From a54c8f0f2d7df525ff997e2afe71866a1a013064 Mon Sep 17 00:00:00 2001 From: Cathy Avery Date: Fri, 2 Oct 2015 09:35:01 -0400 Subject: xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing) xen-blkfront will crash if the check to talk_to_blkback() in blkback_changed()(XenbusStateInitWait) returns an error. The driver data is freed and info is set to NULL. Later during the close process via talk_to_blkback's call to xenbus_dev_fatal() the null pointer is passed to and dereference in blkfront_closing. CC: stable@vger.kernel.org Signed-off-by: Cathy Avery Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6d89ed35d80c..c8fdbc77f9b1 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1968,7 +1968,8 @@ static void blkback_changed(struct xenbus_device *dev, break; /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: - blkfront_closing(info); + if (info) + blkfront_closing(info); break; } } -- cgit v1.2.3 From dcc909d90ccdbb73226397ff6d298f7af35b0e11 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Tue, 6 Oct 2015 20:03:54 +0200 Subject: nbd: Add locking for tasks The timeout handling introduced in 7e2893a16d3e (nbd: Fix timeout detection) introduces a race condition which may lead to killing of tasks that are not in nbd context anymore. This was not observed or reproducable yet. This patch adds locking to critical use of task_recv and task_send to avoid killing tasks that already left the NBD thread functions. This lock is only acquired if a timeout occures or the nbd device starts/stops. Reported-by: Ben Hutchings Signed-off-by: Markus Pargmann Reviewed-by: Ben Hutchings Fixes: 7e2893a16d3e ("nbd: Fix timeout detection") Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 293495a75d3d..1b87623381e2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -60,6 +60,7 @@ struct nbd_device { bool disconnect; /* a disconnect has been requested by user */ struct timer_list timeout_timer; + spinlock_t tasks_lock; struct task_struct *task_recv; struct task_struct *task_send; @@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd) static void nbd_xmit_timeout(unsigned long arg) { struct nbd_device *nbd = (struct nbd_device *)arg; - struct task_struct *task; + unsigned long flags; if (list_empty(&nbd->queue_head)) return; nbd->disconnect = true; - task = READ_ONCE(nbd->task_recv); - if (task) - force_sig(SIGKILL, task); + spin_lock_irqsave(&nbd->tasks_lock, flags); + + if (nbd->task_recv) + force_sig(SIGKILL, nbd->task_recv); - task = READ_ONCE(nbd->task_send); - if (task) + if (nbd->task_send) force_sig(SIGKILL, nbd->task_send); + spin_unlock_irqrestore(&nbd->tasks_lock, flags); + dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n"); } @@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd) { struct request *req; int ret; + unsigned long flags; BUG_ON(nbd->magic != NBD_MAGIC); sk_set_memalloc(nbd->sock->sk); + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = current; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = NULL; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); + return ret; } @@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd) device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = NULL; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); if (signal_pending(current)) { siginfo_t info; @@ -534,8 +546,11 @@ static int nbd_thread_send(void *data) { struct nbd_device *nbd = data; struct request *req; + unsigned long flags; + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_send = current; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); set_user_nice(current, MIN_NICE); while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { @@ -572,7 +587,15 @@ static int nbd_thread_send(void *data) nbd_handle_req(nbd, req); } + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_send = NULL; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); + + /* Clear maybe pending signals */ + if (signal_pending(current)) { + siginfo_t info; + dequeue_signal_lock(current, ¤t->blocked, &info); + } return 0; } @@ -1052,6 +1075,7 @@ static int __init nbd_init(void) nbd_dev[i].magic = NBD_MAGIC; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); + spin_lock_init(&nbd_dev[i].tasks_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); mutex_init(&nbd_dev[i].tx_lock); init_timer(&nbd_dev[i].timeout_timer); -- cgit v1.2.3 From 835da3f99d329b1160a1f7fc82c7ac81163d63d0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Oct 2015 22:29:48 +0200 Subject: nvme: fix 32-bit build warning Compiling the nvme driver on 32-bit warns about a cast from a __u64 variable to a pointer: drivers/block/nvme-core.c: In function 'nvme_submit_io': drivers/block/nvme-core.c:1847:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] (void __user *)io.addr, length, NULL, 0); The cast here is intentional and safe, so we can shut up the gcc warning by adding an intermediate cast to 'uintptr_t'. I had previously submitted a patch to fix this problem in the nvme driver, but it was accepted on the same day that two new warnings got added. For clarification, I also change the third instance of this cast to use uintptr_t instead of unsigned long now. Signed-off-by: Arnd Bergmann Fixes: d29ec8241c10e ("nvme: submit internal commands through the block layer") Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 6f04771f1019..fce353ba5f66 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -1804,7 +1804,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->lba_shift; meta_len = (io.nblocks + 1) * ns->ms; - metadata = (void __user *)(unsigned long)io.metadata; + metadata = (void __user *)(uintptr_t)io.metadata; write = io.opcode & 1; if (ns->ext) { @@ -1844,7 +1844,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.metadata = cpu_to_le64(meta_dma); status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, - (void __user *)io.addr, length, NULL, 0); + (void __user *)(uintptr_t)io.addr, length, NULL, 0); unmap: if (meta) { if (status == NVME_SC_SUCCESS && !write) { @@ -1886,7 +1886,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, - NULL, (void __user *)cmd.addr, cmd.data_len, + NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) -- cgit v1.2.3 From 81c04b943872e0332872df18cec1dec89b178b4d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Oct 2015 21:23:39 +0200 Subject: nvme: use an integer value to Linux errno values Use a separate integer variable to hold the signed Linux errno values we pass back to the block layer. Note that for pass through commands those might still be NVMe values, but those fit into the int as well. Fixes: f4829a9b7a61: ("blk-mq: fix racy updates of rq->errors") Reported-by: Dan Carpenter Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index fce353ba5f66..84e4a8088386 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -603,8 +603,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); - u16 status = le16_to_cpup(&cqe->status) >> 1; + int error = 0; if (unlikely(status)) { if (!(status & NVME_SC_DNR || blk_noretry_request(req)) @@ -621,9 +621,11 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (cmd_rq->ctx == CMD_CTX_CANCELLED) - status = -EINTR; + error = -EINTR; + else + error = status; } else { - status = nvme_error_status(status); + error = nvme_error_status(status); } } @@ -635,7 +637,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (cmd_rq->aborted) dev_warn(nvmeq->dev->dev, "completing aborted command with status:%04x\n", - status); + error); if (iod->nents) { dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, @@ -649,7 +651,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req, status); + blk_mq_complete_request(req, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ -- cgit v1.2.3 From 0dfc70c33409afc232ef0b9ec210535dfbf9bc61 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 15 Oct 2015 13:38:48 -0600 Subject: NVMe: Fix memory leak on retried commands Resources are reallocated for requeued commands, so unmap and release the iod for the failed command. It's a pretty bad memory leak and causes a kernel hang if you remove a drive because of a busy dma pool. You'll get messages spewing like this: nvme 0000:xx:xx.x: dma_pool_destroy prp list 256, ffff880420dec000 busy and lock up pci and the driver since removal never completes while holding a lock. Cc: stable@vger.kernel.org Cc: # 4.0.x- Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 84e4a8088386..ccc0c1f93daa 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -604,6 +604,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); u16 status = le16_to_cpup(&cqe->status) >> 1; + bool requeue = false; int error = 0; if (unlikely(status)) { @@ -611,12 +612,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, && (jiffies - req->start_time) < req->timeout) { unsigned long flags; + requeue = true; blk_mq_requeue_request(req); spin_lock_irqsave(req->q->queue_lock, flags); if (!blk_queue_stopped(req->q)) blk_mq_kick_requeue_list(req->q); spin_unlock_irqrestore(req->q->queue_lock, flags); - return; + goto release_iod; } if (req->cmd_type == REQ_TYPE_DRV_PRIV) { @@ -639,6 +641,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, "completing aborted command with status:%04x\n", error); +release_iod: if (iod->nents) { dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -651,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req, error); + if (likely(!requeue)) + blk_mq_complete_request(req, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ -- cgit v1.2.3