From a54c8f0f2d7df525ff997e2afe71866a1a013064 Mon Sep 17 00:00:00 2001
From: Cathy Avery <cathy.avery@oracle.com>
Date: Fri, 2 Oct 2015 09:35:01 -0400
Subject: xen-blkfront: check for null drvdata in blkback_changed
 (XenbusStateClosing)

xen-blkfront will crash if the check to talk_to_blkback()
in blkback_changed()(XenbusStateInitWait) returns an error.
The driver data is freed and info is set to NULL. Later during
the close process via talk_to_blkback's call to xenbus_dev_fatal()
the null pointer is passed to and dereference in blkfront_closing.

CC: stable@vger.kernel.org
Signed-off-by: Cathy Avery <cathy.avery@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/block/xen-blkfront.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 6d89ed35d80c..c8fdbc77f9b1 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1968,7 +1968,8 @@ static void blkback_changed(struct xenbus_device *dev,
 			break;
 		/* Missed the backend's Closing state -- fallthrough */
 	case XenbusStateClosing:
-		blkfront_closing(info);
+		if (info)
+			blkfront_closing(info);
 		break;
 	}
 }
-- 
cgit v1.2.3


From dcc909d90ccdbb73226397ff6d298f7af35b0e11 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Tue, 6 Oct 2015 20:03:54 +0200
Subject: nbd: Add locking for tasks

The timeout handling introduced in
	7e2893a16d3e (nbd: Fix timeout detection)
introduces a race condition which may lead to killing of tasks that are
not in nbd context anymore. This was not observed or reproducable yet.

This patch adds locking to critical use of task_recv and task_send to
avoid killing tasks that already left the NBD thread functions. This
lock is only acquired if a timeout occures or the nbd device
starts/stops.

Reported-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
Fixes: 7e2893a16d3e ("nbd: Fix timeout detection")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 293495a75d3d..1b87623381e2 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -60,6 +60,7 @@ struct nbd_device {
 	bool disconnect; /* a disconnect has been requested by user */
 
 	struct timer_list timeout_timer;
+	spinlock_t tasks_lock;
 	struct task_struct *task_recv;
 	struct task_struct *task_send;
 
@@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd)
 static void nbd_xmit_timeout(unsigned long arg)
 {
 	struct nbd_device *nbd = (struct nbd_device *)arg;
-	struct task_struct *task;
+	unsigned long flags;
 
 	if (list_empty(&nbd->queue_head))
 		return;
 
 	nbd->disconnect = true;
 
-	task = READ_ONCE(nbd->task_recv);
-	if (task)
-		force_sig(SIGKILL, task);
+	spin_lock_irqsave(&nbd->tasks_lock, flags);
+
+	if (nbd->task_recv)
+		force_sig(SIGKILL, nbd->task_recv);
 
-	task = READ_ONCE(nbd->task_send);
-	if (task)
+	if (nbd->task_send)
 		force_sig(SIGKILL, nbd->task_send);
 
+	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
 	dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n");
 }
 
@@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd)
 {
 	struct request *req;
 	int ret;
+	unsigned long flags;
 
 	BUG_ON(nbd->magic != NBD_MAGIC);
 
 	sk_set_memalloc(nbd->sock->sk);
 
+	spin_lock_irqsave(&nbd->tasks_lock, flags);
 	nbd->task_recv = current;
+	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
 	if (ret) {
 		dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
+
+		spin_lock_irqsave(&nbd->tasks_lock, flags);
 		nbd->task_recv = NULL;
+		spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
 		return ret;
 	}
 
@@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd)
 
 	device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
 
+	spin_lock_irqsave(&nbd->tasks_lock, flags);
 	nbd->task_recv = NULL;
+	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	if (signal_pending(current)) {
 		siginfo_t info;
@@ -534,8 +546,11 @@ static int nbd_thread_send(void *data)
 {
 	struct nbd_device *nbd = data;
 	struct request *req;
+	unsigned long flags;
 
+	spin_lock_irqsave(&nbd->tasks_lock, flags);
 	nbd->task_send = current;
+	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
 
 	set_user_nice(current, MIN_NICE);
 	while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
@@ -572,7 +587,15 @@ static int nbd_thread_send(void *data)
 		nbd_handle_req(nbd, req);
 	}
 
+	spin_lock_irqsave(&nbd->tasks_lock, flags);
 	nbd->task_send = NULL;
+	spin_unlock_irqrestore(&nbd->tasks_lock, flags);
+
+	/* Clear maybe pending signals */
+	if (signal_pending(current)) {
+		siginfo_t info;
+		dequeue_signal_lock(current, &current->blocked, &info);
+	}
 
 	return 0;
 }
@@ -1052,6 +1075,7 @@ static int __init nbd_init(void)
 		nbd_dev[i].magic = NBD_MAGIC;
 		INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
 		spin_lock_init(&nbd_dev[i].queue_lock);
+		spin_lock_init(&nbd_dev[i].tasks_lock);
 		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
 		mutex_init(&nbd_dev[i].tx_lock);
 		init_timer(&nbd_dev[i].timeout_timer);
-- 
cgit v1.2.3


From 835da3f99d329b1160a1f7fc82c7ac81163d63d0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Oct 2015 22:29:48 +0200
Subject: nvme: fix 32-bit build warning

Compiling the nvme driver on 32-bit warns about a cast from a __u64
variable to a pointer:

drivers/block/nvme-core.c: In function 'nvme_submit_io':
drivers/block/nvme-core.c:1847:4: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
    (void __user *)io.addr, length, NULL, 0);

The cast here is intentional and safe, so we can shut up the
gcc warning by adding an intermediate cast to 'uintptr_t'.

I had previously submitted a patch to fix this problem in the
nvme driver, but it was accepted on the same day that two new
warnings got added.

For clarification, I also change the third instance of this cast
to use uintptr_t instead of unsigned long now.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: d29ec8241c10e ("nvme: submit internal commands through the block layer")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 6f04771f1019..fce353ba5f66 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1804,7 +1804,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 
 	length = (io.nblocks + 1) << ns->lba_shift;
 	meta_len = (io.nblocks + 1) * ns->ms;
-	metadata = (void __user *)(unsigned long)io.metadata;
+	metadata = (void __user *)(uintptr_t)io.metadata;
 	write = io.opcode & 1;
 
 	if (ns->ext) {
@@ -1844,7 +1844,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.metadata = cpu_to_le64(meta_dma);
 
 	status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
-			(void __user *)io.addr, length, NULL, 0);
+			(void __user *)(uintptr_t)io.addr, length, NULL, 0);
  unmap:
 	if (meta) {
 		if (status == NVME_SC_SUCCESS && !write) {
@@ -1886,7 +1886,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
 	status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
-			NULL, (void __user *)cmd.addr, cmd.data_len,
+			NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
 			&cmd.result, timeout);
 	if (status >= 0) {
 		if (put_user(cmd.result, &ucmd->result))
-- 
cgit v1.2.3


From 81c04b943872e0332872df18cec1dec89b178b4d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 12 Oct 2015 21:23:39 +0200
Subject: nvme: use an integer value to Linux errno values

Use a separate integer variable to hold the signed Linux errno
values we pass back to the block layer.  Note that for pass through
commands those might still be NVMe values, but those fit into the
int as well.

Fixes: f4829a9b7a61: ("blk-mq: fix racy updates of rq->errors")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index fce353ba5f66..84e4a8088386 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -603,8 +603,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	struct nvme_iod *iod = ctx;
 	struct request *req = iod_get_private(iod);
 	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
-
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
+	int error = 0;
 
 	if (unlikely(status)) {
 		if (!(status & NVME_SC_DNR || blk_noretry_request(req))
@@ -621,9 +621,11 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
 			if (cmd_rq->ctx == CMD_CTX_CANCELLED)
-				status = -EINTR;
+				error = -EINTR;
+			else
+				error = status;
 		} else {
-			status = nvme_error_status(status);
+			error = nvme_error_status(status);
 		}
 	}
 
@@ -635,7 +637,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	if (cmd_rq->aborted)
 		dev_warn(nvmeq->dev->dev,
 			"completing aborted command with status:%04x\n",
-			status);
+			error);
 
 	if (iod->nents) {
 		dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
@@ -649,7 +651,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	}
 	nvme_free_iod(nvmeq->dev, iod);
 
-	blk_mq_complete_request(req, status);
+	blk_mq_complete_request(req, error);
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
-- 
cgit v1.2.3


From 0dfc70c33409afc232ef0b9ec210535dfbf9bc61 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 15 Oct 2015 13:38:48 -0600
Subject: NVMe: Fix memory leak on retried commands

Resources are reallocated for requeued commands, so unmap and release
the iod for the failed command.

It's a pretty bad memory leak and causes a kernel hang if you remove a
drive because of a busy dma pool. You'll get messages spewing like this:

  nvme 0000:xx:xx.x: dma_pool_destroy prp list 256, ffff880420dec000 busy

and lock up pci and the driver since removal never completes while
holding a lock.

Cc: stable@vger.kernel.org
Cc: <stable@vger.kernel.org> # 4.0.x-
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 84e4a8088386..ccc0c1f93daa 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -604,6 +604,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	struct request *req = iod_get_private(iod);
 	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
+	bool requeue = false;
 	int error = 0;
 
 	if (unlikely(status)) {
@@ -611,12 +612,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 		    && (jiffies - req->start_time) < req->timeout) {
 			unsigned long flags;
 
+			requeue = true;
 			blk_mq_requeue_request(req);
 			spin_lock_irqsave(req->q->queue_lock, flags);
 			if (!blk_queue_stopped(req->q))
 				blk_mq_kick_requeue_list(req->q);
 			spin_unlock_irqrestore(req->q->queue_lock, flags);
-			return;
+			goto release_iod;
 		}
 
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
@@ -639,6 +641,7 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			"completing aborted command with status:%04x\n",
 			error);
 
+release_iod:
 	if (iod->nents) {
 		dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
 			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
@@ -651,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 	}
 	nvme_free_iod(nvmeq->dev, iod);
 
-	blk_mq_complete_request(req, error);
+	if (likely(!requeue))
+		blk_mq_complete_request(req, error);
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
-- 
cgit v1.2.3