From 2800b8e7d9dfca1fd9d044dcf7a046b5de5a7239 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 13 May 2016 12:38:09 -0600
Subject: NVMe: Allocate queues only for online cpus

The driver previously requested allocating queues for the total possible
number of CPUs so that blk-mq could rebalance these if CPUs were added
after initialization. The number of hardware contexts can now be changed
at runtime, so we only need to allocate the number of online queues
since we can add more later.

Suggested-by: Jeff Lien <jeff.lien@hgst.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0f093f14d348..3c7b625a5e56 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1394,7 +1394,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int result, i, vecs, nr_io_queues, size;
 
-	nr_io_queues = num_possible_cpus();
+	nr_io_queues = num_online_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
 		return result;
-- 
cgit v1.2.3


From 014a0d609eb4721d1e416cf10da2d5602f9b34d5 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 6 May 2016 11:50:52 -0600
Subject: NVMe: Delete only created queues

Use the online queue count instead of the number of allocated queues. The
controller should just return an invalid queue identifier error to the
commands if a queue wasn't created. While it's not harmful, it's still
not correct.

Reported-by: Saar Gross <saar@annapurnalabs.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3c7b625a5e56..88ed43d0799c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1551,12 +1551,12 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
 
 static void nvme_disable_io_queues(struct nvme_dev *dev)
 {
-	int pass;
+	int pass, queues = dev->online_queues - 1;
 	unsigned long timeout;
 	u8 opcode = nvme_admin_delete_sq;
 
 	for (pass = 0; pass < 2; pass++) {
-		int sent = 0, i = dev->queue_count - 1;
+		int sent = 0, i = queues;
 
 		reinit_completion(&dev->ioq_wait);
  retry:
-- 
cgit v1.2.3


From 921920ab32f290dafdb0359024d4587897712728 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 28 Mar 2016 16:03:21 -0600
Subject: NVMe: Unbind driver on failure

Instead of removing the PCI device from the kernel's topology on
controller failure, this patch simply requests unbinding the device
from the driver. This avoids concurrently running pci removal with the
hot plug event, which has been reported to be problematic when multiple
surprise events occur near simultaneously.

The other benefit is that we will have PCI config and memory space
available to poke around for debugging a failed controller, assuming
the device was not physically removed.

The down side occurs if the platform and/or kernel do not support any
type of surprise hot removal. The device will remain visible through
sysfs (and therefore lspci), and some manual work is necessary to get
the logical topology corrected. But if your platform and/or kernel don't
support surprise removal, you probably shouldn't be doing that anyway.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 88ed43d0799c..194e9014811b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1857,7 +1857,7 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
 
 	nvme_kill_queues(&dev->ctrl);
 	if (pci_get_drvdata(pdev))
-		pci_stop_and_remove_bus_device_locked(pdev);
+		device_release_driver(&pdev->dev);
 	nvme_put_ctrl(&dev->ctrl);
 }
 
-- 
cgit v1.2.3


From d011fb3164e8694d7839f10a497f8ab6c660149a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 4 Apr 2016 15:07:41 -0600
Subject: NVMe: Reduce driver log spamming

Reduce error logging when no corrective action is required.

Suggessted-by: Chris Petersen <cpetersen@fb.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 194e9014811b..4feaed591e83 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2060,14 +2060,17 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
 	 * shutdown the controller to quiesce. The controller will be restarted
 	 * after the slot reset through driver's slot_reset callback.
 	 */
-	dev_warn(dev->ctrl.device, "error detected: state:%d\n", state);
 	switch (state) {
 	case pci_channel_io_normal:
 		return PCI_ERS_RESULT_CAN_RECOVER;
 	case pci_channel_io_frozen:
+		dev_warn(dev->ctrl.device,
+			"frozen state error detected, reset controller\n");
 		nvme_dev_disable(dev, false);
 		return PCI_ERS_RESULT_NEED_RESET;
 	case pci_channel_io_perm_failure:
+		dev_warn(dev->ctrl.device,
+			"failure state error detected, request disconnect\n");
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 	return PCI_ERS_RESULT_NEED_RESET;
-- 
cgit v1.2.3


From 9ec3bb2f994bda9c8817856fdcbfaebe8f62fbd3 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 29 Apr 2016 15:45:18 -0600
Subject: NVMe: Allow user initiated rescan

This exposes ioctl and sysfs methods a user can invoke to request the
driver rescan a controller and its namespaces. This is less harsh than
doing a controller reset, which temporarilly halts all IO, just to
surface a newly attached namespace.

This is mainly useful for controllers that implement the namespace
management command, but do not support the namespace notify change
asynchronous event notification.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c        | 15 +++++++++++++++
 include/uapi/linux/nvme_ioctl.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 2de248bd462b..acc05adaa0d4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1212,6 +1212,9 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 		return ctrl->ops->reset_ctrl(ctrl);
 	case NVME_IOCTL_SUBSYS_RESET:
 		return nvme_reset_subsystem(ctrl);
+	case NVME_IOCTL_RESCAN:
+		nvme_queue_scan(ctrl);
+		return 0;
 	default:
 		return -ENOTTY;
 	}
@@ -1239,6 +1242,17 @@ static ssize_t nvme_sysfs_reset(struct device *dev,
 }
 static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
 
+static ssize_t nvme_sysfs_rescan(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	nvme_queue_scan(ctrl);
+	return count;
+}
+static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);
+
 static ssize_t wwid_show(struct device *dev, struct device_attribute *attr,
 								char *buf)
 {
@@ -1342,6 +1356,7 @@ nvme_show_int_function(cntlid);
 
 static struct attribute *nvme_dev_attrs[] = {
 	&dev_attr_reset_controller.attr,
+	&dev_attr_rescan_controller.attr,
 	&dev_attr_model.attr,
 	&dev_attr_serial.attr,
 	&dev_attr_firmware_rev.attr,
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index c4b2a3f90829..50ff21f748b6 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -61,5 +61,6 @@ struct nvme_passthru_cmd {
 #define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
 #define NVME_IOCTL_RESET	_IO('N', 0x44)
 #define NVME_IOCTL_SUBSYS_RESET	_IO('N', 0x45)
+#define NVME_IOCTL_RESCAN	_IO('N', 0x46)
 
 #endif /* _UAPI_LINUX_NVME_IOCTL_H */
-- 
cgit v1.2.3


From 0ff9d4e1a284a9282a049bf064f123e27f838907 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 12 May 2016 08:37:14 -0600
Subject: NVMe: Short-cut removal on surprise hot-unplug

This patch adds a new state that when set has the core automatically
kill request queues prior to removing namespaces.

If PCI device is not present at the time the nvme driver's remove is
called, we can kill all IO queues immediately instead of waiting for
the watchdog thread to do that at its polling interval. This improves
scenarios where multiple hot plug events occur at the same time since
it doesn't block the pci enumeration for as long.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c | 18 ++++++++++++++++++
 drivers/nvme/host/nvme.h |  1 +
 drivers/nvme/host/pci.c  |  4 ++++
 3 files changed, 23 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index acc05adaa0d4..beed3940786b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -95,6 +95,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 			break;
 		}
 		break;
+	case NVME_CTRL_DEAD:
+		switch (old_state) {
+		case NVME_CTRL_DELETING:
+			changed = true;
+			/* FALLTHRU */
+		default:
+			break;
+		}
+		break;
 	default:
 		break;
 	}
@@ -1595,6 +1604,15 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 {
 	struct nvme_ns *ns, *next;
 
+	/*
+	 * The dead states indicates the controller was not gracefully
+	 * disconnected. In that case, we won't be able to flush any data while
+	 * removing the namespaces' disks; fail all the queues now to avoid
+	 * potentially having to clean up the failed sync later.
+	 */
+	if (ctrl->state == NVME_CTRL_DEAD)
+		nvme_kill_queues(ctrl);
+
 	mutex_lock(&ctrl->namespaces_mutex);
 	list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
 		nvme_ns_remove(ns);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 114b92873894..1daa0482de0e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -72,6 +72,7 @@ enum nvme_ctrl_state {
 	NVME_CTRL_LIVE,
 	NVME_CTRL_RESETTING,
 	NVME_CTRL_DELETING,
+	NVME_CTRL_DEAD,
 };
 
 struct nvme_ctrl {
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4feaed591e83..3bdcf0e34fd6 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2017,6 +2017,10 @@ static void nvme_remove(struct pci_dev *pdev)
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 
 	pci_set_drvdata(pdev, NULL);
+
+	if (!pci_device_is_present(pdev))
+		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+
 	flush_work(&dev->reset_work);
 	nvme_uninit_ctrl(&dev->ctrl);
 	nvme_dev_disable(dev, true);
-- 
cgit v1.2.3


From 99466e708ddce8904c8635c213f2deb523ef4fb9 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 2 May 2016 15:14:24 -0600
Subject: NVMe: Add device ID's with stripe quirk

Adds two Intel controllers that have the "stripe" quirk.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3bdcf0e34fd6..78dca3193ca4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2109,6 +2109,12 @@ static const struct pci_device_id nvme_id_table[] = {
 	{ PCI_VDEVICE(INTEL, 0x0953),
 		.driver_data = NVME_QUIRK_STRIPE_SIZE |
 				NVME_QUIRK_DISCARD_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0x0a53),
+		.driver_data = NVME_QUIRK_STRIPE_SIZE |
+				NVME_QUIRK_DISCARD_ZEROES, },
+	{ PCI_VDEVICE(INTEL, 0x0a54),
+		.driver_data = NVME_QUIRK_STRIPE_SIZE |
+				NVME_QUIRK_DISCARD_ZEROES, },
 	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
-- 
cgit v1.2.3


From ba36c21b0cd8b55fd7f010e9052656c2c03d9e5e Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sat, 9 Apr 2016 03:04:42 +0000
Subject: nvme/host: Add missing blk_integrity tag_size + flags assignments

While doing recent bring-up of nvme/host with target-core T10-PI,
I noticed /sys/block/nvme*/integrity/device_is_integrity_capable
was false, and /sys/block/nvme*/integrity/tag_size contained
a bogus value.

AFAICT outside of blk_integrity_compare() for DM + MD these
are informational values, but go ahead and add the missing
assignments for nvme/host to match what SCSI does within
sd_dif_config_host() for consistency's sake.

Cc: Keith Busch <keith.busch@intel.com>
Cc: Jay Freyensee <james.p.freyensee@intel.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Sagi Grimberg <sagig@grimberg.me>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Reviewed-by: Sagi Grimberg <sagi at grimberg.me>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/nvme/host/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index beed3940786b..1a51584a382b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -729,10 +729,14 @@ static void nvme_init_integrity(struct nvme_ns *ns)
 	switch (ns->pi_type) {
 	case NVME_NS_DPS_PI_TYPE3:
 		integrity.profile = &t10_pi_type3_crc;
+		integrity.tag_size = sizeof(u16) + sizeof(u32);
+		integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
 		break;
 	case NVME_NS_DPS_PI_TYPE1:
 	case NVME_NS_DPS_PI_TYPE2:
 		integrity.profile = &t10_pi_type1_crc;
+		integrity.tag_size = sizeof(u16);
+		integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
 		break;
 	default:
 		integrity.profile = NULL;
-- 
cgit v1.2.3


From 7c87df9c159aa1d228f0d77b37942216cff34922 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 24 May 2016 16:38:10 +0200
Subject: bcache: bch_writeback_thread() is not freezable

bch_writeback_thread() is calling try_to_freeze(), but that's just an
expensive no-op given the fact that the thread is not marked freezable.

I/O helper kthreads, exactly such as the bcache writeback thread, actually
shouldn't be freezable, because they are potentially necessary for
finalizing the image write-out.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/bcache/writeback.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index b9346cd9cda1..60123677b382 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -12,7 +12,6 @@
 #include "writeback.h"
 
 #include <linux/delay.h>
-#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <trace/events/bcache.h>
 
@@ -228,7 +227,6 @@ static void read_dirty(struct cached_dev *dc)
 	 */
 
 	while (!kthread_should_stop()) {
-		try_to_freeze();
 
 		w = bch_keybuf_next(&dc->writeback_keys);
 		if (!w)
@@ -433,7 +431,6 @@ static int bch_writeback_thread(void *arg)
 			if (kthread_should_stop())
 				return 0;
 
-			try_to_freeze();
 			schedule();
 			continue;
 		}
-- 
cgit v1.2.3


From 770b8ce400123af89ac469361d7912f458915547 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 24 May 2016 16:38:15 +0200
Subject: bcache: bch_allocator_thread() is not freezable

bch_allocator_thread() is calling try_to_freeze(), but that's just an
expensive no-op given the fact that the thread is not marked freezable.

Bucket allocator has to be up and running to the very last stages of the
suspend, as the bcache I/O that's in flight (think of writing an
hibernation image to a swap device served by bcache).

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/bcache/alloc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 8eeab72b93e2..ca4abe1ccd8d 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -64,7 +64,6 @@
 #include "btree.h"
 
 #include <linux/blkdev.h>
-#include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <trace/events/bcache.h>
@@ -288,7 +287,6 @@ do {									\
 		if (kthread_should_stop())				\
 			return 0;					\
 									\
-		try_to_freeze();					\
 		schedule();						\
 		mutex_lock(&(ca)->set->bucket_lock);			\
 	}								\
-- 
cgit v1.2.3


From 29e6c57cc78eb7cb8302088061a0721a41ade658 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 24 May 2016 16:38:34 +0200
Subject: bcache: bch_gc_thread() is not freezable

bch_gc_thread() doesn't mark itself freezable, so calling try_to_freeze()
in its context is just an expensive no-op.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/md/bcache/btree.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 22b9e34ceb75..eab505ee0027 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -27,7 +27,6 @@
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
-#include <linux/freezer.h>
 #include <linux/hash.h>
 #include <linux/kthread.h>
 #include <linux/prefetch.h>
@@ -1787,7 +1786,6 @@ again:
 
 		mutex_unlock(&c->bucket_lock);
 
-		try_to_freeze();
 		schedule();
 	}
 
-- 
cgit v1.2.3


From 4d1034eb7c2f5e32d48ddc4dfce0f1a723d28667 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jikos@kernel.org>
Date: Tue, 24 May 2016 16:38:50 +0200
Subject: MAINTAINERS: mark bcache as orphan

The submitted patches are not being reacted upon, and Jens is only picking
up stable fixes on an rather ad-hoc basis.

Link: lkml.kernel.org/r/574462C5.40307@kernel.dk
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 20e634627a10..f6f479549f4e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2222,7 +2222,7 @@ BCACHE (BLOCK LAYER CACHE)
 M:	Kent Overstreet <kent.overstreet@gmail.com>
 L:	linux-bcache@vger.kernel.org
 W:	http://bcache.evilpiepirate.org
-S:	Maintained
+S:	Orphan
 F:	drivers/md/bcache/
 
 BDISP ST MEDIA DRIVER
-- 
cgit v1.2.3


From c7de5726307620711a4753b2a13d9e5daecc1081 Mon Sep 17 00:00:00 2001
From: Ming Lin <ming.l@samsung.com>
Date: Wed, 25 May 2016 23:23:27 -0700
Subject: blk-mq: clear q->mq_ops if init fail

blk_mq_init_queue() calls blk_mq_init_allocated_queue(), but q->mq_ops
was not cleared when blk_mq_init_allocated_queue() fails.
Then blk_cleanup_queue() calls blk_mq_free_queue() which will crash because:
- q->all_q_node is not added to all_q_list yet
- q->tag_set is NULL
- hctx was not setup yet or already freed

Fixed it by clearing q->mq_ops on error path.

Signed-off-by: Ming Lin <ming.l@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7df9c9263b21..29cbc1b5fbdb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2020,7 +2020,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
 	if (!q->queue_ctx)
-		return ERR_PTR(-ENOMEM);
+		goto err_exit;
 
 	q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
 						GFP_KERNEL, set->numa_node);
@@ -2084,6 +2084,8 @@ err_map:
 	kfree(q->queue_hw_ctx);
 err_percpu:
 	free_percpu(q->queue_ctx);
+err_exit:
+	q->mq_ops = NULL;
 	return ERR_PTR(-ENOMEM);
 }
 EXPORT_SYMBOL(blk_mq_init_allocated_queue);
-- 
cgit v1.2.3