From f4aa4c7bbac6c4afdd4adccf90898c1a3685396d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Tue, 5 May 2015 19:49:54 +0800
Subject: block: loop: convert to per-device workqueue

Documentation/workqueue.txt:
	If there is dependency among multiple work items used
	during memory reclaim, they should be queued to separate
	wq each with WQ_MEM_RECLAIM.

Loop devices can be stacked, so we have to convert to per-device
workqueue. One example is Fedora live CD.

Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778
Cc: stable@vger.kernel.org (v4.0)
Cc: Justin M. Forbes <jforbes@fedoraproject.org>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 30 ++++++++++++++----------------
 drivers/block/loop.h |  1 +
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ae3fcb4199e9..3dc15983d3fe 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex);
 static int max_part;
 static int part_shift;
 
-static struct workqueue_struct *loop_wq;
-
 static int transfer_xor(struct loop_device *lo, int cmd,
 			struct page *raw_page, unsigned raw_off,
 			struct page *loop_page, unsigned loop_off,
@@ -725,6 +723,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	size = get_loop_size(lo, file);
 	if ((loff_t)(sector_t)size != size)
 		goto out_putf;
+	error = -ENOMEM;
+	lo->wq = alloc_workqueue("kloopd%d",
+			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0,
+			lo->lo_number);
+	if (!lo->wq)
+		goto out_putf;
 
 	error = 0;
 
@@ -872,6 +876,8 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+	destroy_workqueue(lo->wq);
+	lo->wq = NULL;
 	mutex_unlock(&lo->lo_ctl_mutex);
 	/*
 	 * Need not hold lo_ctl_mutex to fput backing file.
@@ -1425,9 +1431,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
 	struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+	struct loop_device *lo = cmd->rq->q->queuedata;
 
 	blk_mq_start_request(bd->rq);
 
+	if (lo->lo_state != Lo_bound)
+		return -EIO;
+
 	if (cmd->rq->cmd_flags & REQ_WRITE) {
 		struct loop_device *lo = cmd->rq->q->queuedata;
 		bool need_sched = true;
@@ -1441,9 +1451,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		spin_unlock_irq(&lo->lo_lock);
 
 		if (need_sched)
-			queue_work(loop_wq, &lo->write_work);
+			queue_work(lo->wq, &lo->write_work);
 	} else {
-		queue_work(loop_wq, &cmd->read_work);
+		queue_work(lo->wq, &cmd->read_work);
 	}
 
 	return BLK_MQ_RQ_QUEUE_OK;
@@ -1455,9 +1465,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
 	struct loop_device *lo = cmd->rq->q->queuedata;
 	int ret = -EIO;
 
-	if (lo->lo_state != Lo_bound)
-		goto failed;
-
 	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
 		goto failed;
 
@@ -1806,13 +1813,6 @@ static int __init loop_init(void)
 		goto misc_out;
 	}
 
-	loop_wq = alloc_workqueue("kloopd",
-			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
-	if (!loop_wq) {
-		err = -ENOMEM;
-		goto misc_out;
-	}
-
 	blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
 				  THIS_MODULE, loop_probe, NULL, NULL);
 
@@ -1850,8 +1850,6 @@ static void __exit loop_exit(void)
 	blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
 	unregister_blkdev(LOOP_MAJOR, "loop");
 
-	destroy_workqueue(loop_wq);
-
 	misc_deregister(&loop_misc);
 }
 
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f8323f..49564edf5581 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -54,6 +54,7 @@ struct loop_device {
 	gfp_t		old_gfp_mask;
 
 	spinlock_t		lo_lock;
+	struct workqueue_struct *wq;
 	struct list_head	write_cmd_head;
 	struct work_struct	write_work;
 	bool			write_started;
-- 
cgit v1.2.3


From 4d4e41aef9429872ea3b105e83426941f7185ab6 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Tue, 5 May 2015 19:49:55 +0800
Subject: block: loop: avoiding too many pending per work I/O

If there are too many pending per work I/O, too many
high priority work thread can be generated so that
system performance can be effected.

This patch limits the max_active parameter of workqueue as 16.

This patch fixes Fedora 22 live booting performance
regression when it is booted from squashfs over dm
based on loop, and looks the following reasons are
related with the problem:

- not like other filesyststems(such as ext4), squashfs
is a bit special, and I observed that increasing I/O jobs
to access file in squashfs only improve I/O performance a
little, but it can make big difference for ext4

- nested loop: both squashfs.img and ext3fs.img are mounted
as loop block, and ext3fs.img is inside the squashfs

- during booting, lots of tasks may run concurrently

Fixes: b5dd2f6047ca108001328aac0e8588edd15f1778
Cc: stable@vger.kernel.org (v4.0)
Cc: Justin M. Forbes <jforbes@fedoraproject.org>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 3dc15983d3fe..1bee523aa349 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -725,7 +725,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 		goto out_putf;
 	error = -ENOMEM;
 	lo->wq = alloc_workqueue("kloopd%d",
-			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0,
+			WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
 			lo->lo_number);
 	if (!lo->wq)
 		goto out_putf;
-- 
cgit v1.2.3


From 5aea3288d3706e812a8d6c4078669f38b7b72bda Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Tue, 17 Feb 2015 17:40:21 +0100
Subject: cciss: remove duplicate entries from board_type struct

and devices not supported by this driver from unresettable list

Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/cciss.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ff20f192b0f6..48498220cd6c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -139,8 +139,6 @@ static struct board_type products[] = {
 	{0x3214103C, "Smart Array E200i", &SA5_access},
 	{0x3215103C, "Smart Array E200i", &SA5_access},
 	{0x3237103C, "Smart Array E500", &SA5_access},
-	{0x3223103C, "Smart Array P800", &SA5_access},
-	{0x3234103C, "Smart Array P400", &SA5_access},
 	{0x323D103C, "Smart Array P700m", &SA5_access},
 };
 
@@ -574,8 +572,6 @@ static void cciss_procinit(ctlr_info_t *h)
 
 /* List of controllers which cannot be hard reset on kexec with reset_devices */
 static u32 unresettable_controller[] = {
-	0x324a103C, /* Smart Array P712m */
-	0x324b103C, /* SmartArray P711m */
 	0x3223103C, /* Smart Array P800 */
 	0x3234103C, /* Smart Array P400 */
 	0x3235103C, /* Smart Array P400i */
-- 
cgit v1.2.3


From 8a0ee3b52df73c7b89376b03c789232b78dd2aff Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Tue, 17 Feb 2015 17:40:22 +0100
Subject: cciss: correct the non-resettable board list

The hpsa driver carries a more recent version,
copy the table from there.

Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/cciss.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 48498220cd6c..0422c47261c3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -582,12 +582,32 @@ static u32 unresettable_controller[] = {
 	0x3215103C, /* Smart Array E200i */
 	0x3237103C, /* Smart Array E500 */
 	0x323D103C, /* Smart Array P700m */
+	0x40800E11, /* Smart Array 5i */
 	0x409C0E11, /* Smart Array 6400 */
 	0x409D0E11, /* Smart Array 6400 EM */
+	0x40700E11, /* Smart Array 5300 */
+	0x40820E11, /* Smart Array 532 */
+	0x40830E11, /* Smart Array 5312 */
+	0x409A0E11, /* Smart Array 641 */
+	0x409B0E11, /* Smart Array 642 */
+	0x40910E11, /* Smart Array 6i */
 };
 
 /* List of controllers which cannot even be soft reset */
 static u32 soft_unresettable_controller[] = {
+	0x40800E11, /* Smart Array 5i */
+	0x40700E11, /* Smart Array 5300 */
+	0x40820E11, /* Smart Array 532 */
+	0x40830E11, /* Smart Array 5312 */
+	0x409A0E11, /* Smart Array 641 */
+	0x409B0E11, /* Smart Array 642 */
+	0x40910E11, /* Smart Array 6i */
+	/* Exclude 640x boards.  These are two pci devices in one slot
+	 * which share a battery backed cache module.  One controls the
+	 * cache, the other accesses the cache through the one that controls
+	 * it.  If we reset the one controlling the cache, the other will
+	 * likely not be happy.  Just forbid resetting this conjoined mess.
+	 */
 	0x409C0E11, /* Smart Array 6400 */
 	0x409D0E11, /* Smart Array 6400 EM */
 };
@@ -4663,8 +4683,7 @@ static int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
 	 */
 	cciss_lookup_board_id(pdev, &board_id);
 	if (!ctlr_is_resettable(board_id)) {
-		dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
-				"due to shared cache module.");
+		dev_warn(&pdev->dev, "Controller not resettable\n");
 		return -ENODEV;
 	}
 
-- 
cgit v1.2.3


From cddcd72bcec3b1dc9cef7f17d724a7fe42d64cc1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 7 May 2015 09:38:14 +0200
Subject: nvme: disable irqs in nvme_freeze_queues

The queue_lock needs to be taken with irqs disabled.  This is mostly
due to the old pre blk-mq usage pattern, but we've also picked it up
in most of the few places where we use the queue_lock with blk-mq.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 85b8036deaa3..00e641937a8e 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -2585,9 +2585,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev)
 	list_for_each_entry(ns, &dev->namespaces, list) {
 		blk_mq_freeze_queue_start(ns->queue);
 
-		spin_lock(ns->queue->queue_lock);
+		spin_lock_irq(ns->queue->queue_lock);
 		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
-		spin_unlock(ns->queue->queue_lock);
+		spin_unlock_irq(ns->queue->queue_lock);
 
 		blk_mq_cancel_requeue_work(ns->queue);
 		blk_mq_stop_hw_queues(ns->queue);
-- 
cgit v1.2.3


From f8933667953e8e61bb6104f5ca88e32e85656a93 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 6 May 2015 12:26:23 +0800
Subject: block: loop: don't hold lo_ctl_mutex in lo_open

The lo_ctl_mutex is held for running all ioctl handlers, and
in some ioctl handlers, ioctl_by_bdev(BLKRRPART) is called for
rereading partitions, which requires bd_mutex.

So it is easy to cause failure because trylock(bd_mutex) may
fail inside blkdev_reread_part(), and follows the lock context:

blkid or other application:
	->open()
		->mutex_lock(bd_mutex)
		->lo_open()
			->mutex_lock(lo_ctl_mutex)

losetup(set fd ioctl):
	->mutex_lock(lo_ctl_mutex)
	->ioctl_by_bdev(BLKRRPART)
		->trylock(bd_mutex)

This patch trys to eliminate the ABBA lock dependency by removing
lo_ctl_mutext in lo_open() with the following approach:

1) make lo_refcnt as atomic_t and avoid acquiring lo_ctl_mutex in lo_open():
	- for open vs. add/del loop, no any problem because of loop_index_mutex
	- freeze request queue during clr_fd, so I/O can't come until
	  clearing fd is completed, like the effect of holding lo_ctl_mutex
	  in lo_open
	- both open() and release() have been serialized by bd_mutex already

2) don't hold lo_ctl_mutex for decreasing/checking lo_refcnt in
lo_release(), then lo_ctl_mutex is only required for the last release.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 21 ++++++++++++---------
 drivers/block/loop.h |  2 +-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 1bee523aa349..b3e294e529ec 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -831,7 +831,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	 * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
 	 * command to fail with EBUSY.
 	 */
-	if (lo->lo_refcnt > 1) {
+	if (atomic_read(&lo->lo_refcnt) > 1) {
 		lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
 		mutex_unlock(&lo->lo_ctl_mutex);
 		return 0;
@@ -840,6 +840,9 @@ static int loop_clr_fd(struct loop_device *lo)
 	if (filp == NULL)
 		return -EINVAL;
 
+	/* freeze request queue during the transition */
+	blk_mq_freeze_queue(lo->lo_queue);
+
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_state = Lo_rundown;
 	lo->lo_backing_file = NULL;
@@ -871,6 +874,8 @@ static int loop_clr_fd(struct loop_device *lo)
 	lo->lo_state = Lo_unbound;
 	/* This is safe: open() is still holding a reference. */
 	module_put(THIS_MODULE);
+	blk_mq_unfreeze_queue(lo->lo_queue);
+
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
 		ioctl_by_bdev(bdev, BLKRRPART, 0);
 	lo->lo_flags = 0;
@@ -1330,9 +1335,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
 		goto out;
 	}
 
-	mutex_lock(&lo->lo_ctl_mutex);
-	lo->lo_refcnt++;
-	mutex_unlock(&lo->lo_ctl_mutex);
+	atomic_inc(&lo->lo_refcnt);
 out:
 	mutex_unlock(&loop_index_mutex);
 	return err;
@@ -1343,11 +1346,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 	struct loop_device *lo = disk->private_data;
 	int err;
 
-	mutex_lock(&lo->lo_ctl_mutex);
-
-	if (--lo->lo_refcnt)
-		goto out;
+	if (atomic_dec_return(&lo->lo_refcnt))
+		return;
 
+	mutex_lock(&lo->lo_ctl_mutex);
 	if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
 		/*
 		 * In autoclear mode, stop the loop thread
@@ -1601,6 +1603,7 @@ static int loop_add(struct loop_device **l, int i)
 		disk->flags |= GENHD_FL_NO_PART_SCAN;
 	disk->flags |= GENHD_FL_EXT_DEVT;
 	mutex_init(&lo->lo_ctl_mutex);
+	atomic_set(&lo->lo_refcnt, 0);
 	lo->lo_number		= i;
 	spin_lock_init(&lo->lo_lock);
 	disk->major		= LOOP_MAJOR;
@@ -1718,7 +1721,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
 			mutex_unlock(&lo->lo_ctl_mutex);
 			break;
 		}
-		if (lo->lo_refcnt > 0) {
+		if (atomic_read(&lo->lo_refcnt) > 0) {
 			ret = -EBUSY;
 			mutex_unlock(&lo->lo_ctl_mutex);
 			break;
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 49564edf5581..25e8997ed246 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -28,7 +28,7 @@ struct loop_func_table;
 
 struct loop_device {
 	int		lo_number;
-	int		lo_refcnt;
+	atomic_t	lo_refcnt;
 	loff_t		lo_offset;
 	loff_t		lo_sizelimit;
 	int		lo_flags;
-- 
cgit v1.2.3


From 06f0e9e68c0d81c7d822a405f6e35686a711c1fe Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 6 May 2015 12:26:24 +0800
Subject: block: loop: fix another reread part failure

loop_clr_fd() can be run piggyback with lo_release(), and
under this situation, reread partition may always fail because
bd_mutex has been held already.

This patch detects the situation by the reference count, and
call __blkdev_reread_part() to avoid acquiring the lock again.

In the meantime, this patch switches to new kernel APIs
of blkdev_reread_part() and __blkdev_reread_part().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index b3e294e529ec..2b99e34f4253 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -474,6 +474,28 @@ static int loop_flush(struct loop_device *lo)
 	return loop_switch(lo, NULL);
 }
 
+static void loop_reread_partitions(struct loop_device *lo,
+				   struct block_device *bdev)
+{
+	int rc;
+
+	/*
+	 * bd_mutex has been held already in release path, so don't
+	 * acquire it if this function is called in such case.
+	 *
+	 * If the reread partition isn't from release path, lo_refcnt
+	 * must be at least one and it can only become zero when the
+	 * current holder is released.
+	 */
+	if (!atomic_read(&lo->lo_refcnt))
+		rc = __blkdev_reread_part(bdev);
+	else
+		rc = blkdev_reread_part(bdev);
+	if (rc)
+		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
+			__func__, lo->lo_number, lo->lo_file_name, rc);
+}
+
 /*
  * loop_change_fd switched the backing store of a loopback device to
  * a new file. This is useful for operating system installers to free up
@@ -522,7 +544,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 
 	fput(old_file);
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 	return 0;
 
  out_putf:
@@ -759,7 +781,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	if (part_shift)
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 
 	/* Grab the block_device to prevent its destruction after we
 	 * put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@ -877,7 +899,7 @@ static int loop_clr_fd(struct loop_device *lo)
 	blk_mq_unfreeze_queue(lo->lo_queue);
 
 	if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
-		ioctl_by_bdev(bdev, BLKRRPART, 0);
+		loop_reread_partitions(lo, bdev);
 	lo->lo_flags = 0;
 	if (!part_shift)
 		lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
@@ -954,7 +976,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 	     !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
 		lo->lo_flags |= LO_FLAGS_PARTSCAN;
 		lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
-		ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+		loop_reread_partitions(lo, lo->lo_device);
 	}
 
 	lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
-- 
cgit v1.2.3


From 9dcd13795342f51994fc23a4949d31c77919271c Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 6 May 2015 12:26:25 +0800
Subject: block: nbd: convert to blkdev_reread_part()

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 83a7ba4a3eec..0e385d8e9b86 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -711,7 +711,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		bdev->bd_inode->i_size = 0;
 		set_capacity(nbd->disk, 0);
 		if (max_part > 0)
-			ioctl_by_bdev(bdev, BLKRRPART, 0);
+			blkdev_reread_part(bdev);
 		if (nbd->disconnect) /* user requested, ignore socket errors */
 			return 0;
 		return nbd->harderror;
-- 
cgit v1.2.3


From 6029a06c88b925467cb43e4b57dcede88f0457eb Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 6 May 2015 12:26:26 +0800
Subject: block: dasd_genhd: convert to blkdev_reread_part

Also remove the obsolete comment.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Jarod Wilson <jarod@redhat.com>
Acked-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/s390/block/dasd_genhd.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 90f39f79f5d7..2af461923683 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -116,14 +116,11 @@ int dasd_scan_partitions(struct dasd_block *block)
 			      rc);
 		return -ENODEV;
 	}
-	/*
-	 * See fs/partition/check.c:register_disk,rescan_partitions
-	 * Can't call rescan_partitions directly. Use ioctl.
-	 */
-	rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
+
+	rc = blkdev_reread_part(bdev);
 	while (rc == -EBUSY && retry > 0) {
 		schedule();
-		rc = ioctl_by_bdev(bdev, BLKRRPART, 0);
+		rc = blkdev_reread_part(bdev);
 		retry--;
 		DBF_DEV_EVENT(DBF_ERR, block->base,
 			      "scan partitions error, retry %d rc %d",
-- 
cgit v1.2.3


From a05e578055d7f9ec2d5c4465933eb424c4e8e25b Mon Sep 17 00:00:00 2001
From: Jarod Wilson <jarod@redhat.com>
Date: Wed, 6 May 2015 12:26:28 +0800
Subject: s390/block/dasd: remove obsolete while -EBUSY loop

With the mutex_trylock bit gone from blkdev_reread_part(), the retry logic
in dasd_scan_partitions() shouldn't be necessary.

CC: Christoph Hellwig <hch@infradead.org>
CC: Jens Axboe <axboe@kernel.dk>
CC: Tejun Heo <tj@kernel.org>
CC: Alexander Viro <viro@zeniv.linux.org.uk>
CC: Markus Pargmann <mpa@pengutronix.de>
CC: Stefan Weinhuber <wein@de.ibm.com>
CC: Stefan Haberland <stefan.haberland@de.ibm.com>
CC: Sebastian Ott <sebott@linux.vnet.ibm.com>
CC: Fabian Frederick <fabf@skynet.be>
CC: Ming Lei <ming.lei@canonical.com>
CC: David Herrmann <dh.herrmann@gmail.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Peter Zijlstra <peterz@infradead.org>
CC: nbd-general@lists.sourceforge.net
CC: linux-s390@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/s390/block/dasd_genhd.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 2af461923683..ef1d9fb06cab 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -99,9 +99,8 @@ void dasd_gendisk_free(struct dasd_block *block)
 int dasd_scan_partitions(struct dasd_block *block)
 {
 	struct block_device *bdev;
-	int retry, rc;
+	int rc;
 
-	retry = 5;
 	bdev = bdget_disk(block->gdp, 0);
 	if (!bdev) {
 		DBF_DEV_EVENT(DBF_ERR, block->base, "%s",
@@ -118,14 +117,9 @@ int dasd_scan_partitions(struct dasd_block *block)
 	}
 
 	rc = blkdev_reread_part(bdev);
-	while (rc == -EBUSY && retry > 0) {
-		schedule();
-		rc = blkdev_reread_part(bdev);
-		retry--;
+	if (rc)
 		DBF_DEV_EVENT(DBF_ERR, block->base,
-			      "scan partitions error, retry %d rc %d",
-			      retry, rc);
-	}
+				"scan partitions error, rc %d", rc);
 
 	/*
 	 * Since the matching blkdev_put call to the blkdev_get in
-- 
cgit v1.2.3


From 6a9270075858a0586bc1a8415263e8d1134550f6 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 20 May 2015 09:54:35 -0600
Subject: loop: remove (now) unused 'out' label

gcc, righfully, complains:

drivers/block/loop.c:1369:1: warning: label 'out' defined but not used [-Wunused-label]

Kill it.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/loop.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 2b99e34f4253..1797185a56af 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1388,7 +1388,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
 		loop_flush(lo);
 	}
 
-out:
 	mutex_unlock(&lo->lo_ctl_mutex);
 }
 
-- 
cgit v1.2.3


From f705f837c58ebe1ea69dfffff4dcc234e2fbc8dd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:38 +0200
Subject: nvme: consolidate synchronous command submission helpers

Note that we keep the unused timeout argument, but allow callers to
pass 0 instead of a timeout if they want the default.  This will allow
adding a timeout to the pass through path later on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 96 ++++++++++++++++-------------------------------
 drivers/block/nvme-scsi.c | 17 ++++-----
 include/linux/nvme.h      |  6 +--
 3 files changed, 42 insertions(+), 77 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 00e641937a8e..e81b205ffd04 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -991,27 +991,40 @@ static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
  * if the result is positive, it's an NVM Express status code
  */
-static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
-						u32 *result, unsigned timeout)
+static int __nvme_submit_sync_cmd(struct request_queue *q,
+		struct nvme_command *cmd, u32 *result, unsigned timeout)
 {
 	struct sync_cmd_info cmdinfo;
-	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
-	struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+	struct nvme_cmd_info *cmd_rq;
+	struct request *req;
+	int res;
+
+	req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
 
 	cmdinfo.task = current;
 	cmdinfo.status = -EINTR;
 
 	cmd->common.command_id = req->tag;
 
+	cmd_rq = blk_mq_rq_to_pdu(req);
 	nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
 
 	set_current_state(TASK_UNINTERRUPTIBLE);
-	nvme_submit_cmd(nvmeq, cmd);
+	nvme_submit_cmd(cmd_rq->nvmeq, cmd);
 	schedule();
 
 	if (result)
 		*result = cmdinfo.result;
-	return cmdinfo.status;
+	res = cmdinfo.status;
+	blk_mq_free_request(req);
+	return res;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd)
+{
+	return __nvme_submit_sync_cmd(q, cmd, NULL, 0);
 }
 
 static int nvme_submit_async_admin_req(struct nvme_dev *dev)
@@ -1060,41 +1073,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
 	return nvme_submit_cmd(nvmeq, cmd);
 }
 
-static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
-						u32 *result, unsigned timeout)
-{
-	int res;
-	struct request *req;
-
-	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	res = nvme_submit_sync_cmd(req, cmd, result, timeout);
-	blk_mq_free_request(req);
-	return res;
-}
-
-int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
-								u32 *result)
-{
-	return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT);
-}
-
-int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
-					struct nvme_command *cmd, u32 *result)
-{
-	int res;
-	struct request *req;
-
-	req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT),
-									false);
-	if (IS_ERR(req))
-		return PTR_ERR(req);
-	res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT);
-	blk_mq_free_request(req);
-	return res;
-}
-
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
 	struct nvme_command c;
@@ -1103,7 +1081,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1120,7 +1098,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	c.create_cq.cq_flags = cpu_to_le16(flags);
 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1137,7 +1115,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	c.create_sq.sq_flags = cpu_to_le16(flags);
 	c.create_sq.cqid = cpu_to_le16(qid);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1161,7 +1139,7 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
 	c.identify.prp1 = cpu_to_le64(dma_addr);
 	c.identify.cns = cpu_to_le32(cns);
 
-	return nvme_submit_admin_cmd(dev, &c, NULL);
+	return nvme_submit_sync_cmd(dev->admin_q, &c);
 }
 
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
@@ -1175,7 +1153,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 	c.features.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	return nvme_submit_admin_cmd(dev, &c, result);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0);
 }
 
 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
@@ -1189,7 +1167,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	return nvme_submit_admin_cmd(dev, &c, result);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0);
 }
 
 /**
@@ -1813,7 +1791,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
 	c.rw.prp2 = cpu_to_le64(iod->first_dma);
 	c.rw.metadata = cpu_to_le64(meta_dma);
-	status = nvme_submit_io_cmd(dev, ns, &c, NULL);
+	status = nvme_submit_sync_cmd(ns->queue, &c);
  unmap:
 	nvme_unmap_user_pages(dev, write, iod);
 	nvme_free_iod(dev, iod);
@@ -1869,23 +1847,15 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 	timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
 								ADMIN_TIMEOUT;
 
-	if (length != cmd.data_len)
+	if (length != cmd.data_len) {
 		status = -ENOMEM;
-	else if (ns) {
-		struct request *req;
-
-		req = blk_mq_alloc_request(ns->queue, WRITE,
-						(GFP_KERNEL|__GFP_WAIT), false);
-		if (IS_ERR(req))
-			status = PTR_ERR(req);
-		else {
-			status = nvme_submit_sync_cmd(req, &c, &cmd.result,
-								timeout);
-			blk_mq_free_request(req);
-		}
-	} else
-		status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);
+		goto out;
+	}
+
+	status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
+					&cmd.result, timeout);
 
+out:
 	if (cmd.data_len) {
 		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
 		nvme_free_iod(dev, iod);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 6b736b00f63e..ba1809fbd49e 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1053,7 +1053,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	c.common.prp1 = cpu_to_le64(dma_addr);
 	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
 			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
-	res = nvme_submit_admin_cmd(dev, &c, NULL);
+	res = nvme_submit_sync_cmd(dev->admin_q, &c);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c = LOG_TEMP_UNKNOWN;
 	} else {
@@ -1121,7 +1121,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.common.prp1 = cpu_to_le64(dma_addr);
 	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
 			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
-	res = nvme_submit_admin_cmd(dev, &c, NULL);
+	res = nvme_submit_sync_cmd(dev->admin_q, &c);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c_cur = LOG_TEMP_UNKNOWN;
 	} else {
@@ -1609,7 +1609,7 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		c.common.cdw10[0] = cpu_to_le32(cdw10);
 	}
 
-	nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
+	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_unmap;
@@ -1971,7 +1971,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.format.nsid = cpu_to_le32(ns->ns_id);
 	c.format.cdw10 = cpu_to_le32(cdw10);
 
-	nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
+	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_dma;
@@ -2139,7 +2139,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 		nvme_offset += unit_num_blocks;
 
-		nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
+		nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
 		if (nvme_sc != NVME_SC_SUCCESS) {
 			nvme_unmap_user_pages(dev,
 				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2696,7 +2696,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			c.common.opcode = nvme_cmd_flush;
 			c.common.nsid = cpu_to_le32(ns->ns_id);
 
-			nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
+			nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
 			res = nvme_trans_status_code(hdr, nvme_sc);
 			if (res)
 				goto out;
@@ -2724,8 +2724,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
 	c.common.opcode = nvme_cmd_flush;
 	c.common.nsid = cpu_to_le32(ns->ns_id);
 
-	nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
-
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out;
@@ -2932,7 +2931,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.dsm.nr = cpu_to_le32(ndesc - 1);
 	c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
 
-	nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
 	dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8dbd05e70f09..61488b2ae291 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -158,11 +158,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 				unsigned long addr, unsigned length);
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
-int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *,
-						struct nvme_command *, u32 *);
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
-int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
-							u32 *result);
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
 							dma_addr_t dma_addr);
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
-- 
cgit v1.2.3


From e75ec752d725b7b612c0b2db1bca50a9e53c0879 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:39 +0200
Subject: nvme: store a struct device pointer in struct nvme_dev

Most users want the generic device, so store that in struct nvme_dev
instead of the pci_dev.  This also happens to be a nice step towards
making some code reusable for non-PCI transports.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 110 ++++++++++++++++++++++------------------------
 drivers/block/nvme-scsi.c |  63 +++++++++++---------------
 include/linux/nvme.h      |   2 +-
 3 files changed, 79 insertions(+), 96 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e81b205ffd04..870a926e1ddc 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -610,17 +610,17 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 		req->errors = 0;
 
 	if (cmd_rq->aborted)
-		dev_warn(&nvmeq->dev->pci_dev->dev,
+		dev_warn(nvmeq->dev->dev,
 			"completing aborted command with status:%04x\n",
 			status);
 
 	if (iod->nents) {
-		dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
+		dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
 			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		if (blk_integrity_rq(req)) {
 			if (!rq_data_dir(req))
 				nvme_dif_remap(req, nvme_dif_complete);
-			dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1,
+			dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
 				rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 		}
 	}
@@ -861,7 +861,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 		if (blk_rq_bytes(req) !=
                     nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
-			dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg,
+			dma_unmap_sg(nvmeq->dev->dev, iod->sg,
 					iod->nents, dma_dir);
 			goto retry_cmd;
 		}
@@ -1192,8 +1192,7 @@ static void nvme_abort_req(struct request *req)
 		if (work_busy(&dev->reset_work))
 			goto out;
 		list_del_init(&dev->node);
-		dev_warn(&dev->pci_dev->dev,
-			"I/O %d QID %d timeout, reset controller\n",
+		dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
 							req->tag, nvmeq->qid);
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
@@ -1362,22 +1361,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 							int depth)
 {
-	struct device *dmadev = &dev->pci_dev->dev;
 	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
 	if (!nvmeq)
 		return NULL;
 
-	nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth),
+	nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
 					  &nvmeq->cq_dma_addr, GFP_KERNEL);
 	if (!nvmeq->cqes)
 		goto free_nvmeq;
 
-	nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
 					&nvmeq->sq_dma_addr, GFP_KERNEL);
 	if (!nvmeq->sq_cmds)
 		goto free_cqdma;
 
-	nvmeq->q_dmadev = dmadev;
+	nvmeq->q_dmadev = dev->dev;
 	nvmeq->dev = dev;
 	snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
 			dev->instance, qid);
@@ -1393,7 +1391,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	return nvmeq;
 
  free_cqdma:
-	dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes,
+	dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
 							nvmeq->cq_dma_addr);
  free_nvmeq:
 	kfree(nvmeq);
@@ -1465,7 +1463,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
 		if (fatal_signal_pending(current))
 			return -EINTR;
 		if (time_after(jiffies, timeout)) {
-			dev_err(&dev->pci_dev->dev,
+			dev_err(dev->dev,
 				"Device not ready; aborting %s\n", enabled ?
 						"initialisation" : "reset");
 			return -ENODEV;
@@ -1515,7 +1513,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
 		if (fatal_signal_pending(current))
 			return -EINTR;
 		if (time_after(jiffies, timeout)) {
-			dev_err(&dev->pci_dev->dev,
+			dev_err(dev->dev,
 				"Device shutdown incomplete; abort shutdown\n");
 			return -ENODEV;
 		}
@@ -1558,7 +1556,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
 		dev->admin_tagset.reserved_tags = 1;
 		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
-		dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+		dev->admin_tagset.numa_node = dev_to_node(dev->dev);
 		dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
 		dev->admin_tagset.driver_data = dev;
 
@@ -1591,14 +1589,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
 
 	if (page_shift < dev_page_min) {
-		dev_err(&dev->pci_dev->dev,
+		dev_err(dev->dev,
 				"Minimum device page size (%u) too large for "
 				"host (%u)\n", 1 << dev_page_min,
 				1 << page_shift);
 		return -ENODEV;
 	}
 	if (page_shift > dev_page_max) {
-		dev_info(&dev->pci_dev->dev,
+		dev_info(dev->dev,
 				"Device maximum page size (%u) smaller than "
 				"host (%u); enabling work-around\n",
 				1 << dev_page_max, 1 << page_shift);
@@ -1689,7 +1687,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 	sg_mark_end(&sg[i - 1]);
 	iod->nents = count;
 
-	nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
+	nents = dma_map_sg(dev->dev, sg, count,
 				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	if (!nents)
 		goto free_iod;
@@ -1711,7 +1709,7 @@ void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 {
 	int i;
 
-	dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
+	dma_unmap_sg(dev->dev, iod->sg, iod->nents,
 				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
 	for (i = 0; i < iod->nents; i++)
@@ -1762,7 +1760,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 		goto unmap;
 	}
 	if (meta_len) {
-		meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+		meta = dma_alloc_coherent(dev->dev, meta_len,
 						&meta_dma, GFP_KERNEL);
 		if (!meta) {
 			status = -ENOMEM;
@@ -1801,7 +1799,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 								meta_len))
 				status = -EFAULT;
 		}
-		dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
+		dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
 	}
 	return status;
 }
@@ -1961,15 +1959,13 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	u16 old_ms;
 	unsigned short bs;
 
-	id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
-								GFP_KERNEL);
+	id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL);
 	if (!id) {
-		dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n",
-								__func__);
+		dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__);
 		return 0;
 	}
 	if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
-		dev_warn(&dev->pci_dev->dev,
+		dev_warn(dev->dev,
 			"identify failed ns:%d, setting capacity to 0\n",
 			ns->ns_id);
 		memset(id, 0, sizeof(*id));
@@ -2014,7 +2010,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	if (dev->oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 
-	dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+	dma_free_coherent(dev->dev, 4096, id, dma_addr);
 	return 0;
 }
 
@@ -2041,7 +2037,7 @@ static int nvme_kthread(void *data)
 				if (work_busy(&dev->reset_work))
 					continue;
 				list_del_init(&dev->node);
-				dev_warn(&dev->pci_dev->dev,
+				dev_warn(dev->dev,
 					"Failed status: %x, reset controller\n",
 					readl(&dev->bar->csts));
 				dev->reset_workfn = nvme_reset_failed_dev;
@@ -2073,7 +2069,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 {
 	struct nvme_ns *ns;
 	struct gendisk *disk;
-	int node = dev_to_node(&dev->pci_dev->dev);
+	int node = dev_to_node(dev->dev);
 
 	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
@@ -2156,8 +2152,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
 	if (status < 0)
 		return status;
 	if (status > 0) {
-		dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
-									status);
+		dev_err(dev->dev, "Could not set queue count (%d)\n", status);
 		return 0;
 	}
 	return min(result & 0xffff, result >> 16) + 1;
@@ -2171,7 +2166,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
 	struct nvme_queue *adminq = dev->queues[0];
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int result, i, vecs, nr_io_queues, size;
 
 	nr_io_queues = num_possible_cpus();
@@ -2251,7 +2246,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
  */
 static int nvme_dev_add(struct nvme_dev *dev)
 {
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
 	unsigned nn, i;
 	struct nvme_id_ctrl *ctrl;
@@ -2259,14 +2254,14 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dma_addr_t dma_addr;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
-	mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL);
+	mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
 	res = nvme_identify(dev, 0, 1, dma_addr);
 	if (res) {
-		dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
-		dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+		dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
+		dma_free_coherent(dev->dev, 4096, mem, dma_addr);
 		return -EIO;
 	}
 
@@ -2292,12 +2287,12 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		} else
 			dev->max_hw_sectors = max_hw_sectors;
 	}
-	dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+	dma_free_coherent(dev->dev, 4096, mem, dma_addr);
 
 	dev->tagset.ops = &nvme_mq_ops;
 	dev->tagset.nr_hw_queues = dev->online_queues - 1;
 	dev->tagset.timeout = NVME_IO_TIMEOUT;
-	dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+	dev->tagset.numa_node = dev_to_node(dev->dev);
 	dev->tagset.queue_depth =
 				min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
 	dev->tagset.cmd_size = nvme_cmd_size(dev);
@@ -2317,7 +2312,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 {
 	u64 cap;
 	int bars, result = -ENOMEM;
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pci_enable_device_mem(pdev))
 		return result;
@@ -2331,8 +2326,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
 	if (pci_request_selected_regions(pdev, bars, "nvme"))
 		goto disable_pci;
 
-	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) &&
-	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
+	    dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
 		goto disable;
 
 	dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
@@ -2373,19 +2368,21 @@ static int nvme_dev_map(struct nvme_dev *dev)
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
-	if (dev->pci_dev->msi_enabled)
-		pci_disable_msi(dev->pci_dev);
-	else if (dev->pci_dev->msix_enabled)
-		pci_disable_msix(dev->pci_dev);
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	if (pdev->msi_enabled)
+		pci_disable_msi(pdev);
+	else if (pdev->msix_enabled)
+		pci_disable_msix(pdev);
 
 	if (dev->bar) {
 		iounmap(dev->bar);
 		dev->bar = NULL;
-		pci_release_regions(dev->pci_dev);
+		pci_release_regions(pdev);
 	}
 
-	if (pci_is_enabled(dev->pci_dev))
-		pci_disable_device(dev->pci_dev);
+	if (pci_is_enabled(pdev))
+		pci_disable_device(pdev);
 }
 
 struct nvme_delq_ctx {
@@ -2504,7 +2501,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
 					&worker, "nvme%d", dev->instance);
 
 	if (IS_ERR(kworker_task)) {
-		dev_err(&dev->pci_dev->dev,
+		dev_err(dev->dev,
 			"Failed to create queue del task\n");
 		for (i = dev->queue_count - 1; i > 0; i--)
 			nvme_disable_queue(dev, i);
@@ -2622,14 +2619,13 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
 {
-	struct device *dmadev = &dev->pci_dev->dev;
-	dev->prp_page_pool = dma_pool_create("prp list page", dmadev,
+	dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
 						PAGE_SIZE, PAGE_SIZE, 0);
 	if (!dev->prp_page_pool)
 		return -ENOMEM;
 
 	/* Optimisation for I/Os between 4k and 128k */
-	dev->prp_small_pool = dma_pool_create("prp list 256", dmadev,
+	dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
 						256, 256, 0);
 	if (!dev->prp_small_pool) {
 		dma_pool_destroy(dev->prp_page_pool);
@@ -2693,7 +2689,7 @@ static void nvme_free_dev(struct kref *kref)
 {
 	struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
 
-	pci_dev_put(dev->pci_dev);
+	put_device(dev->dev);
 	put_device(dev->device);
 	nvme_free_namespaces(dev);
 	nvme_release_instance(dev);
@@ -2837,7 +2833,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
 static int nvme_remove_dead_ctrl(void *arg)
 {
 	struct nvme_dev *dev = (struct nvme_dev *)arg;
-	struct pci_dev *pdev = dev->pci_dev;
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
 
 	if (pci_get_drvdata(pdev))
 		pci_stop_and_remove_bus_device_locked(pdev);
@@ -2876,11 +2872,11 @@ static void nvme_dev_reset(struct nvme_dev *dev)
 {
 	nvme_dev_shutdown(dev);
 	if (nvme_dev_resume(dev)) {
-		dev_warn(&dev->pci_dev->dev, "Device failed to resume\n");
+		dev_warn(dev->dev, "Device failed to resume\n");
 		kref_get(&dev->kref);
 		if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
 							dev->instance))) {
-			dev_err(&dev->pci_dev->dev,
+			dev_err(dev->dev,
 				"Failed to start controller remove task\n");
 			kref_put(&dev->kref, nvme_free_dev);
 		}
@@ -2924,7 +2920,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
-	dev->pci_dev = pci_dev_get(pdev);
+	dev->dev = get_device(&pdev->dev);
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
 	if (result)
@@ -2954,7 +2950,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  release:
 	nvme_release_instance(dev);
  put_pci:
-	pci_dev_put(dev->pci_dev);
+	put_device(dev->dev);
  free:
 	kfree(dev->queues);
 	kfree(dev->entry);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index ba1809fbd49e..f1c90f273132 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -684,7 +684,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	u8 cmdque = 0x01 << 1;
 	u8 fw_offset = sizeof(dev->firmware_rev);
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 				&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -728,8 +728,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 
  out_free:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
  out_dma:
 	return res;
 }
@@ -787,7 +786,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int xfer_len;
 	__be32 tmp_id = cpu_to_be32(ns->ns_id);
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 					&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -842,7 +841,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		inq_response[6] = 0x00;    /* Rsvd */
 		inq_response[7] = 0x44;    /* Designator Length */
 
-		sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor);
+		sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
 		memcpy(&inq_response[12], dev->model, sizeof(dev->model));
 		sprintf(&inq_response[52], "%04x", tmp_id);
 		memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
@@ -851,8 +850,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 
  out_free:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
  out_dma:
 	return res;
 }
@@ -883,7 +881,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out_mem;
 	}
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 							&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -933,8 +931,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 
  out_free:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
  out_dma:
 	kfree(inq_response);
  out_mem:
@@ -1038,8 +1035,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 		goto out_mem;
 	}
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev,
-					sizeof(struct nvme_smart_log),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log),
 					&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -1077,7 +1073,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
+	dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log),
 			  mem, dma_addr);
  out_dma:
 	kfree(log_response);
@@ -1106,8 +1102,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out_mem;
 	}
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev,
-					sizeof(struct nvme_smart_log),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log),
 					&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -1158,7 +1153,7 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
+	dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log),
 			  mem, dma_addr);
  out_dma:
 	kfree(log_response);
@@ -1209,7 +1204,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
 		return SNTI_INTERNAL_ERROR;
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 							&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -1246,8 +1241,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 
  out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
  out:
 	return res;
 }
@@ -1494,8 +1488,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	unsigned ps_desired = 0;
 
 	/* NVMe Controller Identify */
-	mem = dma_alloc_coherent(&dev->pci_dev->dev,
-				sizeof(struct nvme_id_ctrl),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl),
 				&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -1556,8 +1549,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	if (nvme_sc)
 		res = nvme_sc;
  out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr);
  out:
 	return res;
 }
@@ -1820,7 +1812,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 	 */
 
 	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
-		mem = dma_alloc_coherent(&dev->pci_dev->dev,
+		mem = dma_alloc_coherent(dev->dev,
 			sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL);
 		if (mem == NULL) {
 			res = -ENOMEM;
@@ -1845,7 +1837,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 						(1 << (id_ns->lbaf[flbas].ds));
 		}
  out_dma:
-		dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+		dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns),
 				  mem, dma_addr);
 	}
  out:
@@ -1928,7 +1920,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_command c;
 
 	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 							&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -1979,8 +1971,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_sc;
 
  out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
  out:
 	return res;
 }
@@ -2485,7 +2476,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		resp_size = READ_CAP_16_RESP_SIZE;
 	}
 
-	mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
+	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 							&dma_addr, GFP_KERNEL);
 	if (mem == NULL) {
 		res = -ENOMEM;
@@ -2514,8 +2505,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	kfree(response);
  out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
  out:
 	return res;
 }
@@ -2548,8 +2538,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out;
 	} else {
 		/* NVMe Controller Identify */
-		mem = dma_alloc_coherent(&dev->pci_dev->dev,
-					sizeof(struct nvme_id_ctrl),
+		mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl),
 					&dma_addr, GFP_KERNEL);
 		if (mem == NULL) {
 			res = -ENOMEM;
@@ -2600,8 +2589,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	kfree(response);
  out_dma:
-	dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
-			  dma_addr);
+	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr);
  out:
 	return res;
 }
@@ -2913,7 +2901,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out;
 	}
 
-	range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
+	range = dma_alloc_coherent(dev->dev, ndesc * sizeof(*range),
 							&dma_addr, GFP_KERNEL);
 	if (!range)
 		goto out;
@@ -2934,8 +2922,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
-	dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
-							range, dma_addr);
+	dma_free_coherent(dev->dev, ndesc * sizeof(*range), range, dma_addr);
  out:
 	kfree(plist);
 	return res;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 61488b2ae291..de0e49a716b8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -74,7 +74,7 @@ struct nvme_dev {
 	struct blk_mq_tag_set tagset;
 	struct blk_mq_tag_set admin_tagset;
 	u32 __iomem *dbs;
-	struct pci_dev *pci_dev;
+	struct device *dev;
 	struct dma_pool *prp_page_pool;
 	struct dma_pool *prp_small_pool;
 	int instance;
-- 
cgit v1.2.3


From b90c48d0c11efe373a42a60e66e2ac2a503c287b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:40 +0200
Subject: nvme: split nvme_trans_send_fw_cmd

This function handles two totally different opcodes, so split it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 93 ++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 46 deletions(-)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index f1c90f273132..60415b52fd34 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1554,10 +1554,25 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	return res;
 }
 
-/* Write Buffer Helper Functions */
-/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */
+static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+					u8 buffer_id)
+{
+	struct nvme_command c;
+	int nvme_sc;
+	int res;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_activate_fw;
+	c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
+
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
+	res = nvme_trans_status_code(hdr, nvme_sc);
+	if (res)
+		return res;
+	return nvme_sc;
+}
 
-static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 opcode, u32 tot_len, u32 offset,
 					u8 buffer_id)
 {
@@ -1569,38 +1584,31 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	unsigned length;
 
 	memset(&c, 0, sizeof(c));
-	c.common.opcode = opcode;
-	if (opcode == nvme_admin_download_fw) {
-		if (hdr->iovec_count > 0) {
-			/* Assuming SGL is not allowed for this command */
-			res = nvme_trans_completion(hdr,
-						SAM_STAT_CHECK_CONDITION,
-						ILLEGAL_REQUEST,
-						SCSI_ASC_INVALID_CDB,
-						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out;
-		}
-		iod = nvme_map_user_pages(dev, DMA_TO_DEVICE,
-				(unsigned long)hdr->dxferp, tot_len);
-		if (IS_ERR(iod)) {
-			res = PTR_ERR(iod);
-			goto out;
-		}
-		length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
-		if (length != tot_len) {
-			res = -ENOMEM;
-			goto out_unmap;
-		}
+	c.common.opcode = nvme_admin_download_fw;
 
-		c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
-		c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
-		c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
-	} else if (opcode == nvme_admin_activate_fw) {
-		u32 cdw10 = buffer_id | NVME_FWACT_REPL_ACTV;
-		c.common.cdw10[0] = cpu_to_le32(cdw10);
+	if (hdr->iovec_count > 0) {
+		/* Assuming SGL is not allowed for this command */
+		return nvme_trans_completion(hdr,
+					SAM_STAT_CHECK_CONDITION,
+					ILLEGAL_REQUEST,
+					SCSI_ASC_INVALID_CDB,
+					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+	}
+	iod = nvme_map_user_pages(dev, DMA_TO_DEVICE,
+			(unsigned long)hdr->dxferp, tot_len);
+	if (IS_ERR(iod))
+		return PTR_ERR(iod);
+	length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
+	if (length != tot_len) {
+		res = -ENOMEM;
+		goto out_unmap;
 	}
 
+	c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+	c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
+	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
+	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
+
 	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
@@ -1609,11 +1617,8 @@ static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_sc;
 
  out_unmap:
-	if (opcode == nvme_admin_download_fw) {
-		nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
-		nvme_free_iod(dev, iod);
-	}
- out:
+	nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
+	nvme_free_iod(dev, iod);
 	return res;
 }
 
@@ -2769,7 +2774,7 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 
 	/* Attempt to activate any previously downloaded firmware image */
-	res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0);
+	res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
 
 	/* Determine Block size and count and send format command */
 	res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
@@ -2829,24 +2834,20 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	switch (mode) {
 	case DOWNLOAD_SAVE_ACTIVATE:
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
 						parm_list_length, buffer_offset,
 						buffer_id);
 		if (res != SNTI_TRANSLATION_SUCCESS)
 			goto out;
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
+		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
 		break;
 	case DOWNLOAD_SAVE_DEFER_ACTIVATE:
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
 						parm_list_length, buffer_offset,
 						buffer_id);
 		break;
 	case ACTIVATE_DEFERRED_MICROCODE:
-		res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
-						parm_list_length, buffer_offset,
-						buffer_id);
+		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
 		break;
 	default:
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
-- 
cgit v1.2.3


From e61b0a86cac83c3bf501705c8d52a0a29cecf091 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:41 +0200
Subject: nvme: fix scsi translation error handling

Erorr handling for the scsi translation was completely broken, as there
were two different positive error number spaces overlapping.  Fix this
up by removing one of them, and centralizing the generation of the other
positive values in a single place.  Also fix up a few places that didn't
handle the NVMe error codes properly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 378 ++++++++++++++++------------------------------
 1 file changed, 127 insertions(+), 251 deletions(-)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 60415b52fd34..390c46dade0a 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -47,9 +47,6 @@
 
 static int sg_version_num = 30534;	/* 2 digits for each component */
 
-#define SNTI_TRANSLATION_SUCCESS			0
-#define SNTI_INTERNAL_ERROR				1
-
 /* VPD Page Codes */
 #define VPD_SUPPORTED_PAGES				0x00
 #define VPD_SERIAL_NUMBER				0x80
@@ -369,8 +366,6 @@ struct nvme_trans_io_cdb {
 static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
 								unsigned long n)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	unsigned long not_copied;
 	int i;
 	void *index = from;
 	size_t remaining = n;
@@ -380,29 +375,25 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
 		struct sg_iovec sgl;
 
 		for (i = 0; i < hdr->iovec_count; i++) {
-			not_copied = copy_from_user(&sgl, hdr->dxferp +
+			if (copy_from_user(&sgl, hdr->dxferp +
 						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec));
-			if (not_copied)
+						sizeof(struct sg_iovec)))
 				return -EFAULT;
 			xfer_len = min(remaining, sgl.iov_len);
-			not_copied = copy_to_user(sgl.iov_base, index,
-								xfer_len);
-			if (not_copied) {
-				res = -EFAULT;
-				break;
-			}
+			if (copy_to_user(sgl.iov_base, index, xfer_len))
+				return -EFAULT;
+
 			index += xfer_len;
 			remaining -= xfer_len;
 			if (remaining == 0)
 				break;
 		}
-		return res;
+		return 0;
 	}
-	not_copied = copy_to_user(hdr->dxferp, from, n);
-	if (not_copied)
-		res = -EFAULT;
-	return res;
+
+	if (copy_to_user(hdr->dxferp, from, n))
+		return -EFAULT;
+	return 0;
 }
 
 /* Copy data from userspace memory */
@@ -410,8 +401,6 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
 static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
 								unsigned long n)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	unsigned long not_copied;
 	int i;
 	void *index = to;
 	size_t remaining = n;
@@ -421,30 +410,24 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
 		struct sg_iovec sgl;
 
 		for (i = 0; i < hdr->iovec_count; i++) {
-			not_copied = copy_from_user(&sgl, hdr->dxferp +
+			if (copy_from_user(&sgl, hdr->dxferp +
 						i * sizeof(struct sg_iovec),
-						sizeof(struct sg_iovec));
-			if (not_copied)
+						sizeof(struct sg_iovec)))
 				return -EFAULT;
 			xfer_len = min(remaining, sgl.iov_len);
-			not_copied = copy_from_user(index, sgl.iov_base,
-								xfer_len);
-			if (not_copied) {
-				res = -EFAULT;
-				break;
-			}
+			if (copy_from_user(index, sgl.iov_base, xfer_len))
+				return -EFAULT;
 			index += xfer_len;
 			remaining -= xfer_len;
 			if (remaining == 0)
 				break;
 		}
-		return res;
+		return 0;
 	}
 
-	not_copied = copy_from_user(to, hdr->dxferp, n);
-	if (not_copied)
-		res = -EFAULT;
-	return res;
+	if (copy_from_user(to, hdr->dxferp, n))
+		return -EFAULT;
+	return 0;
 }
 
 /* Status/Sense Buffer Writeback */
@@ -452,7 +435,6 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
 static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
 				 u8 asc, u8 ascq)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	u8 xfer_len;
 	u8 resp[DESC_FMT_SENSE_DATA_SIZE];
 
@@ -477,25 +459,29 @@ static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
 		xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
 		hdr->sb_len_wr = xfer_len;
 		if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
-			res = -EFAULT;
+			return -EFAULT;
 	}
 
-	return res;
+	return 0;
 }
 
+/*
+ * Take a status code from a lowlevel routine, and if it was a positive NVMe
+ * error code update the sense data based on it.  In either case the passed
+ * in value is returned again, unless an -EFAULT from copy_to_user overrides
+ * it.
+ */
 static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
 {
 	u8 status, sense_key, asc, ascq;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 
 	/* For non-nvme (Linux) errors, simply return the error code */
 	if (nvme_sc < 0)
 		return nvme_sc;
 
 	/* Mask DNR, More, and reserved fields */
-	nvme_sc &= 0x7FF;
-
-	switch (nvme_sc) {
+	switch (nvme_sc & 0x7FF) {
 	/* Generic Command Status */
 	case NVME_SC_SUCCESS:
 		status = SAM_STAT_GOOD;
@@ -662,8 +648,7 @@ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
 	}
 
 	res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-
-	return res;
+	return res ? res : nvme_sc;
 }
 
 /* INQUIRY Helper Functions */
@@ -676,7 +661,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	dma_addr_t dma_addr;
 	void *mem;
 	struct nvme_id_ns *id_ns;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	int xfer_len;
 	u8 resp_data_format = 0x02;
@@ -694,19 +679,9 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	/* nvme ns identify - use DPS value for PROTECT field */
 	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
 	res = nvme_trans_status_code(hdr, nvme_sc);
-	/*
-	 * If nvme_sc was -ve, res will be -ve here.
-	 * If nvme_sc was +ve, the status would bace been translated, and res
-	 *  can only be 0 or -ve.
-	 *    - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc
-	 *    - If -ve, return because its a Linux error.
-	 */
 	if (res)
 		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
+
 	id_ns = mem;
 	(id_ns->dps) ? (protect = 0x01) : (protect = 0);
 
@@ -737,7 +712,6 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *inq_response,
 					int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int xfer_len;
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -751,9 +725,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
 	inq_response[9] = INQ_BDEV_LIMITS_PAGE;
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
@@ -761,7 +733,6 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
 					int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int xfer_len;
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -770,9 +741,7 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
 	strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
@@ -781,7 +750,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
 	void *mem;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	int xfer_len;
 	__be32 tmp_id = cpu_to_be32(ns->ns_id);
@@ -804,10 +773,6 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			goto out_free;
-		if (nvme_sc) {
-			res = nvme_sc;
-			goto out_free;
-		}
 
 		if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
 			if (bitmap_empty(eui, len * 8)) {
@@ -859,7 +824,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
 	u8 *inq_response;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
@@ -893,10 +858,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
+
 	id_ns = mem;
 	spt = spt_lut[(id_ns->dpc) & 0x07] << 3;
 	(id_ns->dps) ? (protect = 0x01) : (protect = 0);
@@ -909,10 +871,7 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_free;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_free;
-	}
+
 	id_ctrl = mem;
 	v_sup = id_ctrl->vwc;
 
@@ -961,7 +920,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
 	u8 *inq_response;
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 
 	inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
@@ -990,7 +949,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *log_response;
 
@@ -1018,7 +977,7 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *log_response;
 	struct nvme_command c;
@@ -1084,7 +1043,7 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					int alloc_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *log_response;
 	struct nvme_command c;
@@ -1168,7 +1127,7 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
 {
 	/* Quick check to make sure I don't stomp on my own memory... */
 	if ((cdb10 && len < 8) || (!cdb10 && len < 4))
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	if (cdb10) {
 		resp[0] = (mode_data_length & 0xFF00) >> 8;
@@ -1184,13 +1143,13 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
 		resp[3] = (blk_desc_len & 0x00FF);
 	}
 
-	return SNTI_TRANSLATION_SUCCESS;
+	return 0;
 }
 
 static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				    u8 *resp, int len, u8 llbaa)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
@@ -1200,9 +1159,9 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u32 lba_length;
 
 	if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
 							&dma_addr, GFP_KERNEL);
@@ -1216,10 +1175,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
+
 	id_ns = mem;
 	flbas = (id_ns->flbas) & 0x0F;
 	lba_length = (1 << (id_ns->lbaf[flbas].ds));
@@ -1251,7 +1207,7 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
 					int len)
 {
 	if (len < MODE_PAGE_CONTROL_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	resp[0] = MODE_PAGE_CONTROL;
 	resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
@@ -1265,78 +1221,69 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
 	resp[9] = 0xFF;
 	/* Bytes 10,11: Extended selftest completion time = 0x0000 */
 
-	return SNTI_TRANSLATION_SUCCESS;
+	return 0;
 }
 
 static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr,
 					u8 *resp, int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	u32 feature_resp;
 	u8 vwc;
 
 	if (len < MODE_PAGE_CACHING_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
 								&feature_resp);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out;
-	}
+		return res;
+
 	vwc = feature_resp & 0x00000001;
 
 	resp[0] = MODE_PAGE_CACHING;
 	resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
 	resp[2] = vwc << 2;
-
- out:
-	return res;
+	return 0;
 }
 
 static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *resp,
 					int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-
 	if (len < MODE_PAGE_POW_CND_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	resp[0] = MODE_PAGE_POWER_CONDITION;
 	resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
 	/* All other bytes are zero */
 
-	return res;
+	return 0;
 }
 
 static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *resp,
 					int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-
 	if (len < MODE_PAGE_INF_EXC_LEN)
-		return SNTI_INTERNAL_ERROR;
+		return -EINVAL;
 
 	resp[0] = MODE_PAGE_INFO_EXCEP;
 	resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
 	resp[2] = 0x88;
 	/* All other bytes are zero */
 
-	return res;
+	return 0;
 }
 
 static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				     u8 *resp, int len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u16 mode_pages_offset_1 = 0;
 	u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
 
@@ -1346,23 +1293,18 @@ static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
 					MODE_PAGE_CACHING_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
+	if (res)
+		return res;
 	res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
 					MODE_PAGE_CONTROL_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
+	if (res)
+		return res;
 	res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
 					MODE_PAGE_POW_CND_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
-	res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
+	if (res)
+		return res;
+	return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
 					MODE_PAGE_INF_EXC_LEN);
-	if (res != SNTI_TRANSLATION_SUCCESS)
-		goto out;
-
- out:
-	return res;
 }
 
 static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
@@ -1383,7 +1325,7 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *, int),
 					u16 mode_pages_tot_len)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int xfer_len;
 	u8 *response;
 	u8 dbd, llbaa;
@@ -1412,18 +1354,18 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
 
 	res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
 					llbaa, mode_data_length, blk_desc_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_free;
 	if (blk_desc_len > 0) {
 		res = nvme_trans_fill_blk_desc(ns, hdr,
 					       &response[blk_desc_offset],
 					       blk_desc_len, llbaa);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			goto out_free;
 	}
 	res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
 					mode_pages_tot_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_free;
 
 	xfer_len = min(alloc_len, resp_size);
@@ -1478,7 +1420,7 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
 static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 						u8 pc, u8 pcmod, u8 start)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
@@ -1498,10 +1440,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
+
 	id_ctrl = mem;
 	lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
 
@@ -1544,10 +1483,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
 				    NULL);
 	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_dma;
-	if (nvme_sc)
-		res = nvme_sc;
+
  out_dma:
 	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr);
  out:
@@ -1559,24 +1495,20 @@ static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 {
 	struct nvme_command c;
 	int nvme_sc;
-	int res;
 
 	memset(&c, 0, sizeof(c));
 	c.common.opcode = nvme_admin_activate_fw;
 	c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
 
 	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		return res;
-	return nvme_sc;
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 opcode, u32 tot_len, u32 offset,
 					u8 buffer_id)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_command c;
@@ -1611,10 +1543,6 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 
 	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_unmap;
-	if (nvme_sc)
-		res = nvme_sc;
 
  out_unmap:
 	nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
@@ -1682,7 +1610,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
 static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *mode_page, u8 page_code)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	unsigned dword11;
@@ -1693,12 +1621,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
 					    0, NULL);
 		res = nvme_trans_status_code(hdr, nvme_sc);
-		if (res)
-			break;
-		if (nvme_sc) {
-			res = nvme_sc;
-			break;
-		}
 		break;
 	case MODE_PAGE_CONTROL:
 		break;
@@ -1710,8 +1632,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 						ILLEGAL_REQUEST,
 						SCSI_ASC_INVALID_PARAMETER,
 						SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			if (!res)
-				res = SNTI_INTERNAL_ERROR;
 			break;
 		}
 		break;
@@ -1719,8 +1639,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		if (!res)
-			res = SNTI_INTERNAL_ERROR;
 		break;
 	}
 
@@ -1731,7 +1649,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *cmd, u16 parm_list_len, u8 pf,
 					u8 sp, u8 cdb10)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 *parm_list;
 	u16 bd_len;
 	u8 llbaa = 0;
@@ -1747,7 +1665,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 
 	res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_mem;
 
 	nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
@@ -1785,7 +1703,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		mp_size = parm_list[index + 1] + 2;
 		res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
 								page_code);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			break;
 		index += mp_size;
 	} while (index < parm_list_len);
@@ -1801,7 +1719,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 					     struct sg_io_hdr *hdr)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
@@ -1828,10 +1746,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			goto out_dma;
-		if (nvme_sc) {
-			res = nvme_sc;
-			goto out_dma;
-		}
+
 		id_ns = mem;
 
 		if (ns->mode_select_num_blocks == 0)
@@ -1852,7 +1767,7 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
 					u8 format_prot_info, u8 *nvme_pf_code)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 *parm_list;
 	u8 pf_usage, pf_code;
 
@@ -1862,7 +1777,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
 		goto out;
 	}
 	res = nvme_trans_copy_from_user(hdr, parm_list, len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out_mem;
 
 	if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
@@ -1912,7 +1827,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
 static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				   u8 prot_info)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
@@ -1936,10 +1851,7 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
+
 	id_ns = mem;
 	flbas = (id_ns->flbas) & 0x0F;
 	nlbaf = id_ns->nlbaf;
@@ -1970,10 +1882,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
 	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out_dma;
-	if (nvme_sc)
-		res = nvme_sc;
 
  out_dma:
 	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
@@ -2059,8 +1967,7 @@ static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
 static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				struct nvme_trans_io_cdb *cdb_info, u8 is_write)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
-	int nvme_sc;
+	int nvme_sc = NVME_SC_SUCCESS;
 	struct nvme_dev *dev = ns->dev;
 	u32 num_cmds;
 	struct nvme_iod *iod;
@@ -2117,18 +2024,16 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		iod = nvme_map_user_pages(dev,
 			(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
 			(unsigned long)next_mapping_addr, unit_len);
-		if (IS_ERR(iod)) {
-			res = PTR_ERR(iod);
-			goto out;
-		}
+		if (IS_ERR(iod))
+			return PTR_ERR(iod);
+
 		retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
 		if (retcode != unit_len) {
 			nvme_unmap_user_pages(dev,
 				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
 				iod);
 			nvme_free_iod(dev, iod);
-			res = -ENOMEM;
-			goto out;
+			return -ENOMEM;
 		}
 		c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
 		c.rw.prp2 = cpu_to_le64(iod->first_dma);
@@ -2136,23 +2041,18 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		nvme_offset += unit_num_blocks;
 
 		nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
-		if (nvme_sc != NVME_SC_SUCCESS) {
-			nvme_unmap_user_pages(dev,
-				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-				iod);
-			nvme_free_iod(dev, iod);
-			res = nvme_trans_status_code(hdr, nvme_sc);
-			goto out;
-		}
+
 		nvme_unmap_user_pages(dev,
 				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
 				iod);
 		nvme_free_iod(dev, iod);
+
+
+		if (nvme_sc != NVME_SC_SUCCESS)
+			break;
 	}
-	res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
 
- out:
-	return res;
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 
@@ -2161,7 +2061,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	struct nvme_trans_io_cdb cdb_info;
 	u8 opcode = cmd[0];
 	u64 xfer_bytes;
@@ -2190,7 +2090,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 		break;
 	default:
 		/* Will never really reach here */
-		res = SNTI_INTERNAL_ERROR;
+		res = -EIO;
 		goto out;
 	}
 
@@ -2232,7 +2132,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 
 	/* Send NVMe IO Command(s) */
 	res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out;
 
  out:
@@ -2242,7 +2142,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	u8 evpd;
 	u8 page_code;
 	int alloc_len;
@@ -2310,7 +2210,7 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u16 alloc_len;
 	u8 sp;
 	u8 pc;
@@ -2357,7 +2257,6 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	u8 cdb10 = 0;
 	u16 parm_list_len;
 	u8 page_format;
@@ -2383,17 +2282,17 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		 * According to SPC-4 r24, a paramter list length field of 0
 		 * shall not be considered an error
 		 */
-		res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
+		return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
 						page_format, save_pages, cdb10);
 	}
 
-	return res;
+	return 0;
 }
 
 static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	u16 alloc_len;
 	u8 cdb10 = 0;
 	u8 page_code;
@@ -2463,7 +2362,7 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	u32 alloc_len = READ_CAP_10_RESP_SIZE;
 	u32 resp_size = READ_CAP_10_RESP_SIZE;
@@ -2492,10 +2391,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
 		goto out_dma;
-	if (nvme_sc) {
-		res = nvme_sc;
-		goto out_dma;
-	}
+
 	id_ns = mem;
 
 	response = kzalloc(resp_size, GFP_KERNEL);
@@ -2518,7 +2414,7 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	u32 alloc_len, xfer_len, resp_size;
 	u8 select_report;
@@ -2553,10 +2449,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
 			goto out_dma;
-		if (nvme_sc) {
-			res = nvme_sc;
-			goto out_dma;
-		}
+
 		id_ctrl = mem;
 		ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
 		resp_size = ll_length + LUN_DATA_HEADER_SIZE;
@@ -2602,7 +2495,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 alloc_len, xfer_len, resp_size;
 	u8 desc_format;
 	u8 *response;
@@ -2661,7 +2554,7 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns,
 static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	int nvme_sc;
 	struct nvme_command c;
 	u8 immed, pcmod, pc, no_flush, start;
@@ -2679,7 +2572,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	start &= START_STOP_UNIT_CDB_START_MASK;
 
 	if (immed != 0) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	} else {
@@ -2692,24 +2585,16 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
 			res = nvme_trans_status_code(hdr, nvme_sc);
 			if (res)
-				goto out;
-			if (nvme_sc) {
-				res = nvme_sc;
-				goto out;
-			}
+				return res;
 		}
 		/* Setup the expected power state transition */
-		res = nvme_trans_power_state(ns, hdr, pc, pcmod, start);
+		return nvme_trans_power_state(ns, hdr, pc, pcmod, start);
 	}
-
- out:
-	return res;
 }
 
 static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr, u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	int nvme_sc;
 	struct nvme_command c;
 
@@ -2718,20 +2603,13 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
 	c.common.nsid = cpu_to_le32(ns->ns_id);
 
 	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-	if (res)
-		goto out;
-	if (nvme_sc)
-		res = nvme_sc;
-
- out:
-	return res;
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res;
 	u8 parm_hdr_len = 0;
 	u8 nvme_pf_code = 0;
 	u8 format_prot_info, long_list, format_data;
@@ -2769,7 +2647,7 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	if (parm_hdr_len > 0) {
 		res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
 					format_prot_info, &nvme_pf_code);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			goto out;
 	}
 
@@ -2778,7 +2656,7 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 	/* Determine Block size and count and send format command */
 	res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out;
 
 	res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
@@ -2791,23 +2669,20 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
 					struct sg_io_hdr *hdr,
 					u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
 	struct nvme_dev *dev = ns->dev;
 
 	if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					    NOT_READY, SCSI_ASC_LUN_NOT_READY,
 					    SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	else
-		res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-
-	return res;
+		return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
 }
 
 static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	int res = SNTI_TRANSLATION_SUCCESS;
+	int res = 0;
 	u32 buffer_offset, parm_list_length;
 	u8 buffer_id, mode;
 
@@ -2837,7 +2712,7 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
 						parm_list_length, buffer_offset,
 						buffer_id);
-		if (res != SNTI_TRANSLATION_SUCCESS)
+		if (res)
 			goto out;
 		res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
 		break;
@@ -2893,7 +2768,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		return -ENOMEM;
 
 	res = nvme_trans_copy_from_user(hdr, plist, list_len);
-	if (res != SNTI_TRANSLATION_SUCCESS)
+	if (res)
 		goto out;
 
 	ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
@@ -3038,15 +2913,16 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
 	if (hdr.cmd_len > BLK_MAX_CDB)
 		return -EINVAL;
 
+	/*
+	 * A positive return code means a NVMe status, which has been
+	 * translated to sense data.
+	 */
 	retcode = nvme_scsi_translate(ns, &hdr);
 	if (retcode < 0)
 		return retcode;
-	if (retcode > 0)
-		retcode = SNTI_TRANSLATION_SUCCESS;
 	if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
 		return -EFAULT;
-
-	return retcode;
+	return 0;
 }
 
 int nvme_sg_get_version_num(int __user *ip)
-- 
cgit v1.2.3


From 3726897efde1d7a43b6f966ab81b0c143a176556 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:42 +0200
Subject: nvme: first round at deobsfucating the SCSI translation code

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 326 ++++++++++++----------------------------------
 1 file changed, 82 insertions(+), 244 deletions(-)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 390c46dade0a..9fe0a2c5a9be 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -41,6 +41,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/types.h>
+#include <asm/unaligned.h>
 #include <scsi/sg.h>
 #include <scsi/scsi.h>
 
@@ -55,49 +56,14 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define VPD_BLOCK_LIMITS				0xB0
 #define VPD_BLOCK_DEV_CHARACTERISTICS			0xB1
 
-/* CDB offsets */
-#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET		6
-#define REPORT_LUNS_SR_OFFSET				2
-#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET		10
-#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET		4
-#define REQUEST_SENSE_DESC_OFFSET			1
-#define REQUEST_SENSE_DESC_MASK				0x01
-#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE		1
-#define INQUIRY_EVPD_BYTE_OFFSET			1
-#define INQUIRY_PAGE_CODE_BYTE_OFFSET			2
-#define INQUIRY_EVPD_BIT_MASK				1
-#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET		3
-#define START_STOP_UNIT_CDB_IMMED_OFFSET		1
-#define START_STOP_UNIT_CDB_IMMED_MASK			0x1
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET	3
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK		0xF
-#define START_STOP_UNIT_CDB_POWER_COND_OFFSET		4
-#define START_STOP_UNIT_CDB_POWER_COND_MASK		0xF0
-#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET		4
-#define START_STOP_UNIT_CDB_NO_FLUSH_MASK		0x4
-#define START_STOP_UNIT_CDB_START_OFFSET		4
-#define START_STOP_UNIT_CDB_START_MASK			0x1
-#define WRITE_BUFFER_CDB_MODE_OFFSET			1
-#define WRITE_BUFFER_CDB_MODE_MASK			0x1F
-#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET		2
-#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET		3
-#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET	6
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET		1
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK		0xC0
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT		6
-#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET		1
-#define FORMAT_UNIT_CDB_LONG_LIST_MASK			0x20
-#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET		1
-#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK		0x10
+/* format unit paramter list offsets */
 #define FORMAT_UNIT_SHORT_PARM_LIST_LEN			4
 #define FORMAT_UNIT_LONG_PARM_LIST_LEN			8
 #define FORMAT_UNIT_PROT_INT_OFFSET			3
 #define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET		0
 #define FORMAT_UNIT_PROT_FIELD_USAGE_MASK		0x07
-#define UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET		7
 
 /* Misc. defines */
-#define NIBBLE_SHIFT					4
 #define FIXED_SENSE_DATA				0x70
 #define DESC_FORMAT_SENSE_DATA				0x72
 #define FIXED_SENSE_DATA_ADD_LENGTH			10
@@ -145,22 +111,7 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define IO_CDB_WP_MASK					0xE0
 #define IO_CDB_WP_SHIFT					5
 #define IO_CDB_FUA_MASK					0x8
-#define IO_6_CDB_LBA_OFFSET				0
 #define IO_6_CDB_LBA_MASK				0x001FFFFF
-#define IO_6_CDB_TX_LEN_OFFSET				4
-#define IO_6_DEFAULT_TX_LEN				256
-#define IO_10_CDB_LBA_OFFSET				2
-#define IO_10_CDB_TX_LEN_OFFSET				7
-#define IO_10_CDB_WP_OFFSET				1
-#define IO_10_CDB_FUA_OFFSET				1
-#define IO_12_CDB_LBA_OFFSET				2
-#define IO_12_CDB_TX_LEN_OFFSET				6
-#define IO_12_CDB_WP_OFFSET				1
-#define IO_12_CDB_FUA_OFFSET				1
-#define IO_16_CDB_FUA_OFFSET				1
-#define IO_16_CDB_WP_OFFSET				1
-#define IO_16_CDB_LBA_OFFSET				2
-#define IO_16_CDB_TX_LEN_OFFSET				10
 
 /* Mode Sense/Select defines */
 #define MODE_PAGE_INFO_EXCEP				0x1C
@@ -176,23 +127,14 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define MODE_PAGE_INF_EXC_LEN				0x0C
 #define MODE_PAGE_ALL_LEN				0x54
 #define MODE_SENSE6_MPH_SIZE				4
-#define MODE_SENSE6_ALLOC_LEN_OFFSET			4
-#define MODE_SENSE_PAGE_CONTROL_OFFSET			2
 #define MODE_SENSE_PAGE_CONTROL_MASK			0xC0
 #define MODE_SENSE_PAGE_CODE_OFFSET			2
 #define MODE_SENSE_PAGE_CODE_MASK			0x3F
-#define MODE_SENSE_LLBAA_OFFSET				1
 #define MODE_SENSE_LLBAA_MASK				0x10
 #define MODE_SENSE_LLBAA_SHIFT				4
-#define MODE_SENSE_DBD_OFFSET				1
 #define MODE_SENSE_DBD_MASK				8
 #define MODE_SENSE_DBD_SHIFT				3
 #define MODE_SENSE10_MPH_SIZE				8
-#define MODE_SENSE10_ALLOC_LEN_OFFSET			7
-#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET		1
-#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET		1
-#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET	4
-#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET	7
 #define MODE_SELECT_CDB_PAGE_FORMAT_MASK		0x10
 #define MODE_SELECT_CDB_SAVE_PAGES_MASK			0x1
 #define MODE_SELECT_6_BD_OFFSET				3
@@ -218,14 +160,11 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH		0x07
 #define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE		0x2F
 #define LOG_PAGE_TEMPERATURE_PAGE			0x0D
-#define LOG_SENSE_CDB_SP_OFFSET				1
 #define LOG_SENSE_CDB_SP_NOT_ENABLED			0
-#define LOG_SENSE_CDB_PC_OFFSET				2
 #define LOG_SENSE_CDB_PC_MASK				0xC0
 #define LOG_SENSE_CDB_PC_SHIFT				6
 #define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES		1
 #define LOG_SENSE_CDB_PAGE_CODE_MASK			0x3F
-#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET		7
 #define REMAINING_INFO_EXCP_PAGE_LENGTH			0x8
 #define LOG_INFO_EXCP_PAGE_LENGTH			0xC
 #define REMAINING_TEMP_PAGE_LENGTH			0xC
@@ -275,77 +214,11 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define SCSI_ASCQ_POWER_LOSS_EXPECTED			0x08
 #define SCSI_ASCQ_INVALID_LUN_ID			0x09
 
-/**
- * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to
- * enable DPOFUA support type 0x10 value.
- */
-#define DEVICE_SPECIFIC_PARAMETER			0
-#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR)
-
-/* MACROs to extract information from CDBs */
-
-#define GET_OPCODE(cdb)		cdb[0]
-
-#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0)
-
-#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0))
-
-#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \
-(cdb[index + 1] <<  8) | \
-(cdb[index + 2] <<  0))
-
-#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \
-(cdb[index + 1] << 16) | \
-(cdb[index + 2] <<  8) | \
-(cdb[index + 3] <<  0))
-
-#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \
-(((u64)cdb[index + 1]) << 48) | \
-(((u64)cdb[index + 2]) << 40) | \
-(((u64)cdb[index + 3]) << 32) | \
-(((u64)cdb[index + 4]) << 24) | \
-(((u64)cdb[index + 5]) << 16) | \
-(((u64)cdb[index + 6]) <<  8) | \
-(((u64)cdb[index + 7]) <<  0))
-
-/* Inquiry Helper Macros */
-#define GET_INQ_EVPD_BIT(cdb) \
-((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) &		\
-INQUIRY_EVPD_BIT_MASK) ? 1 : 0)
-
-#define GET_INQ_PAGE_CODE(cdb)					\
-(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET))
-
-#define GET_INQ_ALLOC_LENGTH(cdb)				\
-(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET))
-
-/* Report LUNs Helper Macros */
-#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb)			\
-(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Read Capacity Helper Macros */
-#define GET_READ_CAP_16_ALLOC_LENGTH(cdb)			\
-(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET))
-
-#define IS_READ_CAP_16(cdb)					\
-((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0)
-
-/* Request Sense Helper Macros */
-#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb)			\
-(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Mode Sense Helper Macros */
-#define GET_MODE_SENSE_DBD(cdb)					\
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >>	\
-MODE_SENSE_DBD_SHIFT)
-
-#define GET_MODE_SENSE_LLBAA(cdb)				\
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) &		\
-MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT)
-
-#define GET_MODE_SENSE_MPH_SIZE(cdb10)				\
-(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE)
-
+/* copied from drivers/usb/gadget/function/storage_common.h */
+static inline u32 get_unaligned_be24(u8 *buf)
+{
+	return 0xffffff & (u32) get_unaligned_be32(buf - 1);
+}
 
 /* Struct to gather data that needs to be extracted from a SCSI CDB.
    Not conforming to any particular CDB variant, but compatible with all. */
@@ -1334,9 +1207,10 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
 	u16 mode_pages_offset_1;
 	u16 blk_desc_len, blk_desc_offset, mode_data_length;
 
-	dbd = GET_MODE_SENSE_DBD(cmd);
-	llbaa = GET_MODE_SENSE_LLBAA(cmd);
-	mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10);
+	dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
+	llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
+	mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
+
 	blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
 
 	resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
@@ -1896,46 +1770,39 @@ static inline void nvme_trans_get_io_cdb6(u8 *cmd,
 {
 	cdb_info->fua = 0;
 	cdb_info->prot_info = 0;
-	cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) &
-					IO_6_CDB_LBA_MASK;
-	cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET);
+	cdb_info->lba = get_unaligned_be32(&cmd[0]) & IO_6_CDB_LBA_MASK;
+	cdb_info->xfer_len = cmd[4];
 
 	/* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */
 	if (cdb_info->xfer_len == 0)
-		cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN;
+		cdb_info->xfer_len = 256;
 }
 
 static inline void nvme_trans_get_io_cdb10(u8 *cmd,
 					struct nvme_trans_io_cdb *cdb_info)
 {
-	cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) &
-					IO_CDB_FUA_MASK;
-	cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) &
-					IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET);
-	cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET);
+	cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK;
+	cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
+	cdb_info->lba = get_unaligned_be32(&cmd[2]);
+	cdb_info->xfer_len = get_unaligned_be16(&cmd[7]);
 }
 
 static inline void nvme_trans_get_io_cdb12(u8 *cmd,
 					struct nvme_trans_io_cdb *cdb_info)
 {
-	cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) &
-					IO_CDB_FUA_MASK;
-	cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) &
-					IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET);
-	cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET);
+	cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK;
+	cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
+	cdb_info->lba = get_unaligned_be32(&cmd[2]);
+	cdb_info->xfer_len = get_unaligned_be32(&cmd[6]);
 }
 
 static inline void nvme_trans_get_io_cdb16(u8 *cmd,
 					struct nvme_trans_io_cdb *cdb_info)
 {
-	cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) &
-					IO_CDB_FUA_MASK;
-	cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) &
-					IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET);
-	cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET);
+	cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK;
+	cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
+	cdb_info->lba = get_unaligned_be64(&cmd[2]);
+	cdb_info->xfer_len = get_unaligned_be32(&cmd[10]);
 }
 
 static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
@@ -2148,9 +2015,9 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int alloc_len;
 	u8 *inq_response;
 
-	evpd = GET_INQ_EVPD_BIT(cmd);
-	page_code = GET_INQ_PAGE_CODE(cmd);
-	alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
+	evpd = cmd[1] & 0x01;
+	page_code = cmd[2];
+	alloc_len = get_unaligned_be16(&cmd[3]);
 
 	inq_response = kmalloc(alloc_len, GFP_KERNEL);
 	if (inq_response == NULL) {
@@ -2212,27 +2079,25 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 {
 	int res;
 	u16 alloc_len;
-	u8 sp;
 	u8 pc;
 	u8 page_code;
 
-	sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET);
-	if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) {
+	if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET);
-	page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK;
-	pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
+
+	page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
+	pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
 	if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET);
+	alloc_len = get_unaligned_be16(&cmd[7]);
 	switch (page_code) {
 	case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
 		res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
@@ -2262,18 +2127,13 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 page_format;
 	u8 save_pages;
 
-	page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET);
-	page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+	page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+	save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
 
-	save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET);
-	save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
-	if (GET_OPCODE(cmd) == MODE_SELECT) {
-		parm_list_len = GET_U8_FROM_CDB(cmd,
-				MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET);
+	if (cmd[0] == MODE_SELECT) {
+		parm_list_len = cmd[4];
 	} else {
-		parm_list_len = GET_U16_FROM_CDB(cmd,
-				MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET);
+		parm_list_len = cmd[7];
 		cdb10 = 1;
 	}
 
@@ -2295,29 +2155,23 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res = 0;
 	u16 alloc_len;
 	u8 cdb10 = 0;
-	u8 page_code;
-	u8 pc;
 
-	if (GET_OPCODE(cmd) == MODE_SENSE) {
-		alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET);
+	if (cmd[0] == MODE_SENSE) {
+		alloc_len = cmd[4];
 	} else {
-		alloc_len = GET_U16_FROM_CDB(cmd,
-						MODE_SENSE10_ALLOC_LEN_OFFSET);
+		alloc_len = get_unaligned_be16(&cmd[7]);
 		cdb10 = 1;
 	}
 
-	pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) &
-						MODE_SENSE_PAGE_CONTROL_MASK;
-	if (pc != MODE_SENSE_PC_CURRENT_VALUES) {
+	if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
+			MODE_SENSE_PC_CURRENT_VALUES) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
 
-	page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) &
-					MODE_SENSE_PAGE_CODE_MASK;
-	switch (page_code) {
+	switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
 	case MODE_PAGE_CACHING:
 		res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
 						cdb10,
@@ -2360,24 +2214,25 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 }
 
 static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
+							u8 *cmd, u8 cdb16)
 {
 	int res;
 	int nvme_sc;
-	u32 alloc_len = READ_CAP_10_RESP_SIZE;
-	u32 resp_size = READ_CAP_10_RESP_SIZE;
+	u32 alloc_len;
+	u32 resp_size;
 	u32 xfer_len;
-	u8 cdb16;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
 	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 *response;
 
-	cdb16 = IS_READ_CAP_16(cmd);
 	if (cdb16) {
-		alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd);
+		alloc_len = get_unaligned_be32(&cmd[10]);
 		resp_size = READ_CAP_16_RESP_SIZE;
+	} else {
+		alloc_len = READ_CAP_10_RESP_SIZE;
+		resp_size = READ_CAP_10_RESP_SIZE;
 	}
 
 	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
@@ -2417,7 +2272,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int nvme_sc;
 	u32 alloc_len, xfer_len, resp_size;
-	u8 select_report;
 	u8 *response;
 	struct nvme_dev *dev = ns->dev;
 	dma_addr_t dma_addr;
@@ -2427,17 +2281,14 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
 	__be32 tmp_len;
 
-	alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd);
-	select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET);
-
-	if ((select_report != ALL_LUNS_RETURNED) &&
-	    (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) &&
-	    (select_report != RESTRICTED_LUNS_RETURNED)) {
-		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+	switch (cmd[2]) {
+	default:
+		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-		goto out;
-	} else {
+	case ALL_LUNS_RETURNED:
+	case ALL_WELL_KNOWN_LUNS_RETURNED:
+	case RESTRICTED_LUNS_RETURNED:
 		/* NVMe Controller Identify */
 		mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl),
 					&dma_addr, GFP_KERNEL);
@@ -2454,6 +2305,7 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
 		resp_size = ll_length + LUN_DATA_HEADER_SIZE;
 
+		alloc_len = get_unaligned_be32(&cmd[6]);
 		if (alloc_len < resp_size) {
 			res = nvme_trans_completion(hdr,
 					SAM_STAT_CHECK_CONDITION,
@@ -2500,9 +2352,8 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 desc_format;
 	u8 *response;
 
-	alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd);
-	desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET);
-	desc_format &= REQUEST_SENSE_DESC_MASK;
+	desc_format = cmd[1] & 0x01;
+	alloc_len = cmd[4];
 
 	resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
 					(FIXED_FMT_SENSE_DATA_SIZE));
@@ -2512,7 +2363,7 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out;
 	}
 
-	if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
+	if (desc_format) {
 		/* Descriptor Format Sense Data */
 		response[0] = DESC_FORMAT_SENSE_DATA;
 		response[1] = NO_SENSE;
@@ -2559,17 +2410,11 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_command c;
 	u8 immed, pcmod, pc, no_flush, start;
 
-	immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET);
-	pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET);
-	pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET);
-	no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET);
-	start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET);
-
-	immed &= START_STOP_UNIT_CDB_IMMED_MASK;
-	pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK;
-	pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT;
-	no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK;
-	start &= START_STOP_UNIT_CDB_START_MASK;
+	immed = cmd[1] & 0x01;
+	pcmod = cmd[3] & 0x0f;
+	pc = (cmd[4] & 0xf0) >> 4;
+	no_flush = cmd[4] & 0x04;
+	start = cmd[4] & 0x01;
 
 	if (immed != 0) {
 		return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2614,16 +2459,9 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 nvme_pf_code = 0;
 	u8 format_prot_info, long_list, format_data;
 
-	format_prot_info = GET_U8_FROM_CDB(cmd,
-				FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET);
-	long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET);
-	format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET);
-
-	format_prot_info = (format_prot_info &
-				FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >>
-				FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT;
-	long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK;
-	format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK;
+	format_prot_info = (cmd[1] & 0xc0) >> 6;
+	long_list = cmd[1] & 0x20;
+	format_data = cmd[1] & 0x10;
 
 	if (format_data != 0) {
 		if (format_prot_info != 0) {
@@ -2686,8 +2524,7 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u32 buffer_offset, parm_list_length;
 	u8 buffer_id, mode;
 
-	parm_list_length =
-		GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET);
+	parm_list_length = get_unaligned_be24(&cmd[6]);
 	if (parm_list_length % BYTES_TO_DWORDS != 0) {
 		/* NVMe expects Firmware file to be a whole number of DWORDS */
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2695,17 +2532,15 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET);
+	buffer_id = cmd[2];
 	if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
 		res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 		goto out;
 	}
-	mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) &
-						WRITE_BUFFER_CDB_MODE_MASK;
-	buffer_offset =
-		GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET);
+	mode = cmd[1] & 0x1f;
+	buffer_offset = get_unaligned_be24(&cmd[3]);
 
 	switch (mode) {
 	case DOWNLOAD_SAVE_ACTIVATE:
@@ -2759,7 +2594,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u16 ndesc, list_len;
 	dma_addr_t dma_addr;
 
-	list_len = GET_U16_FROM_CDB(cmd, UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET);
+	list_len = get_unaligned_be16(&cmd[7]);
 	if (!list_len)
 		return -EINVAL;
 
@@ -2853,13 +2688,16 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
 		retcode = nvme_trans_mode_sense(ns, hdr, cmd);
 		break;
 	case READ_CAPACITY:
-		retcode = nvme_trans_read_capacity(ns, hdr, cmd);
+		retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
 		break;
 	case SERVICE_ACTION_IN_16:
-		if (IS_READ_CAP_16(cmd))
-			retcode = nvme_trans_read_capacity(ns, hdr, cmd);
-		else
+		switch (cmd[1]) {
+		case SAI_READ_CAPACITY_16:
+			retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
+			break;
+		default:
 			goto out;
+		}
 		break;
 	case REPORT_LUNS:
 		retcode = nvme_trans_report_luns(ns, hdr, cmd);
-- 
cgit v1.2.3


From cbbb7a2ec6001a0c15297c85184c9cc7fae5f11e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:43 +0200
Subject: nvme: simplify and cleanup the READ/WRITE SCSI CDB parsing code

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 78 +++++++++++++++--------------------------------
 1 file changed, 24 insertions(+), 54 deletions(-)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 9fe0a2c5a9be..b119143e4433 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -107,12 +107,6 @@ static int sg_version_num = 30534;	/* 2 digits for each component */
 #define EXTENDED_INQUIRY_DATA_PAGE_LENGTH		0x3C
 #define RESERVED_FIELD					0
 
-/* SCSI READ/WRITE Defines */
-#define IO_CDB_WP_MASK					0xE0
-#define IO_CDB_WP_SHIFT					5
-#define IO_CDB_FUA_MASK					0x8
-#define IO_6_CDB_LBA_MASK				0x001FFFFF
-
 /* Mode Sense/Select defines */
 #define MODE_PAGE_INFO_EXCEP				0x1C
 #define MODE_PAGE_CACHING				0x08
@@ -1763,48 +1757,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	return res;
 }
 
-/* Read/Write Helper Functions */
-
-static inline void nvme_trans_get_io_cdb6(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = 0;
-	cdb_info->prot_info = 0;
-	cdb_info->lba = get_unaligned_be32(&cmd[0]) & IO_6_CDB_LBA_MASK;
-	cdb_info->xfer_len = cmd[4];
-
-	/* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */
-	if (cdb_info->xfer_len == 0)
-		cdb_info->xfer_len = 256;
-}
-
-static inline void nvme_trans_get_io_cdb10(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK;
-	cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = get_unaligned_be32(&cmd[2]);
-	cdb_info->xfer_len = get_unaligned_be16(&cmd[7]);
-}
-
-static inline void nvme_trans_get_io_cdb12(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK;
-	cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = get_unaligned_be32(&cmd[2]);
-	cdb_info->xfer_len = get_unaligned_be32(&cmd[6]);
-}
-
-static inline void nvme_trans_get_io_cdb16(u8 *cmd,
-					struct nvme_trans_io_cdb *cdb_info)
-{
-	cdb_info->fua = cmd[1] & IO_CDB_FUA_MASK;
-	cdb_info->prot_info = cmd[1] & IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
-	cdb_info->lba = get_unaligned_be64(&cmd[2]);
-	cdb_info->xfer_len = get_unaligned_be32(&cmd[10]);
-}
-
 static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
 					struct nvme_trans_io_cdb *cdb_info,
 					u32 max_blocks)
@@ -1929,7 +1881,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 							u8 *cmd)
 {
 	int res = 0;
-	struct nvme_trans_io_cdb cdb_info;
+	struct nvme_trans_io_cdb cdb_info = { 0, };
 	u8 opcode = cmd[0];
 	u64 xfer_bytes;
 	u64 sum_iov_len = 0;
@@ -1937,23 +1889,41 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 	int i;
 	size_t not_copied;
 
-	/* Extract Fields from CDB */
+	/*
+	 * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
+	 * but always in the same place for all others.
+	 */
+	switch (opcode) {
+	case WRITE_6:
+	case READ_6:
+		break;
+	default:
+		cdb_info.fua = cmd[1] & 0x8;
+		cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
+	}
+
 	switch (opcode) {
 	case WRITE_6:
 	case READ_6:
-		nvme_trans_get_io_cdb6(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be24(&cmd[1]);
+		cdb_info.xfer_len = cmd[4];
+		if (cdb_info.xfer_len == 0)
+			cdb_info.xfer_len = 256;
 		break;
 	case WRITE_10:
 	case READ_10:
-		nvme_trans_get_io_cdb10(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be32(&cmd[2]);
+		cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
 		break;
 	case WRITE_12:
 	case READ_12:
-		nvme_trans_get_io_cdb12(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be32(&cmd[2]);
+		cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
 		break;
 	case WRITE_16:
 	case READ_16:
-		nvme_trans_get_io_cdb16(cmd, &cdb_info);
+		cdb_info.lba = get_unaligned_be64(&cmd[2]);
+		cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
 		break;
 	default:
 		/* Will never really reach here */
-- 
cgit v1.2.3


From 908517684807f3b3d93893da78c7906f5ff2c49b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:44 +0200
Subject: nvme: report the DPOFUA in MODE_SENSE

NVMe device always support the FUA bit, and the SCSI translations
accepts the DPO bit, which doesn't have much of a meaning for us.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index b119143e4433..f53da60b657d 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -999,14 +999,14 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
 	if (cdb10) {
 		resp[0] = (mode_data_length & 0xFF00) >> 8;
 		resp[1] = (mode_data_length & 0x00FF);
-		/* resp[2] and [3] are zero */
+		resp[3] = 0x10 /* DPOFUA */;
 		resp[4] = llbaa;
 		resp[5] = RESERVED_FIELD;
 		resp[6] = (blk_desc_len & 0xFF00) >> 8;
 		resp[7] = (blk_desc_len & 0x00FF);
 	} else {
 		resp[0] = (mode_data_length & 0x00FF);
-		/* resp[1] and [2] are zero */
+		resp[2] = 0x10 /* DPOFUA */;
 		resp[3] = (blk_desc_len & 0x00FF);
 	}
 
-- 
cgit v1.2.3


From 772ce43559e076730ddff5907fabcb3485545e38 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:45 +0200
Subject: nvme: fail SCSI read/write command with unsupported protection bit

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index f53da60b657d..342f5b7f840d 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1900,6 +1900,13 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
 	default:
 		cdb_info.fua = cmd[1] & 0x8;
 		cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
+		if (cdb_info.prot_info && !ns->pi_type) {
+			return nvme_trans_completion(hdr,
+					SAM_STAT_CHECK_CONDITION,
+					ILLEGAL_REQUEST,
+					SCSI_ASC_INVALID_CDB,
+					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+		}
 	}
 
 	switch (opcode) {
-- 
cgit v1.2.3


From d29ec8241c10eacf59c23b3828a88dbae06e7e3f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 22 May 2015 11:12:46 +0200
Subject: nvme: submit internal commands through the block layer

Use block layer queues with an internal cmd_type to submit internally
generated NVMe commands.  This both simplifies the code a lot and allow
for a better structure.  For example now the LighNVM code can construct
commands without knowing the details of the underlying I/O descriptors.
Or a future NVMe over network target could inject commands, as well as
could the SCSI translation and ioctl code be reused for such a beast.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 399 +++++++++++++++++++------------------------
 drivers/block/nvme-scsi.c | 422 +++++++++++++---------------------------------
 include/linux/nvme.h      |  24 +--
 3 files changed, 300 insertions(+), 545 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 870a926e1ddc..03bd638e76dd 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -445,7 +445,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
 				(unsigned long) rq, gfp);
 }
 
-void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 {
 	const int last_prp = dev->page_size / 8 - 1;
 	int i;
@@ -605,7 +605,12 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			spin_unlock_irqrestore(req->q->queue_lock, flags);
 			return;
 		}
-		req->errors = nvme_error_status(status);
+		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+			req->sense_len = le32_to_cpup(&cqe->result);
+			req->errors = status;
+		} else {
+			req->errors = nvme_error_status(status);
+		}
 	} else
 		req->errors = 0;
 
@@ -630,8 +635,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
-int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
-								gfp_t gfp)
+static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
+		int total_len, gfp_t gfp)
 {
 	struct dma_pool *pool;
 	int length = total_len;
@@ -709,6 +714,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	return total_len;
 }
 
+static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
+		struct nvme_iod *iod)
+{
+	struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+
+	memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
+	cmnd->rw.command_id = req->tag;
+	if (req->nr_phys_segments) {
+		cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+		cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+	}
+
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
 /*
  * We reuse the small pool to allocate the 16-byte range here as it is not
  * worth having a special pool for these or additional cases to handle freeing
@@ -807,11 +829,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 	return 0;
 }
 
+/*
+ * NOTE: ns is NULL when called on the admin queue.
+ */
 static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			 const struct blk_mq_queue_data *bd)
 {
 	struct nvme_ns *ns = hctx->queue->queuedata;
 	struct nvme_queue *nvmeq = hctx->driver_data;
+	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
 	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
 	struct nvme_iod *iod;
@@ -822,7 +848,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * unless this namespace is formated such that the metadata can be
 	 * stripped/generated by the controller with PRACT=1.
 	 */
-	if (ns->ms && !blk_integrity_rq(req)) {
+	if (ns && ns->ms && !blk_integrity_rq(req)) {
 		if (!(ns->pi_type && ns->ms == 8)) {
 			req->errors = -EFAULT;
 			blk_mq_complete_request(req);
@@ -830,7 +856,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		}
 	}
 
-	iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
+	iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
 	if (!iod)
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
@@ -841,8 +867,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 		 * as it is not worth having a special pool for these or
 		 * additional cases to handle freeing the iod.
 		 */
-		range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
-						GFP_ATOMIC,
+		range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
 						&iod->first_dma);
 		if (!range)
 			goto retry_cmd;
@@ -860,9 +885,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto retry_cmd;
 
 		if (blk_rq_bytes(req) !=
-                    nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
-			dma_unmap_sg(nvmeq->dev->dev, iod->sg,
-					iod->nents, dma_dir);
+                    nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
+			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 			goto retry_cmd;
 		}
 		if (blk_integrity_rq(req)) {
@@ -884,7 +908,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	nvme_set_info(cmd, iod, req_completion);
 	spin_lock_irq(&nvmeq->q_lock);
-	if (req->cmd_flags & REQ_DISCARD)
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+		nvme_submit_priv(nvmeq, req, iod);
+	else if (req->cmd_flags & REQ_DISCARD)
 		nvme_submit_discard(nvmeq, ns, req, iod);
 	else if (req->cmd_flags & REQ_FLUSH)
 		nvme_submit_flush(nvmeq, ns, req->tag);
@@ -896,10 +922,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return BLK_MQ_RQ_QUEUE_OK;
 
  error_cmd:
-	nvme_free_iod(nvmeq->dev, iod);
+	nvme_free_iod(dev, iod);
 	return BLK_MQ_RQ_QUEUE_ERROR;
  retry_cmd:
-	nvme_free_iod(nvmeq->dev, iod);
+	nvme_free_iod(dev, iod);
 	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
@@ -942,15 +968,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
 	return 1;
 }
 
-/* Admin queue isn't initialized as a request queue. If at some point this
- * happens anyway, make sure to notify the user */
-static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
-			       const struct blk_mq_queue_data *bd)
-{
-	WARN_ON_ONCE(1);
-	return BLK_MQ_RQ_QUEUE_ERROR;
-}
-
 static irqreturn_t nvme_irq(int irq, void *data)
 {
 	irqreturn_t result;
@@ -972,59 +989,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 	return IRQ_WAKE_THREAD;
 }
 
-struct sync_cmd_info {
-	struct task_struct *task;
-	u32 result;
-	int status;
-};
-
-static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
-						struct nvme_completion *cqe)
-{
-	struct sync_cmd_info *cmdinfo = ctx;
-	cmdinfo->result = le32_to_cpup(&cqe->result);
-	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
-	wake_up_process(cmdinfo->task);
-}
-
 /*
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
  * if the result is positive, it's an NVM Express status code
  */
-static int __nvme_submit_sync_cmd(struct request_queue *q,
-		struct nvme_command *cmd, u32 *result, unsigned timeout)
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, void __user *ubuffer, unsigned bufflen,
+		u32 *result, unsigned timeout)
 {
-	struct sync_cmd_info cmdinfo;
-	struct nvme_cmd_info *cmd_rq;
+	bool write = cmd->common.opcode & 1;
+	struct bio *bio = NULL;
 	struct request *req;
-	int res;
+	int ret;
 
-	req = blk_mq_alloc_request(q, WRITE, GFP_KERNEL, false);
+	req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
-	cmdinfo.task = current;
-	cmdinfo.status = -EINTR;
+	req->cmd_type = REQ_TYPE_DRV_PRIV;
+	req->__data_len = 0;
+	req->__sector = (sector_t) -1;
+	req->bio = req->biotail = NULL;
 
-	cmd->common.command_id = req->tag;
+	req->timeout = ADMIN_TIMEOUT;
 
-	cmd_rq = blk_mq_rq_to_pdu(req);
-	nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
+	req->cmd = (unsigned char *)cmd;
+	req->cmd_len = sizeof(struct nvme_command);
+	req->sense = NULL;
+	req->sense_len = 0;
 
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	nvme_submit_cmd(cmd_rq->nvmeq, cmd);
-	schedule();
+	if (buffer && bufflen) {
+		ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+		if (ret)
+			goto out;
+	} else if (ubuffer && bufflen) {
+		ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+		if (ret)
+			goto out;
+		bio = req->bio;
+	}
 
+	blk_execute_rq(req->q, NULL, req, 0);
+	if (bio)
+		blk_rq_unmap_user(bio);
 	if (result)
-		*result = cmdinfo.result;
-	res = cmdinfo.status;
+		*result = req->sense_len;
+	ret = req->errors;
+ out:
 	blk_mq_free_request(req);
-	return res;
+	return ret;
 }
 
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd)
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, unsigned bufflen)
 {
-	return __nvme_submit_sync_cmd(q, cmd, NULL, 0);
+	return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
 }
 
 static int nvme_submit_async_admin_req(struct nvme_dev *dev)
@@ -1081,7 +1100,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c);
+	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1090,6 +1109,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
 
+	/*
+	 * Note: we (ab)use the fact the the prp fields survive if no data
+	 * is attached to the request.
+	 */
 	memset(&c, 0, sizeof(c));
 	c.create_cq.opcode = nvme_admin_create_cq;
 	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
@@ -1098,7 +1121,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	c.create_cq.cq_flags = cpu_to_le16(flags);
 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c);
+	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1107,6 +1130,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
 
+	/*
+	 * Note: we (ab)use the fact the the prp fields survive if no data
+	 * is attached to the request.
+	 */
 	memset(&c, 0, sizeof(c));
 	c.create_sq.opcode = nvme_admin_create_sq;
 	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
@@ -1115,7 +1142,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	c.create_sq.sq_flags = cpu_to_le16(flags);
 	c.create_sq.cqid = cpu_to_le16(qid);
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c);
+	return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1128,18 +1155,43 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
 	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
 }
 
-int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
-							dma_addr_t dma_addr)
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
 {
-	struct nvme_command c;
+	struct nvme_command c = {
+		.identify.opcode = nvme_admin_identify,
+		.identify.cns = cpu_to_le32(1),
+	};
+	int error;
 
-	memset(&c, 0, sizeof(c));
-	c.identify.opcode = nvme_admin_identify;
-	c.identify.nsid = cpu_to_le32(nsid);
-	c.identify.prp1 = cpu_to_le64(dma_addr);
-	c.identify.cns = cpu_to_le32(cns);
+	*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+	if (!*id)
+		return -ENOMEM;
 
-	return nvme_submit_sync_cmd(dev->admin_q, &c);
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+			sizeof(struct nvme_id_ctrl));
+	if (error)
+		kfree(*id);
+	return error;
+}
+
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+		struct nvme_id_ns **id)
+{
+	struct nvme_command c = {
+		.identify.opcode = nvme_admin_identify,
+		.identify.nsid = cpu_to_le32(nsid),
+	};
+	int error;
+
+	*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+	if (!*id)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+			sizeof(struct nvme_id_ns));
+	if (error)
+		kfree(*id);
+	return error;
 }
 
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
@@ -1153,7 +1205,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 	c.features.prp1 = cpu_to_le64(dma_addr);
 	c.features.fid = cpu_to_le32(fid);
 
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+			result, 0);
 }
 
 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
@@ -1167,7 +1220,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 	c.features.fid = cpu_to_le32(fid);
 	c.features.dword11 = cpu_to_le32(dword11);
 
-	return __nvme_submit_sync_cmd(dev->admin_q, &c, result, 0);
+	return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+			result, 0);
+}
+
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+{
+	struct nvme_command c = {
+		.common.opcode = nvme_admin_get_log_page,
+		.common.nsid = cpu_to_le32(0xFFFFFFFF),
+		.common.cdw10[0] = cpu_to_le32(
+			(((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+			 NVME_LOG_SMART),
+	};
+	int error;
+
+	*log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+	if (!*log)
+		return -ENOMEM;
+
+	error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+			sizeof(struct nvme_smart_log));
+	if (error)
+		kfree(*log);
+	return error;
 }
 
 /**
@@ -1523,7 +1599,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
 }
 
 static struct blk_mq_ops nvme_mq_admin_ops = {
-	.queue_rq	= nvme_admin_queue_rq,
+	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_admin_init_hctx,
 	.exit_hctx	= nvme_exit_hctx,
@@ -1644,122 +1720,41 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	return result;
 }
 
-struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
-				unsigned long addr, unsigned length)
-{
-	int i, err, count, nents, offset;
-	struct scatterlist *sg;
-	struct page **pages;
-	struct nvme_iod *iod;
-
-	if (addr & 3)
-		return ERR_PTR(-EINVAL);
-	if (!length || length > INT_MAX - PAGE_SIZE)
-		return ERR_PTR(-EINVAL);
-
-	offset = offset_in_page(addr);
-	count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
-	pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
-	if (!pages)
-		return ERR_PTR(-ENOMEM);
-
-	err = get_user_pages_fast(addr, count, 1, pages);
-	if (err < count) {
-		count = err;
-		err = -EFAULT;
-		goto put_pages;
-	}
-
-	err = -ENOMEM;
-	iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL);
-	if (!iod)
-		goto put_pages;
-
-	sg = iod->sg;
-	sg_init_table(sg, count);
-	for (i = 0; i < count; i++) {
-		sg_set_page(&sg[i], pages[i],
-			    min_t(unsigned, length, PAGE_SIZE - offset),
-			    offset);
-		length -= (PAGE_SIZE - offset);
-		offset = 0;
-	}
-	sg_mark_end(&sg[i - 1]);
-	iod->nents = count;
-
-	nents = dma_map_sg(dev->dev, sg, count,
-				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-	if (!nents)
-		goto free_iod;
-
-	kfree(pages);
-	return iod;
-
- free_iod:
-	kfree(iod);
- put_pages:
-	for (i = 0; i < count; i++)
-		put_page(pages[i]);
-	kfree(pages);
-	return ERR_PTR(err);
-}
-
-void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
-			struct nvme_iod *iod)
-{
-	int i;
-
-	dma_unmap_sg(dev->dev, iod->sg, iod->nents,
-				write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-	for (i = 0; i < iod->nents; i++)
-		put_page(sg_page(&iod->sg[i]));
-}
-
 static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 {
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_user_io io;
 	struct nvme_command c;
-	unsigned length, meta_len, prp_len;
+	unsigned length, meta_len;
 	int status, write;
-	struct nvme_iod *iod;
 	dma_addr_t meta_dma = 0;
 	void *meta = NULL;
 
 	if (copy_from_user(&io, uio, sizeof(io)))
 		return -EFAULT;
-	length = (io.nblocks + 1) << ns->lba_shift;
-	meta_len = (io.nblocks + 1) * ns->ms;
-
-	if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
-		return -EINVAL;
-	else if (meta_len && ns->ext) {
-		length += meta_len;
-		meta_len = 0;
-	}
-
-	write = io.opcode & 1;
 
 	switch (io.opcode) {
 	case nvme_cmd_write:
 	case nvme_cmd_read:
 	case nvme_cmd_compare:
-		iod = nvme_map_user_pages(dev, write, io.addr, length);
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (IS_ERR(iod))
-		return PTR_ERR(iod);
+	length = (io.nblocks + 1) << ns->lba_shift;
+	meta_len = (io.nblocks + 1) * ns->ms;
+	write = io.opcode & 1;
 
-	prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
-	if (length != prp_len) {
-		status = -ENOMEM;
-		goto unmap;
-	}
 	if (meta_len) {
+		if (((io.metadata & 3) || !io.metadata) && !ns->ext)
+			return -EINVAL;
+
+		if (ns->ext) {
+			length += meta_len;
+			meta_len = 0;
+		}
+
 		meta = dma_alloc_coherent(dev->dev, meta_len,
 						&meta_dma, GFP_KERNEL);
 		if (!meta) {
@@ -1786,13 +1781,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	c.rw.reftag = cpu_to_le32(io.reftag);
 	c.rw.apptag = cpu_to_le16(io.apptag);
 	c.rw.appmask = cpu_to_le16(io.appmask);
-	c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-	c.rw.prp2 = cpu_to_le64(iod->first_dma);
 	c.rw.metadata = cpu_to_le64(meta_dma);
-	status = nvme_submit_sync_cmd(ns->queue, &c);
+
+	status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+			(void __user *)io.addr, length, NULL, 0);
  unmap:
-	nvme_unmap_user_pages(dev, write, iod);
-	nvme_free_iod(dev, iod);
 	if (meta) {
 		if (status == NVME_SC_SUCCESS && !write) {
 			if (copy_to_user((void __user *)io.metadata, meta,
@@ -1809,9 +1802,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 {
 	struct nvme_passthru_cmd cmd;
 	struct nvme_command c;
-	int status, length;
-	struct nvme_iod *uninitialized_var(iod);
-	unsigned timeout;
+	unsigned timeout = 0;
+	int status;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
@@ -1831,38 +1823,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
 	c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
 	c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
 
-	length = cmd.data_len;
-	if (cmd.data_len) {
-		iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr,
-								length);
-		if (IS_ERR(iod))
-			return PTR_ERR(iod);
-		length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
-		c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		c.common.prp2 = cpu_to_le64(iod->first_dma);
-	}
-
-	timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
-								ADMIN_TIMEOUT;
-
-	if (length != cmd.data_len) {
-		status = -ENOMEM;
-		goto out;
-	}
+	if (cmd.timeout_ms)
+		timeout = msecs_to_jiffies(cmd.timeout_ms);
 
 	status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
-					&cmd.result, timeout);
-
-out:
-	if (cmd.data_len) {
-		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
-		nvme_free_iod(dev, iod);
+			NULL, (void __user *)cmd.addr, cmd.data_len,
+			&cmd.result, timeout);
+	if (status >= 0) {
+		if (put_user(cmd.result, &ucmd->result))
+			return -EFAULT;
 	}
 
-	if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
-							sizeof(cmd.result)))
-		status = -EFAULT;
-
 	return status;
 }
 
@@ -1954,22 +1925,14 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	struct nvme_ns *ns = disk->private_data;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_id_ns *id;
-	dma_addr_t dma_addr;
 	u8 lbaf, pi_type;
 	u16 old_ms;
 	unsigned short bs;
 
-	id = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL);
-	if (!id) {
-		dev_warn(dev->dev, "%s: Memory alocation failure\n", __func__);
+	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
+		dev_warn(dev->dev, "%s: Identify failure\n", __func__);
 		return 0;
 	}
-	if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
-		dev_warn(dev->dev,
-			"identify failed ns:%d, setting capacity to 0\n",
-			ns->ns_id);
-		memset(id, 0, sizeof(*id));
-	}
 
 	old_ms = ns->ms;
 	lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
@@ -2010,7 +1973,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	if (dev->oncs & NVME_CTRL_ONCS_DSM)
 		nvme_config_discard(ns);
 
-	dma_free_coherent(dev->dev, 4096, id, dma_addr);
+	kfree(id);
 	return 0;
 }
 
@@ -2250,22 +2213,14 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	int res;
 	unsigned nn, i;
 	struct nvme_id_ctrl *ctrl;
-	void *mem;
-	dma_addr_t dma_addr;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
-	mem = dma_alloc_coherent(dev->dev, 4096, &dma_addr, GFP_KERNEL);
-	if (!mem)
-		return -ENOMEM;
-
-	res = nvme_identify(dev, 0, 1, dma_addr);
+	res = nvme_identify_ctrl(dev, &ctrl);
 	if (res) {
 		dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
-		dma_free_coherent(dev->dev, 4096, mem, dma_addr);
 		return -EIO;
 	}
 
-	ctrl = mem;
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
@@ -2287,7 +2242,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		} else
 			dev->max_hw_sectors = max_hw_sectors;
 	}
-	dma_free_coherent(dev->dev, 4096, mem, dma_addr);
+	kfree(ctrl);
 
 	dev->tagset.ops = &nvme_mq_ops;
 	dev->tagset.nr_hw_queues = dev->online_queues - 1;
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 342f5b7f840d..8e6223e5b670 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -525,8 +525,6 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 					int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	int res;
 	int nvme_sc;
@@ -536,21 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	u8 cmdque = 0x01 << 1;
 	u8 fw_offset = sizeof(dev->firmware_rev);
 
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
-				&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
-
 	/* nvme ns identify - use DPS value for PROTECT field */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_free;
+		return res;
 
-	id_ns = mem;
-	(id_ns->dps) ? (protect = 0x01) : (protect = 0);
+	if (id_ns->dps)
+		protect = 0x01;
+	else
+		protect = 0;
+	kfree(id_ns);
 
 	memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
 	inq_response[2] = VERSION_SPC_4;
@@ -567,12 +561,7 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
 	strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
 
 	xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
- out_dma:
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
@@ -615,40 +604,35 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					u8 *inq_response, int alloc_len)
 {
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	int res;
 	int nvme_sc;
 	int xfer_len;
 	__be32 tmp_id = cpu_to_be32(ns->ns_id);
 
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
-					&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
-
 	memset(inq_response, 0, alloc_len);
 	inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE;    /* Page Code */
 	if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
-		struct nvme_id_ns *id_ns = mem;
-		void *eui = id_ns->eui64;
-		int len = sizeof(id_ns->eui64);
+		struct nvme_id_ns *id_ns;
+		void *eui;
+		int len;
 
-		nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
-			goto out_free;
+			return res;
 
+		eui = id_ns->eui64;
+		len = sizeof(id_ns->eui64);
 		if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
 			if (bitmap_empty(eui, len * 8)) {
 				eui = id_ns->nguid;
 				len = sizeof(id_ns->nguid);
 			}
 		}
-		if (bitmap_empty(eui, len * 8))
+		if (bitmap_empty(eui, len * 8)) {
+			kfree(id_ns);
 			goto scsi_string;
+		}
 
 		inq_response[3] = 4 + len; /* Page Length */
 		/* Designation Descriptor start */
@@ -657,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		inq_response[6] = 0x00;    /* Rsvd */
 		inq_response[7] = len;     /* Designator Length */
 		memcpy(&inq_response[8], eui, len);
+		kfree(id_ns);
 	} else {
  scsi_string:
 		if (alloc_len < 72) {
-			res = nvme_trans_completion(hdr,
+			return nvme_trans_completion(hdr,
 					SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_free;
 		}
 		inq_response[3] = 0x48;    /* Page Length */
 		/* Designation Descriptor start */
@@ -679,12 +663,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
 	}
 	xfer_len = alloc_len;
-	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
- out_dma:
-	return res;
+	return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 }
 
 static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
@@ -694,8 +673,6 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ctrl *id_ctrl;
 	struct nvme_id_ns *id_ns;
 	int xfer_len;
@@ -708,39 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u8 luiclr = 0x01;
 
 	inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
-	if (inq_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
-
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
+	if (inq_response == NULL)
+		return -ENOMEM;
 
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_free;
+		goto out_free_inq;
+
+	spt = spt_lut[id_ns->dpc & 0x07] << 3;
+	if (id_ns->dps)
+		protect = 0x01;
+	else
+		protect = 0;
+	kfree(id_ns);
 
-	id_ns = mem;
-	spt = spt_lut[(id_ns->dpc) & 0x07] << 3;
-	(id_ns->dps) ? (protect = 0x01) : (protect = 0);
 	grd_chk = protect << 2;
 	app_chk = protect << 1;
 	ref_chk = protect;
 
-	/* nvme controller identify */
-	nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_free;
+		goto out_free_inq;
 
-	id_ctrl = mem;
 	v_sup = id_ctrl->vwc;
+	kfree(id_ctrl);
 
 	memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
 	inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE;    /* Page Code */
@@ -756,11 +726,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
 
- out_free:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
- out_dma:
+ out_free_inq:
 	kfree(inq_response);
- out_mem:
 	return res;
 }
 
@@ -847,43 +814,27 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_command c;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
-	dma_addr_t dma_addr;
-	void *mem;
 	u8 temp_c;
 	u16 temp_k;
 
 	log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
+	if (log_response == NULL)
+		return -ENOMEM;
 
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log),
-					&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
+	res = nvme_get_log_page(dev, &smart_log);
+	if (res < 0)
+		goto out_free_response;
 
-	/* Get SMART Log Page */
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_get_log_page;
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
-	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
-	res = nvme_submit_sync_cmd(dev->admin_q, &c);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c = LOG_TEMP_UNKNOWN;
 	} else {
-		smart_log = mem;
 		temp_k = (smart_log->temperature[1] << 8) +
 				(smart_log->temperature[0]);
 		temp_c = temp_k - KELVIN_TEMP_FACTOR;
 	}
+	kfree(smart_log);
 
 	log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
 	/* Subpage=0x00, Page Length MSB=0 */
@@ -899,11 +850,8 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
 	xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
-	dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log),
-			  mem, dma_addr);
- out_dma:
+ out_free_response:
 	kfree(log_response);
- out_mem:
 	return res;
 }
 
@@ -913,44 +861,28 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int xfer_len;
 	u8 *log_response;
-	struct nvme_command c;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_smart_log *smart_log;
-	dma_addr_t dma_addr;
-	void *mem;
 	u32 feature_resp;
 	u8 temp_c_cur, temp_c_thresh;
 	u16 temp_k;
 
 	log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
-	if (log_response == NULL) {
-		res = -ENOMEM;
-		goto out_mem;
-	}
+	if (log_response == NULL)
+		return -ENOMEM;
 
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_smart_log),
-					&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out_dma;
-	}
+	res = nvme_get_log_page(dev, &smart_log);
+	if (res < 0)
+		goto out_free_response;
 
-	/* Get SMART Log Page */
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_get_log_page;
-	c.common.nsid = cpu_to_le32(0xFFFFFFFF);
-	c.common.prp1 = cpu_to_le64(dma_addr);
-	c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
-			BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
-	res = nvme_submit_sync_cmd(dev->admin_q, &c);
 	if (res != NVME_SC_SUCCESS) {
 		temp_c_cur = LOG_TEMP_UNKNOWN;
 	} else {
-		smart_log = mem;
 		temp_k = (smart_log->temperature[1] << 8) +
 				(smart_log->temperature[0]);
 		temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
 	}
+	kfree(smart_log);
 
 	/* Get Features for Temp Threshold */
 	res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
@@ -979,11 +911,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
 	res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
 
-	dma_free_coherent(dev->dev, sizeof(struct nvme_smart_log),
-			  mem, dma_addr);
- out_dma:
+ out_free_response:
 	kfree(log_response);
- out_mem:
 	return res;
 }
 
@@ -1019,8 +948,6 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 flbas;
 	u32 lba_length;
@@ -1030,20 +957,11 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
 		return -EINVAL;
 
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
+		return res;
 
-	id_ns = mem;
 	flbas = (id_ns->flbas) & 0x0F;
 	lba_length = (1 << (id_ns->lbaf[flbas].ds));
 
@@ -1063,9 +981,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		memcpy(&resp[12], &tmp_len, sizeof(u32));
 	}
 
- out_dma:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
- out:
+	kfree(id_ns);
 	return res;
 }
 
@@ -1291,26 +1207,17 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ctrl *id_ctrl;
 	int lowest_pow_st;	/* max npss = lowest power consumption */
 	unsigned ps_desired = 0;
 
-	/* NVMe Controller Identify */
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl),
-				&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+	nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
+		return res;
 
-	id_ctrl = mem;
 	lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
+	kfree(id_ctrl);
 
 	switch (pc) {
 	case NVME_POWER_STATE_START_VALID:
@@ -1350,12 +1257,7 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 	nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
 				    NULL);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-
- out_dma:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr);
- out:
-	return res;
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
@@ -1368,7 +1270,7 @@ static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 	c.common.opcode = nvme_admin_activate_fw;
 	c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
 
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
 	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
@@ -1376,15 +1278,9 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 					u8 opcode, u32 tot_len, u32 offset,
 					u8 buffer_id)
 {
-	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
 	struct nvme_command c;
-	struct nvme_iod *iod = NULL;
-	unsigned length;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_admin_download_fw;
 
 	if (hdr->iovec_count > 0) {
 		/* Assuming SGL is not allowed for this command */
@@ -1394,28 +1290,15 @@ static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr
 					SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 	}
-	iod = nvme_map_user_pages(dev, DMA_TO_DEVICE,
-			(unsigned long)hdr->dxferp, tot_len);
-	if (IS_ERR(iod))
-		return PTR_ERR(iod);
-	length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
-	if (length != tot_len) {
-		res = -ENOMEM;
-		goto out_unmap;
-	}
 
-	c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-	c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_download_fw;
 	c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
 	c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
 
-	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
-	res = nvme_trans_status_code(hdr, nvme_sc);
-
- out_unmap:
-	nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
-	nvme_free_iod(dev, iod);
-	return res;
+	nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+			hdr->dxferp, tot_len, NULL, 0);
+	return nvme_trans_status_code(hdr, nvme_sc);
 }
 
 /* Mode Select Helper Functions */
@@ -1590,9 +1473,6 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 	int res = 0;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
-	struct nvme_id_ns *id_ns;
 	u8 flbas;
 
 	/*
@@ -1603,19 +1483,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 	 */
 
 	if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
-		mem = dma_alloc_coherent(dev->dev,
-			sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL);
-		if (mem == NULL) {
-			res = -ENOMEM;
-			goto out;
-		}
-		/* nvme ns identify */
-		nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+		struct nvme_id_ns *id_ns;
+
+		nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
-			goto out_dma;
-
-		id_ns = mem;
+			return res;
 
 		if (ns->mode_select_num_blocks == 0)
 			ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
@@ -1624,12 +1497,11 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
 			ns->mode_select_block_len =
 						(1 << (id_ns->lbaf[flbas].ds));
 		}
- out_dma:
-		dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns),
-				  mem, dma_addr);
+
+		kfree(id_ns);
 	}
- out:
-	return res;
+
+	return 0;
 }
 
 static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
@@ -1698,8 +1570,6 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	int res;
 	int nvme_sc;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 i;
 	u8 flbas, nlbaf;
@@ -1708,19 +1578,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct nvme_command c;
 
 	/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
+		return res;
 
-	id_ns = mem;
 	flbas = (id_ns->flbas) & 0x0F;
 	nlbaf = id_ns->nlbaf;
 
@@ -1748,12 +1610,10 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.format.nsid = cpu_to_le32(ns->ns_id);
 	c.format.cdw10 = cpu_to_le32(cdw10);
 
-	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c);
+	nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
- out_dma:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
- out:
+	kfree(id_ns);
 	return res;
 }
 
@@ -1787,9 +1647,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 				struct nvme_trans_io_cdb *cdb_info, u8 is_write)
 {
 	int nvme_sc = NVME_SC_SUCCESS;
-	struct nvme_dev *dev = ns->dev;
 	u32 num_cmds;
-	struct nvme_iod *iod;
 	u64 unit_len;
 	u64 unit_num_blocks;	/* Number of blocks to xfer in each nvme cmd */
 	u32 retcode;
@@ -1840,35 +1698,17 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		control = nvme_trans_io_get_control(ns, cdb_info);
 		c.rw.control = cpu_to_le16(control);
 
-		iod = nvme_map_user_pages(dev,
-			(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-			(unsigned long)next_mapping_addr, unit_len);
-		if (IS_ERR(iod))
-			return PTR_ERR(iod);
-
-		retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
-		if (retcode != unit_len) {
-			nvme_unmap_user_pages(dev,
-				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-				iod);
-			nvme_free_iod(dev, iod);
-			return -ENOMEM;
+		if (get_capacity(ns->disk) - unit_num_blocks <
+				cdb_info->lba + nvme_offset) {
+			nvme_sc = NVME_SC_LBA_RANGE;
+			break;
 		}
-		c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
-		c.rw.prp2 = cpu_to_le64(iod->first_dma);
+		nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+				next_mapping_addr, unit_len, NULL, 0);
+		if (nvme_sc)
+			break;
 
 		nvme_offset += unit_num_blocks;
-
-		nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
-
-		nvme_unmap_user_pages(dev,
-				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-				iod);
-		nvme_free_iod(dev, iod);
-
-
-		if (nvme_sc != NVME_SC_SUCCESS)
-			break;
 	}
 
 	return nvme_trans_status_code(hdr, nvme_sc);
@@ -2199,8 +2039,6 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u32 resp_size;
 	u32 xfer_len;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ns *id_ns;
 	u8 *response;
 
@@ -2212,24 +2050,15 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		resp_size = READ_CAP_10_RESP_SIZE;
 	}
 
-	mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ns),
-							&dma_addr, GFP_KERNEL);
-	if (mem == NULL) {
-		res = -ENOMEM;
-		goto out;
-	}
-	/* nvme ns identify */
-	nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+	nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
-		goto out_dma;
-
-	id_ns = mem;
+		return res;	
 
 	response = kzalloc(resp_size, GFP_KERNEL);
 	if (response == NULL) {
 		res = -ENOMEM;
-		goto out_dma;
+		goto out_free_id;
 	}
 	nvme_trans_fill_read_cap(response, id_ns, cdb16);
 
@@ -2237,9 +2066,8 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
 
 	kfree(response);
- out_dma:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ns), mem, dma_addr);
- out:
+ out_free_id:
+	kfree(id_ns);
 	return res;
 }
 
@@ -2251,8 +2079,6 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	u32 alloc_len, xfer_len, resp_size;
 	u8 *response;
 	struct nvme_dev *dev = ns->dev;
-	dma_addr_t dma_addr;
-	void *mem;
 	struct nvme_id_ctrl *id_ctrl;
 	u32 ll_length, lun_id;
 	u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
@@ -2266,19 +2092,11 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	case ALL_LUNS_RETURNED:
 	case ALL_WELL_KNOWN_LUNS_RETURNED:
 	case RESTRICTED_LUNS_RETURNED:
-		/* NVMe Controller Identify */
-		mem = dma_alloc_coherent(dev->dev, sizeof(struct nvme_id_ctrl),
-					&dma_addr, GFP_KERNEL);
-		if (mem == NULL) {
-			res = -ENOMEM;
-			goto out;
-		}
-		nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+		nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
 		res = nvme_trans_status_code(hdr, nvme_sc);
 		if (res)
-			goto out_dma;
+			return res;
 
-		id_ctrl = mem;
 		ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
 		resp_size = ll_length + LUN_DATA_HEADER_SIZE;
 
@@ -2288,13 +2106,13 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 					SAM_STAT_CHECK_CONDITION,
 					ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
 					SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
-			goto out_dma;
+			goto out_free_id;
 		}
 
 		response = kzalloc(resp_size, GFP_KERNEL);
 		if (response == NULL) {
 			res = -ENOMEM;
-			goto out_dma;
+			goto out_free_id;
 		}
 
 		/* The first LUN ID will always be 0 per the SAM spec */
@@ -2315,9 +2133,8 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	res = nvme_trans_copy_to_user(hdr, response, xfer_len);
 
 	kfree(response);
- out_dma:
-	dma_free_coherent(dev->dev, sizeof(struct nvme_id_ctrl), mem, dma_addr);
- out:
+ out_free_id:
+	kfree(id_ctrl);
 	return res;
 }
 
@@ -2379,12 +2196,23 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns,
 				SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
 }
 
-static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
-							u8 *cmd)
+static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
+					struct sg_io_hdr *hdr)
 {
-	int res;
 	int nvme_sc;
 	struct nvme_command c;
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_cmd_flush;
+	c.common.nsid = cpu_to_le32(ns->ns_id);
+
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+	return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+							u8 *cmd)
+{
 	u8 immed, pcmod, pc, no_flush, start;
 
 	immed = cmd[1] & 0x01;
@@ -2400,12 +2228,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	} else {
 		if (no_flush == 0) {
 			/* Issue NVME FLUSH command prior to START STOP UNIT */
-			memset(&c, 0, sizeof(c));
-			c.common.opcode = nvme_cmd_flush;
-			c.common.nsid = cpu_to_le32(ns->ns_id);
-
-			nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
-			res = nvme_trans_status_code(hdr, nvme_sc);
+			int res = nvme_trans_synchronize_cache(ns, hdr);
 			if (res)
 				return res;
 		}
@@ -2414,20 +2237,6 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 }
 
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
-					struct sg_io_hdr *hdr, u8 *cmd)
-{
-	int nvme_sc;
-	struct nvme_command c;
-
-	memset(&c, 0, sizeof(c));
-	c.common.opcode = nvme_cmd_flush;
-	c.common.nsid = cpu_to_le32(ns->ns_id);
-
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
-	return nvme_trans_status_code(hdr, nvme_sc);
-}
-
 static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
@@ -2563,13 +2372,11 @@ struct scsi_unmap_parm_list {
 static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 							u8 *cmd)
 {
-	struct nvme_dev *dev = ns->dev;
 	struct scsi_unmap_parm_list *plist;
 	struct nvme_dsm_range *range;
 	struct nvme_command c;
 	int i, nvme_sc, res = -ENOMEM;
 	u16 ndesc, list_len;
-	dma_addr_t dma_addr;
 
 	list_len = get_unaligned_be16(&cmd[7]);
 	if (!list_len)
@@ -2589,8 +2396,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 		goto out;
 	}
 
-	range = dma_alloc_coherent(dev->dev, ndesc * sizeof(*range),
-							&dma_addr, GFP_KERNEL);
+	range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
 	if (!range)
 		goto out;
 
@@ -2603,14 +2409,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	memset(&c, 0, sizeof(c));
 	c.dsm.opcode = nvme_cmd_dsm;
 	c.dsm.nsid = cpu_to_le32(ns->ns_id);
-	c.dsm.prp1 = cpu_to_le64(dma_addr);
 	c.dsm.nr = cpu_to_le32(ndesc - 1);
 	c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
 
-	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c);
+	nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
+			ndesc * sizeof(*range));
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
-	dma_free_coherent(dev->dev, ndesc * sizeof(*range), range, dma_addr);
+	kfree(range);
  out:
 	kfree(plist);
 	return res;
@@ -2690,7 +2496,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
 		retcode = nvme_trans_start_stop(ns, hdr, cmd);
 		break;
 	case SYNCHRONIZE_CACHE:
-		retcode = nvme_trans_synchronize_cache(ns, hdr, cmd);
+		retcode = nvme_trans_synchronize_cache(ns, hdr);
 		break;
 	case FORMAT_UNIT:
 		retcode = nvme_trans_format_unit(ns, hdr, cmd);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index de0e49a716b8..986bf8ad8e93 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -146,21 +146,15 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
 	return (sector >> (ns->lba_shift - 9));
 }
 
-/**
- * nvme_free_iod - frees an nvme_iod
- * @dev: The device that the I/O was submitted to
- * @iod: The memory to free
- */
-void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
-
-int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t);
-struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
-				unsigned long addr, unsigned length);
-void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
-			struct nvme_iod *iod);
-int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd);
-int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
-							dma_addr_t dma_addr);
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buf, unsigned bufflen);
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+		void *buffer, void __user *ubuffer, unsigned bufflen,
+		u32 *result, unsigned timeout);
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id);
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+		struct nvme_id_ns **id);
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log);
 int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
 			dma_addr_t dma_addr, u32 *result);
 int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
-- 
cgit v1.2.3


From a0a931d6a2c1fbc5d5966ebf0e7a043748692c22 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 22 May 2015 12:28:31 -0600
Subject: NVMe: Fix obtaining command result

Replaces req->sense_len usage, which is not owned by the LLD, to
req->special to contain the command result for driver created commands,
and sets the result unconditionally on completion.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@fb.com>
Fixes: d29ec8241c10 ("nvme: submit internal commands through the block layer")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 03bd638e76dd..c42bc53f3765 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -606,13 +606,16 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 			return;
 		}
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
-			req->sense_len = le32_to_cpup(&cqe->result);
 			req->errors = status;
 		} else {
 			req->errors = nvme_error_status(status);
 		}
 	} else
 		req->errors = 0;
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+		u32 result = le32_to_cpup(&cqe->result);
+		req->special = (void *)(uintptr_t)result;
+	}
 
 	if (cmd_rq->aborted)
 		dev_warn(nvmeq->dev->dev,
@@ -1015,8 +1018,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 
 	req->cmd = (unsigned char *)cmd;
 	req->cmd_len = sizeof(struct nvme_command);
-	req->sense = NULL;
-	req->sense_len = 0;
+	req->special = (void *)0;
 
 	if (buffer && bufflen) {
 		ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
@@ -1033,7 +1035,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	if (bio)
 		blk_rq_unmap_user(bio);
 	if (result)
-		*result = req->sense_len;
+		*result = (u32)(uintptr_t)req->special;
 	ret = req->errors;
  out:
 	blk_mq_free_request(req);
-- 
cgit v1.2.3


From f4ff414aeb472397d3b4fc15c22ca65bab219ec8 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 28 May 2015 09:48:54 -0600
Subject: NVMe: Use requested sync command timeout

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index c42bc53f3765..4eb9917b2a7a 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1014,7 +1014,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 	req->__sector = (sector_t) -1;
 	req->bio = req->biotail = NULL;
 
-	req->timeout = ADMIN_TIMEOUT;
+	req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
 
 	req->cmd = (unsigned char *)cmd;
 	req->cmd_len = sizeof(struct nvme_command);
-- 
cgit v1.2.3


From 75619bfa904d0f2840b4274eb92ce47b2e1c472e Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 28 May 2015 09:48:55 -0600
Subject: NVMe: End sync requests immediately on failure

Do not retry failed sync commands so the original status may be seen
without issuing unnecessary retries.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 4eb9917b2a7a..6ed1356e9eb5 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1010,6 +1010,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		return PTR_ERR(req);
 
 	req->cmd_type = REQ_TYPE_DRV_PRIV;
+	req->cmd_flags = REQ_FAILFAST_DRIVER;
 	req->__data_len = 0;
 	req->__sector = (sector_t) -1;
 	req->bio = req->biotail = NULL;
-- 
cgit v1.2.3


From 42483228d4c019ffc86b8dbea7dfbc3f9566fe7e Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 1 Jun 2015 09:29:54 -0600
Subject: NVMe: Remove hctx reliance for multi-namespace

The driver needs to track shared tags to support multiple namespaces
that may be dynamically allocated or deleted. Relying on the first
request_queue's hctx's is not appropriate as we cannot clear outstanding
tags for all namespaces using this handle, nor can the driver easily track
all request_queue's hctx as namespaces are attached/detached. Instead,
this patch uses the nvme_dev's tagset to get the shared tag resources
instead of through a request_queue hctx.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 52 +++++++++++++++++------------------------------
 1 file changed, 19 insertions(+), 33 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 6ed1356e9eb5..513908ff46c4 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -102,6 +102,7 @@ struct nvme_queue {
 	spinlock_t q_lock;
 	struct nvme_command *sq_cmds;
 	volatile struct nvme_completion *cqes;
+	struct blk_mq_tags **tags;
 	dma_addr_t sq_dma_addr;
 	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
@@ -114,7 +115,6 @@ struct nvme_queue {
 	u8 cq_phase;
 	u8 cqe_seen;
 	struct async_cmd_info cmdinfo;
-	struct blk_mq_hw_ctx *hctx;
 };
 
 /*
@@ -182,9 +182,12 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 	struct nvme_dev *dev = data;
 	struct nvme_queue *nvmeq = dev->queues[0];
 
-	WARN_ON(nvmeq->hctx);
-	nvmeq->hctx = hctx;
+	WARN_ON(hctx_idx != 0);
+	WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
+	WARN_ON(nvmeq->tags);
+
 	hctx->driver_data = nvmeq;
+	nvmeq->tags = &dev->admin_tagset.tags[0];
 	return 0;
 }
 
@@ -201,27 +204,16 @@ static int nvme_admin_init_request(void *data, struct request *req,
 	return 0;
 }
 
-static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
-	struct nvme_queue *nvmeq = hctx->driver_data;
-
-	nvmeq->hctx = NULL;
-}
-
 static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 			  unsigned int hctx_idx)
 {
 	struct nvme_dev *dev = data;
-	struct nvme_queue *nvmeq = dev->queues[
-					(hctx_idx % dev->queue_count) + 1];
-
-	if (!nvmeq->hctx)
-		nvmeq->hctx = hctx;
+	struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
 
-	/* nvmeq queues are shared between namespaces. We assume here that
-	 * blk-mq map the tags so they match up with the nvme queue tags. */
-	WARN_ON(nvmeq->hctx->tags != hctx->tags);
+	if (!nvmeq->tags)
+		nvmeq->tags = &dev->tagset.tags[hctx_idx];
 
+	WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
 	hctx->driver_data = nvmeq;
 	return 0;
 }
@@ -320,7 +312,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
 	u32 result = le32_to_cpup(&cqe->result);
 
-	blk_mq_free_hctx_request(nvmeq->hctx, req);
+	blk_mq_free_request(req);
 
 	dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
 	++nvmeq->dev->abort_limit;
@@ -333,14 +325,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx,
 	cmdinfo->result = le32_to_cpup(&cqe->result);
 	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
 	queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
-	blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req);
+	blk_mq_free_request(cmdinfo->req);
 }
 
 static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
 				  unsigned int tag)
 {
-	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
-	struct request *req = blk_mq_tag_to_rq(hctx->tags, tag);
+	struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
 
 	return blk_mq_rq_to_pdu(req);
 }
@@ -1068,7 +1059,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
 	c.common.opcode = nvme_admin_async_event;
 	c.common.command_id = req->tag;
 
-	blk_mq_free_hctx_request(nvmeq->hctx, req);
+	blk_mq_free_request(req);
 	return __nvme_submit_cmd(nvmeq, &c);
 }
 
@@ -1310,8 +1301,7 @@ static void nvme_abort_req(struct request *req)
 	}
 }
 
-static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
-				struct request *req, void *data, bool reserved)
+static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
 {
 	struct nvme_queue *nvmeq = data;
 	void *ctx;
@@ -1408,11 +1398,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 
 static void nvme_clear_queue(struct nvme_queue *nvmeq)
 {
-	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
-
 	spin_lock_irq(&nvmeq->q_lock);
-	if (hctx && hctx->tags)
-		blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
+	if (nvmeq->tags && *nvmeq->tags)
+		blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
 }
 
@@ -1605,7 +1593,6 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
 	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_admin_init_hctx,
-	.exit_hctx	= nvme_exit_hctx,
 	.init_request	= nvme_admin_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -1614,7 +1601,6 @@ static struct blk_mq_ops nvme_mq_ops = {
 	.queue_rq	= nvme_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.init_hctx	= nvme_init_hctx,
-	.exit_hctx	= nvme_exit_hctx,
 	.init_request	= nvme_init_request,
 	.timeout	= nvme_timeout,
 };
@@ -2724,11 +2710,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev)
 	for (i = 0; i < dev->online_queues; i++) {
 		nvmeq = dev->queues[i];
 
-		if (!nvmeq->hctx)
+		if (!nvmeq->tags || !(*nvmeq->tags))
 			continue;
 
 		irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
-							nvmeq->hctx->cpumask);
+					blk_mq_tags_cpumask(*nvmeq->tags));
 	}
 }
 
-- 
cgit v1.2.3


From 419c21a3b6275d40a10901f700efcd40515b6db6 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 2 Jun 2015 08:35:09 +0900
Subject: null_blk: prevent timer handler running on a different CPU where
 started

When irqmode=2 (IRQ completion handler is timer), timer handler should
be called on the same CPU where the timer has been started.

Since completion_queues are per-cpu and the completion handler only
touches completion_queue for local CPU, we need to prevent the handler
from running on a different CPU where the timer has been started.
Otherwise, the IO cannot be completed until another completion handler
is executed on that CPU.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/null_blk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 65cd61a4145e..6f0a58e7613d 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -257,7 +257,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
 	if (llist_add(&cmd->ll_list, &cq->list)) {
 		ktime_t kt = ktime_set(0, completion_nsec);
 
-		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
+		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
 	}
 
 	put_cpu();
-- 
cgit v1.2.3


From 8b70f45e2eb275da886b9c9dee190436d12d876a Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 2 Jun 2015 08:35:10 +0900
Subject: null_blk: restart request processing on completion handler

When irqmode=2 (IRQ completion handler is timer) and queue_mode=1
(Block interface to use is rq), the completion handler should restart
request handling for any pending requests on a queue because request
processing stops when the number of commands are queued more than
hw_queue_depth (null_rq_prep_fn returns BLKPREP_DEFER).

Without this change, the following command cannot finish.

	# modprobe null_blk irqmode=2 queue_mode=1 hw_queue_depth=1
	# fio --name=t --rw=read --size=1g --direct=1 \
	  --ioengine=libaio --iodepth=64 --filename=/dev/nullb0

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/null_blk.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 6f0a58e7613d..6f9b7534928e 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -243,6 +243,17 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
 			cmd = container_of(entry, struct nullb_cmd, ll_list);
 			entry = entry->next;
 			end_cmd(cmd);
+
+			if (cmd->rq) {
+				struct request_queue *q = cmd->rq->q;
+
+				if (!q->mq_ops && blk_queue_stopped(q)) {
+					spin_lock(q->queue_lock);
+					if (blk_queue_stopped(q))
+						blk_start_queue(q);
+					spin_unlock(q->queue_lock);
+				}
+			}
 		} while (entry);
 	}
 
@@ -334,6 +345,7 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req)
 		req->special = cmd;
 		return BLKPREP_OK;
 	}
+	blk_stop_queue(q);
 
 	return BLKPREP_DEFER;
 }
-- 
cgit v1.2.3


From 4cc06521ee1f153e0d292413a5bff7bbbdee92d0 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 5 Jun 2015 10:30:08 -0600
Subject: NVMe: add sysfs and ioctl controller reset

We need the ability to perform an nvme controller reset as discussed on
the mailing list thread:

  http://lists.infradead.org/pipermail/linux-nvme/2015-March/001585.html

This adds a sysfs entry that when written to will reset perform an NVMe
controller reset if the controller was successfully initialized in the
first place.

This also adds locking around resetting the device in the async probe
method so the driver can't schedule two resets.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Cc: Brandon Schultz <brandon.schulz@hgst.com>
Cc: David Sariel <david.sariel@pmcs.com>

Updated by Jens to:

1) Merge this with the ioctl reset patch from David Sariel. The ioctl
   path now shares the reset code from the sysfs path.

2) Don't flush work if we fail issuing the reset.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/nvme.h |  1 +
 2 files changed, 54 insertions(+)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 513908ff46c4..9682e29b4171 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -80,6 +80,7 @@ static wait_queue_head_t nvme_kthread_wait;
 static struct class *nvme_class;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_reset(struct nvme_dev *dev);
 static int nvme_process_cq(struct nvme_queue *nvmeq);
 
 struct async_cmd_info {
@@ -2689,6 +2690,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -ENOTTY;
 		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
 		return nvme_user_cmd(dev, ns, (void __user *)arg);
+	case NVME_IOCTL_RESET:
+		dev_warn(dev->dev, "resetting controller\n");
+		return nvme_reset(dev);
 	default:
 		return -ENOTTY;
 	}
@@ -2839,6 +2843,44 @@ static void nvme_reset_workfn(struct work_struct *work)
 	dev->reset_workfn(work);
 }
 
+static int nvme_reset(struct nvme_dev *dev)
+{
+	int ret = -EBUSY;
+
+	if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+		return -ENODEV;
+
+	spin_lock(&dev_list_lock);
+	if (!work_pending(&dev->reset_work)) {
+		dev->reset_workfn = nvme_reset_failed_dev;
+		queue_work(nvme_workq, &dev->reset_work);
+		ret = 0;
+	}
+	spin_unlock(&dev_list_lock);
+
+	if (!ret) {
+		flush_work(&dev->reset_work);
+		return 0;
+	}
+
+	return ret;
+}
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = nvme_reset(ndev);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
 static void nvme_async_probe(struct work_struct *work);
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -2883,12 +2925,20 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto release_pools;
 	}
 	get_device(dev->device);
+	dev_set_drvdata(dev->device, dev);
+
+	result = device_create_file(dev->device, &dev_attr_reset_controller);
+	if (result)
+		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
 
+ put_dev:
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	put_device(dev->device);
  release_pools:
 	nvme_release_prp_pools(dev);
  release:
@@ -2919,10 +2969,12 @@ static void nvme_async_probe(struct work_struct *work)
 	nvme_set_irq_hints(dev);
 	return;
  reset:
+	spin_lock(&dev_list_lock);
 	if (!work_busy(&dev->reset_work)) {
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
 	}
+	spin_unlock(&dev_list_lock);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -2952,6 +3004,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
 	nvme_dev_remove_admin(dev);
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index aef9a81b2d75..b660dc2fadfb 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -579,5 +579,6 @@ struct nvme_passthru_cmd {
 #define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
 #define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
 #define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET	_IO('N', 0x44)
 
 #endif /* _UAPI_LINUX_NVME_H */
-- 
cgit v1.2.3


From 36a7e993eedb2c3f11de3b686b351f75e1edbbb5 Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Wed, 27 May 2015 12:26:23 -0600
Subject: NVMe: Memory barrier before queue_count is incremented

Protects against reordering and/or preempting which would allow the
kthread to access the queue descriptor before it is set up

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Acked-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 9682e29b4171..cae7cac6cc43 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1453,9 +1453,12 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->qid = qid;
-	dev->queue_count++;
 	dev->queues[qid] = nvmeq;
 
+	/* make sure queue descriptor is set before queue count, for kthread */
+	mb();
+	dev->queue_count++;
+
 	return nvmeq;
 
  free_cqdma:
-- 
cgit v1.2.3


From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 1 Jun 2015 14:28:14 -0600
Subject: NVMe: Automatic namespace rescan

Namespaces may be dynamically allocated and deleted or attached and
detached. This has the driver rescan the device for namespace changes
after each device reset or namespace change asynchronous event.

There could potentially be many detached namespaces that we don't want
polluting /dev/ with unusable block handles, so this will delete disks
if the namespace is not active as indicated by the response from identify
namespace. This also skips adding the disk if no capacity is provisioned
to the namespace in the first place.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 159 ++++++++++++++++++++++++++++++++++++----------
 include/linux/nvme.h      |   1 +
 include/uapi/linux/nvme.h |   4 ++
 3 files changed, 132 insertions(+), 32 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cae7cac6cc43..2072ae81c13a 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -300,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
 		++nvmeq->dev->event_limit;
-	if (status == NVME_SC_SUCCESS)
-		dev_warn(nvmeq->q_dmadev,
-			"async event result %08x\n", result);
+	if (status != NVME_SC_SUCCESS)
+		return;
+
+	switch (result & 0xff07) {
+	case NVME_AER_NOTICE_NS_CHANGED:
+		dev_info(nvmeq->q_dmadev, "rescanning\n");
+		schedule_work(&nvmeq->dev->scan_work);
+	default:
+		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+	}
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1923,8 +1931,13 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	unsigned short bs;
 
 	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
-		dev_warn(dev->dev, "%s: Identify failure\n", __func__);
-		return 0;
+		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
+						dev->instance, ns->ns_id);
+		return -ENODEV;
+	}
+	if (id->ncap == 0) {
+		kfree(id);
+		return -ENODEV;
 	}
 
 	old_ms = ns->ms;
@@ -1958,7 +1971,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 								!ns->ext)
 		nvme_init_integrity(ns);
 
-	if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+	if (ns->ms && !blk_get_integrity(disk))
 		set_capacity(disk, 0);
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2073,11 +2086,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	 * requires it.
 	 */
 	set_capacity(disk, 0);
-	nvme_revalidate_disk(ns->disk);
+	if (nvme_revalidate_disk(ns->disk))
+		goto out_free_disk;
+
 	add_disk(ns->disk);
 	if (ns->ms)
 		revalidate_disk(ns->disk);
 	return;
+ out_free_disk:
+	kfree(disk);
+	list_del(&ns->list);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -2194,6 +2212,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return result;
 }
 
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+	list_del(&ns->list);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+	struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+	return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		if (ns->ns_id == nsid)
+			return ns;
+		if (ns->ns_id > nsid)
+			break;
+	}
+	return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+	return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+							dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+	if (kill)
+		blk_set_queue_dying(ns->queue);
+	if (ns->disk->flags & GENHD_FL_UP) {
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
+		del_gendisk(ns->disk);
+	}
+	if (kill || !blk_queue_dying(ns->queue)) {
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_cleanup_queue(ns->queue);
+        }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+	struct nvme_ns *ns, *next;
+	unsigned i;
+
+	for (i = 1; i <= nn; i++) {
+		ns = nvme_find_ns(dev, i);
+		if (ns) {
+			if (revalidate_disk(ns->disk)) {
+				nvme_ns_remove(ns);
+				nvme_free_namespace(ns);
+			}
+		} else
+			nvme_alloc_ns(dev, i);
+	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		if (ns->ns_id > nn) {
+			nvme_ns_remove(ns);
+			nvme_free_namespace(ns);
+		}
+	}
+	list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+	struct nvme_id_ctrl *ctrl;
+
+	if (!dev->tagset.tags)
+		return;
+	if (nvme_identify_ctrl(dev, &ctrl))
+		return;
+	nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+	kfree(ctrl);
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -2204,7 +2315,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
-	unsigned nn, i;
+	unsigned nn;
 	struct nvme_id_ctrl *ctrl;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
@@ -2250,9 +2361,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (blk_mq_alloc_tag_set(&dev->tagset))
 		return 0;
 
-	for (i = 1; i <= nn; i++)
-		nvme_alloc_ns(dev, i);
-
+	schedule_work(&dev->scan_work);
 	return 0;
 }
 
@@ -2552,17 +2661,8 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
 
-	list_for_each_entry(ns, &dev->namespaces, list) {
-		if (ns->disk->flags & GENHD_FL_UP) {
-			if (blk_get_integrity(ns->disk))
-				blk_integrity_unregister(ns->disk);
-			del_gendisk(ns->disk);
-		}
-		if (!blk_queue_dying(ns->queue)) {
-			blk_mq_abort_requeue_list(ns->queue);
-			blk_cleanup_queue(ns->queue);
-		}
-	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		nvme_ns_remove(ns);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2621,16 +2721,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
 
-	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-		list_del(&ns->list);
-
-		spin_lock(&dev_list_lock);
-		ns->disk->private_data = NULL;
-		spin_unlock(&dev_list_lock);
-
-		put_disk(ns->disk);
-		kfree(ns);
-	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+		nvme_free_namespace(ns);
 }
 
 static void nvme_free_dev(struct kref *kref)
@@ -2814,6 +2906,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
+		schedule_work(&dev->scan_work);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
@@ -2935,6 +3028,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
+	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
@@ -3007,6 +3101,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->scan_work);
 	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 986bf8ad8e93..c0d94ed8ce9a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -92,6 +92,7 @@ struct nvme_dev {
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
+	struct work_struct scan_work;
 	char name[12];
 	char serial[20];
 	char model[40];
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index b660dc2fadfb..732b32e92b02 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -179,6 +179,10 @@ enum {
 	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
 };
 
+enum {
+	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
+};
+
 struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
-- 
cgit v1.2.3


From e7bdd17b0869782d89c371507ee45bb1425615a0 Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Wed, 10 Jun 2015 18:00:17 +0000
Subject: block/ps3vram: Fix sparse warnings

Fix sparse warnings like these:

 drivers/block/ps3vram.c: warning: incorrect type in assignment (different address spaces)
 drivers/block/ps3vram.c:    expected unsigned int [usertype] *ctrl
 drivers/block/ps3vram.c:    got void [noderef] <asn:2>*

Cc: Jim Paris <jim@jtan.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Geoff Levand <geoff@infradead.org>
Acked-by: Jim Paris <jim@jtan.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/ps3vram.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index ef45cfb98fd2..a7bf83639ba0 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -73,8 +73,8 @@ struct ps3vram_priv {
 
 	u64 memory_handle;
 	u64 context_handle;
-	u32 *ctrl;
-	void *reports;
+	u32 __iomem *ctrl;
+	void __iomem *reports;
 	u8 *xdr_buf;
 
 	u32 *fifo_base;
@@ -104,7 +104,7 @@ static char *size = "256M";
 module_param(size, charp, 0);
 MODULE_PARM_DESC(size, "memory size");
 
-static u32 *ps3vram_get_notifier(void *reports, int notifier)
+static u32 __iomem *ps3vram_get_notifier(void __iomem *reports, int notifier)
 {
 	return reports + DMA_NOTIFIER_OFFSET_BASE +
 	       DMA_NOTIFIER_SIZE * notifier;
@@ -113,22 +113,22 @@ static u32 *ps3vram_get_notifier(void *reports, int notifier)
 static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
-	u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+	u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
 	int i;
 
 	for (i = 0; i < 4; i++)
-		notify[i] = 0xffffffff;
+		iowrite32be(0xffffffff, notify + i);
 }
 
 static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
 				 unsigned int timeout_ms)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
-	u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+	u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
 	unsigned long timeout;
 
 	for (timeout = 20; timeout; timeout--) {
-		if (!notify[3])
+		if (!ioread32be(notify + 3))
 			return 0;
 		udelay(10);
 	}
@@ -136,7 +136,7 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
 	timeout = jiffies + msecs_to_jiffies(timeout_ms);
 
 	do {
-		if (!notify[3])
+		if (!ioread32be(notify + 3))
 			return 0;
 		msleep(1);
 	} while (time_before(jiffies, timeout));
@@ -148,8 +148,8 @@ static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
 {
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 
-	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
-	priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+	iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
+	iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_GET);
 }
 
 static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
@@ -159,14 +159,14 @@ static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
 	unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
 
 	do {
-		if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+		if (ioread32be(priv->ctrl + CTRL_PUT) == ioread32be(priv->ctrl + CTRL_GET))
 			return 0;
 		msleep(1);
 	} while (time_before(jiffies, timeout));
 
 	dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
-		 priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
-		 priv->ctrl[CTRL_TOP]);
+		 ioread32be(priv->ctrl + CTRL_PUT), ioread32be(priv->ctrl + CTRL_GET),
+		 ioread32be(priv->ctrl + CTRL_TOP));
 
 	return -ETIMEDOUT;
 }
@@ -189,7 +189,7 @@ static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
 
 	ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
 
-	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+	iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
 
 	/* asking the HV for a blit will kick the FIFO */
 	status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
@@ -207,8 +207,8 @@ static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
 
 	mutex_lock(&ps3_gpu_mutex);
 
-	priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
-			       (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+	iowrite32be(FIFO_BASE + FIFO_OFFSET + (priv->fifo_ptr - priv->fifo_base)
+		* sizeof(u32), priv->ctrl + CTRL_PUT);
 
 	/* asking the HV for a blit will kick the FIFO */
 	status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
-- 
cgit v1.2.3


From de667203fdbb77745ce7baa9ed280d2cc27b1753 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 10 Jun 2015 18:00:17 +0000
Subject: block/ps3vram: Remove obsolete reference to MTD

The ps3vram driver is a plain block device driver since commit
f507cd22035fdadd5dbb476dd05e9e7ee21c3b84 ("ps3/block: Replace mtd/ps3vram
by block/ps3vram").

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Geoff Levand <geoff@infradead.org>
Acked-by: Jim Paris <jim@jtan.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/ps3vram.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index a7bf83639ba0..b1612eb16172 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -1,5 +1,5 @@
 /*
- * ps3vram - Use extra PS3 video ram as MTD block device.
+ * ps3vram - Use extra PS3 video ram as block device.
  *
  * Copyright 2009 Sony Corporation
  *
-- 
cgit v1.2.3


From 3715a5d014e1326b8e6d008dfbf05615014a067e Mon Sep 17 00:00:00 2001
From: Geoff Levand <geoff@infradead.org>
Date: Wed, 10 Jun 2015 18:00:18 +0000
Subject: MAINTAINERS: Update ps3vram block driver

Add myself as co-maintainer of the ps3vram block driver, and add linuxppc-dev
as a relevant mailing list.

I have been acting as maintainer of this driver for the last several years, and
if there is some inquiry regarding it I would like to be notified.

Signed-off-by: Geoff Levand <geoff@infradead.org>
Acked-by: Jim Paris <jim@jtan.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 781e099495d3..ffcb4e5efe74 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7852,6 +7852,8 @@ F:	sound/ppc/snd_ps3*
 
 PS3VRAM DRIVER
 M:	Jim Paris <jim@jtan.com>
+M:	Geoff Levand <geoff@infradead.org>
+L:	linuxppc-dev@lists.ozlabs.org
 L:	cbe-oss-dev@lists.ozlabs.org
 S:	Maintained
 F:	drivers/block/ps3vram.c
-- 
cgit v1.2.3


From 02b48265e7437bfe153af16337b14ee74f00905f Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:48:00 -0700
Subject: mtip32xx: fix rmmod issue

put_disk() need to be called after del_gendisk() to free the disk object structure.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3bd7ca9853a8..b79b59a696f1 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2809,6 +2809,7 @@ static int mtip_free_orphan(struct driver_data *dd)
 				kobject_put(kobj);
 			}
 			del_gendisk(dd->disk);
+			put_disk(dd->disk);
 			dd->disk = NULL;
 		}
 		if (dd->queue) {
@@ -4095,13 +4096,13 @@ static int mtip_block_remove(struct driver_data *dd)
 			dd->bdev = NULL;
 		}
 		if (dd->disk) {
+			del_gendisk(dd->disk);
 			if (dd->disk->queue) {
-				del_gendisk(dd->disk);
 				blk_cleanup_queue(dd->queue);
 				blk_mq_free_tag_set(&dd->tags);
 				dd->queue = NULL;
-			} else
-				put_disk(dd->disk);
+			}
+			put_disk(dd->disk);
 		}
 		dd->disk  = NULL;
 
@@ -4140,12 +4141,12 @@ static int mtip_block_shutdown(struct driver_data *dd)
 		dev_info(&dd->pdev->dev,
 			"Shutting down %s ...\n", dd->disk->disk_name);
 
+		del_gendisk(dd->disk);
 		if (dd->disk->queue) {
-			del_gendisk(dd->disk);
 			blk_cleanup_queue(dd->queue);
 			blk_mq_free_tag_set(&dd->tags);
-		} else
-			put_disk(dd->disk);
+		}
+		put_disk(dd->disk);
 		dd->disk  = NULL;
 		dd->queue = NULL;
 	}
-- 
cgit v1.2.3


From a7806fadc5f68b1551e4fa85f5e655c0448727f1 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:49:28 -0700
Subject: mtip32xx: remove unused variable 'port->allocated'

Remove unused variable 'port->allocated'

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 15 +--------------
 drivers/block/mtip32xx/mtip32xx.h |  8 +-------
 2 files changed, 2 insertions(+), 21 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index b79b59a696f1..0dd5d7633b70 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -623,8 +623,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
 
 	set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
 
-	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
-			test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
+	if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
 		cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
 		dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
 
@@ -2625,18 +2624,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
 				readl(dd->mmio + HOST_IRQ_STAT));
 	size += sprintf(&buf[size], "\n");
 
-	size += sprintf(&buf[size], "L/ Allocated     : [ 0x");
-
-	for (n = dd->slot_groups-1; n >= 0; n--) {
-		if (sizeof(long) > sizeof(u32))
-			group_allocated =
-				dd->port->allocated[n/2] >> (32*(n&1));
-		else
-			group_allocated = dd->port->allocated[n];
-		size += sprintf(&buf[size], "%08X ", group_allocated);
-	}
-	size += sprintf(&buf[size], "]\n");
-
 	size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
 
 	for (n = dd->slot_groups-1; n >= 0; n--) {
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index ba1b31ee22ec..d7a545974c78 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -412,19 +412,13 @@ struct mtip_port {
 	 * by the DMA when the driver issues internal commands.
 	 */
 	dma_addr_t sector_buffer_dma;
-	/*
-	 * Bit significant, used to determine if a command slot has
-	 * been allocated. i.e. the slot is in use.  Bits are cleared
-	 * when the command slot and all associated data structures
-	 * are no longer needed.
-	 */
+
 	u16 *log_buf;
 	dma_addr_t log_buf_dma;
 
 	u8 *smart_buf;
 	dma_addr_t smart_buf_dma;
 
-	unsigned long allocated[SLOTBITS_IN_LONGS];
 	/*
 	 * used to queue commands when an internal command is in progress
 	 * or error handling is active
-- 
cgit v1.2.3


From ee04bed690cb49a49512a641405bac42d13c2b2a Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:50:50 -0700
Subject: mtip32xx: fix incorrectly setting MTIP_DDF_SEC_LOCK_BIT

Fix incorrectly setting MTIP_DDF_SEC_LOCK_BIT

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0dd5d7633b70..e905c81c0f55 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -990,15 +990,11 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 	reply = port->rxfis + RX_FIS_D2H_REG;
 	task_file_data = readl(port->mmio+PORT_TFDATA);
 
-	if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
-		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
-
 	if ((task_file_data & 1))
 		return false;
 
 	if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
 		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
-		set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
 		port->ic_pause_timer = jiffies;
 		return true;
 	} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
@@ -1010,6 +1006,7 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 		((fis->command == 0xFC) &&
 			(fis->features == 0x27 || fis->features == 0x72 ||
 			 fis->features == 0x62 || fis->features == 0x26))) {
+		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
 		/* Com reset after secure erase or lowlevel format */
 		mtip_restart_port(port);
 		return false;
-- 
cgit v1.2.3


From 686d8e0bb5207c2a651eb5b28ac15db33adda59d Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:51:27 -0700
Subject: mtip32xx: Abort I/O during secure erase operation

Currently I/Os are being queued when secure erase operation starts, and issue
them after the operation completes. As all data will be gone when the operation
completes, any queued I/O doesn't make sense. Hence, abort I/O (return -ENODATA)
as soon as the driver receives.

Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index e905c81c0f55..92cb60194ed2 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -994,7 +994,6 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 		return false;
 
 	if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
-		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
 		port->ic_pause_timer = jiffies;
 		return true;
 	} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
@@ -1009,6 +1008,7 @@ static bool mtip_pause_ncq(struct mtip_port *port,
 		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
 		/* Com reset after secure erase or lowlevel format */
 		mtip_restart_port(port);
+		clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
 		return false;
 	}
 
@@ -1108,9 +1108,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 	int_cmd = mtip_get_int_command(dd);
 
 	set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
-	port->ic_pause_timer = 0;
 
-	clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+	if (fis->command == ATA_CMD_SEC_ERASE_PREP)
+		set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+
 	clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
 
 	if (atomic == GFP_KERNEL) {
@@ -1247,11 +1248,11 @@ static int mtip_exec_internal_command(struct mtip_port *port,
 exec_ic_exit:
 	/* Clear the allocated and active bits for the internal command. */
 	mtip_put_int_command(dd, int_cmd);
+	clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 	if (rv >= 0 && mtip_pause_ncq(port, fis)) {
 		/* NCQ paused */
 		return rv;
 	}
-	clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
 	wake_up_interruptible(&port->svc_wait);
 
 	return rv;
@@ -3684,6 +3685,26 @@ static const struct block_device_operations mtip_block_ops = {
 	.owner		= THIS_MODULE
 };
 
+static inline bool is_se_active(struct driver_data *dd)
+{
+	if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) {
+		if (dd->port->ic_pause_timer) {
+			unsigned long to = dd->port->ic_pause_timer +
+							msecs_to_jiffies(1000);
+			if (time_after(jiffies, to)) {
+				clear_bit(MTIP_PF_SE_ACTIVE_BIT,
+							&dd->port->flags);
+				clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
+				dd->port->ic_pause_timer = 0;
+				wake_up_interruptible(&dd->port->svc_wait);
+				return false;
+			}
+		}
+		return true;
+	}
+	return false;
+}
+
 /*
  * Block layer make request function.
  *
@@ -3701,6 +3722,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
 	unsigned int nents;
 
+	if (is_se_active(dd))
+		return -ENODATA;
+
 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
 							&dd->dd_flag))) {
-- 
cgit v1.2.3


From 2132a544727eb17f76bfef8b550a016a41c38821 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:53:18 -0700
Subject: mtip32xx: fix crash on surprise removal of the drive

pci and block layers have changed a lot compared to when SRSI support was added.
Given the current state of pci and block layers, this driver do not have to do
any specific handling.

Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 143 ++++++++++++--------------------------
 1 file changed, 45 insertions(+), 98 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 92cb60194ed2..0c429b51e535 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -895,6 +895,10 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
 
 		/* Acknowledge the interrupt status on the port.*/
 		port_stat = readl(port->mmio + PORT_IRQ_STAT);
+		if (unlikely(port_stat == 0xFFFFFFFF)) {
+			mtip_check_surprise_removal(dd->pdev);
+			return IRQ_HANDLED;
+		}
 		writel(port_stat, port->mmio + PORT_IRQ_STAT);
 
 		/* Demux port status */
@@ -2765,49 +2769,6 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd)
 		debugfs_remove_recursive(dd->dfs_node);
 }
 
-static int mtip_free_orphan(struct driver_data *dd)
-{
-	struct kobject *kobj;
-
-	if (dd->bdev) {
-		if (dd->bdev->bd_holders >= 1)
-			return -2;
-
-		bdput(dd->bdev);
-		dd->bdev = NULL;
-	}
-
-	mtip_hw_debugfs_exit(dd);
-
-	spin_lock(&rssd_index_lock);
-	ida_remove(&rssd_index_ida, dd->index);
-	spin_unlock(&rssd_index_lock);
-
-	if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
-			test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
-		put_disk(dd->disk);
-	} else {
-		if (dd->disk) {
-			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
-			if (kobj) {
-				mtip_hw_sysfs_exit(dd, kobj);
-				kobject_put(kobj);
-			}
-			del_gendisk(dd->disk);
-			put_disk(dd->disk);
-			dd->disk = NULL;
-		}
-		if (dd->queue) {
-			dd->queue->queuedata = NULL;
-			blk_cleanup_queue(dd->queue);
-			blk_mq_free_tag_set(&dd->tags);
-			dd->queue = NULL;
-		}
-	}
-	kfree(dd);
-	return 0;
-}
-
 /*
  * Perform any init/resume time hardware setup
  *
@@ -2955,7 +2916,6 @@ static int mtip_service_thread(void *data)
 	unsigned long slot, slot_start, slot_wrap;
 	unsigned int num_cmd_slots = dd->slot_groups * 32;
 	struct mtip_port *port = dd->port;
-	int ret;
 
 	while (1) {
 		if (kthread_should_stop() ||
@@ -3041,18 +3001,6 @@ restart_eh:
 		if (kthread_should_stop())
 			goto st_out;
 	}
-
-	while (1) {
-		ret = mtip_free_orphan(dd);
-		if (!ret) {
-			/* NOTE: All data structures are invalid, do not
-			 * access any here */
-			return 0;
-		}
-		msleep_interruptible(1000);
-		if (kthread_should_stop())
-			goto st_out;
-	}
 st_out:
 	return 0;
 }
@@ -3380,6 +3328,7 @@ static int mtip_hw_exit(struct driver_data *dd)
 	/* Release the IRQ. */
 	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
+	msleep(1000);
 
 	/* Free dma regions */
 	mtip_dma_free(dd);
@@ -4075,52 +4024,51 @@ static int mtip_block_remove(struct driver_data *dd)
 {
 	struct kobject *kobj;
 
-	if (!dd->sr) {
-		mtip_hw_debugfs_exit(dd);
+	mtip_hw_debugfs_exit(dd);
 
-		if (dd->mtip_svc_handler) {
-			set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
-			wake_up_interruptible(&dd->port->svc_wait);
-			kthread_stop(dd->mtip_svc_handler);
-		}
+	if (dd->mtip_svc_handler) {
+		set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
+		wake_up_interruptible(&dd->port->svc_wait);
+		kthread_stop(dd->mtip_svc_handler);
+	}
 
-		/* Clean up the sysfs attributes, if created */
-		if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
-			kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
-			if (kobj) {
-				mtip_hw_sysfs_exit(dd, kobj);
-				kobject_put(kobj);
-			}
+	/* Clean up the sysfs attributes, if created */
+	if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
+		kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
+		if (kobj) {
+			mtip_hw_sysfs_exit(dd, kobj);
+			kobject_put(kobj);
 		}
+	}
 
+	if (!dd->sr)
 		mtip_standby_drive(dd);
-
-		/*
-		 * Delete our gendisk structure. This also removes the device
-		 * from /dev
-		 */
-		if (dd->bdev) {
-			bdput(dd->bdev);
-			dd->bdev = NULL;
-		}
-		if (dd->disk) {
-			del_gendisk(dd->disk);
-			if (dd->disk->queue) {
-				blk_cleanup_queue(dd->queue);
-				blk_mq_free_tag_set(&dd->tags);
-				dd->queue = NULL;
-			}
-			put_disk(dd->disk);
-		}
-		dd->disk  = NULL;
-
-		spin_lock(&rssd_index_lock);
-		ida_remove(&rssd_index_ida, dd->index);
-		spin_unlock(&rssd_index_lock);
-	} else {
+	else
 		dev_info(&dd->pdev->dev, "device %s surprise removal\n",
 						dd->disk->disk_name);
+
+	/*
+	 * Delete our gendisk structure. This also removes the device
+	 * from /dev
+	 */
+	if (dd->bdev) {
+		bdput(dd->bdev);
+		dd->bdev = NULL;
+	}
+	if (dd->disk) {
+		del_gendisk(dd->disk);
+		if (dd->disk->queue) {
+			blk_cleanup_queue(dd->queue);
+			blk_mq_free_tag_set(&dd->tags);
+			dd->queue = NULL;
+		}
+		put_disk(dd->disk);
 	}
+	dd->disk  = NULL;
+
+	spin_lock(&rssd_index_lock);
+	ida_remove(&rssd_index_ida, dd->index);
+	spin_unlock(&rssd_index_lock);
 
 	/* De-initialize the protocol layer. */
 	mtip_hw_exit(dd);
@@ -4516,6 +4464,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 			"Completion workers still active!\n");
 	}
 
+	blk_mq_stop_hw_queues(dd->queue);
 	/* Clean up the block layer. */
 	mtip_block_remove(dd);
 
@@ -4533,10 +4482,8 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	list_del_init(&dd->remove_list);
 	spin_unlock_irqrestore(&dev_lock, flags);
 
-	if (!dd->sr)
-		kfree(dd);
-	else
-		set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
+	kfree(dd);
+	set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
 
 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 	pci_set_drvdata(pdev, NULL);
-- 
cgit v1.2.3


From 284eb9a202a24fec4aed02d7526abc29827f6cbb Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:54:19 -0700
Subject: mtip32xx: remove unnecessary sleep in mtip_ftl_rebuild_poll()

Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0c429b51e535..378125004b6c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2891,7 +2891,6 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
 			mtip_block_initialize(dd);
 			return 0;
 		}
-		ssleep(10);
 	} while (time_before(jiffies, timeout));
 
 	/* Check for timeout */
-- 
cgit v1.2.3


From 75787265d61fdce212c45805b36779754392d034 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:55:26 -0700
Subject: mtip32xx: fix minor number

When a device is surprise removed and inserted, it is assigned a new minor
number because driver use multiples of 'instance' number. Modified to use the
multiples of 'index' for minor number.

Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 378125004b6c..0b223e3058c0 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3857,7 +3857,8 @@ static int mtip_block_initialize(struct driver_data *dd)
 
 	dd->disk->driverfs_dev	= &dd->pdev->dev;
 	dd->disk->major		= dd->major;
-	dd->disk->first_minor	= dd->instance * MTIP_MAX_MINORS;
+	dd->disk->first_minor	= index * MTIP_MAX_MINORS;
+	dd->disk->minors 	= MTIP_MAX_MINORS;
 	dd->disk->fops		= &mtip_block_ops;
 	dd->disk->private_data	= dd;
 	dd->index		= index;
-- 
cgit v1.2.3


From 2f17d71dd71fe62957f155eee028c6ba79c79f01 Mon Sep 17 00:00:00 2001
From: Asai Thambi SP <asamymuthupa@micron.com>
Date: Mon, 11 May 2015 15:57:16 -0700
Subject: mtip32xx: increase wait time for hba reset

In LUN failure conditions, device takes longer time to complete the hba reset.
Increased wait time from 1 second to 10 seconds.

Signed-off-by: Sam Bradshaw <sbradshaw@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0b223e3058c0..144e7d97b647 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -269,8 +269,11 @@ static int mtip_hba_reset(struct driver_data *dd)
 	/* Flush */
 	readl(dd->mmio + HOST_CTL);
 
-	/* Spin for up to 2 seconds, waiting for reset acknowledgement */
-	timeout = jiffies + msecs_to_jiffies(2000);
+	/*
+	 * Spin for up to 10 seconds waiting for reset acknowledgement. Spec
+	 * is 1 sec but in LUN failure conditions, up to 10 secs are required
+	 */
+	timeout = jiffies + msecs_to_jiffies(10000);
 	do {
 		mdelay(10);
 		if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
-- 
cgit v1.2.3


From e112af0dc9f55099b948e55077504a44b4162c79 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Fri, 5 Jun 2015 14:54:24 +0200
Subject: nvme: don't overwrite req->cmd_flags on sync cmd
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In __nvme_submit_sync_cmd, the request direction is overwritten when
the REQ_FAILFAST_DRIVER flag is set.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: 75619bfa904d0 ("NVMe: End sync requests immediately on failure")
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 2072ae81c13a..12d5b7b03f9b 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1010,7 +1010,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
 		return PTR_ERR(req);
 
 	req->cmd_type = REQ_TYPE_DRV_PRIV;
-	req->cmd_flags = REQ_FAILFAST_DRIVER;
+	req->cmd_flags |= REQ_FAILFAST_DRIVER;
 	req->__data_len = 0;
 	req->__sector = (sector_t) -1;
 	req->bio = req->biotail = NULL;
-- 
cgit v1.2.3


From 71feb364e7faadc681e714f7fdc2bede208ba26c Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 19 Jun 2015 11:07:30 -0600
Subject: NVMe: Fix IO for extended metadata formats

This fixes io submit ioctl handling when using extended metadata
formats. When these formats are used, the user provides a single virtually
contiguous buffer containing both the block and metadata interleaved,
so the metadata size needs to be added to the total length and not mapped
as a separate transfer.

The command is also driver generated, so this patch does not enforce
blk-integrity extensions provide the metadata buffer.

Reported-by: Marcin Dziegielewski <marcin.dziegielewski@intel.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 12d5b7b03f9b..a501d3e6ac92 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -852,7 +852,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 * stripped/generated by the controller with PRACT=1.
 	 */
 	if (ns && ns->ms && !blk_integrity_rq(req)) {
-		if (!(ns->pi_type && ns->ms == 8)) {
+		if (!(ns->pi_type && ns->ms == 8) &&
+					req->cmd_type != REQ_TYPE_DRV_PRIV) {
 			req->errors = -EFAULT;
 			blk_mq_complete_request(req);
 			return BLK_MQ_RQ_QUEUE_OK;
@@ -1747,15 +1748,14 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	meta_len = (io.nblocks + 1) * ns->ms;
 	write = io.opcode & 1;
 
+	if (ns->ext) {
+		length += meta_len;
+		meta_len = 0;
+	}
 	if (meta_len) {
 		if (((io.metadata & 3) || !io.metadata) && !ns->ext)
 			return -EINVAL;
 
-		if (ns->ext) {
-			length += meta_len;
-			meta_len = 0;
-		}
-
 		meta = dma_alloc_coherent(dev->dev, meta_len,
 						&meta_dma, GFP_KERNEL);
 		if (!meta) {
-- 
cgit v1.2.3


From 51ef72bda70841fddd595142ed0e7e0fc571c500 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 20 Jun 2015 16:29:14 +0800
Subject: block: nvme-scsi: Catch kcalloc failure

res variable was initialized to -ENOMEM, but it's override by
nvme_trans_copy_from_user(). So current code returns 0 if kcalloc fails.
Fix it to return proper error code.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-scsi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 8e6223e5b670..ab6d1a0e5167 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -2375,7 +2375,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	struct scsi_unmap_parm_list *plist;
 	struct nvme_dsm_range *range;
 	struct nvme_command c;
-	int i, nvme_sc, res = -ENOMEM;
+	int i, nvme_sc, res;
 	u16 ndesc, list_len;
 
 	list_len = get_unaligned_be16(&cmd[7]);
@@ -2397,8 +2397,10 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	}
 
 	range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
-	if (!range)
+	if (!range) {
+		res = -ENOMEM;
 		goto out;
+	}
 
 	for (i = 0; i < ndesc; i++) {
 		range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
-- 
cgit v1.2.3


From 98f57c5196f7a1f681246858f5860c1120d01ca9 Mon Sep 17 00:00:00 2001
From: Selvan Mani <smani@micron.com>
Date: Wed, 24 Jun 2015 08:48:46 -0600
Subject: mtip32xx: Fix accessing freed memory

In mtip_pci_remove(), driver data 'dd' is accessed after freeing it. This
is a residue of SRSI code cleanup in the patch 016a41c38821 "mtip32xx: fix
crash on surprise removal of the drive". Removed the bit flags
MTIP_DDF_REMOVE_DONE_BIT and MTIP_PF_SR_CLEANUP_BIT.

Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Vignesh Gunasekaran <vgunasekaran@micron.com>
Signed-off-by: Selvan Mani <smani@micron.com>
Signed-off-by: Asai Thambi S P <asamymuthupa@micron.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 19 -------------------
 drivers/block/mtip32xx/mtip32xx.h |  2 --
 2 files changed, 21 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 144e7d97b647..4a2ef09e6704 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -163,12 +163,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
 		else
 			dev_warn(&dd->pdev->dev,
 				"%s: dd->queue is NULL\n", __func__);
-		if (dd->port) {
-			set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
-			wake_up_interruptible(&dd->port->svc_wait);
-		} else
-			dev_warn(&dd->pdev->dev,
-				"%s: dd->port is NULL\n", __func__);
 		return true; /* device removed */
 	}
 
@@ -2938,10 +2932,6 @@ static int mtip_service_thread(void *data)
 			test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
 			goto st_out;
 
-		/* If I am an orphan, start self cleanup */
-		if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
-			break;
-
 		if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
 				&dd->dd_flag)))
 			goto st_out;
@@ -2995,14 +2985,6 @@ restart_eh:
 		}
 	}
 
-	/* wait for pci remove to exit */
-	while (1) {
-		if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
-			break;
-		msleep_interruptible(1000);
-		if (kthread_should_stop())
-			goto st_out;
-	}
 st_out:
 	return 0;
 }
@@ -4486,7 +4468,6 @@ static void mtip_pci_remove(struct pci_dev *pdev)
 	spin_unlock_irqrestore(&dev_lock, flags);
 
 	kfree(dd);
-	set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
 
 	pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
 	pci_set_drvdata(pdev, NULL);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index d7a545974c78..3274784008eb 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -142,7 +142,6 @@ enum {
 	MTIP_PF_SVC_THD_ACTIVE_BIT  = 4,
 	MTIP_PF_ISSUE_CMDS_BIT      = 5,
 	MTIP_PF_REBUILD_BIT         = 6,
-	MTIP_PF_SR_CLEANUP_BIT      = 7,
 	MTIP_PF_SVC_THD_STOP_BIT    = 8,
 
 	/* below are bit numbers in 'dd_flag' defined in driver_data */
@@ -150,7 +149,6 @@ enum {
 	MTIP_DDF_REMOVE_PENDING_BIT = 1,
 	MTIP_DDF_OVER_TEMP_BIT      = 2,
 	MTIP_DDF_WRITE_PROTECT_BIT  = 3,
-	MTIP_DDF_REMOVE_DONE_BIT    = 4,
 	MTIP_DDF_CLEANUP_BIT        = 5,
 	MTIP_DDF_RESUME_BIT         = 6,
 	MTIP_DDF_INIT_DONE_BIT      = 7,
-- 
cgit v1.2.3