summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2015-07-27 14:43:29 +0200
committerTakashi Iwai <tiwai@suse.de>2015-07-27 14:43:29 +0200
commit527c465a3c8716d93201ae34b7fc52679610596d (patch)
treee436b076df094caba9809b9c622374d396e84435 /drivers/block
parentfbce23a0b95763dfc4961ce6240e055c39f497ed (diff)
parent5ce000b297a1c1bb126a14b02acb40318b88a903 (diff)
Merge branch 'for-linus' into for-next
... to make easier developing HDA ext code.
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig12
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/cciss.c27
-rw-r--r--drivers/block/cciss_scsi.c1
-rw-r--r--drivers/block/drbd/drbd_debugfs.c10
-rw-r--r--drivers/block/drbd/drbd_int.h1
-rw-r--r--drivers/block/drbd/drbd_main.c10
-rw-r--r--drivers/block/drbd/drbd_receiver.c4
-rw-r--r--drivers/block/loop.c84
-rw-r--r--drivers/block/loop.h3
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c228
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h10
-rw-r--r--drivers/block/nbd.c52
-rw-r--r--drivers/block/null_blk.c18
-rw-r--r--drivers/block/nvme-core.c928
-rw-r--r--drivers/block/nvme-scsi.c1230
-rw-r--r--drivers/block/paride/paride.c57
-rw-r--r--drivers/block/paride/paride.h2
-rw-r--r--drivers/block/paride/pcd.c9
-rw-r--r--drivers/block/paride/pd.c16
-rw-r--r--drivers/block/paride/pf.c7
-rw-r--r--drivers/block/paride/pg.c8
-rw-r--r--drivers/block/paride/pt.c8
-rw-r--r--drivers/block/pktcdvd.c1
-rw-r--r--drivers/block/pmem.c262
-rw-r--r--drivers/block/ps3vram.c34
-rw-r--r--drivers/block/rbd.c111
-rw-r--r--drivers/block/sx8.c4
-rw-r--r--drivers/block/virtio_blk.c6
-rw-r--r--drivers/block/xen-blkback/blkback.c23
-rw-r--r--drivers/block/xen-blkback/common.h6
-rw-r--r--drivers/block/xen-blkback/xenbus.c167
-rw-r--r--drivers/block/xen-blkfront.c157
-rw-r--r--drivers/block/zram/Kconfig10
-rw-r--r--drivers/block/zram/zcomp.c7
-rw-r--r--drivers/block/zram/zcomp.h1
-rw-r--r--drivers/block/zram/zram_drv.c959
-rw-r--r--drivers/block/zram/zram_drv.h10
38 files changed, 2055 insertions, 2429 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 3ccef9eba6f9..1b8094d4d7af 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -404,18 +404,6 @@ config BLK_DEV_RAM_DAX
and will prevent RAM block device backing store memory from being
allocated from highmem (only a problem for highmem systems).
-config BLK_DEV_PMEM
- tristate "Persistent memory block device support"
- depends on HAS_IOMEM
- help
- Saying Y here will allow you to use a contiguous range of reserved
- memory as one or more persistent block devices.
-
- To compile this driver as a module, choose M here: the module will be
- called 'pmem'.
-
- If unsure, say N.
-
config CDROM_PKTCDVD
tristate "Packet writing on CD/DVD media"
depends on !UML
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 9cc6c18a1c7e..02b688d1438d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PS3_VRAM) += ps3vram.o
obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o
obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o
obj-$(CONFIG_BLK_DEV_RAM) += brd.o
-obj-$(CONFIG_BLK_DEV_PMEM) += pmem.o
obj-$(CONFIG_BLK_DEV_LOOP) += loop.o
obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ff20f192b0f6..0422c47261c3 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -139,8 +139,6 @@ static struct board_type products[] = {
{0x3214103C, "Smart Array E200i", &SA5_access},
{0x3215103C, "Smart Array E200i", &SA5_access},
{0x3237103C, "Smart Array E500", &SA5_access},
- {0x3223103C, "Smart Array P800", &SA5_access},
- {0x3234103C, "Smart Array P400", &SA5_access},
{0x323D103C, "Smart Array P700m", &SA5_access},
};
@@ -574,8 +572,6 @@ static void cciss_procinit(ctlr_info_t *h)
/* List of controllers which cannot be hard reset on kexec with reset_devices */
static u32 unresettable_controller[] = {
- 0x324a103C, /* Smart Array P712m */
- 0x324b103C, /* SmartArray P711m */
0x3223103C, /* Smart Array P800 */
0x3234103C, /* Smart Array P400 */
0x3235103C, /* Smart Array P400i */
@@ -586,12 +582,32 @@ static u32 unresettable_controller[] = {
0x3215103C, /* Smart Array E200i */
0x3237103C, /* Smart Array E500 */
0x323D103C, /* Smart Array P700m */
+ 0x40800E11, /* Smart Array 5i */
0x409C0E11, /* Smart Array 6400 */
0x409D0E11, /* Smart Array 6400 EM */
+ 0x40700E11, /* Smart Array 5300 */
+ 0x40820E11, /* Smart Array 532 */
+ 0x40830E11, /* Smart Array 5312 */
+ 0x409A0E11, /* Smart Array 641 */
+ 0x409B0E11, /* Smart Array 642 */
+ 0x40910E11, /* Smart Array 6i */
};
/* List of controllers which cannot even be soft reset */
static u32 soft_unresettable_controller[] = {
+ 0x40800E11, /* Smart Array 5i */
+ 0x40700E11, /* Smart Array 5300 */
+ 0x40820E11, /* Smart Array 532 */
+ 0x40830E11, /* Smart Array 5312 */
+ 0x409A0E11, /* Smart Array 641 */
+ 0x409B0E11, /* Smart Array 642 */
+ 0x40910E11, /* Smart Array 6i */
+ /* Exclude 640x boards. These are two pci devices in one slot
+ * which share a battery backed cache module. One controls the
+ * cache, the other accesses the cache through the one that controls
+ * it. If we reset the one controlling the cache, the other will
+ * likely not be happy. Just forbid resetting this conjoined mess.
+ */
0x409C0E11, /* Smart Array 6400 */
0x409D0E11, /* Smart Array 6400 EM */
};
@@ -4667,8 +4683,7 @@ static int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
*/
cciss_lookup_board_id(pdev, &board_id);
if (!ctlr_is_resettable(board_id)) {
- dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
- "due to shared cache module.");
+ dev_warn(&pdev->dev, "Controller not resettable\n");
return -ENODEV;
}
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index ecd845cd28d8..1537302e56e3 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -84,7 +84,6 @@ static struct scsi_host_template cciss_driver_template = {
.show_info = cciss_scsi_show_info,
.queuecommand = cciss_scsi_queue_command,
.this_id = 7,
- .cmd_per_lun = 1,
.use_clustering = DISABLE_CLUSTERING,
/* Can't have eh_bus_reset_handler or eh_host_reset_handler for cciss */
.eh_device_reset_handler= cciss_eh_device_reset_handler,
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index a6ee3d750c30..6b88a35fb048 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -419,14 +419,6 @@ static int in_flight_summary_show(struct seq_file *m, void *pos)
return 0;
}
-/* simple_positive(file->f_path.dentry) respectively debugfs_positive(),
- * but neither is "reachable" from here.
- * So we have our own inline version of it above. :-( */
-static inline int debugfs_positive(struct dentry *dentry)
-{
- return d_really_is_positive(dentry) && !d_unhashed(dentry);
-}
-
/* make sure at *open* time that the respective object won't go away. */
static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),
void *data, struct kref *kref,
@@ -444,7 +436,7 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
/* serialize with d_delete() */
mutex_lock(&d_inode(parent)->i_mutex);
/* Make sure the object is still alive */
- if (debugfs_positive(file->f_path.dentry)
+ if (simple_positive(file->f_path.dentry)
&& kref_get_unless_zero(kref))
ret = 0;
mutex_unlock(&d_inode(parent)->i_mutex);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b905e9888b88..efd19c2da9c2 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <linux/major.h>
#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
#include <linux/genhd.h>
#include <linux/idr.h>
#include <net/tcp.h>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 81fde9ef7f8e..a1518539b858 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2359,7 +2359,7 @@ static void drbd_cleanup(void)
* @congested_data: User data
* @bdi_bits: Bits the BDI flusher thread is currently interested in
*
- * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
+ * Returns 1<<WB_async_congested and/or 1<<WB_sync_congested if we are congested.
*/
static int drbd_congested(void *congested_data, int bdi_bits)
{
@@ -2376,14 +2376,14 @@ static int drbd_congested(void *congested_data, int bdi_bits)
}
if (test_bit(CALLBACK_PENDING, &first_peer_device(device)->connection->flags)) {
- r |= (1 << BDI_async_congested);
+ r |= (1 << WB_async_congested);
/* Without good local data, we would need to read from remote,
* and that would need the worker thread as well, which is
* currently blocked waiting for that usermode helper to
* finish.
*/
if (!get_ldev_if_state(device, D_UP_TO_DATE))
- r |= (1 << BDI_sync_congested);
+ r |= (1 << WB_sync_congested);
else
put_ldev(device);
r &= bdi_bits;
@@ -2399,9 +2399,9 @@ static int drbd_congested(void *congested_data, int bdi_bits)
reason = 'b';
}
- if (bdi_bits & (1 << BDI_async_congested) &&
+ if (bdi_bits & (1 << WB_async_congested) &&
test_bit(NET_CONGESTED, &first_peer_device(device)->connection->flags)) {
- r |= (1 << BDI_async_congested);
+ r |= (1 << WB_async_congested);
reason = reason == 'b' ? 'a' : 'n';
}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index cee20354ac37..c097909c589c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -598,7 +598,7 @@ static struct socket *drbd_try_connect(struct drbd_connection *connection)
memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
what = "sock_create_kern";
- err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
+ err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (err < 0) {
sock = NULL;
@@ -693,7 +693,7 @@ static int prepare_listen_socket(struct drbd_connection *connection, struct acce
memcpy(&my_addr, &connection->my_addr, my_addr_len);
what = "sock_create_kern";
- err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
+ err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
SOCK_STREAM, IPPROTO_TCP, &s_listen);
if (err) {
s_listen = NULL;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d7173cb1ea76..f7a4c9d7f721 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -86,8 +86,6 @@ static DEFINE_MUTEX(loop_index_mutex);
static int max_part;
static int part_shift;
-static struct workqueue_struct *loop_wq;
-
static int transfer_xor(struct loop_device *lo, int cmd,
struct page *raw_page, unsigned raw_off,
struct page *loop_page, unsigned loop_off,
@@ -476,6 +474,28 @@ static int loop_flush(struct loop_device *lo)
return loop_switch(lo, NULL);
}
+static void loop_reread_partitions(struct loop_device *lo,
+ struct block_device *bdev)
+{
+ int rc;
+
+ /*
+ * bd_mutex has been held already in release path, so don't
+ * acquire it if this function is called in such case.
+ *
+ * If the reread partition isn't from release path, lo_refcnt
+ * must be at least one and it can only become zero when the
+ * current holder is released.
+ */
+ if (!atomic_read(&lo->lo_refcnt))
+ rc = __blkdev_reread_part(bdev);
+ else
+ rc = blkdev_reread_part(bdev);
+ if (rc)
+ pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
+ __func__, lo->lo_number, lo->lo_file_name, rc);
+}
+
/*
* loop_change_fd switched the backing store of a loopback device to
* a new file. This is useful for operating system installers to free up
@@ -524,7 +544,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
fput(old_file);
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ loop_reread_partitions(lo, bdev);
return 0;
out_putf:
@@ -568,7 +588,7 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
spin_lock_irq(&lo->lo_lock);
if (lo->lo_backing_file)
- p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
+ p = file_path(lo->lo_backing_file, buf, PAGE_SIZE - 1);
spin_unlock_irq(&lo->lo_lock);
if (IS_ERR_OR_NULL(p))
@@ -725,6 +745,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
size = get_loop_size(lo, file);
if ((loff_t)(sector_t)size != size)
goto out_putf;
+ error = -ENOMEM;
+ lo->wq = alloc_workqueue("kloopd%d",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 16,
+ lo->lo_number);
+ if (!lo->wq)
+ goto out_putf;
error = 0;
@@ -755,7 +781,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
if (lo->lo_flags & LO_FLAGS_PARTSCAN)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ loop_reread_partitions(lo, bdev);
/* Grab the block_device to prevent its destruction after we
* put /dev/loopXX inode. Later in loop_clr_fd() we bdput(bdev).
@@ -827,7 +853,7 @@ static int loop_clr_fd(struct loop_device *lo)
* <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
* command to fail with EBUSY.
*/
- if (lo->lo_refcnt > 1) {
+ if (atomic_read(&lo->lo_refcnt) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
mutex_unlock(&lo->lo_ctl_mutex);
return 0;
@@ -836,6 +862,9 @@ static int loop_clr_fd(struct loop_device *lo)
if (filp == NULL)
return -EINVAL;
+ /* freeze request queue during the transition */
+ blk_mq_freeze_queue(lo->lo_queue);
+
spin_lock_irq(&lo->lo_lock);
lo->lo_state = Lo_rundown;
lo->lo_backing_file = NULL;
@@ -867,11 +896,15 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_unbound;
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
+ blk_mq_unfreeze_queue(lo->lo_queue);
+
if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ loop_reread_partitions(lo, bdev);
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
+ destroy_workqueue(lo->wq);
+ lo->wq = NULL;
mutex_unlock(&lo->lo_ctl_mutex);
/*
* Need not hold lo_ctl_mutex to fput backing file.
@@ -943,7 +976,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
!(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_flags |= LO_FLAGS_PARTSCAN;
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
- ioctl_by_bdev(lo->lo_device, BLKRRPART, 0);
+ loop_reread_partitions(lo, lo->lo_device);
}
lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
@@ -1324,9 +1357,7 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
goto out;
}
- mutex_lock(&lo->lo_ctl_mutex);
- lo->lo_refcnt++;
- mutex_unlock(&lo->lo_ctl_mutex);
+ atomic_inc(&lo->lo_refcnt);
out:
mutex_unlock(&loop_index_mutex);
return err;
@@ -1337,11 +1368,10 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
struct loop_device *lo = disk->private_data;
int err;
- mutex_lock(&lo->lo_ctl_mutex);
-
- if (--lo->lo_refcnt)
- goto out;
+ if (atomic_dec_return(&lo->lo_refcnt))
+ return;
+ mutex_lock(&lo->lo_ctl_mutex);
if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
/*
* In autoclear mode, stop the loop thread
@@ -1358,7 +1388,6 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
loop_flush(lo);
}
-out:
mutex_unlock(&lo->lo_ctl_mutex);
}
@@ -1425,9 +1454,13 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct loop_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+ struct loop_device *lo = cmd->rq->q->queuedata;
blk_mq_start_request(bd->rq);
+ if (lo->lo_state != Lo_bound)
+ return -EIO;
+
if (cmd->rq->cmd_flags & REQ_WRITE) {
struct loop_device *lo = cmd->rq->q->queuedata;
bool need_sched = true;
@@ -1441,9 +1474,9 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irq(&lo->lo_lock);
if (need_sched)
- queue_work(loop_wq, &lo->write_work);
+ queue_work(lo->wq, &lo->write_work);
} else {
- queue_work(loop_wq, &cmd->read_work);
+ queue_work(lo->wq, &cmd->read_work);
}
return BLK_MQ_RQ_QUEUE_OK;
@@ -1455,9 +1488,6 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
struct loop_device *lo = cmd->rq->q->queuedata;
int ret = -EIO;
- if (lo->lo_state != Lo_bound)
- goto failed;
-
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
goto failed;
@@ -1594,6 +1624,7 @@ static int loop_add(struct loop_device **l, int i)
disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT;
mutex_init(&lo->lo_ctl_mutex);
+ atomic_set(&lo->lo_refcnt, 0);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
disk->major = LOOP_MAJOR;
@@ -1711,7 +1742,7 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&lo->lo_ctl_mutex);
break;
}
- if (lo->lo_refcnt > 0) {
+ if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY;
mutex_unlock(&lo->lo_ctl_mutex);
break;
@@ -1806,13 +1837,6 @@ static int __init loop_init(void)
goto misc_out;
}
- loop_wq = alloc_workqueue("kloopd",
- WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_UNBOUND, 0);
- if (!loop_wq) {
- err = -ENOMEM;
- goto misc_out;
- }
-
blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
THIS_MODULE, loop_probe, NULL, NULL);
@@ -1850,8 +1874,6 @@ static void __exit loop_exit(void)
blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
unregister_blkdev(LOOP_MAJOR, "loop");
- destroy_workqueue(loop_wq);
-
misc_deregister(&loop_misc);
}
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index 301c27f8323f..25e8997ed246 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -28,7 +28,7 @@ struct loop_func_table;
struct loop_device {
int lo_number;
- int lo_refcnt;
+ atomic_t lo_refcnt;
loff_t lo_offset;
loff_t lo_sizelimit;
int lo_flags;
@@ -54,6 +54,7 @@ struct loop_device {
gfp_t old_gfp_mask;
spinlock_t lo_lock;
+ struct workqueue_struct *wq;
struct list_head write_cmd_head;
struct work_struct write_work;
bool write_started;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 3bd7ca9853a8..4a2ef09e6704 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -163,12 +163,6 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev)
else
dev_warn(&dd->pdev->dev,
"%s: dd->queue is NULL\n", __func__);
- if (dd->port) {
- set_bit(MTIP_PF_SR_CLEANUP_BIT, &dd->port->flags);
- wake_up_interruptible(&dd->port->svc_wait);
- } else
- dev_warn(&dd->pdev->dev,
- "%s: dd->port is NULL\n", __func__);
return true; /* device removed */
}
@@ -269,8 +263,11 @@ static int mtip_hba_reset(struct driver_data *dd)
/* Flush */
readl(dd->mmio + HOST_CTL);
- /* Spin for up to 2 seconds, waiting for reset acknowledgement */
- timeout = jiffies + msecs_to_jiffies(2000);
+ /*
+ * Spin for up to 10 seconds waiting for reset acknowledgement. Spec
+ * is 1 sec but in LUN failure conditions, up to 10 secs are required
+ */
+ timeout = jiffies + msecs_to_jiffies(10000);
do {
mdelay(10);
if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
@@ -623,8 +620,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
- if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
- test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
+ if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL);
dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
@@ -896,6 +892,10 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
/* Acknowledge the interrupt status on the port.*/
port_stat = readl(port->mmio + PORT_IRQ_STAT);
+ if (unlikely(port_stat == 0xFFFFFFFF)) {
+ mtip_check_surprise_removal(dd->pdev);
+ return IRQ_HANDLED;
+ }
writel(port_stat, port->mmio + PORT_IRQ_STAT);
/* Demux port status */
@@ -991,15 +991,10 @@ static bool mtip_pause_ncq(struct mtip_port *port,
reply = port->rxfis + RX_FIS_D2H_REG;
task_file_data = readl(port->mmio+PORT_TFDATA);
- if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
- clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
-
if ((task_file_data & 1))
return false;
if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
- set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
- set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
port->ic_pause_timer = jiffies;
return true;
} else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
@@ -1011,8 +1006,10 @@ static bool mtip_pause_ncq(struct mtip_port *port,
((fis->command == 0xFC) &&
(fis->features == 0x27 || fis->features == 0x72 ||
fis->features == 0x62 || fis->features == 0x26))) {
+ clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
/* Com reset after secure erase or lowlevel format */
mtip_restart_port(port);
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
return false;
}
@@ -1112,9 +1109,10 @@ static int mtip_exec_internal_command(struct mtip_port *port,
int_cmd = mtip_get_int_command(dd);
set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
- port->ic_pause_timer = 0;
- clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+ if (fis->command == ATA_CMD_SEC_ERASE_PREP)
+ set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
+
clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
if (atomic == GFP_KERNEL) {
@@ -1251,11 +1249,11 @@ static int mtip_exec_internal_command(struct mtip_port *port,
exec_ic_exit:
/* Clear the allocated and active bits for the internal command. */
mtip_put_int_command(dd, int_cmd);
+ clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
if (rv >= 0 && mtip_pause_ncq(port, fis)) {
/* NCQ paused */
return rv;
}
- clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
wake_up_interruptible(&port->svc_wait);
return rv;
@@ -2625,18 +2623,6 @@ static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
readl(dd->mmio + HOST_IRQ_STAT));
size += sprintf(&buf[size], "\n");
- size += sprintf(&buf[size], "L/ Allocated : [ 0x");
-
- for (n = dd->slot_groups-1; n >= 0; n--) {
- if (sizeof(long) > sizeof(u32))
- group_allocated =
- dd->port->allocated[n/2] >> (32*(n&1));
- else
- group_allocated = dd->port->allocated[n];
- size += sprintf(&buf[size], "%08X ", group_allocated);
- }
- size += sprintf(&buf[size], "]\n");
-
size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--) {
@@ -2780,48 +2766,6 @@ static void mtip_hw_debugfs_exit(struct driver_data *dd)
debugfs_remove_recursive(dd->dfs_node);
}
-static int mtip_free_orphan(struct driver_data *dd)
-{
- struct kobject *kobj;
-
- if (dd->bdev) {
- if (dd->bdev->bd_holders >= 1)
- return -2;
-
- bdput(dd->bdev);
- dd->bdev = NULL;
- }
-
- mtip_hw_debugfs_exit(dd);
-
- spin_lock(&rssd_index_lock);
- ida_remove(&rssd_index_ida, dd->index);
- spin_unlock(&rssd_index_lock);
-
- if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag) &&
- test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) {
- put_disk(dd->disk);
- } else {
- if (dd->disk) {
- kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
- if (kobj) {
- mtip_hw_sysfs_exit(dd, kobj);
- kobject_put(kobj);
- }
- del_gendisk(dd->disk);
- dd->disk = NULL;
- }
- if (dd->queue) {
- dd->queue->queuedata = NULL;
- blk_cleanup_queue(dd->queue);
- blk_mq_free_tag_set(&dd->tags);
- dd->queue = NULL;
- }
- }
- kfree(dd);
- return 0;
-}
-
/*
* Perform any init/resume time hardware setup
*
@@ -2944,7 +2888,6 @@ static int mtip_ftl_rebuild_poll(struct driver_data *dd)
mtip_block_initialize(dd);
return 0;
}
- ssleep(10);
} while (time_before(jiffies, timeout));
/* Check for timeout */
@@ -2969,7 +2912,6 @@ static int mtip_service_thread(void *data)
unsigned long slot, slot_start, slot_wrap;
unsigned int num_cmd_slots = dd->slot_groups * 32;
struct mtip_port *port = dd->port;
- int ret;
while (1) {
if (kthread_should_stop() ||
@@ -2990,10 +2932,6 @@ static int mtip_service_thread(void *data)
test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
goto st_out;
- /* If I am an orphan, start self cleanup */
- if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags))
- break;
-
if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag)))
goto st_out;
@@ -3047,26 +2985,6 @@ restart_eh:
}
}
- /* wait for pci remove to exit */
- while (1) {
- if (test_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag))
- break;
- msleep_interruptible(1000);
- if (kthread_should_stop())
- goto st_out;
- }
-
- while (1) {
- ret = mtip_free_orphan(dd);
- if (!ret) {
- /* NOTE: All data structures are invalid, do not
- * access any here */
- return 0;
- }
- msleep_interruptible(1000);
- if (kthread_should_stop())
- goto st_out;
- }
st_out:
return 0;
}
@@ -3394,6 +3312,7 @@ static int mtip_hw_exit(struct driver_data *dd)
/* Release the IRQ. */
irq_set_affinity_hint(dd->pdev->irq, NULL);
devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
+ msleep(1000);
/* Free dma regions */
mtip_dma_free(dd);
@@ -3699,6 +3618,26 @@ static const struct block_device_operations mtip_block_ops = {
.owner = THIS_MODULE
};
+static inline bool is_se_active(struct driver_data *dd)
+{
+ if (unlikely(test_bit(MTIP_PF_SE_ACTIVE_BIT, &dd->port->flags))) {
+ if (dd->port->ic_pause_timer) {
+ unsigned long to = dd->port->ic_pause_timer +
+ msecs_to_jiffies(1000);
+ if (time_after(jiffies, to)) {
+ clear_bit(MTIP_PF_SE_ACTIVE_BIT,
+ &dd->port->flags);
+ clear_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
+ dd->port->ic_pause_timer = 0;
+ wake_up_interruptible(&dd->port->svc_wait);
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
/*
* Block layer make request function.
*
@@ -3716,6 +3655,9 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq);
unsigned int nents;
+ if (is_se_active(dd))
+ return -ENODATA;
+
if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
&dd->dd_flag))) {
@@ -3900,7 +3842,8 @@ static int mtip_block_initialize(struct driver_data *dd)
dd->disk->driverfs_dev = &dd->pdev->dev;
dd->disk->major = dd->major;
- dd->disk->first_minor = dd->instance * MTIP_MAX_MINORS;
+ dd->disk->first_minor = index * MTIP_MAX_MINORS;
+ dd->disk->minors = MTIP_MAX_MINORS;
dd->disk->fops = &mtip_block_ops;
dd->disk->private_data = dd;
dd->index = index;
@@ -4066,52 +4009,51 @@ static int mtip_block_remove(struct driver_data *dd)
{
struct kobject *kobj;
- if (!dd->sr) {
- mtip_hw_debugfs_exit(dd);
+ mtip_hw_debugfs_exit(dd);
- if (dd->mtip_svc_handler) {
- set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
- wake_up_interruptible(&dd->port->svc_wait);
- kthread_stop(dd->mtip_svc_handler);
- }
+ if (dd->mtip_svc_handler) {
+ set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
+ wake_up_interruptible(&dd->port->svc_wait);
+ kthread_stop(dd->mtip_svc_handler);
+ }
- /* Clean up the sysfs attributes, if created */
- if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
- kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
- if (kobj) {
- mtip_hw_sysfs_exit(dd, kobj);
- kobject_put(kobj);
- }
+ /* Clean up the sysfs attributes, if created */
+ if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
+ kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
+ if (kobj) {
+ mtip_hw_sysfs_exit(dd, kobj);
+ kobject_put(kobj);
}
+ }
+ if (!dd->sr)
mtip_standby_drive(dd);
-
- /*
- * Delete our gendisk structure. This also removes the device
- * from /dev
- */
- if (dd->bdev) {
- bdput(dd->bdev);
- dd->bdev = NULL;
- }
- if (dd->disk) {
- if (dd->disk->queue) {
- del_gendisk(dd->disk);
- blk_cleanup_queue(dd->queue);
- blk_mq_free_tag_set(&dd->tags);
- dd->queue = NULL;
- } else
- put_disk(dd->disk);
- }
- dd->disk = NULL;
-
- spin_lock(&rssd_index_lock);
- ida_remove(&rssd_index_ida, dd->index);
- spin_unlock(&rssd_index_lock);
- } else {
+ else
dev_info(&dd->pdev->dev, "device %s surprise removal\n",
dd->disk->disk_name);
+
+ /*
+ * Delete our gendisk structure. This also removes the device
+ * from /dev
+ */
+ if (dd->bdev) {
+ bdput(dd->bdev);
+ dd->bdev = NULL;
}
+ if (dd->disk) {
+ del_gendisk(dd->disk);
+ if (dd->disk->queue) {
+ blk_cleanup_queue(dd->queue);
+ blk_mq_free_tag_set(&dd->tags);
+ dd->queue = NULL;
+ }
+ put_disk(dd->disk);
+ }
+ dd->disk = NULL;
+
+ spin_lock(&rssd_index_lock);
+ ida_remove(&rssd_index_ida, dd->index);
+ spin_unlock(&rssd_index_lock);
/* De-initialize the protocol layer. */
mtip_hw_exit(dd);
@@ -4140,12 +4082,12 @@ static int mtip_block_shutdown(struct driver_data *dd)
dev_info(&dd->pdev->dev,
"Shutting down %s ...\n", dd->disk->disk_name);
+ del_gendisk(dd->disk);
if (dd->disk->queue) {
- del_gendisk(dd->disk);
blk_cleanup_queue(dd->queue);
blk_mq_free_tag_set(&dd->tags);
- } else
- put_disk(dd->disk);
+ }
+ put_disk(dd->disk);
dd->disk = NULL;
dd->queue = NULL;
}
@@ -4507,6 +4449,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
"Completion workers still active!\n");
}
+ blk_mq_stop_hw_queues(dd->queue);
/* Clean up the block layer. */
mtip_block_remove(dd);
@@ -4524,10 +4467,7 @@ static void mtip_pci_remove(struct pci_dev *pdev)
list_del_init(&dd->remove_list);
spin_unlock_irqrestore(&dev_lock, flags);
- if (!dd->sr)
- kfree(dd);
- else
- set_bit(MTIP_DDF_REMOVE_DONE_BIT, &dd->dd_flag);
+ kfree(dd);
pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
pci_set_drvdata(pdev, NULL);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index ba1b31ee22ec..3274784008eb 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -142,7 +142,6 @@ enum {
MTIP_PF_SVC_THD_ACTIVE_BIT = 4,
MTIP_PF_ISSUE_CMDS_BIT = 5,
MTIP_PF_REBUILD_BIT = 6,
- MTIP_PF_SR_CLEANUP_BIT = 7,
MTIP_PF_SVC_THD_STOP_BIT = 8,
/* below are bit numbers in 'dd_flag' defined in driver_data */
@@ -150,7 +149,6 @@ enum {
MTIP_DDF_REMOVE_PENDING_BIT = 1,
MTIP_DDF_OVER_TEMP_BIT = 2,
MTIP_DDF_WRITE_PROTECT_BIT = 3,
- MTIP_DDF_REMOVE_DONE_BIT = 4,
MTIP_DDF_CLEANUP_BIT = 5,
MTIP_DDF_RESUME_BIT = 6,
MTIP_DDF_INIT_DONE_BIT = 7,
@@ -412,19 +410,13 @@ struct mtip_port {
* by the DMA when the driver issues internal commands.
*/
dma_addr_t sector_buffer_dma;
- /*
- * Bit significant, used to determine if a command slot has
- * been allocated. i.e. the slot is in use. Bits are cleared
- * when the command slot and all associated data structures
- * are no longer needed.
- */
+
u16 *log_buf;
dma_addr_t log_buf_dma;
u8 *smart_buf;
dma_addr_t smart_buf_dma;
- unsigned long allocated[SLOTBITS_IN_LONGS];
/*
* used to queue commands when an internal command is in progress
* or error handling is active
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 39e5f7fae3ef..0e385d8e9b86 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
int result, flags;
struct nbd_request request;
unsigned long size = blk_rq_bytes(req);
+ u32 type;
+
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ type = NBD_CMD_DISC;
+ else if (req->cmd_flags & REQ_DISCARD)
+ type = NBD_CMD_TRIM;
+ else if (req->cmd_flags & REQ_FLUSH)
+ type = NBD_CMD_FLUSH;
+ else if (rq_data_dir(req) == WRITE)
+ type = NBD_CMD_WRITE;
+ else
+ type = NBD_CMD_READ;
memset(&request, 0, sizeof(request));
request.magic = htonl(NBD_REQUEST_MAGIC);
- request.type = htonl(nbd_cmd(req));
-
- if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+ request.type = htonl(type);
+ if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
request.len = htonl(size);
}
memcpy(request.handle, &req, sizeof(req));
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
- req, nbdcmd_to_ascii(nbd_cmd(req)),
+ req, nbdcmd_to_ascii(type),
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
result = sock_xmit(nbd, 1, &request, sizeof(request),
- (nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+ (type == NBD_CMD_WRITE) ? MSG_MORE : 0);
if (result <= 0) {
dev_err(disk_to_dev(nbd->disk),
"Send control failed (result %d)\n", result);
return -EIO;
}
- if (nbd_cmd(req) == NBD_CMD_WRITE) {
+ if (type == NBD_CMD_WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
/*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
}
dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
- if (nbd_cmd(req) == NBD_CMD_READ) {
+ if (rq_data_dir(req) != WRITE) {
struct req_iterator iter;
struct bio_vec bvec;
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
if (req->cmd_type != REQ_TYPE_FS)
goto error_out;
- nbd_cmd(req) = NBD_CMD_READ;
- if (rq_data_dir(req) == WRITE) {
- if ((req->cmd_flags & REQ_DISCARD)) {
- WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
- nbd_cmd(req) = NBD_CMD_TRIM;
- } else
- nbd_cmd(req) = NBD_CMD_WRITE;
- if (nbd->flags & NBD_FLAG_READ_ONLY) {
- dev_err(disk_to_dev(nbd->disk),
- "Write on read-only\n");
- goto error_out;
- }
- }
-
- if (req->cmd_flags & REQ_FLUSH) {
- BUG_ON(unlikely(blk_rq_sectors(req)));
- nbd_cmd(req) = NBD_CMD_FLUSH;
+ if (rq_data_dir(req) == WRITE &&
+ (nbd->flags & NBD_FLAG_READ_ONLY)) {
+ dev_err(disk_to_dev(nbd->disk),
+ "Write on read-only\n");
+ goto error_out;
}
req->errors = 0;
@@ -592,8 +591,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
fsync_bdev(bdev);
mutex_lock(&nbd->tx_lock);
blk_rq_init(NULL, &sreq);
- sreq.cmd_type = REQ_TYPE_SPECIAL;
- nbd_cmd(&sreq) = NBD_CMD_DISC;
+ sreq.cmd_type = REQ_TYPE_DRV_PRIV;
/* Check again after getting mutex back. */
if (!nbd->sock)
@@ -713,7 +711,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
bdev->bd_inode->i_size = 0;
set_capacity(nbd->disk, 0);
if (max_part > 0)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
+ blkdev_reread_part(bdev);
if (nbd->disconnect) /* user requested, ignore socket errors */
return 0;
return nbd->harderror;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 65cd61a4145e..69de41a87b74 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -99,7 +99,7 @@ static int null_set_queue_mode(const char *str, const struct kernel_param *kp)
return null_param_store_val(str, &queue_mode, NULL_Q_BIO, NULL_Q_MQ);
}
-static struct kernel_param_ops null_queue_mode_param_ops = {
+static const struct kernel_param_ops null_queue_mode_param_ops = {
.set = null_set_queue_mode,
.get = param_get_int,
};
@@ -127,7 +127,7 @@ static int null_set_irqmode(const char *str, const struct kernel_param *kp)
NULL_IRQ_TIMER);
}
-static struct kernel_param_ops null_irqmode_param_ops = {
+static const struct kernel_param_ops null_irqmode_param_ops = {
.set = null_set_irqmode,
.get = param_get_int,
};
@@ -243,6 +243,17 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
cmd = container_of(entry, struct nullb_cmd, ll_list);
entry = entry->next;
end_cmd(cmd);
+
+ if (cmd->rq) {
+ struct request_queue *q = cmd->rq->q;
+
+ if (!q->mq_ops && blk_queue_stopped(q)) {
+ spin_lock(q->queue_lock);
+ if (blk_queue_stopped(q))
+ blk_start_queue(q);
+ spin_unlock(q->queue_lock);
+ }
+ }
} while (entry);
}
@@ -257,7 +268,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
if (llist_add(&cmd->ll_list, &cq->list)) {
ktime_t kt = ktime_set(0, completion_nsec);
- hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
+ hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
}
put_cpu();
@@ -334,6 +345,7 @@ static int null_rq_prep_fn(struct request_queue *q, struct request *req)
req->special = cmd;
return BLKPREP_OK;
}
+ blk_stop_queue(q);
return BLKPREP_DEFER;
}
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 683dff272562..7920c2741b47 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -29,6 +29,7 @@
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/kernel.h>
+#include <linux/list_sort.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -80,6 +81,7 @@ static wait_queue_head_t nvme_kthread_wait;
static struct class *nvme_class;
static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_reset(struct nvme_dev *dev);
static int nvme_process_cq(struct nvme_queue *nvmeq);
struct async_cmd_info {
@@ -102,6 +104,7 @@ struct nvme_queue {
spinlock_t q_lock;
struct nvme_command *sq_cmds;
volatile struct nvme_completion *cqes;
+ struct blk_mq_tags **tags;
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
@@ -114,7 +117,6 @@ struct nvme_queue {
u8 cq_phase;
u8 cqe_seen;
struct async_cmd_info cmdinfo;
- struct blk_mq_hw_ctx *hctx;
};
/*
@@ -182,12 +184,22 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
struct nvme_dev *dev = data;
struct nvme_queue *nvmeq = dev->queues[0];
- WARN_ON(nvmeq->hctx);
- nvmeq->hctx = hctx;
+ WARN_ON(hctx_idx != 0);
+ WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
+ WARN_ON(nvmeq->tags);
+
hctx->driver_data = nvmeq;
+ nvmeq->tags = &dev->admin_tagset.tags[0];
return 0;
}
+static void nvme_admin_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
+ struct nvme_queue *nvmeq = hctx->driver_data;
+
+ nvmeq->tags = NULL;
+}
+
static int nvme_admin_init_request(void *data, struct request *req,
unsigned int hctx_idx, unsigned int rq_idx,
unsigned int numa_node)
@@ -201,27 +213,16 @@ static int nvme_admin_init_request(void *data, struct request *req,
return 0;
}
-static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
-{
- struct nvme_queue *nvmeq = hctx->driver_data;
-
- nvmeq->hctx = NULL;
-}
-
static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
struct nvme_dev *dev = data;
- struct nvme_queue *nvmeq = dev->queues[
- (hctx_idx % dev->queue_count) + 1];
-
- if (!nvmeq->hctx)
- nvmeq->hctx = hctx;
+ struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
- /* nvmeq queues are shared between namespaces. We assume here that
- * blk-mq map the tags so they match up with the nvme queue tags. */
- WARN_ON(nvmeq->hctx->tags != hctx->tags);
+ if (!nvmeq->tags)
+ nvmeq->tags = &dev->tagset.tags[hctx_idx];
+ WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
hctx->driver_data = nvmeq;
return 0;
}
@@ -307,9 +308,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
++nvmeq->dev->event_limit;
- if (status == NVME_SC_SUCCESS)
- dev_warn(nvmeq->q_dmadev,
- "async event result %08x\n", result);
+ if (status != NVME_SC_SUCCESS)
+ return;
+
+ switch (result & 0xff07) {
+ case NVME_AER_NOTICE_NS_CHANGED:
+ dev_info(nvmeq->q_dmadev, "rescanning\n");
+ schedule_work(&nvmeq->dev->scan_work);
+ default:
+ dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+ }
}
static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -320,7 +328,7 @@ static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
u16 status = le16_to_cpup(&cqe->status) >> 1;
u32 result = le32_to_cpup(&cqe->result);
- blk_mq_free_hctx_request(nvmeq->hctx, req);
+ blk_mq_free_request(req);
dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
++nvmeq->dev->abort_limit;
@@ -333,14 +341,13 @@ static void async_completion(struct nvme_queue *nvmeq, void *ctx,
cmdinfo->result = le32_to_cpup(&cqe->result);
cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
- blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req);
+ blk_mq_free_request(cmdinfo->req);
}
static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
unsigned int tag)
{
- struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
- struct request *req = blk_mq_tag_to_rq(hctx->tags, tag);
+ struct request *req = blk_mq_tag_to_rq(*nvmeq->tags, tag);
return blk_mq_rq_to_pdu(req);
}
@@ -445,7 +452,7 @@ static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev,
(unsigned long) rq, gfp);
}
-void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
+static void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
{
const int last_prp = dev->page_size / 8 - 1;
int i;
@@ -605,22 +612,33 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
spin_unlock_irqrestore(req->q->queue_lock, flags);
return;
}
- req->errors = nvme_error_status(status);
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ if (cmd_rq->ctx == CMD_CTX_CANCELLED)
+ req->errors = -EINTR;
+ else
+ req->errors = status;
+ } else {
+ req->errors = nvme_error_status(status);
+ }
} else
req->errors = 0;
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
+ u32 result = le32_to_cpup(&cqe->result);
+ req->special = (void *)(uintptr_t)result;
+ }
if (cmd_rq->aborted)
- dev_warn(&nvmeq->dev->pci_dev->dev,
+ dev_warn(nvmeq->dev->dev,
"completing aborted command with status:%04x\n",
status);
if (iod->nents) {
- dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
+ dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (blk_integrity_rq(req)) {
if (!rq_data_dir(req))
nvme_dif_remap(req, nvme_dif_complete);
- dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1,
+ dma_unmap_sg(nvmeq->dev->dev, iod->meta_sg, 1,
rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
}
}
@@ -630,8 +648,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
}
/* length is in bytes. gfp flags indicates whether we may sleep. */
-int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
- gfp_t gfp)
+static int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod,
+ int total_len, gfp_t gfp)
{
struct dma_pool *pool;
int length = total_len;
@@ -709,6 +727,23 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
return total_len;
}
+static void nvme_submit_priv(struct nvme_queue *nvmeq, struct request *req,
+ struct nvme_iod *iod)
+{
+ struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
+
+ memcpy(cmnd, req->cmd, sizeof(struct nvme_command));
+ cmnd->rw.command_id = req->tag;
+ if (req->nr_phys_segments) {
+ cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
+ cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
+ }
+
+ if (++nvmeq->sq_tail == nvmeq->q_depth)
+ nvmeq->sq_tail = 0;
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
/*
* We reuse the small pool to allocate the 16-byte range here as it is not
* worth having a special pool for these or additional cases to handle freeing
@@ -807,11 +842,15 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
return 0;
}
+/*
+ * NOTE: ns is NULL when called on the admin queue.
+ */
static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct nvme_ns *ns = hctx->queue->queuedata;
struct nvme_queue *nvmeq = hctx->driver_data;
+ struct nvme_dev *dev = nvmeq->dev;
struct request *req = bd->rq;
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_iod *iod;
@@ -822,15 +861,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* unless this namespace is formated such that the metadata can be
* stripped/generated by the controller with PRACT=1.
*/
- if (ns->ms && !blk_integrity_rq(req)) {
- if (!(ns->pi_type && ns->ms == 8)) {
+ if (ns && ns->ms && !blk_integrity_rq(req)) {
+ if (!(ns->pi_type && ns->ms == 8) &&
+ req->cmd_type != REQ_TYPE_DRV_PRIV) {
req->errors = -EFAULT;
blk_mq_complete_request(req);
return BLK_MQ_RQ_QUEUE_OK;
}
}
- iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC);
+ iod = nvme_alloc_iod(req, dev, GFP_ATOMIC);
if (!iod)
return BLK_MQ_RQ_QUEUE_BUSY;
@@ -841,8 +881,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
* as it is not worth having a special pool for these or
* additional cases to handle freeing the iod.
*/
- range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
- GFP_ATOMIC,
+ range = dma_pool_alloc(dev->prp_small_pool, GFP_ATOMIC,
&iod->first_dma);
if (!range)
goto retry_cmd;
@@ -860,9 +899,8 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
goto retry_cmd;
if (blk_rq_bytes(req) !=
- nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
- dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg,
- iod->nents, dma_dir);
+ nvme_setup_prps(dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) {
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
goto retry_cmd;
}
if (blk_integrity_rq(req)) {
@@ -884,7 +922,9 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
nvme_set_info(cmd, iod, req_completion);
spin_lock_irq(&nvmeq->q_lock);
- if (req->cmd_flags & REQ_DISCARD)
+ if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+ nvme_submit_priv(nvmeq, req, iod);
+ else if (req->cmd_flags & REQ_DISCARD)
nvme_submit_discard(nvmeq, ns, req, iod);
else if (req->cmd_flags & REQ_FLUSH)
nvme_submit_flush(nvmeq, ns, req->tag);
@@ -896,10 +936,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_MQ_RQ_QUEUE_OK;
error_cmd:
- nvme_free_iod(nvmeq->dev, iod);
+ nvme_free_iod(dev, iod);
return BLK_MQ_RQ_QUEUE_ERROR;
retry_cmd:
- nvme_free_iod(nvmeq->dev, iod);
+ nvme_free_iod(dev, iod);
return BLK_MQ_RQ_QUEUE_BUSY;
}
@@ -942,15 +982,6 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
return 1;
}
-/* Admin queue isn't initialized as a request queue. If at some point this
- * happens anyway, make sure to notify the user */
-static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- WARN_ON_ONCE(1);
- return BLK_MQ_RQ_QUEUE_ERROR;
-}
-
static irqreturn_t nvme_irq(int irq, void *data)
{
irqreturn_t result;
@@ -972,46 +1003,61 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
return IRQ_WAKE_THREAD;
}
-struct sync_cmd_info {
- struct task_struct *task;
- u32 result;
- int status;
-};
-
-static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
- struct nvme_completion *cqe)
-{
- struct sync_cmd_info *cmdinfo = ctx;
- cmdinfo->result = le32_to_cpup(&cqe->result);
- cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
- wake_up_process(cmdinfo->task);
-}
-
/*
* Returns 0 on success. If the result is negative, it's a Linux error code;
* if the result is positive, it's an NVM Express status code
*/
-static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
- u32 *result, unsigned timeout)
+int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, void __user *ubuffer, unsigned bufflen,
+ u32 *result, unsigned timeout)
{
- struct sync_cmd_info cmdinfo;
- struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = cmd_rq->nvmeq;
+ bool write = cmd->common.opcode & 1;
+ struct bio *bio = NULL;
+ struct request *req;
+ int ret;
- cmdinfo.task = current;
- cmdinfo.status = -EINTR;
+ req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
- cmd->common.command_id = req->tag;
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
+ req->cmd_flags |= REQ_FAILFAST_DRIVER;
+ req->__data_len = 0;
+ req->__sector = (sector_t) -1;
+ req->bio = req->biotail = NULL;
+
+ req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
- nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
+ req->cmd = (unsigned char *)cmd;
+ req->cmd_len = sizeof(struct nvme_command);
+ req->special = (void *)0;
- set_current_state(TASK_UNINTERRUPTIBLE);
- nvme_submit_cmd(nvmeq, cmd);
- schedule();
+ if (buffer && bufflen) {
+ ret = blk_rq_map_kern(q, req, buffer, bufflen, __GFP_WAIT);
+ if (ret)
+ goto out;
+ } else if (ubuffer && bufflen) {
+ ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen, __GFP_WAIT);
+ if (ret)
+ goto out;
+ bio = req->bio;
+ }
+ blk_execute_rq(req->q, NULL, req, 0);
+ if (bio)
+ blk_rq_unmap_user(bio);
if (result)
- *result = cmdinfo.result;
- return cmdinfo.status;
+ *result = (u32)(uintptr_t)req->special;
+ ret = req->errors;
+ out:
+ blk_mq_free_request(req);
+ return ret;
+}
+
+int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+ void *buffer, unsigned bufflen)
+{
+ return __nvme_submit_sync_cmd(q, cmd, buffer, NULL, bufflen, NULL, 0);
}
static int nvme_submit_async_admin_req(struct nvme_dev *dev)
@@ -1033,7 +1079,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
c.common.opcode = nvme_admin_async_event;
c.common.command_id = req->tag;
- blk_mq_free_hctx_request(nvmeq->hctx, req);
+ blk_mq_free_request(req);
return __nvme_submit_cmd(nvmeq, &c);
}
@@ -1060,41 +1106,6 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
return nvme_submit_cmd(nvmeq, cmd);
}
-static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
- u32 *result, unsigned timeout)
-{
- int res;
- struct request *req;
-
- req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
- if (IS_ERR(req))
- return PTR_ERR(req);
- res = nvme_submit_sync_cmd(req, cmd, result, timeout);
- blk_mq_free_request(req);
- return res;
-}
-
-int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
- u32 *result)
-{
- return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT);
-}
-
-int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
- struct nvme_command *cmd, u32 *result)
-{
- int res;
- struct request *req;
-
- req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT),
- false);
- if (IS_ERR(req))
- return PTR_ERR(req);
- res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT);
- blk_mq_free_request(req);
- return res;
-}
-
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
struct nvme_command c;
@@ -1103,7 +1114,7 @@ static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
c.delete_queue.opcode = opcode;
c.delete_queue.qid = cpu_to_le16(id);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
}
static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
@@ -1112,6 +1123,10 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+ /*
+ * Note: we (ab)use the fact the the prp fields survive if no data
+ * is attached to the request.
+ */
memset(&c, 0, sizeof(c));
c.create_cq.opcode = nvme_admin_create_cq;
c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
@@ -1120,7 +1135,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
c.create_cq.cq_flags = cpu_to_le16(flags);
c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
}
static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
@@ -1129,6 +1144,10 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
struct nvme_command c;
int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
+ /*
+ * Note: we (ab)use the fact the the prp fields survive if no data
+ * is attached to the request.
+ */
memset(&c, 0, sizeof(c));
c.create_sq.opcode = nvme_admin_create_sq;
c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
@@ -1137,7 +1156,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
c.create_sq.sq_flags = cpu_to_le16(flags);
c.create_sq.cqid = cpu_to_le16(qid);
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ return nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
}
static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1150,18 +1169,45 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
}
-int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns,
- dma_addr_t dma_addr)
+int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id)
{
- struct nvme_command c;
+ struct nvme_command c = { };
+ int error;
- memset(&c, 0, sizeof(c));
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify;
- c.identify.nsid = cpu_to_le32(nsid);
- c.identify.prp1 = cpu_to_le64(dma_addr);
- c.identify.cns = cpu_to_le32(cns);
+ c.identify.cns = cpu_to_le32(1);
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
- return nvme_submit_admin_cmd(dev, &c, NULL);
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+ return error;
+}
+
+int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid,
+ struct nvme_id_ns **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify,
+ c.identify.nsid = cpu_to_le32(nsid),
+
+ *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
+ sizeof(struct nvme_id_ns));
+ if (error)
+ kfree(*id);
+ return error;
}
int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
@@ -1175,7 +1221,8 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid,
c.features.prp1 = cpu_to_le64(dma_addr);
c.features.fid = cpu_to_le32(fid);
- return nvme_submit_admin_cmd(dev, &c, result);
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+ result, 0);
}
int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
@@ -1189,7 +1236,30 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
c.features.fid = cpu_to_le32(fid);
c.features.dword11 = cpu_to_le32(dword11);
- return nvme_submit_admin_cmd(dev, &c, result);
+ return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, NULL, 0,
+ result, 0);
+}
+
+int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log)
+{
+ struct nvme_command c = { };
+ int error;
+
+ c.common.opcode = nvme_admin_get_log_page,
+ c.common.nsid = cpu_to_le32(0xFFFFFFFF),
+ c.common.cdw10[0] = cpu_to_le32(
+ (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
+ NVME_LOG_SMART),
+
+ *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
+ if (!*log)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
+ sizeof(struct nvme_smart_log));
+ if (error)
+ kfree(*log);
+ return error;
}
/**
@@ -1214,8 +1284,7 @@ static void nvme_abort_req(struct request *req)
if (work_busy(&dev->reset_work))
goto out;
list_del_init(&dev->node);
- dev_warn(&dev->pci_dev->dev,
- "I/O %d QID %d timeout, reset controller\n",
+ dev_warn(dev->dev, "I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work);
@@ -1254,8 +1323,7 @@ static void nvme_abort_req(struct request *req)
}
}
-static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
- struct request *req, void *data, bool reserved)
+static void nvme_cancel_queue_ios(struct request *req, void *data, bool reserved)
{
struct nvme_queue *nvmeq = data;
void *ctx;
@@ -1352,11 +1420,9 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_clear_queue(struct nvme_queue *nvmeq)
{
- struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
-
spin_lock_irq(&nvmeq->q_lock);
- if (hctx && hctx->tags)
- blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
+ if (nvmeq->tags && *nvmeq->tags)
+ blk_mq_all_tag_busy_iter(*nvmeq->tags, nvme_cancel_queue_ios, nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
}
@@ -1384,22 +1450,21 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
- struct device *dmadev = &dev->pci_dev->dev;
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
if (!nvmeq)
return NULL;
- nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth),
+ nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
- nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth),
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
goto free_cqdma;
- nvmeq->q_dmadev = dmadev;
+ nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
dev->instance, qid);
@@ -1409,13 +1474,17 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
nvmeq->q_depth = depth;
nvmeq->qid = qid;
- dev->queue_count++;
+ nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
+ /* make sure queue descriptor is set before queue count, for kthread */
+ mb();
+ dev->queue_count++;
+
return nvmeq;
free_cqdma:
- dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes,
+ dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
nvmeq->cq_dma_addr);
free_nvmeq:
kfree(nvmeq);
@@ -1487,7 +1556,7 @@ static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Device not ready; aborting %s\n", enabled ?
"initialisation" : "reset");
return -ENODEV;
@@ -1537,7 +1606,7 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Device shutdown incomplete; abort shutdown\n");
return -ENODEV;
}
@@ -1547,10 +1616,10 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
}
static struct blk_mq_ops nvme_mq_admin_ops = {
- .queue_rq = nvme_admin_queue_rq,
+ .queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
- .exit_hctx = nvme_exit_hctx,
+ .exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_admin_init_request,
.timeout = nvme_timeout,
};
@@ -1559,7 +1628,6 @@ static struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
.map_queue = blk_mq_map_queue,
.init_hctx = nvme_init_hctx,
- .exit_hctx = nvme_exit_hctx,
.init_request = nvme_init_request,
.timeout = nvme_timeout,
};
@@ -1580,7 +1648,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
dev->admin_tagset.reserved_tags = 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
- dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+ dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
dev->admin_tagset.driver_data = dev;
@@ -1594,6 +1662,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
}
if (!blk_get_queue(dev->admin_q)) {
nvme_dev_remove_admin(dev);
+ dev->admin_q = NULL;
return -ENODEV;
}
} else
@@ -1613,14 +1682,14 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
if (page_shift < dev_page_min) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Minimum device page size (%u) too large for "
"host (%u)\n", 1 << dev_page_min,
1 << page_shift);
return -ENODEV;
}
if (page_shift > dev_page_max) {
- dev_info(&dev->pci_dev->dev,
+ dev_info(dev->dev,
"Device maximum page size (%u) smaller than "
"host (%u); enabling work-around\n",
1 << dev_page_max, 1 << page_shift);
@@ -1658,8 +1727,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
nvmeq->cq_vector = 0;
result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
- if (result)
+ if (result) {
+ nvmeq->cq_vector = -1;
goto free_nvmeq;
+ }
return result;
@@ -1668,126 +1739,43 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
return result;
}
-struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
- unsigned long addr, unsigned length)
-{
- int i, err, count, nents, offset;
- struct scatterlist *sg;
- struct page **pages;
- struct nvme_iod *iod;
-
- if (addr & 3)
- return ERR_PTR(-EINVAL);
- if (!length || length > INT_MAX - PAGE_SIZE)
- return ERR_PTR(-EINVAL);
-
- offset = offset_in_page(addr);
- count = DIV_ROUND_UP(offset + length, PAGE_SIZE);
- pages = kcalloc(count, sizeof(*pages), GFP_KERNEL);
- if (!pages)
- return ERR_PTR(-ENOMEM);
-
- err = get_user_pages_fast(addr, count, 1, pages);
- if (err < count) {
- count = err;
- err = -EFAULT;
- goto put_pages;
- }
-
- err = -ENOMEM;
- iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL);
- if (!iod)
- goto put_pages;
-
- sg = iod->sg;
- sg_init_table(sg, count);
- for (i = 0; i < count; i++) {
- sg_set_page(&sg[i], pages[i],
- min_t(unsigned, length, PAGE_SIZE - offset),
- offset);
- length -= (PAGE_SIZE - offset);
- offset = 0;
- }
- sg_mark_end(&sg[i - 1]);
- iod->nents = count;
-
- nents = dma_map_sg(&dev->pci_dev->dev, sg, count,
- write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (!nents)
- goto free_iod;
-
- kfree(pages);
- return iod;
-
- free_iod:
- kfree(iod);
- put_pages:
- for (i = 0; i < count; i++)
- put_page(pages[i]);
- kfree(pages);
- return ERR_PTR(err);
-}
-
-void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
- struct nvme_iod *iod)
-{
- int i;
-
- dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents,
- write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
- for (i = 0; i < iod->nents; i++)
- put_page(sg_page(&iod->sg[i]));
-}
-
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{
struct nvme_dev *dev = ns->dev;
struct nvme_user_io io;
struct nvme_command c;
- unsigned length, meta_len, prp_len;
+ unsigned length, meta_len;
int status, write;
- struct nvme_iod *iod;
dma_addr_t meta_dma = 0;
void *meta = NULL;
void __user *metadata;
if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT;
- length = (io.nblocks + 1) << ns->lba_shift;
- meta_len = (io.nblocks + 1) * ns->ms;
-
- if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext)
- return -EINVAL;
- else if (meta_len && ns->ext) {
- length += meta_len;
- meta_len = 0;
- }
-
- metadata = (void __user *)(unsigned long)io.metadata;
-
- write = io.opcode & 1;
switch (io.opcode) {
case nvme_cmd_write:
case nvme_cmd_read:
case nvme_cmd_compare:
- iod = nvme_map_user_pages(dev, write, io.addr, length);
break;
default:
return -EINVAL;
}
- if (IS_ERR(iod))
- return PTR_ERR(iod);
+ length = (io.nblocks + 1) << ns->lba_shift;
+ meta_len = (io.nblocks + 1) * ns->ms;
+ metadata = (void __user *)(unsigned long)io.metadata;
+ write = io.opcode & 1;
- prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
- if (length != prp_len) {
- status = -ENOMEM;
- goto unmap;
+ if (ns->ext) {
+ length += meta_len;
+ meta_len = 0;
}
if (meta_len) {
- meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len,
+ if (((io.metadata & 3) || !io.metadata) && !ns->ext)
+ return -EINVAL;
+
+ meta = dma_alloc_coherent(dev->dev, meta_len,
&meta_dma, GFP_KERNEL);
if (!meta) {
@@ -1813,19 +1801,17 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.reftag = cpu_to_le32(io.reftag);
c.rw.apptag = cpu_to_le16(io.apptag);
c.rw.appmask = cpu_to_le16(io.appmask);
- c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.rw.prp2 = cpu_to_le64(iod->first_dma);
c.rw.metadata = cpu_to_le64(meta_dma);
- status = nvme_submit_io_cmd(dev, ns, &c, NULL);
+
+ status = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+ (void __user *)io.addr, length, NULL, 0);
unmap:
- nvme_unmap_user_pages(dev, write, iod);
- nvme_free_iod(dev, iod);
if (meta) {
if (status == NVME_SC_SUCCESS && !write) {
if (copy_to_user(metadata, meta, meta_len))
status = -EFAULT;
}
- dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma);
+ dma_free_coherent(dev->dev, meta_len, meta, meta_dma);
}
return status;
}
@@ -1835,9 +1821,8 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
- int status, length;
- struct nvme_iod *uninitialized_var(iod);
- unsigned timeout;
+ unsigned timeout = 0;
+ int status;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
@@ -1857,46 +1842,17 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
- length = cmd.data_len;
- if (cmd.data_len) {
- iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr,
- length);
- if (IS_ERR(iod))
- return PTR_ERR(iod);
- length = nvme_setup_prps(dev, iod, length, GFP_KERNEL);
- c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.common.prp2 = cpu_to_le64(iod->first_dma);
- }
-
- timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
- ADMIN_TIMEOUT;
-
- if (length != cmd.data_len)
- status = -ENOMEM;
- else if (ns) {
- struct request *req;
-
- req = blk_mq_alloc_request(ns->queue, WRITE,
- (GFP_KERNEL|__GFP_WAIT), false);
- if (IS_ERR(req))
- status = PTR_ERR(req);
- else {
- status = nvme_submit_sync_cmd(req, &c, &cmd.result,
- timeout);
- blk_mq_free_request(req);
- }
- } else
- status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);
+ if (cmd.timeout_ms)
+ timeout = msecs_to_jiffies(cmd.timeout_ms);
- if (cmd.data_len) {
- nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
- nvme_free_iod(dev, iod);
+ status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c,
+ NULL, (void __user *)cmd.addr, cmd.data_len,
+ &cmd.result, timeout);
+ if (status >= 0) {
+ if (put_user(cmd.result, &ucmd->result))
+ return -EFAULT;
}
- if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result,
- sizeof(cmd.result)))
- status = -EFAULT;
-
return status;
}
@@ -1988,23 +1944,18 @@ static int nvme_revalidate_disk(struct gendisk *disk)
struct nvme_ns *ns = disk->private_data;
struct nvme_dev *dev = ns->dev;
struct nvme_id_ns *id;
- dma_addr_t dma_addr;
u8 lbaf, pi_type;
u16 old_ms;
unsigned short bs;
- id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr,
- GFP_KERNEL);
- if (!id) {
- dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n",
- __func__);
- return 0;
+ if (nvme_identify_ns(dev, ns->ns_id, &id)) {
+ dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
+ dev->instance, ns->ns_id);
+ return -ENODEV;
}
- if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) {
- dev_warn(&dev->pci_dev->dev,
- "identify failed ns:%d, setting capacity to 0\n",
- ns->ns_id);
- memset(id, 0, sizeof(*id));
+ if (id->ncap == 0) {
+ kfree(id);
+ return -ENODEV;
}
old_ms = ns->ms;
@@ -2038,7 +1989,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
!ns->ext)
nvme_init_integrity(ns);
- if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+ if (ns->ms && !blk_get_integrity(disk))
set_capacity(disk, 0);
else
set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2046,7 +1997,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
if (dev->oncs & NVME_CTRL_ONCS_DSM)
nvme_config_discard(ns);
- dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr);
+ kfree(id);
return 0;
}
@@ -2073,7 +2024,7 @@ static int nvme_kthread(void *data)
if (work_busy(&dev->reset_work))
continue;
list_del_init(&dev->node);
- dev_warn(&dev->pci_dev->dev,
+ dev_warn(dev->dev,
"Failed status: %x, reset controller\n",
readl(&dev->bar->csts));
dev->reset_workfn = nvme_reset_failed_dev;
@@ -2105,7 +2056,7 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
{
struct nvme_ns *ns;
struct gendisk *disk;
- int node = dev_to_node(&dev->pci_dev->dev);
+ int node = dev_to_node(dev->dev);
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
@@ -2153,11 +2104,25 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
* requires it.
*/
set_capacity(disk, 0);
- nvme_revalidate_disk(ns->disk);
+ if (nvme_revalidate_disk(ns->disk))
+ goto out_free_disk;
+
add_disk(ns->disk);
- if (ns->ms)
- revalidate_disk(ns->disk);
+ if (ns->ms) {
+ struct block_device *bd = bdget_disk(ns->disk, 0);
+ if (!bd)
+ return;
+ if (blkdev_get(bd, FMODE_READ, NULL)) {
+ bdput(bd);
+ return;
+ }
+ blkdev_reread_part(bd);
+ blkdev_put(bd, FMODE_READ);
+ }
return;
+ out_free_disk:
+ kfree(disk);
+ list_del(&ns->list);
out_free_queue:
blk_cleanup_queue(ns->queue);
out_free_ns:
@@ -2188,8 +2153,7 @@ static int set_queue_count(struct nvme_dev *dev, int count)
if (status < 0)
return status;
if (status > 0) {
- dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n",
- status);
+ dev_err(dev->dev, "Could not set queue count (%d)\n", status);
return 0;
}
return min(result & 0xffff, result >> 16) + 1;
@@ -2203,7 +2167,7 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
int result, i, vecs, nr_io_queues, size;
nr_io_queues = num_possible_cpus();
@@ -2261,8 +2225,10 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->max_qid = nr_io_queues;
result = queue_request_irq(dev, adminq, adminq->irqname);
- if (result)
+ if (result) {
+ adminq->cq_vector = -1;
goto free_queues;
+ }
/* Free previously allocated queues that are no longer usable */
nvme_free_queues(dev, nr_io_queues + 1);
@@ -2275,6 +2241,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return result;
}
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+ list_del(&ns->list);
+
+ spin_lock(&dev_list_lock);
+ ns->disk->private_data = NULL;
+ spin_unlock(&dev_list_lock);
+
+ put_disk(ns->disk);
+ kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+ struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+ return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry(ns, &dev->namespaces, list) {
+ if (ns->ns_id == nsid)
+ return ns;
+ if (ns->ns_id > nsid)
+ break;
+ }
+ return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+ return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+ dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+ bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+ if (kill)
+ blk_set_queue_dying(ns->queue);
+ if (ns->disk->flags & GENHD_FL_UP) {
+ if (blk_get_integrity(ns->disk))
+ blk_integrity_unregister(ns->disk);
+ del_gendisk(ns->disk);
+ }
+ if (kill || !blk_queue_dying(ns->queue)) {
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_cleanup_queue(ns->queue);
+ }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+ struct nvme_ns *ns, *next;
+ unsigned i;
+
+ for (i = 1; i <= nn; i++) {
+ ns = nvme_find_ns(dev, i);
+ if (ns) {
+ if (revalidate_disk(ns->disk)) {
+ nvme_ns_remove(ns);
+ nvme_free_namespace(ns);
+ }
+ } else
+ nvme_alloc_ns(dev, i);
+ }
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+ if (ns->ns_id > nn) {
+ nvme_ns_remove(ns);
+ nvme_free_namespace(ns);
+ }
+ }
+ list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+ struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+ struct nvme_id_ctrl *ctrl;
+
+ if (!dev->tagset.tags)
+ return;
+ if (nvme_identify_ctrl(dev, &ctrl))
+ return;
+ nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+ kfree(ctrl);
+}
+
/*
* Return: error value if an error occurred setting up the queues or calling
* Identify Device. 0 if these succeeded, even if adding some of the
@@ -2283,26 +2342,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
static int nvme_dev_add(struct nvme_dev *dev)
{
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
int res;
- unsigned nn, i;
+ unsigned nn;
struct nvme_id_ctrl *ctrl;
- void *mem;
- dma_addr_t dma_addr;
int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
- mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL);
- if (!mem)
- return -ENOMEM;
-
- res = nvme_identify(dev, 0, 1, dma_addr);
+ res = nvme_identify_ctrl(dev, &ctrl);
if (res) {
- dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res);
- dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+ dev_err(dev->dev, "Identify Controller failed (%d)\n", res);
return -EIO;
}
- ctrl = mem;
nn = le32_to_cpup(&ctrl->nn);
dev->oncs = le16_to_cpup(&ctrl->oncs);
dev->abort_limit = ctrl->acl + 1;
@@ -2324,24 +2375,23 @@ static int nvme_dev_add(struct nvme_dev *dev)
} else
dev->max_hw_sectors = max_hw_sectors;
}
- dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr);
+ kfree(ctrl);
- dev->tagset.ops = &nvme_mq_ops;
- dev->tagset.nr_hw_queues = dev->online_queues - 1;
- dev->tagset.timeout = NVME_IO_TIMEOUT;
- dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
- dev->tagset.queue_depth =
+ if (!dev->tagset.tags) {
+ dev->tagset.ops = &nvme_mq_ops;
+ dev->tagset.nr_hw_queues = dev->online_queues - 1;
+ dev->tagset.timeout = NVME_IO_TIMEOUT;
+ dev->tagset.numa_node = dev_to_node(dev->dev);
+ dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
- dev->tagset.cmd_size = nvme_cmd_size(dev);
- dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
- dev->tagset.driver_data = dev;
-
- if (blk_mq_alloc_tag_set(&dev->tagset))
- return 0;
-
- for (i = 1; i <= nn; i++)
- nvme_alloc_ns(dev, i);
+ dev->tagset.cmd_size = nvme_cmd_size(dev);
+ dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+ dev->tagset.driver_data = dev;
+ if (blk_mq_alloc_tag_set(&dev->tagset))
+ return 0;
+ }
+ schedule_work(&dev->scan_work);
return 0;
}
@@ -2349,7 +2399,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
{
u64 cap;
int bars, result = -ENOMEM;
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_enable_device_mem(pdev))
return result;
@@ -2363,8 +2413,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (pci_request_selected_regions(pdev, bars, "nvme"))
goto disable_pci;
- if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) &&
- dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)))
+ if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
+ dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
goto disable;
dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
@@ -2405,19 +2455,21 @@ static int nvme_dev_map(struct nvme_dev *dev)
static void nvme_dev_unmap(struct nvme_dev *dev)
{
- if (dev->pci_dev->msi_enabled)
- pci_disable_msi(dev->pci_dev);
- else if (dev->pci_dev->msix_enabled)
- pci_disable_msix(dev->pci_dev);
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pdev->msi_enabled)
+ pci_disable_msi(pdev);
+ else if (pdev->msix_enabled)
+ pci_disable_msix(pdev);
if (dev->bar) {
iounmap(dev->bar);
dev->bar = NULL;
- pci_release_regions(dev->pci_dev);
+ pci_release_regions(pdev);
}
- if (pci_is_enabled(dev->pci_dev))
- pci_disable_device(dev->pci_dev);
+ if (pci_is_enabled(pdev))
+ pci_disable_device(pdev);
}
struct nvme_delq_ctx {
@@ -2536,7 +2588,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
&worker, "nvme%d", dev->instance);
if (IS_ERR(kworker_task)) {
- dev_err(&dev->pci_dev->dev,
+ dev_err(dev->dev,
"Failed to create queue del task\n");
for (i = dev->queue_count - 1; i > 0; i--)
nvme_disable_queue(dev, i);
@@ -2587,9 +2639,9 @@ static void nvme_freeze_queues(struct nvme_dev *dev)
list_for_each_entry(ns, &dev->namespaces, list) {
blk_mq_freeze_queue_start(ns->queue);
- spin_lock(ns->queue->queue_lock);
+ spin_lock_irq(ns->queue->queue_lock);
queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
- spin_unlock(ns->queue->queue_lock);
+ spin_unlock_irq(ns->queue->queue_lock);
blk_mq_cancel_requeue_work(ns->queue);
blk_mq_stop_hw_queues(ns->queue);
@@ -2639,29 +2691,19 @@ static void nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns;
- list_for_each_entry(ns, &dev->namespaces, list) {
- if (ns->disk->flags & GENHD_FL_UP) {
- if (blk_get_integrity(ns->disk))
- blk_integrity_unregister(ns->disk);
- del_gendisk(ns->disk);
- }
- if (!blk_queue_dying(ns->queue)) {
- blk_mq_abort_requeue_list(ns->queue);
- blk_cleanup_queue(ns->queue);
- }
- }
+ list_for_each_entry(ns, &dev->namespaces, list)
+ nvme_ns_remove(ns);
}
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
- struct device *dmadev = &dev->pci_dev->dev;
- dev->prp_page_pool = dma_pool_create("prp list page", dmadev,
+ dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
PAGE_SIZE, PAGE_SIZE, 0);
if (!dev->prp_page_pool)
return -ENOMEM;
/* Optimisation for I/Os between 4k and 128k */
- dev->prp_small_pool = dma_pool_create("prp list 256", dmadev,
+ dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
256, 256, 0);
if (!dev->prp_small_pool) {
dma_pool_destroy(dev->prp_page_pool);
@@ -2709,28 +2751,22 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
- list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
- list_del(&ns->list);
-
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
-
- put_disk(ns->disk);
- kfree(ns);
- }
+ list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+ nvme_free_namespace(ns);
}
static void nvme_free_dev(struct kref *kref)
{
struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref);
- pci_dev_put(dev->pci_dev);
+ put_device(dev->dev);
put_device(dev->device);
nvme_free_namespaces(dev);
nvme_release_instance(dev);
- blk_mq_free_tag_set(&dev->tagset);
- blk_put_queue(dev->admin_q);
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ if (dev->admin_q)
+ blk_put_queue(dev->admin_q);
kfree(dev->queues);
kfree(dev->entry);
kfree(dev);
@@ -2781,6 +2817,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
return -ENOTTY;
ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
return nvme_user_cmd(dev, ns, (void __user *)arg);
+ case NVME_IOCTL_RESET:
+ dev_warn(dev->dev, "resetting controller\n");
+ return nvme_reset(dev);
default:
return -ENOTTY;
}
@@ -2802,11 +2841,11 @@ static void nvme_set_irq_hints(struct nvme_dev *dev)
for (i = 0; i < dev->online_queues; i++) {
nvmeq = dev->queues[i];
- if (!nvmeq->hctx)
+ if (!nvmeq->tags || !(*nvmeq->tags))
continue;
irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- nvmeq->hctx->cpumask);
+ blk_mq_tags_cpumask(*nvmeq->tags));
}
}
@@ -2858,6 +2897,9 @@ static int nvme_dev_start(struct nvme_dev *dev)
free_tags:
nvme_dev_remove_admin(dev);
+ blk_put_queue(dev->admin_q);
+ dev->admin_q = NULL;
+ dev->queues[0]->tags = NULL;
disable:
nvme_disable_queue(dev, 0);
nvme_dev_list_remove(dev);
@@ -2869,7 +2911,7 @@ static int nvme_dev_start(struct nvme_dev *dev)
static int nvme_remove_dead_ctrl(void *arg)
{
struct nvme_dev *dev = (struct nvme_dev *)arg;
- struct pci_dev *pdev = dev->pci_dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_get_drvdata(pdev))
pci_stop_and_remove_bus_device_locked(pdev);
@@ -2899,24 +2941,43 @@ static int nvme_dev_resume(struct nvme_dev *dev)
spin_unlock(&dev_list_lock);
} else {
nvme_unfreeze_queues(dev);
+ nvme_dev_add(dev);
nvme_set_irq_hints(dev);
}
return 0;
}
+static void nvme_dead_ctrl(struct nvme_dev *dev)
+{
+ dev_warn(dev->dev, "Device failed to resume\n");
+ kref_get(&dev->kref);
+ if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
+ dev->instance))) {
+ dev_err(dev->dev,
+ "Failed to start controller remove task\n");
+ kref_put(&dev->kref, nvme_free_dev);
+ }
+}
+
static void nvme_dev_reset(struct nvme_dev *dev)
{
+ bool in_probe = work_busy(&dev->probe_work);
+
nvme_dev_shutdown(dev);
- if (nvme_dev_resume(dev)) {
- dev_warn(&dev->pci_dev->dev, "Device failed to resume\n");
- kref_get(&dev->kref);
- if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
- dev->instance))) {
- dev_err(&dev->pci_dev->dev,
- "Failed to start controller remove task\n");
- kref_put(&dev->kref, nvme_free_dev);
- }
+
+ /* Synchronize with device probe so that work will see failure status
+ * and exit gracefully without trying to schedule another reset */
+ flush_work(&dev->probe_work);
+
+ /* Fail this device if reset occured during probe to avoid
+ * infinite initialization loops. */
+ if (in_probe) {
+ nvme_dead_ctrl(dev);
+ return;
}
+ /* Schedule device resume asynchronously so the reset work is available
+ * to cleanup errors that may occur during reinitialization */
+ schedule_work(&dev->probe_work);
}
static void nvme_reset_failed_dev(struct work_struct *ws)
@@ -2931,6 +2992,45 @@ static void nvme_reset_workfn(struct work_struct *work)
dev->reset_workfn(work);
}
+static int nvme_reset(struct nvme_dev *dev)
+{
+ int ret = -EBUSY;
+
+ if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+ return -ENODEV;
+
+ spin_lock(&dev_list_lock);
+ if (!work_pending(&dev->reset_work)) {
+ dev->reset_workfn = nvme_reset_failed_dev;
+ queue_work(nvme_workq, &dev->reset_work);
+ ret = 0;
+ }
+ spin_unlock(&dev_list_lock);
+
+ if (!ret) {
+ flush_work(&dev->reset_work);
+ flush_work(&dev->probe_work);
+ return 0;
+ }
+
+ return ret;
+}
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct nvme_dev *ndev = dev_get_drvdata(dev);
+ int ret;
+
+ ret = nvme_reset(ndev);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
static void nvme_async_probe(struct work_struct *work);
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
@@ -2956,7 +3056,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
INIT_LIST_HEAD(&dev->namespaces);
dev->reset_workfn = nvme_reset_failed_dev;
INIT_WORK(&dev->reset_work, nvme_reset_workfn);
- dev->pci_dev = pci_dev_get(pdev);
+ dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
result = nvme_set_instance(dev);
if (result)
@@ -2975,18 +3075,27 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto release_pools;
}
get_device(dev->device);
+ dev_set_drvdata(dev->device, dev);
+
+ result = device_create_file(dev->device, &dev_attr_reset_controller);
+ if (result)
+ goto put_dev;
INIT_LIST_HEAD(&dev->node);
+ INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->probe_work, nvme_async_probe);
schedule_work(&dev->probe_work);
return 0;
+ put_dev:
+ device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+ put_device(dev->device);
release_pools:
nvme_release_prp_pools(dev);
release:
nvme_release_instance(dev);
put_pci:
- pci_dev_put(dev->pci_dev);
+ put_device(dev->dev);
free:
kfree(dev->queues);
kfree(dev->entry);
@@ -2997,24 +3106,9 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_async_probe(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
- int result;
-
- result = nvme_dev_start(dev);
- if (result)
- goto reset;
- if (dev->online_queues > 1)
- result = nvme_dev_add(dev);
- if (result)
- goto reset;
-
- nvme_set_irq_hints(dev);
- return;
- reset:
- if (!work_busy(&dev->reset_work)) {
- dev->reset_workfn = nvme_reset_failed_dev;
- queue_work(nvme_workq, &dev->reset_work);
- }
+ if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
+ nvme_dead_ctrl(dev);
}
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -3044,8 +3138,10 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL);
flush_work(&dev->probe_work);
flush_work(&dev->reset_work);
- nvme_dev_shutdown(dev);
+ flush_work(&dev->scan_work);
+ device_remove_file(dev->device, &dev_attr_reset_controller);
nvme_dev_remove(dev);
+ nvme_dev_shutdown(dev);
nvme_dev_remove_admin(dev);
device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
nvme_free_queues(dev, 0);
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 44f2514fb775..e5a63f06fb0f 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -41,15 +41,13 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/types.h>
+#include <asm/unaligned.h>
#include <scsi/sg.h>
#include <scsi/scsi.h>
static int sg_version_num = 30534; /* 2 digits for each component */
-#define SNTI_TRANSLATION_SUCCESS 0
-#define SNTI_INTERNAL_ERROR 1
-
/* VPD Page Codes */
#define VPD_SUPPORTED_PAGES 0x00
#define VPD_SERIAL_NUMBER 0x80
@@ -58,49 +56,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define VPD_BLOCK_LIMITS 0xB0
#define VPD_BLOCK_DEV_CHARACTERISTICS 0xB1
-/* CDB offsets */
-#define REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET 6
-#define REPORT_LUNS_SR_OFFSET 2
-#define READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET 10
-#define REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET 4
-#define REQUEST_SENSE_DESC_OFFSET 1
-#define REQUEST_SENSE_DESC_MASK 0x01
-#define DESCRIPTOR_FORMAT_SENSE_DATA_TYPE 1
-#define INQUIRY_EVPD_BYTE_OFFSET 1
-#define INQUIRY_PAGE_CODE_BYTE_OFFSET 2
-#define INQUIRY_EVPD_BIT_MASK 1
-#define INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET 3
-#define START_STOP_UNIT_CDB_IMMED_OFFSET 1
-#define START_STOP_UNIT_CDB_IMMED_MASK 0x1
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET 3
-#define START_STOP_UNIT_CDB_POWER_COND_MOD_MASK 0xF
-#define START_STOP_UNIT_CDB_POWER_COND_OFFSET 4
-#define START_STOP_UNIT_CDB_POWER_COND_MASK 0xF0
-#define START_STOP_UNIT_CDB_NO_FLUSH_OFFSET 4
-#define START_STOP_UNIT_CDB_NO_FLUSH_MASK 0x4
-#define START_STOP_UNIT_CDB_START_OFFSET 4
-#define START_STOP_UNIT_CDB_START_MASK 0x1
-#define WRITE_BUFFER_CDB_MODE_OFFSET 1
-#define WRITE_BUFFER_CDB_MODE_MASK 0x1F
-#define WRITE_BUFFER_CDB_BUFFER_ID_OFFSET 2
-#define WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET 3
-#define WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET 6
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET 1
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK 0xC0
-#define FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT 6
-#define FORMAT_UNIT_CDB_LONG_LIST_OFFSET 1
-#define FORMAT_UNIT_CDB_LONG_LIST_MASK 0x20
-#define FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET 1
-#define FORMAT_UNIT_CDB_FORMAT_DATA_MASK 0x10
+/* format unit paramter list offsets */
#define FORMAT_UNIT_SHORT_PARM_LIST_LEN 4
#define FORMAT_UNIT_LONG_PARM_LIST_LEN 8
#define FORMAT_UNIT_PROT_INT_OFFSET 3
#define FORMAT_UNIT_PROT_FIELD_USAGE_OFFSET 0
#define FORMAT_UNIT_PROT_FIELD_USAGE_MASK 0x07
-#define UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET 7
/* Misc. defines */
-#define NIBBLE_SHIFT 4
#define FIXED_SENSE_DATA 0x70
#define DESC_FORMAT_SENSE_DATA 0x72
#define FIXED_SENSE_DATA_ADD_LENGTH 10
@@ -144,27 +107,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define EXTENDED_INQUIRY_DATA_PAGE_LENGTH 0x3C
#define RESERVED_FIELD 0
-/* SCSI READ/WRITE Defines */
-#define IO_CDB_WP_MASK 0xE0
-#define IO_CDB_WP_SHIFT 5
-#define IO_CDB_FUA_MASK 0x8
-#define IO_6_CDB_LBA_OFFSET 0
-#define IO_6_CDB_LBA_MASK 0x001FFFFF
-#define IO_6_CDB_TX_LEN_OFFSET 4
-#define IO_6_DEFAULT_TX_LEN 256
-#define IO_10_CDB_LBA_OFFSET 2
-#define IO_10_CDB_TX_LEN_OFFSET 7
-#define IO_10_CDB_WP_OFFSET 1
-#define IO_10_CDB_FUA_OFFSET 1
-#define IO_12_CDB_LBA_OFFSET 2
-#define IO_12_CDB_TX_LEN_OFFSET 6
-#define IO_12_CDB_WP_OFFSET 1
-#define IO_12_CDB_FUA_OFFSET 1
-#define IO_16_CDB_FUA_OFFSET 1
-#define IO_16_CDB_WP_OFFSET 1
-#define IO_16_CDB_LBA_OFFSET 2
-#define IO_16_CDB_TX_LEN_OFFSET 10
-
/* Mode Sense/Select defines */
#define MODE_PAGE_INFO_EXCEP 0x1C
#define MODE_PAGE_CACHING 0x08
@@ -179,23 +121,14 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define MODE_PAGE_INF_EXC_LEN 0x0C
#define MODE_PAGE_ALL_LEN 0x54
#define MODE_SENSE6_MPH_SIZE 4
-#define MODE_SENSE6_ALLOC_LEN_OFFSET 4
-#define MODE_SENSE_PAGE_CONTROL_OFFSET 2
#define MODE_SENSE_PAGE_CONTROL_MASK 0xC0
#define MODE_SENSE_PAGE_CODE_OFFSET 2
#define MODE_SENSE_PAGE_CODE_MASK 0x3F
-#define MODE_SENSE_LLBAA_OFFSET 1
#define MODE_SENSE_LLBAA_MASK 0x10
#define MODE_SENSE_LLBAA_SHIFT 4
-#define MODE_SENSE_DBD_OFFSET 1
#define MODE_SENSE_DBD_MASK 8
#define MODE_SENSE_DBD_SHIFT 3
#define MODE_SENSE10_MPH_SIZE 8
-#define MODE_SENSE10_ALLOC_LEN_OFFSET 7
-#define MODE_SELECT_CDB_PAGE_FORMAT_OFFSET 1
-#define MODE_SELECT_CDB_SAVE_PAGES_OFFSET 1
-#define MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET 4
-#define MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET 7
#define MODE_SELECT_CDB_PAGE_FORMAT_MASK 0x10
#define MODE_SELECT_CDB_SAVE_PAGES_MASK 0x1
#define MODE_SELECT_6_BD_OFFSET 3
@@ -221,14 +154,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define LOG_PAGE_SUPPORTED_LOG_PAGES_LENGTH 0x07
#define LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE 0x2F
#define LOG_PAGE_TEMPERATURE_PAGE 0x0D
-#define LOG_SENSE_CDB_SP_OFFSET 1
#define LOG_SENSE_CDB_SP_NOT_ENABLED 0
-#define LOG_SENSE_CDB_PC_OFFSET 2
#define LOG_SENSE_CDB_PC_MASK 0xC0
#define LOG_SENSE_CDB_PC_SHIFT 6
#define LOG_SENSE_CDB_PC_CUMULATIVE_VALUES 1
#define LOG_SENSE_CDB_PAGE_CODE_MASK 0x3F
-#define LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET 7
#define REMAINING_INFO_EXCP_PAGE_LENGTH 0x8
#define LOG_INFO_EXCP_PAGE_LENGTH 0xC
#define REMAINING_TEMP_PAGE_LENGTH 0xC
@@ -278,77 +208,11 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#define SCSI_ASCQ_POWER_LOSS_EXPECTED 0x08
#define SCSI_ASCQ_INVALID_LUN_ID 0x09
-/**
- * DEVICE_SPECIFIC_PARAMETER in mode parameter header (see sbc2r16) to
- * enable DPOFUA support type 0x10 value.
- */
-#define DEVICE_SPECIFIC_PARAMETER 0
-#define VPD_ID_DESCRIPTOR_LENGTH sizeof(VPD_IDENTIFICATION_DESCRIPTOR)
-
-/* MACROs to extract information from CDBs */
-
-#define GET_OPCODE(cdb) cdb[0]
-
-#define GET_U8_FROM_CDB(cdb, index) (cdb[index] << 0)
-
-#define GET_U16_FROM_CDB(cdb, index) ((cdb[index] << 8) | (cdb[index + 1] << 0))
-
-#define GET_U24_FROM_CDB(cdb, index) ((cdb[index] << 16) | \
-(cdb[index + 1] << 8) | \
-(cdb[index + 2] << 0))
-
-#define GET_U32_FROM_CDB(cdb, index) ((cdb[index] << 24) | \
-(cdb[index + 1] << 16) | \
-(cdb[index + 2] << 8) | \
-(cdb[index + 3] << 0))
-
-#define GET_U64_FROM_CDB(cdb, index) ((((u64)cdb[index]) << 56) | \
-(((u64)cdb[index + 1]) << 48) | \
-(((u64)cdb[index + 2]) << 40) | \
-(((u64)cdb[index + 3]) << 32) | \
-(((u64)cdb[index + 4]) << 24) | \
-(((u64)cdb[index + 5]) << 16) | \
-(((u64)cdb[index + 6]) << 8) | \
-(((u64)cdb[index + 7]) << 0))
-
-/* Inquiry Helper Macros */
-#define GET_INQ_EVPD_BIT(cdb) \
-((GET_U8_FROM_CDB(cdb, INQUIRY_EVPD_BYTE_OFFSET) & \
-INQUIRY_EVPD_BIT_MASK) ? 1 : 0)
-
-#define GET_INQ_PAGE_CODE(cdb) \
-(GET_U8_FROM_CDB(cdb, INQUIRY_PAGE_CODE_BYTE_OFFSET))
-
-#define GET_INQ_ALLOC_LENGTH(cdb) \
-(GET_U16_FROM_CDB(cdb, INQUIRY_CDB_ALLOCATION_LENGTH_OFFSET))
-
-/* Report LUNs Helper Macros */
-#define GET_REPORT_LUNS_ALLOC_LENGTH(cdb) \
-(GET_U32_FROM_CDB(cdb, REPORT_LUNS_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Read Capacity Helper Macros */
-#define GET_READ_CAP_16_ALLOC_LENGTH(cdb) \
-(GET_U32_FROM_CDB(cdb, READ_CAP_16_CDB_ALLOC_LENGTH_OFFSET))
-
-#define IS_READ_CAP_16(cdb) \
-((cdb[0] == SERVICE_ACTION_IN_16 && cdb[1] == SAI_READ_CAPACITY_16) ? 1 : 0)
-
-/* Request Sense Helper Macros */
-#define GET_REQUEST_SENSE_ALLOC_LENGTH(cdb) \
-(GET_U8_FROM_CDB(cdb, REQUEST_SENSE_CDB_ALLOC_LENGTH_OFFSET))
-
-/* Mode Sense Helper Macros */
-#define GET_MODE_SENSE_DBD(cdb) \
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_DBD_OFFSET) & MODE_SENSE_DBD_MASK) >> \
-MODE_SENSE_DBD_SHIFT)
-
-#define GET_MODE_SENSE_LLBAA(cdb) \
-((GET_U8_FROM_CDB(cdb, MODE_SENSE_LLBAA_OFFSET) & \
-MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT)
-
-#define GET_MODE_SENSE_MPH_SIZE(cdb10) \
-(cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE)
-
+/* copied from drivers/usb/gadget/function/storage_common.h */
+static inline u32 get_unaligned_be24(u8 *buf)
+{
+ return 0xffffff & (u32) get_unaligned_be32(buf - 1);
+}
/* Struct to gather data that needs to be extracted from a SCSI CDB.
Not conforming to any particular CDB variant, but compatible with all. */
@@ -369,8 +233,6 @@ struct nvme_trans_io_cdb {
static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
unsigned long n)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- unsigned long not_copied;
int i;
void *index = from;
size_t remaining = n;
@@ -380,29 +242,25 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
struct sg_iovec sgl;
for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
+ if (copy_from_user(&sgl, hdr->dxferp +
i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
+ sizeof(struct sg_iovec)))
return -EFAULT;
xfer_len = min(remaining, sgl.iov_len);
- not_copied = copy_to_user(sgl.iov_base, index,
- xfer_len);
- if (not_copied) {
- res = -EFAULT;
- break;
- }
+ if (copy_to_user(sgl.iov_base, index, xfer_len))
+ return -EFAULT;
+
index += xfer_len;
remaining -= xfer_len;
if (remaining == 0)
break;
}
- return res;
+ return 0;
}
- not_copied = copy_to_user(hdr->dxferp, from, n);
- if (not_copied)
- res = -EFAULT;
- return res;
+
+ if (copy_to_user(hdr->dxferp, from, n))
+ return -EFAULT;
+ return 0;
}
/* Copy data from userspace memory */
@@ -410,8 +268,6 @@ static int nvme_trans_copy_to_user(struct sg_io_hdr *hdr, void *from,
static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
unsigned long n)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- unsigned long not_copied;
int i;
void *index = to;
size_t remaining = n;
@@ -421,30 +277,24 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
struct sg_iovec sgl;
for (i = 0; i < hdr->iovec_count; i++) {
- not_copied = copy_from_user(&sgl, hdr->dxferp +
+ if (copy_from_user(&sgl, hdr->dxferp +
i * sizeof(struct sg_iovec),
- sizeof(struct sg_iovec));
- if (not_copied)
+ sizeof(struct sg_iovec)))
return -EFAULT;
xfer_len = min(remaining, sgl.iov_len);
- not_copied = copy_from_user(index, sgl.iov_base,
- xfer_len);
- if (not_copied) {
- res = -EFAULT;
- break;
- }
+ if (copy_from_user(index, sgl.iov_base, xfer_len))
+ return -EFAULT;
index += xfer_len;
remaining -= xfer_len;
if (remaining == 0)
break;
}
- return res;
+ return 0;
}
- not_copied = copy_from_user(to, hdr->dxferp, n);
- if (not_copied)
- res = -EFAULT;
- return res;
+ if (copy_from_user(to, hdr->dxferp, n))
+ return -EFAULT;
+ return 0;
}
/* Status/Sense Buffer Writeback */
@@ -452,7 +302,6 @@ static int nvme_trans_copy_from_user(struct sg_io_hdr *hdr, void *to,
static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
u8 asc, u8 ascq)
{
- int res = SNTI_TRANSLATION_SUCCESS;
u8 xfer_len;
u8 resp[DESC_FMT_SENSE_DATA_SIZE];
@@ -477,25 +326,29 @@ static int nvme_trans_completion(struct sg_io_hdr *hdr, u8 status, u8 sense_key,
xfer_len = min_t(u8, hdr->mx_sb_len, DESC_FMT_SENSE_DATA_SIZE);
hdr->sb_len_wr = xfer_len;
if (copy_to_user(hdr->sbp, resp, xfer_len) > 0)
- res = -EFAULT;
+ return -EFAULT;
}
- return res;
+ return 0;
}
+/*
+ * Take a status code from a lowlevel routine, and if it was a positive NVMe
+ * error code update the sense data based on it. In either case the passed
+ * in value is returned again, unless an -EFAULT from copy_to_user overrides
+ * it.
+ */
static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
{
u8 status, sense_key, asc, ascq;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
/* For non-nvme (Linux) errors, simply return the error code */
if (nvme_sc < 0)
return nvme_sc;
/* Mask DNR, More, and reserved fields */
- nvme_sc &= 0x7FF;
-
- switch (nvme_sc) {
+ switch (nvme_sc & 0x7FF) {
/* Generic Command Status */
case NVME_SC_SUCCESS:
status = SAM_STAT_GOOD;
@@ -662,8 +515,7 @@ static int nvme_trans_status_code(struct sg_io_hdr *hdr, int nvme_sc)
}
res = nvme_trans_completion(hdr, status, sense_key, asc, ascq);
-
- return res;
+ return res ? res : nvme_sc;
}
/* INQUIRY Helper Functions */
@@ -673,10 +525,8 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
int alloc_len)
{
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
int xfer_len;
u8 resp_data_format = 0x02;
@@ -684,31 +534,17 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
u8 cmdque = 0x01 << 1;
u8 fw_offset = sizeof(dev->firmware_rev);
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
-
/* nvme ns identify - use DPS value for PROTECT field */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
- /*
- * If nvme_sc was -ve, res will be -ve here.
- * If nvme_sc was +ve, the status would bace been translated, and res
- * can only be 0 or -ve.
- * - If 0 && nvme_sc > 0, then go into next if where res gets nvme_sc
- * - If -ve, return because its a Linux error.
- */
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ns = mem;
- (id_ns->dps) ? (protect = 0x01) : (protect = 0);
+ return res;
+
+ if (id_ns->dps)
+ protect = 0x01;
+ else
+ protect = 0;
+ kfree(id_ns);
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
inq_response[2] = VERSION_SPC_4;
@@ -725,20 +561,13 @@ static int nvme_trans_standard_inquiry_page(struct nvme_ns *ns,
strncpy(&inq_response[32], dev->firmware_rev + fw_offset, 4);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out_dma:
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *inq_response,
int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
int xfer_len;
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -752,9 +581,7 @@ static int nvme_trans_supported_vpd_pages(struct nvme_ns *ns,
inq_response[9] = INQ_BDEV_LIMITS_PAGE;
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
@@ -762,7 +589,6 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
int alloc_len)
{
struct nvme_dev *dev = ns->dev;
- int res = SNTI_TRANSLATION_SUCCESS;
int xfer_len;
memset(inq_response, 0, STANDARD_INQUIRY_LENGTH);
@@ -771,53 +597,42 @@ static int nvme_trans_unit_serial_page(struct nvme_ns *ns,
strncpy(&inq_response[4], dev->serial, INQ_SERIAL_NUMBER_LENGTH);
xfer_len = min(alloc_len, STANDARD_INQUIRY_LENGTH);
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response, int alloc_len)
{
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
int xfer_len;
__be32 tmp_id = cpu_to_be32(ns->ns_id);
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
-
memset(inq_response, 0, alloc_len);
inq_response[1] = INQ_DEVICE_IDENTIFICATION_PAGE; /* Page Code */
if (readl(&dev->bar->vs) >= NVME_VS(1, 1)) {
- struct nvme_id_ns *id_ns = mem;
- void *eui = id_ns->eui64;
- int len = sizeof(id_ns->eui64);
+ struct nvme_id_ns *id_ns;
+ void *eui;
+ int len;
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
+ return res;
+ eui = id_ns->eui64;
+ len = sizeof(id_ns->eui64);
if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) {
if (bitmap_empty(eui, len * 8)) {
eui = id_ns->nguid;
len = sizeof(id_ns->nguid);
}
}
- if (bitmap_empty(eui, len * 8))
+ if (bitmap_empty(eui, len * 8)) {
+ kfree(id_ns);
goto scsi_string;
+ }
inq_response[3] = 4 + len; /* Page Length */
/* Designation Descriptor start */
@@ -826,14 +641,14 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
inq_response[6] = 0x00; /* Rsvd */
inq_response[7] = len; /* Designator Length */
memcpy(&inq_response[8], eui, len);
+ kfree(id_ns);
} else {
scsi_string:
if (alloc_len < 72) {
- res = nvme_trans_completion(hdr,
+ return nvme_trans_completion(hdr,
SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_free;
}
inq_response[3] = 0x48; /* Page Length */
/* Designation Descriptor start */
@@ -842,30 +657,22 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
inq_response[6] = 0x00; /* Rsvd */
inq_response[7] = 0x44; /* Designator Length */
- sprintf(&inq_response[8], "%04x", dev->pci_dev->vendor);
+ sprintf(&inq_response[8], "%04x", to_pci_dev(dev->dev)->vendor);
memcpy(&inq_response[12], dev->model, sizeof(dev->model));
sprintf(&inq_response[52], "%04x", tmp_id);
memcpy(&inq_response[56], dev->serial, sizeof(dev->serial));
}
xfer_len = alloc_len;
- res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
-
- out_free:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out_dma:
- return res;
+ return nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
}
static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
u8 *inq_response;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ctrl *id_ctrl;
struct nvme_id_ns *id_ns;
int xfer_len;
@@ -878,45 +685,32 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 luiclr = 0x01;
inq_response = kmalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
- if (inq_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
-
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
+ if (inq_response == NULL)
+ return -ENOMEM;
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ns = mem;
- spt = spt_lut[(id_ns->dpc) & 0x07] << 3;
- (id_ns->dps) ? (protect = 0x01) : (protect = 0);
+ goto out_free_inq;
+
+ spt = spt_lut[id_ns->dpc & 0x07] << 3;
+ if (id_ns->dps)
+ protect = 0x01;
+ else
+ protect = 0;
+ kfree(id_ns);
+
grd_chk = protect << 2;
app_chk = protect << 1;
ref_chk = protect;
- /* nvme controller identify */
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+ nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_free;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_free;
- }
- id_ctrl = mem;
+ goto out_free_inq;
+
v_sup = id_ctrl->vwc;
+ kfree(id_ctrl);
memset(inq_response, 0, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
inq_response[1] = INQ_EXTENDED_INQUIRY_DATA_PAGE; /* Page Code */
@@ -932,12 +726,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
xfer_len = min(alloc_len, EXTENDED_INQUIRY_DATA_PAGE_LENGTH);
res = nvme_trans_copy_to_user(hdr, inq_response, xfer_len);
- out_free:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out_dma:
+ out_free_inq:
kfree(inq_response);
- out_mem:
return res;
}
@@ -965,7 +755,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
u8 *inq_response;
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
inq_response = kzalloc(EXTENDED_INQUIRY_DATA_PAGE_LENGTH, GFP_KERNEL);
@@ -994,7 +784,7 @@ static int nvme_trans_bdev_char_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *log_response;
@@ -1022,47 +812,30 @@ static int nvme_trans_log_supp_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
struct sg_io_hdr *hdr, int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *log_response;
- struct nvme_command c;
struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
- dma_addr_t dma_addr;
- void *mem;
u8 temp_c;
u16 temp_k;
log_response = kzalloc(LOG_INFO_EXCP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
+ if (log_response == NULL)
+ return -ENOMEM;
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_smart_log),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
+ res = nvme_get_log_page(dev, &smart_log);
+ if (res < 0)
+ goto out_free_response;
- /* Get SMART Log Page */
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_get_log_page;
- c.common.nsid = cpu_to_le32(0xFFFFFFFF);
- c.common.prp1 = cpu_to_le64(dma_addr);
- c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
- BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
- res = nvme_submit_admin_cmd(dev, &c, NULL);
if (res != NVME_SC_SUCCESS) {
temp_c = LOG_TEMP_UNKNOWN;
} else {
- smart_log = mem;
temp_k = (smart_log->temperature[1] << 8) +
(smart_log->temperature[0]);
temp_c = temp_k - KELVIN_TEMP_FACTOR;
}
+ kfree(smart_log);
log_response[0] = LOG_PAGE_INFORMATIONAL_EXCEPTIONS_PAGE;
/* Subpage=0x00, Page Length MSB=0 */
@@ -1078,59 +851,39 @@ static int nvme_trans_log_info_exceptions(struct nvme_ns *ns,
xfer_len = min(alloc_len, LOG_INFO_EXCP_PAGE_LENGTH);
res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
- mem, dma_addr);
- out_dma:
+ out_free_response:
kfree(log_response);
- out_mem:
return res;
}
static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
int alloc_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *log_response;
- struct nvme_command c;
struct nvme_dev *dev = ns->dev;
struct nvme_smart_log *smart_log;
- dma_addr_t dma_addr;
- void *mem;
u32 feature_resp;
u8 temp_c_cur, temp_c_thresh;
u16 temp_k;
log_response = kzalloc(LOG_TEMP_PAGE_LENGTH, GFP_KERNEL);
- if (log_response == NULL) {
- res = -ENOMEM;
- goto out_mem;
- }
+ if (log_response == NULL)
+ return -ENOMEM;
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_smart_log),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out_dma;
- }
+ res = nvme_get_log_page(dev, &smart_log);
+ if (res < 0)
+ goto out_free_response;
- /* Get SMART Log Page */
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_admin_get_log_page;
- c.common.nsid = cpu_to_le32(0xFFFFFFFF);
- c.common.prp1 = cpu_to_le64(dma_addr);
- c.common.cdw10[0] = cpu_to_le32((((sizeof(struct nvme_smart_log) /
- BYTES_TO_DWORDS) - 1) << 16) | NVME_LOG_SMART);
- res = nvme_submit_admin_cmd(dev, &c, NULL);
if (res != NVME_SC_SUCCESS) {
temp_c_cur = LOG_TEMP_UNKNOWN;
} else {
- smart_log = mem;
temp_k = (smart_log->temperature[1] << 8) +
(smart_log->temperature[0]);
temp_c_cur = temp_k - KELVIN_TEMP_FACTOR;
}
+ kfree(smart_log);
/* Get Features for Temp Threshold */
res = nvme_get_features(dev, NVME_FEAT_TEMP_THRESH, 0, 0,
@@ -1159,11 +912,8 @@ static int nvme_trans_log_temperature(struct nvme_ns *ns, struct sg_io_hdr *hdr,
xfer_len = min(alloc_len, LOG_TEMP_PAGE_LENGTH);
res = nvme_trans_copy_to_user(hdr, log_response, xfer_len);
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_smart_log),
- mem, dma_addr);
- out_dma:
+ out_free_response:
kfree(log_response);
- out_mem:
return res;
}
@@ -1174,59 +924,45 @@ static int nvme_trans_fill_mode_parm_hdr(u8 *resp, int len, u8 cdb10, u8 llbaa,
{
/* Quick check to make sure I don't stomp on my own memory... */
if ((cdb10 && len < 8) || (!cdb10 && len < 4))
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
if (cdb10) {
resp[0] = (mode_data_length & 0xFF00) >> 8;
resp[1] = (mode_data_length & 0x00FF);
- /* resp[2] and [3] are zero */
+ resp[3] = 0x10 /* DPOFUA */;
resp[4] = llbaa;
resp[5] = RESERVED_FIELD;
resp[6] = (blk_desc_len & 0xFF00) >> 8;
resp[7] = (blk_desc_len & 0x00FF);
} else {
resp[0] = (mode_data_length & 0x00FF);
- /* resp[1] and [2] are zero */
+ resp[2] = 0x10 /* DPOFUA */;
resp[3] = (blk_desc_len & 0x00FF);
}
- return SNTI_TRANSLATION_SUCCESS;
+ return 0;
}
static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *resp, int len, u8 llbaa)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
u8 flbas;
u32 lba_length;
if (llbaa == 0 && len < MODE_PAGE_BLK_DES_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
else if (llbaa > 0 && len < MODE_PAGE_LLBAA_BLK_DES_LEN)
- return SNTI_INTERNAL_ERROR;
-
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
+ return -EINVAL;
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
+
flbas = (id_ns->flbas) & 0x0F;
lba_length = (1 << (id_ns->lbaf[flbas].ds));
@@ -1246,10 +982,7 @@ static int nvme_trans_fill_blk_desc(struct nvme_ns *ns, struct sg_io_hdr *hdr,
memcpy(&resp[12], &tmp_len, sizeof(u32));
}
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out:
+ kfree(id_ns);
return res;
}
@@ -1258,7 +991,7 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
int len)
{
if (len < MODE_PAGE_CONTROL_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
resp[0] = MODE_PAGE_CONTROL;
resp[1] = MODE_PAGE_CONTROL_LEN_FIELD;
@@ -1272,78 +1005,69 @@ static int nvme_trans_fill_control_page(struct nvme_ns *ns,
resp[9] = 0xFF;
/* Bytes 10,11: Extended selftest completion time = 0x0000 */
- return SNTI_TRANSLATION_SUCCESS;
+ return 0;
}
static int nvme_trans_fill_caching_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr,
u8 *resp, int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
u32 feature_resp;
u8 vwc;
if (len < MODE_PAGE_CACHING_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
nvme_sc = nvme_get_features(dev, NVME_FEAT_VOLATILE_WC, 0, 0,
&feature_resp);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out;
- if (nvme_sc) {
- res = nvme_sc;
- goto out;
- }
+ return res;
+
vwc = feature_resp & 0x00000001;
resp[0] = MODE_PAGE_CACHING;
resp[1] = MODE_PAGE_CACHING_LEN_FIELD;
resp[2] = vwc << 2;
-
- out:
- return res;
+ return 0;
}
static int nvme_trans_fill_pow_cnd_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *resp,
int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
-
if (len < MODE_PAGE_POW_CND_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
resp[0] = MODE_PAGE_POWER_CONDITION;
resp[1] = MODE_PAGE_POW_CND_LEN_FIELD;
/* All other bytes are zero */
- return res;
+ return 0;
}
static int nvme_trans_fill_inf_exc_page(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *resp,
int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
-
if (len < MODE_PAGE_INF_EXC_LEN)
- return SNTI_INTERNAL_ERROR;
+ return -EINVAL;
resp[0] = MODE_PAGE_INFO_EXCEP;
resp[1] = MODE_PAGE_INF_EXC_LEN_FIELD;
resp[2] = 0x88;
/* All other bytes are zero */
- return res;
+ return 0;
}
static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *resp, int len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u16 mode_pages_offset_1 = 0;
u16 mode_pages_offset_2, mode_pages_offset_3, mode_pages_offset_4;
@@ -1353,23 +1077,18 @@ static int nvme_trans_fill_all_pages(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_fill_caching_page(ns, hdr, &resp[mode_pages_offset_1],
MODE_PAGE_CACHING_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
+ if (res)
+ return res;
res = nvme_trans_fill_control_page(ns, hdr, &resp[mode_pages_offset_2],
MODE_PAGE_CONTROL_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
+ if (res)
+ return res;
res = nvme_trans_fill_pow_cnd_page(ns, hdr, &resp[mode_pages_offset_3],
MODE_PAGE_POW_CND_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
- res = nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
+ if (res)
+ return res;
+ return nvme_trans_fill_inf_exc_page(ns, hdr, &resp[mode_pages_offset_4],
MODE_PAGE_INF_EXC_LEN);
- if (res != SNTI_TRANSLATION_SUCCESS)
- goto out;
-
- out:
- return res;
}
static inline int nvme_trans_get_blk_desc_len(u8 dbd, u8 llbaa)
@@ -1390,7 +1109,7 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
struct sg_io_hdr *hdr, u8 *, int),
u16 mode_pages_tot_len)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int xfer_len;
u8 *response;
u8 dbd, llbaa;
@@ -1399,9 +1118,10 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
u16 mode_pages_offset_1;
u16 blk_desc_len, blk_desc_offset, mode_data_length;
- dbd = GET_MODE_SENSE_DBD(cmd);
- llbaa = GET_MODE_SENSE_LLBAA(cmd);
- mph_size = GET_MODE_SENSE_MPH_SIZE(cdb10);
+ dbd = (cmd[1] & MODE_SENSE_DBD_MASK) >> MODE_SENSE_DBD_SHIFT;
+ llbaa = (cmd[1] & MODE_SENSE_LLBAA_MASK) >> MODE_SENSE_LLBAA_SHIFT;
+ mph_size = cdb10 ? MODE_SENSE10_MPH_SIZE : MODE_SENSE6_MPH_SIZE;
+
blk_desc_len = nvme_trans_get_blk_desc_len(dbd, llbaa);
resp_size = mph_size + blk_desc_len + mode_pages_tot_len;
@@ -1419,18 +1139,18 @@ static int nvme_trans_mode_page_create(struct nvme_ns *ns,
res = nvme_trans_fill_mode_parm_hdr(&response[0], mph_size, cdb10,
llbaa, mode_data_length, blk_desc_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_free;
if (blk_desc_len > 0) {
res = nvme_trans_fill_blk_desc(ns, hdr,
&response[blk_desc_offset],
blk_desc_len, llbaa);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_free;
}
res = mode_page_fill_func(ns, hdr, &response[mode_pages_offset_1],
mode_pages_tot_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_free;
xfer_len = min(alloc_len, resp_size);
@@ -1485,33 +1205,20 @@ static void nvme_trans_fill_read_cap(u8 *response, struct nvme_id_ns *id_ns,
static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 pc, u8 pcmod, u8 start)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ctrl *id_ctrl;
int lowest_pow_st; /* max npss = lowest power consumption */
unsigned ps_desired = 0;
- /* NVMe Controller Identify */
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_id_ctrl),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+ nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ctrl = mem;
+ return res;
+
lowest_pow_st = max(POWER_STATE_0, (int)(id_ctrl->npss - 1));
+ kfree(id_ctrl);
switch (pc) {
case NVME_POWER_STATE_START_VALID:
@@ -1551,79 +1258,48 @@ static int nvme_trans_power_state(struct nvme_ns *ns, struct sg_io_hdr *hdr,
}
nvme_sc = nvme_set_features(dev, NVME_FEAT_POWER_MGMT, ps_desired, 0,
NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_dma;
- if (nvme_sc)
- res = nvme_sc;
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
- dma_addr);
- out:
- return res;
+ return nvme_trans_status_code(hdr, nvme_sc);
}
-/* Write Buffer Helper Functions */
-/* Also using this for Format Unit with hdr passed as NULL, and buffer_id, 0 */
+static int nvme_trans_send_activate_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 buffer_id)
+{
+ struct nvme_command c;
+ int nvme_sc;
-static int nvme_trans_send_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_admin_activate_fw;
+ c.common.cdw10[0] = cpu_to_le32(buffer_id | NVME_FWACT_REPL_ACTV);
+
+ nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+ return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_send_download_fw_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 opcode, u32 tot_len, u32 offset,
u8 buffer_id)
{
- int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
struct nvme_command c;
- struct nvme_iod *iod = NULL;
- unsigned length;
- memset(&c, 0, sizeof(c));
- c.common.opcode = opcode;
- if (opcode == nvme_admin_download_fw) {
- if (hdr->iovec_count > 0) {
- /* Assuming SGL is not allowed for this command */
- res = nvme_trans_completion(hdr,
- SAM_STAT_CHECK_CONDITION,
- ILLEGAL_REQUEST,
- SCSI_ASC_INVALID_CDB,
- SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- }
- iod = nvme_map_user_pages(dev, DMA_TO_DEVICE,
- (unsigned long)hdr->dxferp, tot_len);
- if (IS_ERR(iod)) {
- res = PTR_ERR(iod);
- goto out;
- }
- length = nvme_setup_prps(dev, iod, tot_len, GFP_KERNEL);
- if (length != tot_len) {
- res = -ENOMEM;
- goto out_unmap;
- }
-
- c.dlfw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.dlfw.prp2 = cpu_to_le64(iod->first_dma);
- c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
- c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
- } else if (opcode == nvme_admin_activate_fw) {
- u32 cdw10 = buffer_id | NVME_FWACT_REPL_ACTV;
- c.common.cdw10[0] = cpu_to_le32(cdw10);
+ if (hdr->iovec_count > 0) {
+ /* Assuming SGL is not allowed for this command */
+ return nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST,
+ SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
}
- nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_unmap;
- if (nvme_sc)
- res = nvme_sc;
-
- out_unmap:
- if (opcode == nvme_admin_download_fw) {
- nvme_unmap_user_pages(dev, DMA_TO_DEVICE, iod);
- nvme_free_iod(dev, iod);
- }
- out:
- return res;
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_admin_download_fw;
+ c.dlfw.numd = cpu_to_le32((tot_len/BYTES_TO_DWORDS) - 1);
+ c.dlfw.offset = cpu_to_le32(offset/BYTES_TO_DWORDS);
+
+ nvme_sc = __nvme_submit_sync_cmd(dev->admin_q, &c, NULL,
+ hdr->dxferp, tot_len, NULL, 0);
+ return nvme_trans_status_code(hdr, nvme_sc);
}
/* Mode Select Helper Functions */
@@ -1686,7 +1362,7 @@ static void nvme_trans_modesel_save_bd(struct nvme_ns *ns, u8 *parm_list,
static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *mode_page, u8 page_code)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
unsigned dword11;
@@ -1697,12 +1373,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
nvme_sc = nvme_set_features(dev, NVME_FEAT_VOLATILE_WC, dword11,
0, NULL);
res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- break;
- if (nvme_sc) {
- res = nvme_sc;
- break;
- }
break;
case MODE_PAGE_CONTROL:
break;
@@ -1714,8 +1384,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
ILLEGAL_REQUEST,
SCSI_ASC_INVALID_PARAMETER,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- if (!res)
- res = SNTI_INTERNAL_ERROR;
break;
}
break;
@@ -1723,8 +1391,6 @@ static int nvme_trans_modesel_get_mp(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- if (!res)
- res = SNTI_INTERNAL_ERROR;
break;
}
@@ -1735,7 +1401,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd, u16 parm_list_len, u8 pf,
u8 sp, u8 cdb10)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 *parm_list;
u16 bd_len;
u8 llbaa = 0;
@@ -1751,7 +1417,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
}
res = nvme_trans_copy_from_user(hdr, parm_list, parm_list_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_mem;
nvme_trans_modesel_get_bd_len(parm_list, cdb10, &bd_len, &llbaa);
@@ -1789,7 +1455,7 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
mp_size = parm_list[index + 1] + 2;
res = nvme_trans_modesel_get_mp(ns, hdr, &parm_list[index],
page_code);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
break;
index += mp_size;
} while (index < parm_list_len);
@@ -1805,12 +1471,9 @@ static int nvme_trans_modesel_data(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
struct sg_io_hdr *hdr)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
- struct nvme_id_ns *id_ns;
u8 flbas;
/*
@@ -1821,22 +1484,12 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
*/
if (ns->mode_select_num_blocks == 0 || ns->mode_select_block_len == 0) {
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_id_ns), &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ struct nvme_id_ns *id_ns;
+
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
if (ns->mode_select_num_blocks == 0)
ns->mode_select_num_blocks = le64_to_cpu(id_ns->ncap);
@@ -1845,18 +1498,17 @@ static int nvme_trans_fmt_set_blk_size_count(struct nvme_ns *ns,
ns->mode_select_block_len =
(1 << (id_ns->lbaf[flbas].ds));
}
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- mem, dma_addr);
+
+ kfree(id_ns);
}
- out:
- return res;
+
+ return 0;
}
static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
u8 format_prot_info, u8 *nvme_pf_code)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 *parm_list;
u8 pf_usage, pf_code;
@@ -1866,7 +1518,7 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
goto out;
}
res = nvme_trans_copy_from_user(hdr, parm_list, len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out_mem;
if ((parm_list[FORMAT_UNIT_IMMED_OFFSET] &
@@ -1916,11 +1568,9 @@ static int nvme_trans_fmt_get_parm_header(struct sg_io_hdr *hdr, u8 len,
static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 prot_info)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
u8 i;
u8 flbas, nlbaf;
@@ -1929,22 +1579,11 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_command c;
/* Loop thru LBAF's in id_ns to match reqd lbaf, put in cdw10 */
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
+
flbas = (id_ns->flbas) & 0x0F;
nlbaf = id_ns->nlbaf;
@@ -1972,69 +1611,13 @@ static int nvme_trans_fmt_send_cmd(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.format.nsid = cpu_to_le32(ns->ns_id);
c.format.cdw10 = cpu_to_le32(cdw10);
- nvme_sc = nvme_submit_admin_cmd(dev, &c, NULL);
+ nvme_sc = nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0);
res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out_dma;
- if (nvme_sc)
- res = nvme_sc;
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out:
+ kfree(id_ns);
return res;
}
-/* Read/Write Helper Functions */
-
-static inline void nvme_trans_get_io_cdb6(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = 0;
- cdb_info->prot_info = 0;
- cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_6_CDB_LBA_OFFSET) &
- IO_6_CDB_LBA_MASK;
- cdb_info->xfer_len = GET_U8_FROM_CDB(cmd, IO_6_CDB_TX_LEN_OFFSET);
-
- /* sbc3r27 sec 5.32 - TRANSFER LEN of 0 implies a 256 Block transfer */
- if (cdb_info->xfer_len == 0)
- cdb_info->xfer_len = IO_6_DEFAULT_TX_LEN;
-}
-
-static inline void nvme_trans_get_io_cdb10(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_10_CDB_FUA_OFFSET) &
- IO_CDB_FUA_MASK;
- cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_10_CDB_WP_OFFSET) &
- IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
- cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_10_CDB_LBA_OFFSET);
- cdb_info->xfer_len = GET_U16_FROM_CDB(cmd, IO_10_CDB_TX_LEN_OFFSET);
-}
-
-static inline void nvme_trans_get_io_cdb12(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_12_CDB_FUA_OFFSET) &
- IO_CDB_FUA_MASK;
- cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_12_CDB_WP_OFFSET) &
- IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
- cdb_info->lba = GET_U32_FROM_CDB(cmd, IO_12_CDB_LBA_OFFSET);
- cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_12_CDB_TX_LEN_OFFSET);
-}
-
-static inline void nvme_trans_get_io_cdb16(u8 *cmd,
- struct nvme_trans_io_cdb *cdb_info)
-{
- cdb_info->fua = GET_U8_FROM_CDB(cmd, IO_16_CDB_FUA_OFFSET) &
- IO_CDB_FUA_MASK;
- cdb_info->prot_info = GET_U8_FROM_CDB(cmd, IO_16_CDB_WP_OFFSET) &
- IO_CDB_WP_MASK >> IO_CDB_WP_SHIFT;
- cdb_info->lba = GET_U64_FROM_CDB(cmd, IO_16_CDB_LBA_OFFSET);
- cdb_info->xfer_len = GET_U32_FROM_CDB(cmd, IO_16_CDB_TX_LEN_OFFSET);
-}
-
static inline u32 nvme_trans_io_get_num_cmds(struct sg_io_hdr *hdr,
struct nvme_trans_io_cdb *cdb_info,
u32 max_blocks)
@@ -2064,11 +1647,8 @@ static u16 nvme_trans_io_get_control(struct nvme_ns *ns,
static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
struct nvme_trans_io_cdb *cdb_info, u8 is_write)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- int nvme_sc;
- struct nvme_dev *dev = ns->dev;
+ int nvme_sc = NVME_SC_SUCCESS;
u32 num_cmds;
- struct nvme_iod *iod;
u64 unit_len;
u64 unit_num_blocks; /* Number of blocks to xfer in each nvme cmd */
u32 retcode;
@@ -2119,45 +1699,20 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
control = nvme_trans_io_get_control(ns, cdb_info);
c.rw.control = cpu_to_le16(control);
- iod = nvme_map_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- (unsigned long)next_mapping_addr, unit_len);
- if (IS_ERR(iod)) {
- res = PTR_ERR(iod);
- goto out;
- }
- retcode = nvme_setup_prps(dev, iod, unit_len, GFP_KERNEL);
- if (retcode != unit_len) {
- nvme_unmap_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- iod);
- nvme_free_iod(dev, iod);
- res = -ENOMEM;
- goto out;
+ if (get_capacity(ns->disk) - unit_num_blocks <
+ cdb_info->lba + nvme_offset) {
+ nvme_sc = NVME_SC_LBA_RANGE;
+ break;
}
- c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
- c.rw.prp2 = cpu_to_le64(iod->first_dma);
+ nvme_sc = __nvme_submit_sync_cmd(ns->queue, &c, NULL,
+ next_mapping_addr, unit_len, NULL, 0);
+ if (nvme_sc)
+ break;
nvme_offset += unit_num_blocks;
-
- nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
- if (nvme_sc != NVME_SC_SUCCESS) {
- nvme_unmap_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- iod);
- nvme_free_iod(dev, iod);
- res = nvme_trans_status_code(hdr, nvme_sc);
- goto out;
- }
- nvme_unmap_user_pages(dev,
- (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- iod);
- nvme_free_iod(dev, iod);
}
- res = nvme_trans_status_code(hdr, NVME_SC_SUCCESS);
- out:
- return res;
+ return nvme_trans_status_code(hdr, nvme_sc);
}
@@ -2166,8 +1721,8 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
- struct nvme_trans_io_cdb cdb_info;
+ int res = 0;
+ struct nvme_trans_io_cdb cdb_info = { 0, };
u8 opcode = cmd[0];
u64 xfer_bytes;
u64 sum_iov_len = 0;
@@ -2175,27 +1730,52 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
int i;
size_t not_copied;
- /* Extract Fields from CDB */
+ /*
+ * The FUA and WPROTECT fields are not supported in 6-byte CDBs,
+ * but always in the same place for all others.
+ */
+ switch (opcode) {
+ case WRITE_6:
+ case READ_6:
+ break;
+ default:
+ cdb_info.fua = cmd[1] & 0x8;
+ cdb_info.prot_info = (cmd[1] & 0xe0) >> 5;
+ if (cdb_info.prot_info && !ns->pi_type) {
+ return nvme_trans_completion(hdr,
+ SAM_STAT_CHECK_CONDITION,
+ ILLEGAL_REQUEST,
+ SCSI_ASC_INVALID_CDB,
+ SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
+ }
+ }
+
switch (opcode) {
case WRITE_6:
case READ_6:
- nvme_trans_get_io_cdb6(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be24(&cmd[1]);
+ cdb_info.xfer_len = cmd[4];
+ if (cdb_info.xfer_len == 0)
+ cdb_info.xfer_len = 256;
break;
case WRITE_10:
case READ_10:
- nvme_trans_get_io_cdb10(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be32(&cmd[2]);
+ cdb_info.xfer_len = get_unaligned_be16(&cmd[7]);
break;
case WRITE_12:
case READ_12:
- nvme_trans_get_io_cdb12(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be32(&cmd[2]);
+ cdb_info.xfer_len = get_unaligned_be32(&cmd[6]);
break;
case WRITE_16:
case READ_16:
- nvme_trans_get_io_cdb16(cmd, &cdb_info);
+ cdb_info.lba = get_unaligned_be64(&cmd[2]);
+ cdb_info.xfer_len = get_unaligned_be32(&cmd[10]);
break;
default:
/* Will never really reach here */
- res = SNTI_INTERNAL_ERROR;
+ res = -EIO;
goto out;
}
@@ -2237,7 +1817,7 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
/* Send NVMe IO Command(s) */
res = nvme_trans_do_nvme_io(ns, hdr, &cdb_info, is_write);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
out:
@@ -2247,15 +1827,15 @@ static int nvme_trans_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, u8 is_write,
static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
u8 evpd;
u8 page_code;
int alloc_len;
u8 *inq_response;
- evpd = GET_INQ_EVPD_BIT(cmd);
- page_code = GET_INQ_PAGE_CODE(cmd);
- alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
+ evpd = cmd[1] & 0x01;
+ page_code = cmd[2];
+ alloc_len = get_unaligned_be16(&cmd[3]);
inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
GFP_KERNEL);
@@ -2316,29 +1896,27 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u16 alloc_len;
- u8 sp;
u8 pc;
u8 page_code;
- sp = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_SP_OFFSET);
- if (sp != LOG_SENSE_CDB_SP_NOT_ENABLED) {
+ if (cmd[1] != LOG_SENSE_CDB_SP_NOT_ENABLED) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- pc = GET_U8_FROM_CDB(cmd, LOG_SENSE_CDB_PC_OFFSET);
- page_code = pc & LOG_SENSE_CDB_PAGE_CODE_MASK;
- pc = (pc & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
+
+ page_code = cmd[2] & LOG_SENSE_CDB_PAGE_CODE_MASK;
+ pc = (cmd[2] & LOG_SENSE_CDB_PC_MASK) >> LOG_SENSE_CDB_PC_SHIFT;
if (pc != LOG_SENSE_CDB_PC_CUMULATIVE_VALUES) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- alloc_len = GET_U16_FROM_CDB(cmd, LOG_SENSE_CDB_ALLOC_LENGTH_OFFSET);
+ alloc_len = get_unaligned_be16(&cmd[7]);
switch (page_code) {
case LOG_PAGE_SUPPORTED_LOG_PAGES_PAGE:
res = nvme_trans_log_supp_pages(ns, hdr, alloc_len);
@@ -2363,24 +1941,18 @@ static int nvme_trans_log_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
u8 cdb10 = 0;
u16 parm_list_len;
u8 page_format;
u8 save_pages;
- page_format = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_PAGE_FORMAT_OFFSET);
- page_format &= MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+ page_format = cmd[1] & MODE_SELECT_CDB_PAGE_FORMAT_MASK;
+ save_pages = cmd[1] & MODE_SELECT_CDB_SAVE_PAGES_MASK;
- save_pages = GET_U8_FROM_CDB(cmd, MODE_SELECT_CDB_SAVE_PAGES_OFFSET);
- save_pages &= MODE_SELECT_CDB_SAVE_PAGES_MASK;
-
- if (GET_OPCODE(cmd) == MODE_SELECT) {
- parm_list_len = GET_U8_FROM_CDB(cmd,
- MODE_SELECT_6_CDB_PARAM_LIST_LENGTH_OFFSET);
+ if (cmd[0] == MODE_SELECT) {
+ parm_list_len = cmd[4];
} else {
- parm_list_len = GET_U16_FROM_CDB(cmd,
- MODE_SELECT_10_CDB_PARAM_LIST_LENGTH_OFFSET);
+ parm_list_len = cmd[7];
cdb10 = 1;
}
@@ -2389,42 +1961,36 @@ static int nvme_trans_mode_select(struct nvme_ns *ns, struct sg_io_hdr *hdr,
* According to SPC-4 r24, a paramter list length field of 0
* shall not be considered an error
*/
- res = nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
+ return nvme_trans_modesel_data(ns, hdr, cmd, parm_list_len,
page_format, save_pages, cdb10);
}
- return res;
+ return 0;
}
static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
u16 alloc_len;
u8 cdb10 = 0;
- u8 page_code;
- u8 pc;
- if (GET_OPCODE(cmd) == MODE_SENSE) {
- alloc_len = GET_U8_FROM_CDB(cmd, MODE_SENSE6_ALLOC_LEN_OFFSET);
+ if (cmd[0] == MODE_SENSE) {
+ alloc_len = cmd[4];
} else {
- alloc_len = GET_U16_FROM_CDB(cmd,
- MODE_SENSE10_ALLOC_LEN_OFFSET);
+ alloc_len = get_unaligned_be16(&cmd[7]);
cdb10 = 1;
}
- pc = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CONTROL_OFFSET) &
- MODE_SENSE_PAGE_CONTROL_MASK;
- if (pc != MODE_SENSE_PC_CURRENT_VALUES) {
+ if ((cmd[2] & MODE_SENSE_PAGE_CONTROL_MASK) !=
+ MODE_SENSE_PC_CURRENT_VALUES) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- page_code = GET_U8_FROM_CDB(cmd, MODE_SENSE_PAGE_CODE_OFFSET) &
- MODE_SENSE_PAGE_CODE_MASK;
- switch (page_code) {
+ switch (cmd[2] & MODE_SENSE_PAGE_CODE_MASK) {
case MODE_PAGE_CACHING:
res = nvme_trans_mode_page_create(ns, hdr, cmd, alloc_len,
cdb10,
@@ -2467,47 +2033,34 @@ static int nvme_trans_mode_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
}
static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
+ u8 *cmd, u8 cdb16)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
- u32 alloc_len = READ_CAP_10_RESP_SIZE;
- u32 resp_size = READ_CAP_10_RESP_SIZE;
+ u32 alloc_len;
+ u32 resp_size;
u32 xfer_len;
- u8 cdb16;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ns *id_ns;
u8 *response;
- cdb16 = IS_READ_CAP_16(cmd);
if (cdb16) {
- alloc_len = GET_READ_CAP_16_ALLOC_LENGTH(cmd);
+ alloc_len = get_unaligned_be32(&cmd[10]);
resp_size = READ_CAP_16_RESP_SIZE;
+ } else {
+ alloc_len = READ_CAP_10_RESP_SIZE;
+ resp_size = READ_CAP_10_RESP_SIZE;
}
- mem = dma_alloc_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- /* nvme ns identify */
- nvme_sc = nvme_identify(dev, ns->ns_id, 0, dma_addr);
+ nvme_sc = nvme_identify_ns(dev, ns->ns_id, &id_ns);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ns = mem;
+ return res;
response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) {
res = -ENOMEM;
- goto out_dma;
+ goto out_free_id;
}
nvme_trans_fill_read_cap(response, id_ns, cdb16);
@@ -2515,72 +2068,53 @@ static int nvme_trans_read_capacity(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_copy_to_user(hdr, response, xfer_len);
kfree(response);
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ns), mem,
- dma_addr);
- out:
+ out_free_id:
+ kfree(id_ns);
return res;
}
static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
int nvme_sc;
u32 alloc_len, xfer_len, resp_size;
- u8 select_report;
u8 *response;
struct nvme_dev *dev = ns->dev;
- dma_addr_t dma_addr;
- void *mem;
struct nvme_id_ctrl *id_ctrl;
u32 ll_length, lun_id;
u8 lun_id_offset = REPORT_LUNS_FIRST_LUN_OFFSET;
__be32 tmp_len;
- alloc_len = GET_REPORT_LUNS_ALLOC_LENGTH(cmd);
- select_report = GET_U8_FROM_CDB(cmd, REPORT_LUNS_SR_OFFSET);
-
- if ((select_report != ALL_LUNS_RETURNED) &&
- (select_report != ALL_WELL_KNOWN_LUNS_RETURNED) &&
- (select_report != RESTRICTED_LUNS_RETURNED)) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+ switch (cmd[2]) {
+ default:
+ return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out;
- } else {
- /* NVMe Controller Identify */
- mem = dma_alloc_coherent(&dev->pci_dev->dev,
- sizeof(struct nvme_id_ctrl),
- &dma_addr, GFP_KERNEL);
- if (mem == NULL) {
- res = -ENOMEM;
- goto out;
- }
- nvme_sc = nvme_identify(dev, 0, 1, dma_addr);
+ case ALL_LUNS_RETURNED:
+ case ALL_WELL_KNOWN_LUNS_RETURNED:
+ case RESTRICTED_LUNS_RETURNED:
+ nvme_sc = nvme_identify_ctrl(dev, &id_ctrl);
res = nvme_trans_status_code(hdr, nvme_sc);
if (res)
- goto out_dma;
- if (nvme_sc) {
- res = nvme_sc;
- goto out_dma;
- }
- id_ctrl = mem;
+ return res;
+
ll_length = le32_to_cpu(id_ctrl->nn) * LUN_ENTRY_SIZE;
resp_size = ll_length + LUN_DATA_HEADER_SIZE;
+ alloc_len = get_unaligned_be32(&cmd[6]);
if (alloc_len < resp_size) {
res = nvme_trans_completion(hdr,
SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
- goto out_dma;
+ goto out_free_id;
}
response = kzalloc(resp_size, GFP_KERNEL);
if (response == NULL) {
res = -ENOMEM;
- goto out_dma;
+ goto out_free_id;
}
/* The first LUN ID will always be 0 per the SAM spec */
@@ -2601,24 +2135,21 @@ static int nvme_trans_report_luns(struct nvme_ns *ns, struct sg_io_hdr *hdr,
res = nvme_trans_copy_to_user(hdr, response, xfer_len);
kfree(response);
- out_dma:
- dma_free_coherent(&dev->pci_dev->dev, sizeof(struct nvme_id_ctrl), mem,
- dma_addr);
- out:
+ out_free_id:
+ kfree(id_ctrl);
return res;
}
static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 alloc_len, xfer_len, resp_size;
u8 desc_format;
u8 *response;
- alloc_len = GET_REQUEST_SENSE_ALLOC_LENGTH(cmd);
- desc_format = GET_U8_FROM_CDB(cmd, REQUEST_SENSE_DESC_OFFSET);
- desc_format &= REQUEST_SENSE_DESC_MASK;
+ desc_format = cmd[1] & 0x01;
+ alloc_len = cmd[4];
resp_size = ((desc_format) ? (DESC_FMT_SENSE_DATA_SIZE) :
(FIXED_FMT_SENSE_DATA_SIZE));
@@ -2628,7 +2159,7 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr,
goto out;
}
- if (desc_format == DESCRIPTOR_FORMAT_SENSE_DATA_TYPE) {
+ if (desc_format) {
/* Descriptor Format Sense Data */
response[0] = DESC_FORMAT_SENSE_DATA;
response[1] = NO_SENSE;
@@ -2667,95 +2198,58 @@ static int nvme_trans_security_protocol(struct nvme_ns *ns,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
}
-static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
- u8 *cmd)
+static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
+ struct sg_io_hdr *hdr)
{
- int res = SNTI_TRANSLATION_SUCCESS;
int nvme_sc;
struct nvme_command c;
- u8 immed, pcmod, pc, no_flush, start;
- immed = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_IMMED_OFFSET);
- pcmod = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_MOD_OFFSET);
- pc = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_POWER_COND_OFFSET);
- no_flush = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_NO_FLUSH_OFFSET);
- start = GET_U8_FROM_CDB(cmd, START_STOP_UNIT_CDB_START_OFFSET);
+ memset(&c, 0, sizeof(c));
+ c.common.opcode = nvme_cmd_flush;
+ c.common.nsid = cpu_to_le32(ns->ns_id);
- immed &= START_STOP_UNIT_CDB_IMMED_MASK;
- pcmod &= START_STOP_UNIT_CDB_POWER_COND_MOD_MASK;
- pc = (pc & START_STOP_UNIT_CDB_POWER_COND_MASK) >> NIBBLE_SHIFT;
- no_flush &= START_STOP_UNIT_CDB_NO_FLUSH_MASK;
- start &= START_STOP_UNIT_CDB_START_MASK;
+ nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, NULL, 0);
+ return nvme_trans_status_code(hdr, nvme_sc);
+}
+
+static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
+ u8 *cmd)
+{
+ u8 immed, pcmod, pc, no_flush, start;
+
+ immed = cmd[1] & 0x01;
+ pcmod = cmd[3] & 0x0f;
+ pc = (cmd[4] & 0xf0) >> 4;
+ no_flush = cmd[4] & 0x04;
+ start = cmd[4] & 0x01;
if (immed != 0) {
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+ return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
} else {
if (no_flush == 0) {
/* Issue NVME FLUSH command prior to START STOP UNIT */
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
- res = nvme_trans_status_code(hdr, nvme_sc);
+ int res = nvme_trans_synchronize_cache(ns, hdr);
if (res)
- goto out;
- if (nvme_sc) {
- res = nvme_sc;
- goto out;
- }
+ return res;
}
/* Setup the expected power state transition */
- res = nvme_trans_power_state(ns, hdr, pc, pcmod, start);
+ return nvme_trans_power_state(ns, hdr, pc, pcmod, start);
}
-
- out:
- return res;
-}
-
-static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
- struct sg_io_hdr *hdr, u8 *cmd)
-{
- int res = SNTI_TRANSLATION_SUCCESS;
- int nvme_sc;
- struct nvme_command c;
-
- memset(&c, 0, sizeof(c));
- c.common.opcode = nvme_cmd_flush;
- c.common.nsid = cpu_to_le32(ns->ns_id);
-
- nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
-
- res = nvme_trans_status_code(hdr, nvme_sc);
- if (res)
- goto out;
- if (nvme_sc)
- res = nvme_sc;
-
- out:
- return res;
}
static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res;
u8 parm_hdr_len = 0;
u8 nvme_pf_code = 0;
u8 format_prot_info, long_list, format_data;
- format_prot_info = GET_U8_FROM_CDB(cmd,
- FORMAT_UNIT_CDB_FORMAT_PROT_INFO_OFFSET);
- long_list = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_LONG_LIST_OFFSET);
- format_data = GET_U8_FROM_CDB(cmd, FORMAT_UNIT_CDB_FORMAT_DATA_OFFSET);
-
- format_prot_info = (format_prot_info &
- FORMAT_UNIT_CDB_FORMAT_PROT_INFO_MASK) >>
- FORMAT_UNIT_CDB_FORMAT_PROT_INFO_SHIFT;
- long_list &= FORMAT_UNIT_CDB_LONG_LIST_MASK;
- format_data &= FORMAT_UNIT_CDB_FORMAT_DATA_MASK;
+ format_prot_info = (cmd[1] & 0xc0) >> 6;
+ long_list = cmd[1] & 0x20;
+ format_data = cmd[1] & 0x10;
if (format_data != 0) {
if (format_prot_info != 0) {
@@ -2779,16 +2273,16 @@ static int nvme_trans_format_unit(struct nvme_ns *ns, struct sg_io_hdr *hdr,
if (parm_hdr_len > 0) {
res = nvme_trans_fmt_get_parm_header(hdr, parm_hdr_len,
format_prot_info, &nvme_pf_code);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
}
/* Attempt to activate any previously downloaded firmware image */
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw, 0, 0, 0);
+ res = nvme_trans_send_activate_fw_cmd(ns, hdr, 0);
/* Determine Block size and count and send format command */
res = nvme_trans_fmt_set_blk_size_count(ns, hdr);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
res = nvme_trans_fmt_send_cmd(ns, hdr, nvme_pf_code);
@@ -2801,28 +2295,24 @@ static int nvme_trans_test_unit_ready(struct nvme_ns *ns,
struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
struct nvme_dev *dev = ns->dev;
if (!(readl(&dev->bar->csts) & NVME_CSTS_RDY))
- res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
+ return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
NOT_READY, SCSI_ASC_LUN_NOT_READY,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
else
- res = nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
-
- return res;
+ return nvme_trans_completion(hdr, SAM_STAT_GOOD, NO_SENSE, 0, 0);
}
static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- int res = SNTI_TRANSLATION_SUCCESS;
+ int res = 0;
u32 buffer_offset, parm_list_length;
u8 buffer_id, mode;
- parm_list_length =
- GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_PARM_LIST_LENGTH_OFFSET);
+ parm_list_length = get_unaligned_be24(&cmd[6]);
if (parm_list_length % BYTES_TO_DWORDS != 0) {
/* NVMe expects Firmware file to be a whole number of DWORDS */
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2830,38 +2320,32 @@ static int nvme_trans_write_buffer(struct nvme_ns *ns, struct sg_io_hdr *hdr,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- buffer_id = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_ID_OFFSET);
+ buffer_id = cmd[2];
if (buffer_id > NVME_MAX_FIRMWARE_SLOT) {
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
ILLEGAL_REQUEST, SCSI_ASC_INVALID_CDB,
SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
goto out;
}
- mode = GET_U8_FROM_CDB(cmd, WRITE_BUFFER_CDB_MODE_OFFSET) &
- WRITE_BUFFER_CDB_MODE_MASK;
- buffer_offset =
- GET_U24_FROM_CDB(cmd, WRITE_BUFFER_CDB_BUFFER_OFFSET_OFFSET);
+ mode = cmd[1] & 0x1f;
+ buffer_offset = get_unaligned_be24(&cmd[3]);
switch (mode) {
case DOWNLOAD_SAVE_ACTIVATE:
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+ res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
parm_list_length, buffer_offset,
buffer_id);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
- parm_list_length, buffer_offset,
- buffer_id);
+ res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
break;
case DOWNLOAD_SAVE_DEFER_ACTIVATE:
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_download_fw,
+ res = nvme_trans_send_download_fw_cmd(ns, hdr, nvme_admin_download_fw,
parm_list_length, buffer_offset,
buffer_id);
break;
case ACTIVATE_DEFERRED_MICROCODE:
- res = nvme_trans_send_fw_cmd(ns, hdr, nvme_admin_activate_fw,
- parm_list_length, buffer_offset,
- buffer_id);
+ res = nvme_trans_send_activate_fw_cmd(ns, hdr, buffer_id);
break;
default:
res = nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION,
@@ -2890,15 +2374,13 @@ struct scsi_unmap_parm_list {
static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *cmd)
{
- struct nvme_dev *dev = ns->dev;
struct scsi_unmap_parm_list *plist;
struct nvme_dsm_range *range;
struct nvme_command c;
- int i, nvme_sc, res = -ENOMEM;
+ int i, nvme_sc, res;
u16 ndesc, list_len;
- dma_addr_t dma_addr;
- list_len = GET_U16_FROM_CDB(cmd, UNMAP_CDB_PARAM_LIST_LENGTH_OFFSET);
+ list_len = get_unaligned_be16(&cmd[7]);
if (!list_len)
return -EINVAL;
@@ -2907,7 +2389,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
return -ENOMEM;
res = nvme_trans_copy_from_user(hdr, plist, list_len);
- if (res != SNTI_TRANSLATION_SUCCESS)
+ if (res)
goto out;
ndesc = be16_to_cpu(plist->unmap_blk_desc_data_len) >> 4;
@@ -2916,10 +2398,11 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
goto out;
}
- range = dma_alloc_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
- &dma_addr, GFP_KERNEL);
- if (!range)
+ range = kcalloc(ndesc, sizeof(*range), GFP_KERNEL);
+ if (!range) {
+ res = -ENOMEM;
goto out;
+ }
for (i = 0; i < ndesc; i++) {
range[i].nlb = cpu_to_le32(be32_to_cpu(plist->desc[i].nlb));
@@ -2930,15 +2413,14 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
memset(&c, 0, sizeof(c));
c.dsm.opcode = nvme_cmd_dsm;
c.dsm.nsid = cpu_to_le32(ns->ns_id);
- c.dsm.prp1 = cpu_to_le64(dma_addr);
c.dsm.nr = cpu_to_le32(ndesc - 1);
c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
- nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
+ nvme_sc = nvme_submit_sync_cmd(ns->queue, &c, range,
+ ndesc * sizeof(*range));
res = nvme_trans_status_code(hdr, nvme_sc);
- dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
- range, dma_addr);
+ kfree(range);
out:
kfree(plist);
return res;
@@ -2993,13 +2475,16 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
retcode = nvme_trans_mode_sense(ns, hdr, cmd);
break;
case READ_CAPACITY:
- retcode = nvme_trans_read_capacity(ns, hdr, cmd);
+ retcode = nvme_trans_read_capacity(ns, hdr, cmd, 0);
break;
case SERVICE_ACTION_IN_16:
- if (IS_READ_CAP_16(cmd))
- retcode = nvme_trans_read_capacity(ns, hdr, cmd);
- else
+ switch (cmd[1]) {
+ case SAI_READ_CAPACITY_16:
+ retcode = nvme_trans_read_capacity(ns, hdr, cmd, 1);
+ break;
+ default:
goto out;
+ }
break;
case REPORT_LUNS:
retcode = nvme_trans_report_luns(ns, hdr, cmd);
@@ -3015,7 +2500,7 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr)
retcode = nvme_trans_start_stop(ns, hdr, cmd);
break;
case SYNCHRONIZE_CACHE:
- retcode = nvme_trans_synchronize_cache(ns, hdr, cmd);
+ retcode = nvme_trans_synchronize_cache(ns, hdr);
break;
case FORMAT_UNIT:
retcode = nvme_trans_format_unit(ns, hdr, cmd);
@@ -3053,15 +2538,16 @@ int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr)
if (hdr.cmd_len > BLK_MAX_CDB)
return -EINVAL;
+ /*
+ * A positive return code means a NVMe status, which has been
+ * translated to sense data.
+ */
retcode = nvme_scsi_translate(ns, &hdr);
if (retcode < 0)
return retcode;
- if (retcode > 0)
- retcode = SNTI_TRANSLATION_SUCCESS;
if (copy_to_user(u_hdr, &hdr, sizeof(sg_io_hdr_t)) > 0)
return -EFAULT;
-
- return retcode;
+ return 0;
}
int nvme_sg_get_version_num(int __user *ip)
diff --git a/drivers/block/paride/paride.c b/drivers/block/paride/paride.c
index 48c50f11f63b..0e287993b778 100644
--- a/drivers/block/paride/paride.c
+++ b/drivers/block/paride/paride.c
@@ -30,6 +30,7 @@
#include <linux/wait.h>
#include <linux/sched.h> /* TASK_* */
#include <linux/parport.h>
+#include <linux/slab.h>
#include "paride.h"
@@ -244,17 +245,19 @@ void paride_unregister(PIP * pr)
EXPORT_SYMBOL(paride_unregister);
-static int pi_register_parport(PIA * pi, int verbose)
+static int pi_register_parport(PIA *pi, int verbose, int unit)
{
struct parport *port;
+ struct pardev_cb par_cb;
port = parport_find_base(pi->port);
if (!port)
return 0;
-
- pi->pardev = parport_register_device(port,
- pi->device, NULL,
- pi_wake_up, NULL, 0, (void *) pi);
+ memset(&par_cb, 0, sizeof(par_cb));
+ par_cb.wakeup = pi_wake_up;
+ par_cb.private = (void *)pi;
+ pi->pardev = parport_register_dev_model(port, pi->device, &par_cb,
+ unit);
parport_put_port(port);
if (!pi->pardev)
return 0;
@@ -311,7 +314,7 @@ static int pi_probe_unit(PIA * pi, int unit, char *scratch, int verbose)
e = pi->proto->max_units;
}
- if (!pi_register_parport(pi, verbose))
+ if (!pi_register_parport(pi, verbose, s))
return 0;
if (pi->proto->test_port) {
@@ -432,3 +435,45 @@ int pi_init(PIA * pi, int autoprobe, int port, int mode,
}
EXPORT_SYMBOL(pi_init);
+
+static int pi_probe(struct pardevice *par_dev)
+{
+ struct device_driver *drv = par_dev->dev.driver;
+ int len = strlen(drv->name);
+
+ if (strncmp(par_dev->name, drv->name, len))
+ return -ENODEV;
+
+ return 0;
+}
+
+void *pi_register_driver(char *name)
+{
+ struct parport_driver *parp_drv;
+ int ret;
+
+ parp_drv = kzalloc(sizeof(*parp_drv), GFP_KERNEL);
+ if (!parp_drv)
+ return NULL;
+
+ parp_drv->name = name;
+ parp_drv->probe = pi_probe;
+ parp_drv->devmodel = true;
+
+ ret = parport_register_driver(parp_drv);
+ if (ret) {
+ kfree(parp_drv);
+ return NULL;
+ }
+ return (void *)parp_drv;
+}
+EXPORT_SYMBOL(pi_register_driver);
+
+void pi_unregister_driver(void *_drv)
+{
+ struct parport_driver *drv = _drv;
+
+ parport_unregister_driver(drv);
+ kfree(drv);
+}
+EXPORT_SYMBOL(pi_unregister_driver);
diff --git a/drivers/block/paride/paride.h b/drivers/block/paride/paride.h
index 2bddbf45518b..ddb9e589da7f 100644
--- a/drivers/block/paride/paride.h
+++ b/drivers/block/paride/paride.h
@@ -165,6 +165,8 @@ typedef struct pi_protocol PIP;
extern int paride_register( PIP * );
extern void paride_unregister ( PIP * );
+void *pi_register_driver(char *);
+void pi_unregister_driver(void *);
#endif /* __DRIVERS_PARIDE_H__ */
/* end of paride.h */
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 3b7c9f1be484..93362362aa55 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -221,6 +221,7 @@ static int pcd_busy; /* request being processed ? */
static int pcd_sector; /* address of next requested sector */
static int pcd_count; /* number of blocks still to do */
static char *pcd_buf; /* buffer for request in progress */
+static void *par_drv; /* reference of parport driver */
/* kernel glue structures */
@@ -690,6 +691,12 @@ static int pcd_detect(void)
printk("%s: %s version %s, major %d, nice %d\n",
name, name, PCD_VERSION, major, nice);
+ par_drv = pi_register_driver(name);
+ if (!par_drv) {
+ pr_err("failed to register %s driver\n", name);
+ return -1;
+ }
+
k = 0;
if (pcd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
cd = pcd;
@@ -723,6 +730,7 @@ static int pcd_detect(void)
printk("%s: No CD-ROM drive found\n", name);
for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++)
put_disk(cd->disk);
+ pi_unregister_driver(par_drv);
return -1;
}
@@ -984,6 +992,7 @@ static void __exit pcd_exit(void)
}
blk_cleanup_queue(pcd_queue);
unregister_blkdev(major, name);
+ pi_unregister_driver(par_drv);
}
MODULE_LICENSE("GPL");
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index d48715b287e6..b9242d78283d 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -247,6 +247,8 @@ static char *pd_errs[17] = { "ERR", "INDEX", "ECC", "DRQ", "SEEK", "WRERR",
"IDNF", "MC", "UNC", "???", "TMO"
};
+static void *par_drv; /* reference of parport driver */
+
static inline int status_reg(struct pd_unit *disk)
{
return pi_read_regr(disk->pi, 1, 6);
@@ -442,7 +444,7 @@ static char *pd_buf; /* buffer for request in progress */
static enum action do_pd_io_start(void)
{
- if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
+ if (pd_req->cmd_type == REQ_TYPE_DRV_PRIV) {
phase = pd_special;
return pd_special();
}
@@ -725,7 +727,7 @@ static int pd_special_command(struct pd_unit *disk,
if (IS_ERR(rq))
return PTR_ERR(rq);
- rq->cmd_type = REQ_TYPE_SPECIAL;
+ rq->cmd_type = REQ_TYPE_DRV_PRIV;
rq->special = func;
err = blk_execute_rq(disk->gd->queue, disk->gd, rq, 0);
@@ -872,6 +874,12 @@ static int pd_detect(void)
pd_drive_count++;
}
+ par_drv = pi_register_driver(name);
+ if (!par_drv) {
+ pr_err("failed to register %s driver\n", name);
+ return -1;
+ }
+
if (pd_drive_count == 0) { /* nothing spec'd - so autoprobe for 1 */
disk = pd;
if (pi_init(disk->pi, 1, -1, -1, -1, -1, -1, pd_scratch,
@@ -902,8 +910,10 @@ static int pd_detect(void)
found = 1;
}
}
- if (!found)
+ if (!found) {
printk("%s: no valid drive found\n", name);
+ pi_unregister_driver(par_drv);
+ }
return found;
}
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 9a15fd3c9349..7a7d977a76c5 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -264,6 +264,7 @@ static int pf_cmd; /* current command READ/WRITE */
static struct pf_unit *pf_current;/* unit of current request */
static int pf_mask; /* stopper for pseudo-int */
static char *pf_buf; /* buffer for request in progress */
+static void *par_drv; /* reference of parport driver */
/* kernel glue structures */
@@ -703,6 +704,11 @@ static int pf_detect(void)
printk("%s: %s version %s, major %d, cluster %d, nice %d\n",
name, name, PF_VERSION, major, cluster, nice);
+ par_drv = pi_register_driver(name);
+ if (!par_drv) {
+ pr_err("failed to register %s driver\n", name);
+ return -1;
+ }
k = 0;
if (pf_drive_count == 0) {
if (pi_init(pf->pi, 1, -1, -1, -1, -1, -1, pf_scratch, PI_PF,
@@ -735,6 +741,7 @@ static int pf_detect(void)
printk("%s: No ATAPI disk detected\n", name);
for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++)
put_disk(pf->disk);
+ pi_unregister_driver(par_drv);
return -1;
}
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 876d0c3eaf58..bfbd4c852dd9 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -227,6 +227,7 @@ static int pg_identify(struct pg *dev, int log);
static char pg_scratch[512]; /* scratch block buffer */
static struct class *pg_class;
+static void *par_drv; /* reference of parport driver */
/* kernel glue structures */
@@ -481,6 +482,12 @@ static int pg_detect(void)
printk("%s: %s version %s, major %d\n", name, name, PG_VERSION, major);
+ par_drv = pi_register_driver(name);
+ if (!par_drv) {
+ pr_err("failed to register %s driver\n", name);
+ return -1;
+ }
+
k = 0;
if (pg_drive_count == 0) {
if (pi_init(dev->pi, 1, -1, -1, -1, -1, -1, pg_scratch,
@@ -511,6 +518,7 @@ static int pg_detect(void)
if (k)
return 0;
+ pi_unregister_driver(par_drv);
printk("%s: No ATAPI device detected\n", name);
return -1;
}
diff --git a/drivers/block/paride/pt.c b/drivers/block/paride/pt.c
index 2596042eb987..1740d75e8a32 100644
--- a/drivers/block/paride/pt.c
+++ b/drivers/block/paride/pt.c
@@ -232,6 +232,7 @@ static int pt_identify(struct pt_unit *tape);
static struct pt_unit pt[PT_UNITS];
static char pt_scratch[512]; /* scratch block buffer */
+static void *par_drv; /* reference of parport driver */
/* kernel glue structures */
@@ -605,6 +606,12 @@ static int pt_detect(void)
printk("%s: %s version %s, major %d\n", name, name, PT_VERSION, major);
+ par_drv = pi_register_driver(name);
+ if (!par_drv) {
+ pr_err("failed to register %s driver\n", name);
+ return -1;
+ }
+
specified = 0;
for (unit = 0; unit < PT_UNITS; unit++) {
struct pt_unit *tape = &pt[unit];
@@ -644,6 +651,7 @@ static int pt_detect(void)
if (found)
return 0;
+ pi_unregister_driver(par_drv);
printk("%s: No ATAPI tape drive detected\n", name);
return -1;
}
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 09e628dafd9d..4c20c228184c 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -61,6 +61,7 @@
#include <linux/freezer.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/backing-dev.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_ioctl.h>
#include <scsi/scsi.h>
diff --git a/drivers/block/pmem.c b/drivers/block/pmem.c
deleted file mode 100644
index eabf4a8d0085..000000000000
--- a/drivers/block/pmem.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Persistent Memory Driver
- *
- * Copyright (c) 2014, Intel Corporation.
- * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
- * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- */
-
-#include <asm/cacheflush.h>
-#include <linux/blkdev.h>
-#include <linux/hdreg.h>
-#include <linux/init.h>
-#include <linux/platform_device.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/slab.h>
-
-#define PMEM_MINORS 16
-
-struct pmem_device {
- struct request_queue *pmem_queue;
- struct gendisk *pmem_disk;
-
- /* One contiguous memory region per device */
- phys_addr_t phys_addr;
- void *virt_addr;
- size_t size;
-};
-
-static int pmem_major;
-static atomic_t pmem_index;
-
-static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
- unsigned int len, unsigned int off, int rw,
- sector_t sector)
-{
- void *mem = kmap_atomic(page);
- size_t pmem_off = sector << 9;
-
- if (rw == READ) {
- memcpy(mem + off, pmem->virt_addr + pmem_off, len);
- flush_dcache_page(page);
- } else {
- flush_dcache_page(page);
- memcpy(pmem->virt_addr + pmem_off, mem + off, len);
- }
-
- kunmap_atomic(mem);
-}
-
-static void pmem_make_request(struct request_queue *q, struct bio *bio)
-{
- struct block_device *bdev = bio->bi_bdev;
- struct pmem_device *pmem = bdev->bd_disk->private_data;
- int rw;
- struct bio_vec bvec;
- sector_t sector;
- struct bvec_iter iter;
- int err = 0;
-
- if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
- err = -EIO;
- goto out;
- }
-
- BUG_ON(bio->bi_rw & REQ_DISCARD);
-
- rw = bio_data_dir(bio);
- sector = bio->bi_iter.bi_sector;
- bio_for_each_segment(bvec, bio, iter) {
- pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
- rw, sector);
- sector += bvec.bv_len >> 9;
- }
-
-out:
- bio_endio(bio, err);
-}
-
-static int pmem_rw_page(struct block_device *bdev, sector_t sector,
- struct page *page, int rw)
-{
- struct pmem_device *pmem = bdev->bd_disk->private_data;
-
- pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
- page_endio(page, rw & WRITE, 0);
-
- return 0;
-}
-
-static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, unsigned long *pfn, long size)
-{
- struct pmem_device *pmem = bdev->bd_disk->private_data;
- size_t offset = sector << 9;
-
- if (!pmem)
- return -ENODEV;
-
- *kaddr = pmem->virt_addr + offset;
- *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
-
- return pmem->size - offset;
-}
-
-static const struct block_device_operations pmem_fops = {
- .owner = THIS_MODULE,
- .rw_page = pmem_rw_page,
- .direct_access = pmem_direct_access,
-};
-
-static struct pmem_device *pmem_alloc(struct device *dev, struct resource *res)
-{
- struct pmem_device *pmem;
- struct gendisk *disk;
- int idx, err;
-
- err = -ENOMEM;
- pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
- if (!pmem)
- goto out;
-
- pmem->phys_addr = res->start;
- pmem->size = resource_size(res);
-
- err = -EINVAL;
- if (!request_mem_region(pmem->phys_addr, pmem->size, "pmem")) {
- dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size);
- goto out_free_dev;
- }
-
- /*
- * Map the memory as non-cachable, as we can't write back the contents
- * of the CPU caches in case of a crash.
- */
- err = -ENOMEM;
- pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
- if (!pmem->virt_addr)
- goto out_release_region;
-
- pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
- if (!pmem->pmem_queue)
- goto out_unmap;
-
- blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
- blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
- blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
-
- disk = alloc_disk(PMEM_MINORS);
- if (!disk)
- goto out_free_queue;
-
- idx = atomic_inc_return(&pmem_index) - 1;
-
- disk->major = pmem_major;
- disk->first_minor = PMEM_MINORS * idx;
- disk->fops = &pmem_fops;
- disk->private_data = pmem;
- disk->queue = pmem->pmem_queue;
- disk->flags = GENHD_FL_EXT_DEVT;
- sprintf(disk->disk_name, "pmem%d", idx);
- disk->driverfs_dev = dev;
- set_capacity(disk, pmem->size >> 9);
- pmem->pmem_disk = disk;
-
- add_disk(disk);
-
- return pmem;
-
-out_free_queue:
- blk_cleanup_queue(pmem->pmem_queue);
-out_unmap:
- iounmap(pmem->virt_addr);
-out_release_region:
- release_mem_region(pmem->phys_addr, pmem->size);
-out_free_dev:
- kfree(pmem);
-out:
- return ERR_PTR(err);
-}
-
-static void pmem_free(struct pmem_device *pmem)
-{
- del_gendisk(pmem->pmem_disk);
- put_disk(pmem->pmem_disk);
- blk_cleanup_queue(pmem->pmem_queue);
- iounmap(pmem->virt_addr);
- release_mem_region(pmem->phys_addr, pmem->size);
- kfree(pmem);
-}
-
-static int pmem_probe(struct platform_device *pdev)
-{
- struct pmem_device *pmem;
- struct resource *res;
-
- if (WARN_ON(pdev->num_resources > 1))
- return -ENXIO;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENXIO;
-
- pmem = pmem_alloc(&pdev->dev, res);
- if (IS_ERR(pmem))
- return PTR_ERR(pmem);
-
- platform_set_drvdata(pdev, pmem);
-
- return 0;
-}
-
-static int pmem_remove(struct platform_device *pdev)
-{
- struct pmem_device *pmem = platform_get_drvdata(pdev);
-
- pmem_free(pmem);
- return 0;
-}
-
-static struct platform_driver pmem_driver = {
- .probe = pmem_probe,
- .remove = pmem_remove,
- .driver = {
- .owner = THIS_MODULE,
- .name = "pmem",
- },
-};
-
-static int __init pmem_init(void)
-{
- int error;
-
- pmem_major = register_blkdev(0, "pmem");
- if (pmem_major < 0)
- return pmem_major;
-
- error = platform_driver_register(&pmem_driver);
- if (error)
- unregister_blkdev(pmem_major, "pmem");
- return error;
-}
-module_init(pmem_init);
-
-static void pmem_exit(void)
-{
- platform_driver_unregister(&pmem_driver);
- unregister_blkdev(pmem_major, "pmem");
-}
-module_exit(pmem_exit);
-
-MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index ef45cfb98fd2..b1612eb16172 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -1,5 +1,5 @@
/*
- * ps3vram - Use extra PS3 video ram as MTD block device.
+ * ps3vram - Use extra PS3 video ram as block device.
*
* Copyright 2009 Sony Corporation
*
@@ -73,8 +73,8 @@ struct ps3vram_priv {
u64 memory_handle;
u64 context_handle;
- u32 *ctrl;
- void *reports;
+ u32 __iomem *ctrl;
+ void __iomem *reports;
u8 *xdr_buf;
u32 *fifo_base;
@@ -104,7 +104,7 @@ static char *size = "256M";
module_param(size, charp, 0);
MODULE_PARM_DESC(size, "memory size");
-static u32 *ps3vram_get_notifier(void *reports, int notifier)
+static u32 __iomem *ps3vram_get_notifier(void __iomem *reports, int notifier)
{
return reports + DMA_NOTIFIER_OFFSET_BASE +
DMA_NOTIFIER_SIZE * notifier;
@@ -113,22 +113,22 @@ static u32 *ps3vram_get_notifier(void *reports, int notifier)
static void ps3vram_notifier_reset(struct ps3_system_bus_device *dev)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
- u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
int i;
for (i = 0; i < 4; i++)
- notify[i] = 0xffffffff;
+ iowrite32be(0xffffffff, notify + i);
}
static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
unsigned int timeout_ms)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
- u32 *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
+ u32 __iomem *notify = ps3vram_get_notifier(priv->reports, NOTIFIER);
unsigned long timeout;
for (timeout = 20; timeout; timeout--) {
- if (!notify[3])
+ if (!ioread32be(notify + 3))
return 0;
udelay(10);
}
@@ -136,7 +136,7 @@ static int ps3vram_notifier_wait(struct ps3_system_bus_device *dev,
timeout = jiffies + msecs_to_jiffies(timeout_ms);
do {
- if (!notify[3])
+ if (!ioread32be(notify + 3))
return 0;
msleep(1);
} while (time_before(jiffies, timeout));
@@ -148,8 +148,8 @@ static void ps3vram_init_ring(struct ps3_system_bus_device *dev)
{
struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
- priv->ctrl[CTRL_GET] = FIFO_BASE + FIFO_OFFSET;
+ iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
+ iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_GET);
}
static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
@@ -159,14 +159,14 @@ static int ps3vram_wait_ring(struct ps3_system_bus_device *dev,
unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
do {
- if (priv->ctrl[CTRL_PUT] == priv->ctrl[CTRL_GET])
+ if (ioread32be(priv->ctrl + CTRL_PUT) == ioread32be(priv->ctrl + CTRL_GET))
return 0;
msleep(1);
} while (time_before(jiffies, timeout));
dev_warn(&dev->core, "FIFO timeout (%08x/%08x/%08x)\n",
- priv->ctrl[CTRL_PUT], priv->ctrl[CTRL_GET],
- priv->ctrl[CTRL_TOP]);
+ ioread32be(priv->ctrl + CTRL_PUT), ioread32be(priv->ctrl + CTRL_GET),
+ ioread32be(priv->ctrl + CTRL_TOP));
return -ETIMEDOUT;
}
@@ -189,7 +189,7 @@ static void ps3vram_rewind_ring(struct ps3_system_bus_device *dev)
ps3vram_out_ring(priv, 0x20000000 | (FIFO_BASE + FIFO_OFFSET));
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET;
+ iowrite32be(FIFO_BASE + FIFO_OFFSET, priv->ctrl + CTRL_PUT);
/* asking the HV for a blit will kick the FIFO */
status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
@@ -207,8 +207,8 @@ static void ps3vram_fire_ring(struct ps3_system_bus_device *dev)
mutex_lock(&ps3_gpu_mutex);
- priv->ctrl[CTRL_PUT] = FIFO_BASE + FIFO_OFFSET +
- (priv->fifo_ptr - priv->fifo_base) * sizeof(u32);
+ iowrite32be(FIFO_BASE + FIFO_OFFSET + (priv->fifo_ptr - priv->fifo_base)
+ * sizeof(u32), priv->ctrl + CTRL_PUT);
/* asking the HV for a blit will kick the FIFO */
status = lv1_gpu_fb_blit(priv->context_handle, 0, 0, 0, 0);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index ec6c5c6e1ac9..d94529d5c8e9 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -346,6 +346,7 @@ struct rbd_device {
struct rbd_image_header header;
unsigned long flags; /* possibly lock protected */
struct rbd_spec *spec;
+ struct rbd_options *opts;
char *header_name;
@@ -724,34 +725,36 @@ static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
}
/*
- * mount options
+ * (Per device) rbd map options
*/
enum {
+ Opt_queue_depth,
Opt_last_int,
/* int args above */
Opt_last_string,
/* string args above */
Opt_read_only,
Opt_read_write,
- /* Boolean args above */
- Opt_last_bool,
+ Opt_err
};
static match_table_t rbd_opts_tokens = {
+ {Opt_queue_depth, "queue_depth=%d"},
/* int args above */
/* string args above */
{Opt_read_only, "read_only"},
{Opt_read_only, "ro"}, /* Alternate spelling */
{Opt_read_write, "read_write"},
{Opt_read_write, "rw"}, /* Alternate spelling */
- /* Boolean args above */
- {-1, NULL}
+ {Opt_err, NULL}
};
struct rbd_options {
+ int queue_depth;
bool read_only;
};
+#define RBD_QUEUE_DEPTH_DEFAULT BLKDEV_MAX_RQ
#define RBD_READ_ONLY_DEFAULT false
static int parse_rbd_opts_token(char *c, void *private)
@@ -761,27 +764,27 @@ static int parse_rbd_opts_token(char *c, void *private)
int token, intval, ret;
token = match_token(c, rbd_opts_tokens, argstr);
- if (token < 0)
- return -EINVAL;
-
if (token < Opt_last_int) {
ret = match_int(&argstr[0], &intval);
if (ret < 0) {
- pr_err("bad mount option arg (not int) "
- "at '%s'\n", c);
+ pr_err("bad mount option arg (not int) at '%s'\n", c);
return ret;
}
dout("got int token %d val %d\n", token, intval);
} else if (token > Opt_last_int && token < Opt_last_string) {
- dout("got string token %d val %s\n", token,
- argstr[0].from);
- } else if (token > Opt_last_string && token < Opt_last_bool) {
- dout("got Boolean token %d\n", token);
+ dout("got string token %d val %s\n", token, argstr[0].from);
} else {
dout("got token %d\n", token);
}
switch (token) {
+ case Opt_queue_depth:
+ if (intval < 1) {
+ pr_err("queue_depth out of range\n");
+ return -EINVAL;
+ }
+ rbd_opts->queue_depth = intval;
+ break;
case Opt_read_only:
rbd_opts->read_only = true;
break;
@@ -789,9 +792,10 @@ static int parse_rbd_opts_token(char *c, void *private)
rbd_opts->read_only = false;
break;
default:
- rbd_assert(false);
- break;
+ /* libceph prints "bad option" msg */
+ return -EINVAL;
}
+
return 0;
}
@@ -1563,22 +1567,39 @@ static void rbd_obj_request_end(struct rbd_obj_request *obj_request)
/*
* Wait for an object request to complete. If interrupted, cancel the
* underlying osd request.
+ *
+ * @timeout: in jiffies, 0 means "wait forever"
*/
-static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+static int __rbd_obj_request_wait(struct rbd_obj_request *obj_request,
+ unsigned long timeout)
{
- int ret;
+ long ret;
dout("%s %p\n", __func__, obj_request);
-
- ret = wait_for_completion_interruptible(&obj_request->completion);
- if (ret < 0) {
- dout("%s %p interrupted\n", __func__, obj_request);
+ ret = wait_for_completion_interruptible_timeout(
+ &obj_request->completion,
+ ceph_timeout_jiffies(timeout));
+ if (ret <= 0) {
+ if (ret == 0)
+ ret = -ETIMEDOUT;
rbd_obj_request_end(obj_request);
- return ret;
+ } else {
+ ret = 0;
}
- dout("%s %p done\n", __func__, obj_request);
- return 0;
+ dout("%s %p ret %d\n", __func__, obj_request, (int)ret);
+ return ret;
+}
+
+static int rbd_obj_request_wait(struct rbd_obj_request *obj_request)
+{
+ return __rbd_obj_request_wait(obj_request, 0);
+}
+
+static int rbd_obj_request_wait_timeout(struct rbd_obj_request *obj_request,
+ unsigned long timeout)
+{
+ return __rbd_obj_request_wait(obj_request, timeout);
}
static void rbd_img_request_complete(struct rbd_img_request *img_request)
@@ -2001,11 +2022,11 @@ static struct rbd_obj_request *rbd_obj_request_create(const char *object_name,
rbd_assert(obj_request_type_valid(type));
size = strlen(object_name) + 1;
- name = kmalloc(size, GFP_KERNEL);
+ name = kmalloc(size, GFP_NOIO);
if (!name)
return NULL;
- obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_KERNEL);
+ obj_request = kmem_cache_zalloc(rbd_obj_request_cache, GFP_NOIO);
if (!obj_request) {
kfree(name);
return NULL;
@@ -2376,7 +2397,7 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
}
if (opcode == CEPH_OSD_OP_DELETE)
- osd_req_op_init(osd_request, num_ops, opcode);
+ osd_req_op_init(osd_request, num_ops, opcode, 0);
else
osd_req_op_extent_init(osd_request, num_ops, opcode,
offset, length, 0, 0);
@@ -2848,7 +2869,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request)
goto out;
stat_request->callback = rbd_img_obj_exists_callback;
- osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT);
+ osd_req_op_init(stat_request->osd_req, 0, CEPH_OSD_OP_STAT, 0);
osd_req_op_raw_data_in_pages(stat_request->osd_req, 0, pages, size, 0,
false, false);
rbd_osd_req_format_read(stat_request);
@@ -3122,6 +3143,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
bool watch)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_options *opts = osdc->client->options;
struct rbd_obj_request *obj_request;
int ret;
@@ -3148,7 +3170,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper(
if (ret)
goto out;
- ret = rbd_obj_request_wait(obj_request);
+ ret = rbd_obj_request_wait_timeout(obj_request, opts->mount_timeout);
if (ret)
goto out;
@@ -3750,10 +3772,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set));
rbd_dev->tag_set.ops = &rbd_mq_ops;
- rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ;
+ rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
- rbd_dev->tag_set.flags =
- BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
rbd_dev->tag_set.nr_hw_queues = 1;
rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
@@ -3773,6 +3794,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
/* set io sizes to object size */
segment_size = rbd_obj_bytes(&rbd_dev->header);
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
+ blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
blk_queue_max_segment_size(q, segment_size);
blk_queue_io_min(q, segment_size);
blk_queue_io_opt(q, segment_size);
@@ -4044,7 +4066,8 @@ static void rbd_spec_free(struct kref *kref)
}
static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
- struct rbd_spec *spec)
+ struct rbd_spec *spec,
+ struct rbd_options *opts)
{
struct rbd_device *rbd_dev;
@@ -4058,8 +4081,9 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc,
INIT_LIST_HEAD(&rbd_dev->node);
init_rwsem(&rbd_dev->header_rwsem);
- rbd_dev->spec = spec;
rbd_dev->rbd_client = rbdc;
+ rbd_dev->spec = spec;
+ rbd_dev->opts = opts;
/* Initialize the layout used for all rbd requests */
@@ -4075,6 +4099,7 @@ static void rbd_dev_destroy(struct rbd_device *rbd_dev)
{
rbd_put_client(rbd_dev->rbd_client);
rbd_spec_put(rbd_dev->spec);
+ kfree(rbd_dev->opts);
kfree(rbd_dev);
}
@@ -4933,6 +4958,7 @@ static int rbd_add_parse_args(const char *buf,
goto out_mem;
rbd_opts->read_only = RBD_READ_ONLY_DEFAULT;
+ rbd_opts->queue_depth = RBD_QUEUE_DEPTH_DEFAULT;
copts = ceph_parse_options(options, mon_addrs,
mon_addrs + mon_addrs_size - 1,
@@ -4963,8 +4989,8 @@ out_err:
*/
static int rbd_add_get_pool_id(struct rbd_client *rbdc, const char *pool_name)
{
+ struct ceph_options *opts = rbdc->client->options;
u64 newest_epoch;
- unsigned long timeout = rbdc->client->options->mount_timeout * HZ;
int tries = 0;
int ret;
@@ -4979,7 +5005,8 @@ again:
if (rbdc->client->osdc.osdmap->epoch < newest_epoch) {
ceph_monc_request_next_osdmap(&rbdc->client->monc);
(void) ceph_monc_wait_osdmap(&rbdc->client->monc,
- newest_epoch, timeout);
+ newest_epoch,
+ opts->mount_timeout);
goto again;
} else {
/* the osdmap we have is new enough */
@@ -5148,7 +5175,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
rbdc = __rbd_get_client(rbd_dev->rbd_client);
ret = -ENOMEM;
- parent = rbd_dev_create(rbdc, parent_spec);
+ parent = rbd_dev_create(rbdc, parent_spec, NULL);
if (!parent)
goto out_err;
@@ -5394,9 +5421,6 @@ static ssize_t do_rbd_add(struct bus_type *bus,
rc = rbd_add_parse_args(buf, &ceph_opts, &rbd_opts, &spec);
if (rc < 0)
goto err_out_module;
- read_only = rbd_opts->read_only;
- kfree(rbd_opts);
- rbd_opts = NULL; /* done with this */
rbdc = rbd_get_client(ceph_opts);
if (IS_ERR(rbdc)) {
@@ -5422,11 +5446,12 @@ static ssize_t do_rbd_add(struct bus_type *bus,
goto err_out_client;
}
- rbd_dev = rbd_dev_create(rbdc, spec);
+ rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
if (!rbd_dev)
goto err_out_client;
rbdc = NULL; /* rbd_dev now owns this */
spec = NULL; /* rbd_dev now owns this */
+ rbd_opts = NULL; /* rbd_dev now owns this */
rc = rbd_dev_image_probe(rbd_dev, true);
if (rc < 0)
@@ -5434,6 +5459,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
/* If we are mapping a snapshot it must be marked read-only */
+ read_only = rbd_dev->opts->read_only;
if (rbd_dev->spec->snap_id != CEPH_NOSNAP)
read_only = true;
rbd_dev->mapping.read_only = read_only;
@@ -5458,6 +5484,7 @@ err_out_client:
rbd_put_client(rbdc);
err_out_args:
rbd_spec_put(spec);
+ kfree(rbd_opts);
err_out_module:
module_put(THIS_MODULE);
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 5d552857de41..59c91d49b14b 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -620,7 +620,7 @@ static int carm_array_info (struct carm_host *host, unsigned int array_idx)
spin_unlock_irq(&host->lock);
DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+ crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
crq->rq->special = crq;
blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
@@ -661,7 +661,7 @@ static int carm_send_special (struct carm_host *host, carm_sspc_t func)
crq->msg_bucket = (u32) rc;
DPRINTK("blk_execute_rq_nowait, tag == %u\n", idx);
- crq->rq->cmd_type = REQ_TYPE_SPECIAL;
+ crq->rq->cmd_type = REQ_TYPE_DRV_PRIV;
crq->rq->special = crq;
blk_execute_rq_nowait(host->oob_q, NULL, crq->rq, true, NULL);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 5ea2f0bbbc7c..d4d05f064d39 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -124,7 +124,7 @@ static inline void virtblk_request_done(struct request *req)
req->resid_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.residual);
req->sense_len = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.sense_len);
req->errors = virtio32_to_cpu(vblk->vdev, vbr->in_hdr.errors);
- } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
+ } else if (req->cmd_type == REQ_TYPE_DRV_PRIV) {
req->errors = (error != 0);
}
@@ -188,7 +188,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
break;
- case REQ_TYPE_SPECIAL:
+ case REQ_TYPE_DRV_PRIV:
vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_GET_ID);
vbr->out_hdr.sector = 0;
vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
@@ -251,7 +251,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
return PTR_ERR(req);
}
- req->cmd_type = REQ_TYPE_SPECIAL;
+ req->cmd_type = REQ_TYPE_DRV_PRIV;
err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
blk_put_request(req);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 713fc9ff1149..ced96777b677 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -84,6 +84,13 @@ MODULE_PARM_DESC(max_persistent_grants,
"Maximum number of grants to map persistently");
/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+/*
* The LRU mechanism to clean the lists of persistent grants needs to
* be executed periodically. The time interval between consecutive executions
* of the purge mechanism is set in ms.
@@ -729,7 +736,7 @@ static void xen_blkbk_unmap_and_respond(struct pending_req *req)
struct grant_page **pages = req->segments;
unsigned int invcount;
- invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages,
+ invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_segs,
req->unmap, req->unmap_pages);
work->data = req;
@@ -915,7 +922,7 @@ static int xen_blkbk_map_seg(struct pending_req *pending_req)
int rc;
rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
- pending_req->nr_pages,
+ pending_req->nr_segs,
(pending_req->operation != BLKIF_OP_READ));
return rc;
@@ -931,7 +938,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment *segments = NULL;
- nseg = pending_req->nr_pages;
+ nseg = pending_req->nr_segs;
indirect_grefs = INDIRECT_PAGES(nseg);
BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@@ -1251,7 +1258,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
pending_req->id = req->u.rw.id;
pending_req->operation = req_operation;
pending_req->status = BLKIF_RSP_OKAY;
- pending_req->nr_pages = nseg;
+ pending_req->nr_segs = nseg;
if (req->operation != BLKIF_OP_INDIRECT) {
preq.dev = req->u.rw.handle;
@@ -1372,7 +1379,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
fail_flush:
xen_blkbk_unmap(blkif, pending_req->segments,
- pending_req->nr_pages);
+ pending_req->nr_segs);
fail_response:
/* Haven't submitted any bio's yet. */
make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
@@ -1438,6 +1445,12 @@ static int __init xen_blkif_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+ }
+
rc = xen_blkif_interface_init();
if (rc)
goto failed_init;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f620b5d3f77c..45a044a53d1e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -44,6 +44,7 @@
#include <xen/interface/io/blkif.h>
#include <xen/interface/io/protocols.h>
+extern unsigned int xen_blkif_max_ring_order;
/*
* This is the maximum number of segments that would be allowed in indirect
* requests. This value will also be passed to the frontend.
@@ -248,7 +249,7 @@ struct backend_info;
#define PERSISTENT_GNT_WAS_ACTIVE 1
/* Number of requests that we can fit in a ring */
-#define XEN_BLKIF_REQS 32
+#define XEN_BLKIF_REQS_PER_PAGE 32
struct persistent_gnt {
struct page *page;
@@ -320,6 +321,7 @@ struct xen_blkif {
struct work_struct free_work;
/* Thread shutdown wait queue. */
wait_queue_head_t shutdown_wq;
+ unsigned int nr_ring_pages;
};
struct seg_buf {
@@ -343,7 +345,7 @@ struct grant_page {
struct pending_req {
struct xen_blkif *blkif;
u64 id;
- int nr_pages;
+ int nr_segs;
atomic_t pendcnt;
unsigned short operation;
int status;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 6ab69ad61ee1..deb3f001791f 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -25,6 +25,7 @@
/* Enlarge the array size in order to fully show blkback name. */
#define BLKBACK_NAME_LEN (20)
+#define RINGREF_NAME_LEN (20)
struct backend_info {
struct xenbus_device *dev;
@@ -124,8 +125,6 @@ static void xen_update_blkif_status(struct xen_blkif *blkif)
static struct xen_blkif *xen_blkif_alloc(domid_t domid)
{
struct xen_blkif *blkif;
- struct pending_req *req, *n;
- int i, j;
BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
@@ -151,55 +150,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
INIT_LIST_HEAD(&blkif->pending_free);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
-
- for (i = 0; i < XEN_BLKIF_REQS; i++) {
- req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
- goto fail;
- list_add_tail(&req->free_list,
- &blkif->pending_free);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- req->segments[j] = kzalloc(sizeof(*req->segments[0]),
- GFP_KERNEL);
- if (!req->segments[j])
- goto fail;
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
- GFP_KERNEL);
- if (!req->indirect_pages[j])
- goto fail;
- }
- }
spin_lock_init(&blkif->pending_free_lock);
init_waitqueue_head(&blkif->pending_free_wq);
init_waitqueue_head(&blkif->shutdown_wq);
return blkif;
-
-fail:
- list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
- list_del(&req->free_list);
- for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
- if (!req->segments[j])
- break;
- kfree(req->segments[j]);
- }
- for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
- if (!req->indirect_pages[j])
- break;
- kfree(req->indirect_pages[j]);
- }
- kfree(req);
- }
-
- kmem_cache_free(xen_blkif_cachep, blkif);
-
- return ERR_PTR(-ENOMEM);
}
-static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
- unsigned int evtchn)
+static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
+ unsigned int nr_grefs, unsigned int evtchn)
{
int err;
@@ -207,7 +166,7 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
if (blkif->irq)
return 0;
- err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
+ err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
&blkif->blk_ring);
if (err < 0)
return err;
@@ -217,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
{
struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
break;
}
default:
@@ -312,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
i++;
}
- WARN_ON(i != XEN_BLKIF_REQS);
+ WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
kmem_cache_free(xen_blkif_cachep, blkif);
}
@@ -597,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
if (err)
goto fail;
+ err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
+ xen_blkif_max_ring_order);
+ if (err)
+ pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
+
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
@@ -860,22 +824,66 @@ again:
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
- unsigned long ring_ref;
- unsigned int evtchn;
+ unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
- int err;
+ struct pending_req *req, *n;
+ int err, i, j;
pr_debug("%s %s\n", __func__, dev->otherend);
- err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
- &ring_ref, "event-channel", "%u", &evtchn, NULL);
- if (err) {
- xenbus_dev_fatal(dev, err,
- "reading %s/ring-ref and event-channel",
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
+ &evtchn);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/event-channel",
dev->otherend);
return err;
}
+ pr_info("event-channel %u\n", evtchn);
+
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
+ &ring_page_order);
+ if (err != 1) {
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
+ "%u", &ring_ref[0]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
+ dev->otherend);
+ return err;
+ }
+ nr_grefs = 1;
+ pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
+ ring_ref[0]);
+ } else {
+ unsigned int i;
+
+ if (ring_page_order > xen_blkif_max_ring_order) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
+ dev->otherend, ring_page_order,
+ xen_blkif_max_ring_order);
+ return err;
+ }
+
+ nr_grefs = 1 << ring_page_order;
+ for (i = 0; i < nr_grefs; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
+ "%u", &ring_ref[i]);
+ if (err != 1) {
+ err = -EINVAL;
+ xenbus_dev_fatal(dev, err, "reading %s/%s",
+ dev->otherend, ring_ref_name);
+ return err;
+ }
+ pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
+ }
+ }
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -900,20 +908,55 @@ static int connect_ring(struct backend_info *be)
be->blkif->vbd.feature_gnt_persistent = pers_grants;
be->blkif->vbd.overflow_max_grants = 0;
+ be->blkif->nr_ring_pages = nr_grefs;
- pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
- ring_ref, evtchn, be->blkif->blk_protocol, protocol,
+ pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
+ nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
pers_grants ? "persistent grants" : "");
+ for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ goto fail;
+ list_add_tail(&req->free_list, &be->blkif->pending_free);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
+ if (!req->segments[j])
+ goto fail;
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+ GFP_KERNEL);
+ if (!req->indirect_pages[j])
+ goto fail;
+ }
+ }
+
/* Map the shared frame, irq etc. */
- err = xen_blkif_map(be->blkif, ring_ref, evtchn);
+ err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
if (err) {
- xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
- ring_ref, evtchn);
+ xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
return err;
}
return 0;
+
+fail:
+ list_for_each_entry_safe(req, n, &be->blkif->pending_free, free_list) {
+ list_del(&req->free_list);
+ for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+ if (!req->segments[j])
+ break;
+ kfree(req->segments[j]);
+ }
+ for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+ if (!req->indirect_pages[j])
+ break;
+ kfree(req->indirect_pages[j]);
+ }
+ kfree(req);
+ }
+ return -ENOMEM;
}
static const struct xenbus_device_id xen_blkbk_ids[] = {
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2c61cf8c6f61..6d89ed35d80c 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -98,7 +98,21 @@ static unsigned int xen_blkif_max_segments = 32;
module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
-#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
+/*
+ * Maximum order of pages to be used for the shared ring between front and
+ * backend, 4KB page granularity is used.
+ */
+static unsigned int xen_blkif_max_ring_order;
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+
+#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
+#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+/*
+ * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
+ * characters are enough. Define to 20 to keep consist with backend.
+ */
+#define RINGREF_NAME_LEN (20)
/*
* We have one of these per vbd, whether ide, scsi or 'other'. They
@@ -114,13 +128,14 @@ struct blkfront_info
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
- int ring_ref;
+ int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int nr_ring_pages;
struct blkif_front_ring ring;
unsigned int evtchn, irq;
struct request_queue *rq;
struct work_struct work;
struct gnttab_free_callback callback;
- struct blk_shadow shadow[BLK_RING_SIZE];
+ struct blk_shadow shadow[BLK_MAX_RING_SIZE];
struct list_head grants;
struct list_head indirect_pages;
unsigned int persistent_gnts_c;
@@ -139,8 +154,6 @@ static unsigned int nr_minors;
static unsigned long *minors;
static DEFINE_SPINLOCK(minor_lock);
-#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
- (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
#define GRANT_INVALID_REF 0
#define PARTS_PER_DISK 16
@@ -170,7 +183,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info);
static int get_id_from_freelist(struct blkfront_info *info)
{
unsigned long free = info->shadow_free;
- BUG_ON(free >= BLK_RING_SIZE);
+ BUG_ON(free >= BLK_RING_SIZE(info));
info->shadow_free = info->shadow[free].req.u.rw.id;
info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
return free;
@@ -983,7 +996,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
}
}
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
/*
* Clear persistent grants present in requests already
* on the shared ring
@@ -1033,12 +1046,15 @@ free_shadow:
flush_work(&info->work);
/* Free resources associated with old device channel. */
- if (info->ring_ref != GRANT_INVALID_REF) {
- gnttab_end_foreign_access(info->ring_ref, 0,
- (unsigned long)info->ring.sring);
- info->ring_ref = GRANT_INVALID_REF;
- info->ring.sring = NULL;
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ if (info->ring_ref[i] != GRANT_INVALID_REF) {
+ gnttab_end_foreign_access(info->ring_ref[i], 0, 0);
+ info->ring_ref[i] = GRANT_INVALID_REF;
+ }
}
+ free_pages((unsigned long)info->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+ info->ring.sring = NULL;
+
if (info->irq)
unbind_from_irqhandler(info->irq, info);
info->evtchn = info->irq = 0;
@@ -1058,12 +1074,6 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
- /*
- * Copy the data received from the backend into the bvec.
- * Since bv_offset can be different than 0, and bv_len different
- * than PAGE_SIZE, we have to keep track of the current offset,
- * to be sure we are copying the data from the right shared page.
- */
for_each_sg(s->sg, sg, nseg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
shared_data = kmap_atomic(
@@ -1157,7 +1167,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
* never have given to it (we stamp it up to BLK_RING_SIZE -
* look in get_id_from_freelist.
*/
- if (id >= BLK_RING_SIZE) {
+ if (id >= BLK_RING_SIZE(info)) {
WARN(1, "%s: response to %s has incorrect id (%ld)\n",
info->gd->disk_name, op_name(bret->operation), id);
/* We can't safely get the 'struct request' as
@@ -1245,26 +1255,30 @@ static int setup_blkring(struct xenbus_device *dev,
struct blkfront_info *info)
{
struct blkif_sring *sring;
- grant_ref_t gref;
- int err;
+ int err, i;
+ unsigned long ring_size = info->nr_ring_pages * PAGE_SIZE;
+ grant_ref_t gref[XENBUS_MAX_RING_PAGES];
- info->ring_ref = GRANT_INVALID_REF;
+ for (i = 0; i < info->nr_ring_pages; i++)
+ info->ring_ref[i] = GRANT_INVALID_REF;
- sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH);
+ sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
+ get_order(ring_size));
if (!sring) {
xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
return -ENOMEM;
}
SHARED_RING_INIT(sring);
- FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+ FRONT_RING_INIT(&info->ring, sring, ring_size);
- err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
+ err = xenbus_grant_ring(dev, info->ring.sring, info->nr_ring_pages, gref);
if (err < 0) {
- free_page((unsigned long)sring);
+ free_pages((unsigned long)sring, get_order(ring_size));
info->ring.sring = NULL;
goto fail;
}
- info->ring_ref = gref;
+ for (i = 0; i < info->nr_ring_pages; i++)
+ info->ring_ref[i] = gref[i];
err = xenbus_alloc_evtchn(dev, &info->evtchn);
if (err)
@@ -1292,7 +1306,18 @@ static int talk_to_blkback(struct xenbus_device *dev,
{
const char *message = NULL;
struct xenbus_transaction xbt;
- int err;
+ int err, i;
+ unsigned int max_page_order = 0;
+ unsigned int ring_page_order = 0;
+
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+ "max-ring-page-order", "%u", &max_page_order);
+ if (err != 1)
+ info->nr_ring_pages = 1;
+ else {
+ ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
+ info->nr_ring_pages = 1 << ring_page_order;
+ }
/* Create shared ring, alloc event channel. */
err = setup_blkring(dev, info);
@@ -1306,11 +1331,32 @@ again:
goto destroy_blkring;
}
- err = xenbus_printf(xbt, dev->nodename,
- "ring-ref", "%u", info->ring_ref);
- if (err) {
- message = "writing ring-ref";
- goto abort_transaction;
+ if (info->nr_ring_pages == 1) {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-ref", "%u", info->ring_ref[0]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ } else {
+ err = xenbus_printf(xbt, dev->nodename,
+ "ring-page-order", "%u", ring_page_order);
+ if (err) {
+ message = "writing ring-page-order";
+ goto abort_transaction;
+ }
+
+ for (i = 0; i < info->nr_ring_pages; i++) {
+ char ring_ref_name[RINGREF_NAME_LEN];
+
+ snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
+ err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
+ "%u", info->ring_ref[i]);
+ if (err) {
+ message = "writing ring-ref";
+ goto abort_transaction;
+ }
+ }
}
err = xenbus_printf(xbt, dev->nodename,
"event-channel", "%u", info->evtchn);
@@ -1338,6 +1384,9 @@ again:
goto destroy_blkring;
}
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
+ info->shadow[i].req.u.rw.id = i+1;
+ info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
xenbus_switch_state(dev, XenbusStateInitialised);
return 0;
@@ -1361,7 +1410,7 @@ again:
static int blkfront_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
- int err, vdevice, i;
+ int err, vdevice;
struct blkfront_info *info;
/* FIXME: Use dynamic device id if this is not set. */
@@ -1422,21 +1471,10 @@ static int blkfront_probe(struct xenbus_device *dev,
info->connected = BLKIF_STATE_DISCONNECTED;
INIT_WORK(&info->work, blkif_restart_queue);
- for (i = 0; i < BLK_RING_SIZE; i++)
- info->shadow[i].req.u.rw.id = i+1;
- info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
-
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
dev_set_drvdata(&dev->dev, info);
- err = talk_to_blkback(dev, info);
- if (err) {
- kfree(info);
- dev_set_drvdata(&dev->dev, NULL);
- return err;
- }
-
return 0;
}
@@ -1476,10 +1514,10 @@ static int blkif_recover(struct blkfront_info *info)
/* Stage 2: Set up free list. */
memset(&info->shadow, 0, sizeof(info->shadow));
- for (i = 0; i < BLK_RING_SIZE; i++)
+ for (i = 0; i < BLK_RING_SIZE(info); i++)
info->shadow[i].req.u.rw.id = i+1;
info->shadow_free = info->ring.req_prod_pvt;
- info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
+ info->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
rc = blkfront_setup_indirect(info);
if (rc) {
@@ -1491,7 +1529,7 @@ static int blkif_recover(struct blkfront_info *info)
blk_queue_max_segments(info->rq, segs);
bio_list_init(&bio_list);
INIT_LIST_HEAD(&requests);
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
/* Not in use? */
if (!copy[i].request)
continue;
@@ -1697,7 +1735,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
segs = info->max_indirect_segments;
}
- err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+ err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE(info));
if (err)
goto out_of_memory;
@@ -1707,7 +1745,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
* grants, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
- int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE;
+ int num = INDIRECT_GREFS(segs) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&info->indirect_pages));
for (i = 0; i < num; i++) {
@@ -1718,7 +1756,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
}
}
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
info->shadow[i].grants_used = kzalloc(
sizeof(info->shadow[i].grants_used[0]) * segs,
GFP_NOIO);
@@ -1740,7 +1778,7 @@ static int blkfront_setup_indirect(struct blkfront_info *info)
return 0;
out_of_memory:
- for (i = 0; i < BLK_RING_SIZE; i++) {
+ for (i = 0; i < BLK_RING_SIZE(info); i++) {
kfree(info->shadow[i].grants_used);
info->shadow[i].grants_used = NULL;
kfree(info->shadow[i].sg);
@@ -1906,8 +1944,15 @@ static void blkback_changed(struct xenbus_device *dev,
dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
switch (backend_state) {
- case XenbusStateInitialising:
case XenbusStateInitWait:
+ if (dev->state != XenbusStateInitialising)
+ break;
+ if (talk_to_blkback(dev, info)) {
+ kfree(info);
+ dev_set_drvdata(&dev->dev, NULL);
+ break;
+ }
+ case XenbusStateInitialising:
case XenbusStateInitialised:
case XenbusStateReconfiguring:
case XenbusStateReconfigured:
@@ -2091,6 +2136,12 @@ static int __init xlblk_init(void)
if (!xen_domain())
return -ENODEV;
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
+ xen_blkif_max_ring_order = 0;
+ }
+
if (!xen_has_pv_disk_devices())
return -ENODEV;
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 6489c0fd0ea6..386ba3d1a6ee 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -23,12 +23,4 @@ config ZRAM_LZ4_COMPRESS
default n
help
This option enables LZ4 compression algorithm support. Compression
- algorithm can be changed using `comp_algorithm' device attribute.
-
-config ZRAM_DEBUG
- bool "Compressed RAM block device debug support"
- depends on ZRAM
- default n
- help
- This option adds additional debugging code to the compressed
- RAM block device driver.
+ algorithm can be changed using `comp_algorithm' device attribute. \ No newline at end of file
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index f1ff39a3d1c1..965d1afb0eaa 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -274,7 +274,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
int i = 0;
while (backends[i]) {
- if (sysfs_streq(comp, backends[i]->name))
+ if (!strcmp(comp, backends[i]->name))
sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2,
"[%s] ", backends[i]->name);
else
@@ -286,6 +286,11 @@ ssize_t zcomp_available_show(const char *comp, char *buf)
return sz;
}
+bool zcomp_available_algorithm(const char *comp)
+{
+ return find_backend(comp) != NULL;
+}
+
bool zcomp_set_max_streams(struct zcomp *comp, int num_strm)
{
return comp->set_max_streams(comp, num_strm);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index c59d1fca72c0..46e2b9f8f1f0 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -51,6 +51,7 @@ struct zcomp {
};
ssize_t zcomp_available_show(const char *comp, char *buf);
+bool zcomp_available_algorithm(const char *comp);
struct zcomp *zcomp_create(const char *comp, int max_strm);
void zcomp_destroy(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 6e134f4759c0..fb655e8d1e3b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -15,10 +15,6 @@
#define KMSG_COMPONENT "zram"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-#ifdef CONFIG_ZRAM_DEBUG
-#define DEBUG
-#endif
-
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/bio.h>
@@ -32,12 +28,16 @@
#include <linux/string.h>
#include <linux/vmalloc.h>
#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/sysfs.h>
#include "zram_drv.h"
-/* Globals */
+static DEFINE_IDR(zram_index_idr);
+/* idr index must be protected */
+static DEFINE_MUTEX(zram_index_mutex);
+
static int zram_major;
-static struct zram *zram_devices;
static const char *default_compressor = "lzo";
/* Module params (documentation at end) */
@@ -53,7 +53,7 @@ static inline void deprecated_attr_warn(const char *name)
}
#define ZRAM_ATTR_RO(name) \
-static ssize_t name##_show(struct device *d, \
+static ssize_t name##_show(struct device *d, \
struct device_attribute *attr, char *b) \
{ \
struct zram *zram = dev_to_zram(d); \
@@ -74,33 +74,117 @@ static inline struct zram *dev_to_zram(struct device *dev)
return (struct zram *)dev_to_disk(dev)->private_data;
}
-static ssize_t compact_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/* flag operations require table entry bit_spin_lock() being held */
+static int zram_test_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
{
- unsigned long nr_migrated;
- struct zram *zram = dev_to_zram(dev);
- struct zram_meta *meta;
+ return meta->table[index].value & BIT(flag);
+}
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
+static void zram_set_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].value |= BIT(flag);
+}
- meta = zram->meta;
- nr_migrated = zs_compact(meta->mem_pool);
- atomic64_add(nr_migrated, &zram->stats.num_migrated);
- up_read(&zram->init_lock);
+static void zram_clear_flag(struct zram_meta *meta, u32 index,
+ enum zram_pageflags flag)
+{
+ meta->table[index].value &= ~BIT(flag);
+}
- return len;
+static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+{
+ return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
}
-static ssize_t disksize_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static void zram_set_obj_size(struct zram_meta *meta,
+ u32 index, size_t size)
{
- struct zram *zram = dev_to_zram(dev);
+ unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
- return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+ meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+}
+
+static inline int is_partial_io(struct bio_vec *bvec)
+{
+ return bvec->bv_len != PAGE_SIZE;
+}
+
+/*
+ * Check if request is within bounds and aligned on zram logical blocks.
+ */
+static inline int valid_io_request(struct zram *zram,
+ sector_t start, unsigned int size)
+{
+ u64 end, bound;
+
+ /* unaligned request */
+ if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+ return 0;
+ if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+ return 0;
+
+ end = start + (size >> SECTOR_SHIFT);
+ bound = zram->disksize >> SECTOR_SHIFT;
+ /* out of range range */
+ if (unlikely(start >= bound || end > bound || start > end))
+ return 0;
+
+ /* I/O request is valid */
+ return 1;
+}
+
+static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
+{
+ if (*offset + bvec->bv_len >= PAGE_SIZE)
+ (*index)++;
+ *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
+}
+
+static inline void update_used_max(struct zram *zram,
+ const unsigned long pages)
+{
+ unsigned long old_max, cur_max;
+
+ old_max = atomic_long_read(&zram->stats.max_used_pages);
+
+ do {
+ cur_max = old_max;
+ if (pages > cur_max)
+ old_max = atomic_long_cmpxchg(
+ &zram->stats.max_used_pages, cur_max, pages);
+ } while (old_max != cur_max);
+}
+
+static int page_zero_filled(void *ptr)
+{
+ unsigned int pos;
+ unsigned long *page;
+
+ page = (unsigned long *)ptr;
+
+ for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
+ if (page[pos])
+ return 0;
+ }
+
+ return 1;
+}
+
+static void handle_zero_page(struct bio_vec *bvec)
+{
+ struct page *page = bvec->bv_page;
+ void *user_mem;
+
+ user_mem = kmap_atomic(page);
+ if (is_partial_io(bvec))
+ memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
+ else
+ clear_page(user_mem);
+ kunmap_atomic(user_mem);
+
+ flush_dcache_page(page);
}
static ssize_t initstate_show(struct device *dev,
@@ -116,6 +200,14 @@ static ssize_t initstate_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%u\n", val);
}
+static ssize_t disksize_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct zram *zram = dev_to_zram(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
+}
+
static ssize_t orig_data_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -143,19 +235,6 @@ static ssize_t mem_used_total_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}
-static ssize_t max_comp_streams_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- int val;
- struct zram *zram = dev_to_zram(dev);
-
- down_read(&zram->init_lock);
- val = zram->max_comp_streams;
- up_read(&zram->init_lock);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", val);
-}
-
static ssize_t mem_limit_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -225,6 +304,19 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}
+static ssize_t max_comp_streams_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int val;
+ struct zram *zram = dev_to_zram(dev);
+
+ down_read(&zram->init_lock);
+ val = zram->max_comp_streams;
+ up_read(&zram->init_lock);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -271,6 +363,8 @@ static ssize_t comp_algorithm_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
+ size_t sz;
+
down_write(&zram->init_lock);
if (init_done(zram)) {
up_write(&zram->init_lock);
@@ -278,69 +372,108 @@ static ssize_t comp_algorithm_store(struct device *dev,
return -EBUSY;
}
strlcpy(zram->compressor, buf, sizeof(zram->compressor));
+
+ /* ignore trailing newline */
+ sz = strlen(zram->compressor);
+ if (sz > 0 && zram->compressor[sz - 1] == '\n')
+ zram->compressor[sz - 1] = 0x00;
+
+ if (!zcomp_available_algorithm(zram->compressor))
+ len = -EINVAL;
+
up_write(&zram->init_lock);
return len;
}
-/* flag operations needs meta->tb_lock */
-static int zram_test_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
+static ssize_t compact_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- return meta->table[index].value & BIT(flag);
-}
+ unsigned long nr_migrated;
+ struct zram *zram = dev_to_zram(dev);
+ struct zram_meta *meta;
-static void zram_set_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
-{
- meta->table[index].value |= BIT(flag);
-}
+ down_read(&zram->init_lock);
+ if (!init_done(zram)) {
+ up_read(&zram->init_lock);
+ return -EINVAL;
+ }
-static void zram_clear_flag(struct zram_meta *meta, u32 index,
- enum zram_pageflags flag)
-{
- meta->table[index].value &= ~BIT(flag);
+ meta = zram->meta;
+ nr_migrated = zs_compact(meta->mem_pool);
+ atomic64_add(nr_migrated, &zram->stats.num_migrated);
+ up_read(&zram->init_lock);
+
+ return len;
}
-static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
+static ssize_t io_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
+ struct zram *zram = dev_to_zram(dev);
+ ssize_t ret;
+
+ down_read(&zram->init_lock);
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu %8llu\n",
+ (u64)atomic64_read(&zram->stats.failed_reads),
+ (u64)atomic64_read(&zram->stats.failed_writes),
+ (u64)atomic64_read(&zram->stats.invalid_io),
+ (u64)atomic64_read(&zram->stats.notify_free));
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static void zram_set_obj_size(struct zram_meta *meta,
- u32 index, size_t size)
+static ssize_t mm_stat_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
+ struct zram *zram = dev_to_zram(dev);
+ u64 orig_size, mem_used = 0;
+ long max_used;
+ ssize_t ret;
- meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
+ down_read(&zram->init_lock);
+ if (init_done(zram))
+ mem_used = zs_get_total_pages(zram->meta->mem_pool);
+
+ orig_size = atomic64_read(&zram->stats.pages_stored);
+ max_used = atomic_long_read(&zram->stats.max_used_pages);
+
+ ret = scnprintf(buf, PAGE_SIZE,
+ "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
+ orig_size << PAGE_SHIFT,
+ (u64)atomic64_read(&zram->stats.compr_data_size),
+ mem_used << PAGE_SHIFT,
+ zram->limit_pages << PAGE_SHIFT,
+ max_used << PAGE_SHIFT,
+ (u64)atomic64_read(&zram->stats.zero_pages),
+ (u64)atomic64_read(&zram->stats.num_migrated));
+ up_read(&zram->init_lock);
+
+ return ret;
}
-static inline int is_partial_io(struct bio_vec *bvec)
+static DEVICE_ATTR_RO(io_stat);
+static DEVICE_ATTR_RO(mm_stat);
+ZRAM_ATTR_RO(num_reads);
+ZRAM_ATTR_RO(num_writes);
+ZRAM_ATTR_RO(failed_reads);
+ZRAM_ATTR_RO(failed_writes);
+ZRAM_ATTR_RO(invalid_io);
+ZRAM_ATTR_RO(notify_free);
+ZRAM_ATTR_RO(zero_pages);
+ZRAM_ATTR_RO(compr_data_size);
+
+static inline bool zram_meta_get(struct zram *zram)
{
- return bvec->bv_len != PAGE_SIZE;
+ if (atomic_inc_not_zero(&zram->refcount))
+ return true;
+ return false;
}
-/*
- * Check if request is within bounds and aligned on zram logical blocks.
- */
-static inline int valid_io_request(struct zram *zram,
- sector_t start, unsigned int size)
+static inline void zram_meta_put(struct zram *zram)
{
- u64 end, bound;
-
- /* unaligned request */
- if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
- return 0;
- if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
- return 0;
-
- end = start + (size >> SECTOR_SHIFT);
- bound = zram->disksize >> SECTOR_SHIFT;
- /* out of range range */
- if (unlikely(start >= bound || end > bound || start > end))
- return 0;
-
- /* I/O request is valid */
- return 1;
+ atomic_dec(&zram->refcount);
}
static void zram_meta_free(struct zram_meta *meta, u64 disksize)
@@ -394,56 +527,6 @@ out_error:
return NULL;
}
-static inline bool zram_meta_get(struct zram *zram)
-{
- if (atomic_inc_not_zero(&zram->refcount))
- return true;
- return false;
-}
-
-static inline void zram_meta_put(struct zram *zram)
-{
- atomic_dec(&zram->refcount);
-}
-
-static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
-{
- if (*offset + bvec->bv_len >= PAGE_SIZE)
- (*index)++;
- *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
-}
-
-static int page_zero_filled(void *ptr)
-{
- unsigned int pos;
- unsigned long *page;
-
- page = (unsigned long *)ptr;
-
- for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
- if (page[pos])
- return 0;
- }
-
- return 1;
-}
-
-static void handle_zero_page(struct bio_vec *bvec)
-{
- struct page *page = bvec->bv_page;
- void *user_mem;
-
- user_mem = kmap_atomic(page);
- if (is_partial_io(bvec))
- memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
- else
- clear_page(user_mem);
- kunmap_atomic(user_mem);
-
- flush_dcache_page(page);
-}
-
-
/*
* To protect concurrent access to the same index entry,
* caller should hold this table index entry's bit_spinlock to
@@ -561,21 +644,6 @@ out_cleanup:
return ret;
}
-static inline void update_used_max(struct zram *zram,
- const unsigned long pages)
-{
- unsigned long old_max, cur_max;
-
- old_max = atomic_long_read(&zram->stats.max_used_pages);
-
- do {
- cur_max = old_max;
- if (pages > cur_max)
- old_max = atomic_long_cmpxchg(
- &zram->stats.max_used_pages, cur_max, pages);
- } while (old_max != cur_max);
-}
-
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
int offset)
{
@@ -585,8 +653,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
struct page *page;
unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
struct zram_meta *meta = zram->meta;
- struct zcomp_strm *zstrm;
- bool locked = false;
+ struct zcomp_strm *zstrm = NULL;
unsigned long alloced_pages;
page = bvec->bv_page;
@@ -606,7 +673,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
zstrm = zcomp_strm_find(zram->comp);
- locked = true;
user_mem = kmap_atomic(page);
if (is_partial_io(bvec)) {
@@ -678,7 +744,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
}
zcomp_strm_release(zram->comp, zstrm);
- locked = false;
+ zstrm = NULL;
zs_unmap_object(meta->mem_pool, handle);
/*
@@ -696,42 +762,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
atomic64_add(clen, &zram->stats.compr_data_size);
atomic64_inc(&zram->stats.pages_stored);
out:
- if (locked)
+ if (zstrm)
zcomp_strm_release(zram->comp, zstrm);
if (is_partial_io(bvec))
kfree(uncmem);
return ret;
}
-static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
- int offset, int rw)
-{
- unsigned long start_time = jiffies;
- int ret;
-
- generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
- &zram->disk->part0);
-
- if (rw == READ) {
- atomic64_inc(&zram->stats.num_reads);
- ret = zram_bvec_read(zram, bvec, index, offset);
- } else {
- atomic64_inc(&zram->stats.num_writes);
- ret = zram_bvec_write(zram, bvec, index, offset);
- }
-
- generic_end_io_acct(rw, &zram->disk->part0, start_time);
-
- if (unlikely(ret)) {
- if (rw == READ)
- atomic64_inc(&zram->stats.failed_reads);
- else
- atomic64_inc(&zram->stats.failed_writes);
- }
-
- return ret;
-}
-
/*
* zram_bio_discard - handler on discard request
* @index: physical block index in PAGE_SIZE units
@@ -771,151 +808,32 @@ static void zram_bio_discard(struct zram *zram, u32 index,
}
}
-static void zram_reset_device(struct zram *zram)
-{
- struct zram_meta *meta;
- struct zcomp *comp;
- u64 disksize;
-
- down_write(&zram->init_lock);
-
- zram->limit_pages = 0;
-
- if (!init_done(zram)) {
- up_write(&zram->init_lock);
- return;
- }
-
- meta = zram->meta;
- comp = zram->comp;
- disksize = zram->disksize;
- /*
- * Refcount will go down to 0 eventually and r/w handler
- * cannot handle further I/O so it will bail out by
- * check zram_meta_get.
- */
- zram_meta_put(zram);
- /*
- * We want to free zram_meta in process context to avoid
- * deadlock between reclaim path and any other locks.
- */
- wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
-
- /* Reset stats */
- memset(&zram->stats, 0, sizeof(zram->stats));
- zram->disksize = 0;
- zram->max_comp_streams = 1;
-
- set_capacity(zram->disk, 0);
- part_stat_set_all(&zram->disk->part0, 0);
-
- up_write(&zram->init_lock);
- /* I/O operation under all of CPU are done so let's free */
- zram_meta_free(meta, disksize);
- zcomp_destroy(comp);
-}
-
-static ssize_t disksize_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- u64 disksize;
- struct zcomp *comp;
- struct zram_meta *meta;
- struct zram *zram = dev_to_zram(dev);
- int err;
-
- disksize = memparse(buf, NULL);
- if (!disksize)
- return -EINVAL;
-
- disksize = PAGE_ALIGN(disksize);
- meta = zram_meta_alloc(zram->disk->first_minor, disksize);
- if (!meta)
- return -ENOMEM;
-
- comp = zcomp_create(zram->compressor, zram->max_comp_streams);
- if (IS_ERR(comp)) {
- pr_info("Cannot initialise %s compressing backend\n",
- zram->compressor);
- err = PTR_ERR(comp);
- goto out_free_meta;
- }
-
- down_write(&zram->init_lock);
- if (init_done(zram)) {
- pr_info("Cannot change disksize for initialized device\n");
- err = -EBUSY;
- goto out_destroy_comp;
- }
-
- init_waitqueue_head(&zram->io_done);
- atomic_set(&zram->refcount, 1);
- zram->meta = meta;
- zram->comp = comp;
- zram->disksize = disksize;
- set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
- up_write(&zram->init_lock);
-
- /*
- * Revalidate disk out of the init_lock to avoid lockdep splat.
- * It's okay because disk's capacity is protected by init_lock
- * so that revalidate_disk always sees up-to-date capacity.
- */
- revalidate_disk(zram->disk);
-
- return len;
-
-out_destroy_comp:
- up_write(&zram->init_lock);
- zcomp_destroy(comp);
-out_free_meta:
- zram_meta_free(meta, disksize);
- return err;
-}
-
-static ssize_t reset_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
+ int offset, int rw)
{
+ unsigned long start_time = jiffies;
int ret;
- unsigned short do_reset;
- struct zram *zram;
- struct block_device *bdev;
- zram = dev_to_zram(dev);
- bdev = bdget_disk(zram->disk, 0);
-
- if (!bdev)
- return -ENOMEM;
+ generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
+ &zram->disk->part0);
- mutex_lock(&bdev->bd_mutex);
- /* Do not reset an active device! */
- if (bdev->bd_openers) {
- ret = -EBUSY;
- goto out;
+ if (rw == READ) {
+ atomic64_inc(&zram->stats.num_reads);
+ ret = zram_bvec_read(zram, bvec, index, offset);
+ } else {
+ atomic64_inc(&zram->stats.num_writes);
+ ret = zram_bvec_write(zram, bvec, index, offset);
}
- ret = kstrtou16(buf, 10, &do_reset);
- if (ret)
- goto out;
+ generic_end_io_acct(rw, &zram->disk->part0, start_time);
- if (!do_reset) {
- ret = -EINVAL;
- goto out;
+ if (unlikely(ret)) {
+ if (rw == READ)
+ atomic64_inc(&zram->stats.failed_reads);
+ else
+ atomic64_inc(&zram->stats.failed_writes);
}
- /* Make sure all pending I/O is finished */
- fsync_bdev(bdev);
- zram_reset_device(zram);
-
- mutex_unlock(&bdev->bd_mutex);
- revalidate_disk(zram->disk);
- bdput(bdev);
-
- return len;
-
-out:
- mutex_unlock(&bdev->bd_mutex);
- bdput(bdev);
return ret;
}
@@ -1055,80 +973,185 @@ out:
return err;
}
-static const struct block_device_operations zram_devops = {
- .swap_slot_free_notify = zram_slot_free_notify,
- .rw_page = zram_rw_page,
- .owner = THIS_MODULE
-};
+static void zram_reset_device(struct zram *zram)
+{
+ struct zram_meta *meta;
+ struct zcomp *comp;
+ u64 disksize;
-static DEVICE_ATTR_WO(compact);
-static DEVICE_ATTR_RW(disksize);
-static DEVICE_ATTR_RO(initstate);
-static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR_RO(orig_data_size);
-static DEVICE_ATTR_RO(mem_used_total);
-static DEVICE_ATTR_RW(mem_limit);
-static DEVICE_ATTR_RW(mem_used_max);
-static DEVICE_ATTR_RW(max_comp_streams);
-static DEVICE_ATTR_RW(comp_algorithm);
+ down_write(&zram->init_lock);
-static ssize_t io_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ zram->limit_pages = 0;
+
+ if (!init_done(zram)) {
+ up_write(&zram->init_lock);
+ return;
+ }
+
+ meta = zram->meta;
+ comp = zram->comp;
+ disksize = zram->disksize;
+ /*
+ * Refcount will go down to 0 eventually and r/w handler
+ * cannot handle further I/O so it will bail out by
+ * check zram_meta_get.
+ */
+ zram_meta_put(zram);
+ /*
+ * We want to free zram_meta in process context to avoid
+ * deadlock between reclaim path and any other locks.
+ */
+ wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
+
+ /* Reset stats */
+ memset(&zram->stats, 0, sizeof(zram->stats));
+ zram->disksize = 0;
+ zram->max_comp_streams = 1;
+
+ set_capacity(zram->disk, 0);
+ part_stat_set_all(&zram->disk->part0, 0);
+
+ up_write(&zram->init_lock);
+ /* I/O operation under all of CPU are done so let's free */
+ zram_meta_free(meta, disksize);
+ zcomp_destroy(comp);
+}
+
+static ssize_t disksize_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
+ u64 disksize;
+ struct zcomp *comp;
+ struct zram_meta *meta;
struct zram *zram = dev_to_zram(dev);
- ssize_t ret;
+ int err;
- down_read(&zram->init_lock);
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8llu\n",
- (u64)atomic64_read(&zram->stats.failed_reads),
- (u64)atomic64_read(&zram->stats.failed_writes),
- (u64)atomic64_read(&zram->stats.invalid_io),
- (u64)atomic64_read(&zram->stats.notify_free));
- up_read(&zram->init_lock);
+ disksize = memparse(buf, NULL);
+ if (!disksize)
+ return -EINVAL;
- return ret;
+ disksize = PAGE_ALIGN(disksize);
+ meta = zram_meta_alloc(zram->disk->first_minor, disksize);
+ if (!meta)
+ return -ENOMEM;
+
+ comp = zcomp_create(zram->compressor, zram->max_comp_streams);
+ if (IS_ERR(comp)) {
+ pr_info("Cannot initialise %s compressing backend\n",
+ zram->compressor);
+ err = PTR_ERR(comp);
+ goto out_free_meta;
+ }
+
+ down_write(&zram->init_lock);
+ if (init_done(zram)) {
+ pr_info("Cannot change disksize for initialized device\n");
+ err = -EBUSY;
+ goto out_destroy_comp;
+ }
+
+ init_waitqueue_head(&zram->io_done);
+ atomic_set(&zram->refcount, 1);
+ zram->meta = meta;
+ zram->comp = comp;
+ zram->disksize = disksize;
+ set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
+ up_write(&zram->init_lock);
+
+ /*
+ * Revalidate disk out of the init_lock to avoid lockdep splat.
+ * It's okay because disk's capacity is protected by init_lock
+ * so that revalidate_disk always sees up-to-date capacity.
+ */
+ revalidate_disk(zram->disk);
+
+ return len;
+
+out_destroy_comp:
+ up_write(&zram->init_lock);
+ zcomp_destroy(comp);
+out_free_meta:
+ zram_meta_free(meta, disksize);
+ return err;
}
-static ssize_t mm_stat_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t reset_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- struct zram *zram = dev_to_zram(dev);
- u64 orig_size, mem_used = 0;
- long max_used;
- ssize_t ret;
+ int ret;
+ unsigned short do_reset;
+ struct zram *zram;
+ struct block_device *bdev;
- down_read(&zram->init_lock);
- if (init_done(zram))
- mem_used = zs_get_total_pages(zram->meta->mem_pool);
+ ret = kstrtou16(buf, 10, &do_reset);
+ if (ret)
+ return ret;
- orig_size = atomic64_read(&zram->stats.pages_stored);
- max_used = atomic_long_read(&zram->stats.max_used_pages);
+ if (!do_reset)
+ return -EINVAL;
- ret = scnprintf(buf, PAGE_SIZE,
- "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
- orig_size << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.compr_data_size),
- mem_used << PAGE_SHIFT,
- zram->limit_pages << PAGE_SHIFT,
- max_used << PAGE_SHIFT,
- (u64)atomic64_read(&zram->stats.zero_pages),
- (u64)atomic64_read(&zram->stats.num_migrated));
- up_read(&zram->init_lock);
+ zram = dev_to_zram(dev);
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+ /* Do not reset an active device or claimed device */
+ if (bdev->bd_openers || zram->claim) {
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return -EBUSY;
+ }
+
+ /* From now on, anyone can't open /dev/zram[0-9] */
+ zram->claim = true;
+ mutex_unlock(&bdev->bd_mutex);
+
+ /* Make sure all the pending I/O are finished */
+ fsync_bdev(bdev);
+ zram_reset_device(zram);
+ revalidate_disk(zram->disk);
+ bdput(bdev);
+
+ mutex_lock(&bdev->bd_mutex);
+ zram->claim = false;
+ mutex_unlock(&bdev->bd_mutex);
+
+ return len;
+}
+
+static int zram_open(struct block_device *bdev, fmode_t mode)
+{
+ int ret = 0;
+ struct zram *zram;
+
+ WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+
+ zram = bdev->bd_disk->private_data;
+ /* zram was claimed to reset so open request fails */
+ if (zram->claim)
+ ret = -EBUSY;
return ret;
}
-static DEVICE_ATTR_RO(io_stat);
-static DEVICE_ATTR_RO(mm_stat);
-ZRAM_ATTR_RO(num_reads);
-ZRAM_ATTR_RO(num_writes);
-ZRAM_ATTR_RO(failed_reads);
-ZRAM_ATTR_RO(failed_writes);
-ZRAM_ATTR_RO(invalid_io);
-ZRAM_ATTR_RO(notify_free);
-ZRAM_ATTR_RO(zero_pages);
-ZRAM_ATTR_RO(compr_data_size);
+static const struct block_device_operations zram_devops = {
+ .open = zram_open,
+ .swap_slot_free_notify = zram_slot_free_notify,
+ .rw_page = zram_rw_page,
+ .owner = THIS_MODULE
+};
+
+static DEVICE_ATTR_WO(compact);
+static DEVICE_ATTR_RW(disksize);
+static DEVICE_ATTR_RO(initstate);
+static DEVICE_ATTR_WO(reset);
+static DEVICE_ATTR_RO(orig_data_size);
+static DEVICE_ATTR_RO(mem_used_total);
+static DEVICE_ATTR_RW(mem_limit);
+static DEVICE_ATTR_RW(mem_used_max);
+static DEVICE_ATTR_RW(max_comp_streams);
+static DEVICE_ATTR_RW(comp_algorithm);
static struct attribute *zram_disk_attrs[] = {
&dev_attr_disksize.attr,
@@ -1158,10 +1181,24 @@ static struct attribute_group zram_disk_attr_group = {
.attrs = zram_disk_attrs,
};
-static int create_device(struct zram *zram, int device_id)
+/*
+ * Allocate and initialize new zram device. the function returns
+ * '>= 0' device_id upon success, and negative value otherwise.
+ */
+static int zram_add(void)
{
+ struct zram *zram;
struct request_queue *queue;
- int ret = -ENOMEM;
+ int ret, device_id;
+
+ zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
+ if (!zram)
+ return -ENOMEM;
+
+ ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto out_free_dev;
+ device_id = ret;
init_rwsem(&zram->init_lock);
@@ -1169,12 +1206,13 @@ static int create_device(struct zram *zram, int device_id)
if (!queue) {
pr_err("Error allocating disk queue for device %d\n",
device_id);
- goto out;
+ ret = -ENOMEM;
+ goto out_free_idr;
}
blk_queue_make_request(queue, zram_make_request);
- /* gendisk structure */
+ /* gendisk structure */
zram->disk = alloc_disk(1);
if (!zram->disk) {
pr_warn("Error allocating disk structure for device %d\n",
@@ -1232,90 +1270,177 @@ static int create_device(struct zram *zram, int device_id)
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram->meta = NULL;
zram->max_comp_streams = 1;
- return 0;
+
+ pr_info("Added device: %s\n", zram->disk->disk_name);
+ return device_id;
out_free_disk:
del_gendisk(zram->disk);
put_disk(zram->disk);
out_free_queue:
blk_cleanup_queue(queue);
-out:
+out_free_idr:
+ idr_remove(&zram_index_idr, device_id);
+out_free_dev:
+ kfree(zram);
return ret;
}
-static void destroy_devices(unsigned int nr)
+static int zram_remove(struct zram *zram)
+{
+ struct block_device *bdev;
+
+ bdev = bdget_disk(zram->disk, 0);
+ if (!bdev)
+ return -ENOMEM;
+
+ mutex_lock(&bdev->bd_mutex);
+ if (bdev->bd_openers || zram->claim) {
+ mutex_unlock(&bdev->bd_mutex);
+ bdput(bdev);
+ return -EBUSY;
+ }
+
+ zram->claim = true;
+ mutex_unlock(&bdev->bd_mutex);
+
+ /*
+ * Remove sysfs first, so no one will perform a disksize
+ * store while we destroy the devices. This also helps during
+ * hot_remove -- zram_reset_device() is the last holder of
+ * ->init_lock, no later/concurrent disksize_store() or any
+ * other sysfs handlers are possible.
+ */
+ sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
+ &zram_disk_attr_group);
+
+ /* Make sure all the pending I/O are finished */
+ fsync_bdev(bdev);
+ zram_reset_device(zram);
+ bdput(bdev);
+
+ pr_info("Removed device: %s\n", zram->disk->disk_name);
+
+ idr_remove(&zram_index_idr, zram->disk->first_minor);
+ blk_cleanup_queue(zram->disk->queue);
+ del_gendisk(zram->disk);
+ put_disk(zram->disk);
+ kfree(zram);
+ return 0;
+}
+
+/* zram-control sysfs attributes */
+static ssize_t hot_add_show(struct class *class,
+ struct class_attribute *attr,
+ char *buf)
+{
+ int ret;
+
+ mutex_lock(&zram_index_mutex);
+ ret = zram_add();
+ mutex_unlock(&zram_index_mutex);
+
+ if (ret < 0)
+ return ret;
+ return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+}
+
+static ssize_t hot_remove_store(struct class *class,
+ struct class_attribute *attr,
+ const char *buf,
+ size_t count)
{
struct zram *zram;
- unsigned int i;
+ int ret, dev_id;
- for (i = 0; i < nr; i++) {
- zram = &zram_devices[i];
- /*
- * Remove sysfs first, so no one will perform a disksize
- * store while we destroy the devices
- */
- sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
- &zram_disk_attr_group);
+ /* dev_id is gendisk->first_minor, which is `int' */
+ ret = kstrtoint(buf, 10, &dev_id);
+ if (ret)
+ return ret;
+ if (dev_id < 0)
+ return -EINVAL;
- zram_reset_device(zram);
+ mutex_lock(&zram_index_mutex);
- blk_cleanup_queue(zram->disk->queue);
- del_gendisk(zram->disk);
- put_disk(zram->disk);
- }
+ zram = idr_find(&zram_index_idr, dev_id);
+ if (zram)
+ ret = zram_remove(zram);
+ else
+ ret = -ENODEV;
- kfree(zram_devices);
+ mutex_unlock(&zram_index_mutex);
+ return ret ? ret : count;
+}
+
+static struct class_attribute zram_control_class_attrs[] = {
+ __ATTR_RO(hot_add),
+ __ATTR_WO(hot_remove),
+ __ATTR_NULL,
+};
+
+static struct class zram_control_class = {
+ .name = "zram-control",
+ .owner = THIS_MODULE,
+ .class_attrs = zram_control_class_attrs,
+};
+
+static int zram_remove_cb(int id, void *ptr, void *data)
+{
+ zram_remove(ptr);
+ return 0;
+}
+
+static void destroy_devices(void)
+{
+ class_unregister(&zram_control_class);
+ idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
+ idr_destroy(&zram_index_idr);
unregister_blkdev(zram_major, "zram");
- pr_info("Destroyed %u device(s)\n", nr);
}
static int __init zram_init(void)
{
- int ret, dev_id;
+ int ret;
- if (num_devices > max_num_devices) {
- pr_warn("Invalid value for num_devices: %u\n",
- num_devices);
- return -EINVAL;
+ ret = class_register(&zram_control_class);
+ if (ret) {
+ pr_warn("Unable to register zram-control class\n");
+ return ret;
}
zram_major = register_blkdev(0, "zram");
if (zram_major <= 0) {
pr_warn("Unable to get major number\n");
+ class_unregister(&zram_control_class);
return -EBUSY;
}
- /* Allocate the device array and initialize each one */
- zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
- if (!zram_devices) {
- unregister_blkdev(zram_major, "zram");
- return -ENOMEM;
- }
-
- for (dev_id = 0; dev_id < num_devices; dev_id++) {
- ret = create_device(&zram_devices[dev_id], dev_id);
- if (ret)
+ while (num_devices != 0) {
+ mutex_lock(&zram_index_mutex);
+ ret = zram_add();
+ mutex_unlock(&zram_index_mutex);
+ if (ret < 0)
goto out_error;
+ num_devices--;
}
- pr_info("Created %u device(s)\n", num_devices);
return 0;
out_error:
- destroy_devices(dev_id);
+ destroy_devices();
return ret;
}
static void __exit zram_exit(void)
{
- destroy_devices(num_devices);
+ destroy_devices();
}
module_init(zram_init);
module_exit(zram_exit);
module_param(num_devices, uint, 0);
-MODULE_PARM_DESC(num_devices, "Number of zram devices");
+MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 570c598f4ce9..6dbe2df506bf 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -20,12 +20,6 @@
#include "zcomp.h"
-/*
- * Some arbitrary value. This is just to catch
- * invalid value for num_devices module parameter.
- */
-static const unsigned max_num_devices = 32;
-
/*-- Configurable parameters */
/*
@@ -121,5 +115,9 @@ struct zram {
*/
u64 disksize; /* bytes */
char compressor[10];
+ /*
+ * zram is claimed so open request will be failed
+ */
+ bool claim; /* Protected by bdev->bd_mutex */
};
#endif