Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block

Pull block IO core updates from Jens Axboe: "This is the pull request for the core changes in the block layer for 3.13. It contains: - The new blk-mq request interface. This is a new and more scalable queueing model that marries the best part of the request based interface we currently have (which is fully featured, but scales poorly) and the bio based "interface" which the new drivers for high IOPS devices end up using because it's much faster than the request based one. The bio interface has no block layer support, since it taps into the stack much earlier. This means that drivers end up having to implement a lot of functionality on their own, like tagging, timeout handling, requeue, etc. The blk-mq interface provides all these. Some drivers even provide a switch to select bio or rq and has code to handle both, since things like merging only works in the rq model and hence is faster for some workloads. This is a huge mess. Conversion of these drivers nets us a substantial code reduction. Initial results on converting SCSI to this model even shows an 8x improvement on single queue devices. So while the model was intended to work on the newer multiqueue devices, it has substantial improvements for "classic" hardware as well. This code has gone through extensive testing and development, it's now ready to go. A pull request is coming to convert virtio-blk to this model will be will be coming as well, with more drivers scheduled for 3.14 conversion. - Two blktrace fixes from Jan and Chen Gang. - A plug merge fix from Alireza Haghdoost. - Conversion of __get_cpu_var() from Christoph Lameter. - Fix for sector_div() with 64-bit divider from Geert Uytterhoeven. - A fix for a race between request completion and the timeout handling from Jeff Moyer. This is what caused the merge conflict with blk-mq/core, in case you are looking at that. - A dm stacking fix from Mike Snitzer. - A code consolidation fix and duplicated code removal from Kent Overstreet. - A handful of block bug fixes from Mikulas Patocka, fixing a loop crash and memory corruption on blk cg. - Elevator switch bug fix from Tomoki Sekiyama. A heads-up that I had to rebase this branch. Initially the immutable bio_vecs had been queued up for inclusion, but a week later, it became clear that it wasn't fully cooked yet. So the decision was made to pull this out and postpone it until 3.14. It was a straight forward rebase, just pruning out the immutable series and the later fixes of problems with it. The rest of the patches applied directly and no further changes were made" * 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits) block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO block: Do not call sector_div() with a 64-bit divisor kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup() block: Consolidate duplicated bio_trim() implementations block: Use rw_copy_check_uvector() block: Enable sysfs nomerge control for I/O requests in the plug list block: properly stack underlying max_segment_size to DM device elevator: acquire q->sysfs_lock in elevator_change() elevator: Fix a race in elevator switching and md device initialization block: Replace __get_cpu_var uses bdi: test bdi_init failure block: fix a probe argument to blk_register_region loop: fix crash if blk_alloc_queue fails blk-core: Fix memory corruption if blkcg_init_queue fails block: fix race between request completion and timeout handling blktrace: Send BLK_TN_PROCESS events to all running traces blk-mq: don't disallow request merges for req->special being set blk-mq: mq plug list breakage blk-mq: fix for flush deadlock ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-11-14 12:08:14 +0900
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-11-14 12:08:14 +0900
commit: 0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch)
tree: 177c4cb22ece78b18f64f548ae82b9a15edbb99c /drivers/block
parent: 2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff)
parent: e37459b8e2c7db6735e39e019e448b76e5e77647 (diff)
7 files changed, 647 insertions, 57 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index e67fa16e1938..5902bd006a9c 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -15,6 +15,9 @@ menuconfig BLK_DEV
 
 if BLK_DEV
 
+config BLK_DEV_NULL_BLK
+	tristate "Null test block driver"
+
 config BLK_DEV_FD
 	tristate "Normal floppy disk support"
 	depends on ARCH_MAY_HAVE_PC_FDC
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index ca07399a8d99..03b3b4a2bd8a 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD)     += rbd.o
 obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX)	+= mtip32xx/
 
 obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
+obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk.o
 
 nvme-y		:= nvme-core.o nvme-scsi.o
 swim_mod-y	:= swim.o swim_asm.o
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 9bf4371755f2..d91f1a56e861 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -545,7 +545,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data)
 
 	mutex_lock(&brd_devices_mutex);
 	brd = brd_init_one(MINOR(dev) >> part_shift);
-	kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM);
+	kobj = brd ? get_disk(brd->brd_disk) : NULL;
 	mutex_unlock(&brd_devices_mutex);
 
 	*part = 0;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 04ceb7e2fadd..000abe2f105c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q)
 		return;
 
 	if (WARN(atomic_read(&usage_count) == 0,
-		 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n",
+		 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n",
 		 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
-		 current_req->cmd_flags))
+		 (unsigned long long) current_req->cmd_flags))
 		return;
 
 	if (test_and_set_bit(0, &fdc_busy)) {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 40e715531aa6..dbdb88a4976c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1633,7 +1633,7 @@ static int loop_add(struct loop_device **l, int i)
 	err = -ENOMEM;
 	lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
 	if (!lo->lo_queue)
-		goto out_free_dev;
+		goto out_free_idr;
 
 	disk = lo->lo_disk = alloc_disk(1 << part_shift);
 	if (!disk)
@@ -1678,6 +1678,8 @@ static int loop_add(struct loop_device **l, int i)
 
 out_free_queue:
 	blk_cleanup_queue(lo->lo_queue);
+out_free_idr:
+	idr_remove(&loop_index_idr, i);
 out_free_dev:
 	kfree(lo);
 out:
@@ -1741,7 +1743,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
 	if (err < 0)
 		err = loop_add(&lo, MINOR(dev) >> part_shift);
 	if (err < 0)
-		kobj = ERR_PTR(err);
+		kobj = NULL;
 	else
 		kobj = get_disk(lo->lo_disk);
 	mutex_unlock(&loop_index_mutex);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
new file mode 100644
index 000000000000..b5d842370cc9
--- /dev/null
+++ b/drivers/block/null_blk.c
@@ -0,0 +1,635 @@
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/blk-mq.h>
+#include <linux/hrtimer.h>
+
+struct nullb_cmd {
+	struct list_head list;
+	struct llist_node ll_list;
+	struct call_single_data csd;
+	struct request *rq;
+	struct bio *bio;
+	unsigned int tag;
+	struct nullb_queue *nq;
+};
+
+struct nullb_queue {
+	unsigned long *tag_map;
+	wait_queue_head_t wait;
+	unsigned int queue_depth;
+
+	struct nullb_cmd *cmds;
+};
+
+struct nullb {
+	struct list_head list;
+	unsigned int index;
+	struct request_queue *q;
+	struct gendisk *disk;
+	struct hrtimer timer;
+	unsigned int queue_depth;
+	spinlock_t lock;
+
+	struct nullb_queue *queues;
+	unsigned int nr_queues;
+};
+
+static LIST_HEAD(nullb_list);
+static struct mutex lock;
+static int null_major;
+static int nullb_indexes;
+
+struct completion_queue {
+	struct llist_head list;
+	struct hrtimer timer;
+};
+
+/*
+ * These are per-cpu for now, they will need to be configured by the
+ * complete_queues parameter and appropriately mapped.
+ */
+static DEFINE_PER_CPU(struct completion_queue, completion_queues);
+
+enum {
+	NULL_IRQ_NONE		= 0,
+	NULL_IRQ_SOFTIRQ	= 1,
+	NULL_IRQ_TIMER		= 2,
+
+	NULL_Q_BIO		= 0,
+	NULL_Q_RQ		= 1,
+	NULL_Q_MQ		= 2,
+};
+
+static int submit_queues = 1;
+module_param(submit_queues, int, S_IRUGO);
+MODULE_PARM_DESC(submit_queues, "Number of submission queues");
+
+static int home_node = NUMA_NO_NODE;
+module_param(home_node, int, S_IRUGO);
+MODULE_PARM_DESC(home_node, "Home node for the device");
+
+static int queue_mode = NULL_Q_MQ;
+module_param(queue_mode, int, S_IRUGO);
+MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)");
+
+static int gb = 250;
+module_param(gb, int, S_IRUGO);
+MODULE_PARM_DESC(gb, "Size in GB");
+
+static int bs = 512;
+module_param(bs, int, S_IRUGO);
+MODULE_PARM_DESC(bs, "Block size (in bytes)");
+
+static int nr_devices = 2;
+module_param(nr_devices, int, S_IRUGO);
+MODULE_PARM_DESC(nr_devices, "Number of devices to register");
+
+static int irqmode = NULL_IRQ_SOFTIRQ;
+module_param(irqmode, int, S_IRUGO);
+MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
+
+static int completion_nsec = 10000;
+module_param(completion_nsec, int, S_IRUGO);
+MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
+
+static int hw_queue_depth = 64;
+module_param(hw_queue_depth, int, S_IRUGO);
+MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
+
+static bool use_per_node_hctx = true;
+module_param(use_per_node_hctx, bool, S_IRUGO);
+MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true");
+
+static void put_tag(struct nullb_queue *nq, unsigned int tag)
+{
+	clear_bit_unlock(tag, nq->tag_map);
+
+	if (waitqueue_active(&nq->wait))
+		wake_up(&nq->wait);
+}
+
+static unsigned int get_tag(struct nullb_queue *nq)
+{
+	unsigned int tag;
+
+	do {
+		tag = find_first_zero_bit(nq->tag_map, nq->queue_depth);
+		if (tag >= nq->queue_depth)
+			return -1U;
+	} while (test_and_set_bit_lock(tag, nq->tag_map));
+
+	return tag;
+}
+
+static void free_cmd(struct nullb_cmd *cmd)
+{
+	put_tag(cmd->nq, cmd->tag);
+}
+
+static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
+{
+	struct nullb_cmd *cmd;
+	unsigned int tag;
+
+	tag = get_tag(nq);
+	if (tag != -1U) {
+		cmd = &nq->cmds[tag];
+		cmd->tag = tag;
+		cmd->nq = nq;
+		return cmd;
+	}
+
+	return NULL;
+}
+
+static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
+{
+	struct nullb_cmd *cmd;
+	DEFINE_WAIT(wait);
+
+	cmd = __alloc_cmd(nq);
+	if (cmd || !can_wait)
+		return cmd;
+
+	do {
+		prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE);
+		cmd = __alloc_cmd(nq);
+		if (cmd)
+			break;
+
+		io_schedule();
+	} while (1);
+
+	finish_wait(&nq->wait, &wait);
+	return cmd;
+}
+
+static void end_cmd(struct nullb_cmd *cmd)
+{
+	if (cmd->rq) {
+		if (queue_mode == NULL_Q_MQ)
+			blk_mq_end_io(cmd->rq, 0);
+		else {
+			INIT_LIST_HEAD(&cmd->rq->queuelist);
+			blk_end_request_all(cmd->rq, 0);
+		}
+	} else if (cmd->bio)
+		bio_endio(cmd->bio, 0);
+
+	if (queue_mode != NULL_Q_MQ)
+		free_cmd(cmd);
+}
+
+static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
+{
+	struct completion_queue *cq;
+	struct llist_node *entry;
+	struct nullb_cmd *cmd;
+
+	cq = &per_cpu(completion_queues, smp_processor_id());
+
+	while ((entry = llist_del_all(&cq->list)) != NULL) {
+		do {
+			cmd = container_of(entry, struct nullb_cmd, ll_list);
+			end_cmd(cmd);
+			entry = entry->next;
+		} while (entry);
+	}
+
+	return HRTIMER_NORESTART;
+}
+
+static void null_cmd_end_timer(struct nullb_cmd *cmd)
+{
+	struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());
+
+	cmd->ll_list.next = NULL;
+	if (llist_add(&cmd->ll_list, &cq->list)) {
+		ktime_t kt = ktime_set(0, completion_nsec);
+
+		hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL);
+	}
+
+	put_cpu();
+}
+
+static void null_softirq_done_fn(struct request *rq)
+{
+	blk_end_request_all(rq, 0);
+}
+
+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+
+static void null_ipi_cmd_end_io(void *data)
+{
+	struct completion_queue *cq;
+	struct llist_node *entry, *next;
+	struct nullb_cmd *cmd;
+
+	cq = &per_cpu(completion_queues, smp_processor_id());
+
+	entry = llist_del_all(&cq->list);
+
+	while (entry) {
+		next = entry->next;
+		cmd = llist_entry(entry, struct nullb_cmd, ll_list);
+		end_cmd(cmd);
+		entry = next;
+	}
+}
+
+static void null_cmd_end_ipi(struct nullb_cmd *cmd)
+{
+	struct call_single_data *data = &cmd->csd;
+	int cpu = get_cpu();
+	struct completion_queue *cq = &per_cpu(completion_queues, cpu);
+
+	cmd->ll_list.next = NULL;
+
+	if (llist_add(&cmd->ll_list, &cq->list)) {
+		data->func = null_ipi_cmd_end_io;
+		data->flags = 0;
+		__smp_call_function_single(cpu, data, 0);
+	}
+
+	put_cpu();
+}
+
+#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */
+
+static inline void null_handle_cmd(struct nullb_cmd *cmd)
+{
+	/* Complete IO by inline, softirq or timer */
+	switch (irqmode) {
+	case NULL_IRQ_NONE:
+		end_cmd(cmd);
+		break;
+	case NULL_IRQ_SOFTIRQ:
+#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+		null_cmd_end_ipi(cmd);
+#else
+		end_cmd(cmd);
+#endif
+		break;
+	case NULL_IRQ_TIMER:
+		null_cmd_end_timer(cmd);
+		break;
+	}
+}
+
+static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
+{
+	int index = 0;
+
+	if (nullb->nr_queues != 1)
+		index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues);
+
+	return &nullb->queues[index];
+}
+
+static void null_queue_bio(struct request_queue *q, struct bio *bio)
+{
+	struct nullb *nullb = q->queuedata;
+	struct nullb_queue *nq = nullb_to_queue(nullb);
+	struct nullb_cmd *cmd;
+
+	cmd = alloc_cmd(nq, 1);
+	cmd->bio = bio;
+
+	null_handle_cmd(cmd);
+}
+
+static int null_rq_prep_fn(struct request_queue *q, struct request *req)
+{
+	struct nullb *nullb = q->queuedata;
+	struct nullb_queue *nq = nullb_to_queue(nullb);
+	struct nullb_cmd *cmd;
+
+	cmd = alloc_cmd(nq, 0);
+	if (cmd) {
+		cmd->rq = req;
+		req->special = cmd;
+		return BLKPREP_OK;
+	}
+
+	return BLKPREP_DEFER;
+}
+
+static void null_request_fn(struct request_queue *q)
+{
+	struct request *rq;
+
+	while ((rq = blk_fetch_request(q)) != NULL) {
+		struct nullb_cmd *cmd = rq->special;
+
+		spin_unlock_irq(q->queue_lock);
+		null_handle_cmd(cmd);
+		spin_lock_irq(q->queue_lock);
+	}
+}
+
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+{
+	struct nullb_cmd *cmd = rq->special;
+
+	cmd->rq = rq;
+	cmd->nq = hctx->driver_data;
+
+	null_handle_cmd(cmd);
+	return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
+{
+	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
+				hctx_index);
+}
+
+static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
+{
+	kfree(hctx);
+}
+
+static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+			  unsigned int index)
+{
+	struct nullb *nullb = data;
+	struct nullb_queue *nq = &nullb->queues[index];
+
+	init_waitqueue_head(&nq->wait);
+	nq->queue_depth = nullb->queue_depth;
+	nullb->nr_queues++;
+	hctx->driver_data = nq;
+
+	return 0;
+}
+
+static struct blk_mq_ops null_mq_ops = {
+	.queue_rq       = null_queue_rq,
+	.map_queue      = blk_mq_map_queue,
+	.init_hctx	= null_init_hctx,
+};
+
+static struct blk_mq_reg null_mq_reg = {
+	.ops		= &null_mq_ops,
+	.queue_depth	= 64,
+	.cmd_size	= sizeof(struct nullb_cmd),
+	.flags		= BLK_MQ_F_SHOULD_MERGE,
+};
+
+static void null_del_dev(struct nullb *nullb)
+{
+	list_del_init(&nullb->list);
+
+	del_gendisk(nullb->disk);
+	if (queue_mode == NULL_Q_MQ)
+		blk_mq_free_queue(nullb->q);
+	else
+		blk_cleanup_queue(nullb->q);
+	put_disk(nullb->disk);
+	kfree(nullb);
+}
+
+static int null_open(struct block_device *bdev, fmode_t mode)
+{
+	return 0;
+}
+
+static void null_release(struct gendisk *disk, fmode_t mode)
+{
+}
+
+static const struct block_device_operations null_fops = {
+	.owner =	THIS_MODULE,
+	.open =		null_open,
+	.release =	null_release,
+};
+
+static int setup_commands(struct nullb_queue *nq)
+{
+	struct nullb_cmd *cmd;
+	int i, tag_size;
+
+	nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL);
+	if (!nq->cmds)
+		return 1;
+
+	tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
+	nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL);
+	if (!nq->tag_map) {
+		kfree(nq->cmds);
+		return 1;
+	}
+
+	for (i = 0; i < nq->queue_depth; i++) {
+		cmd = &nq->cmds[i];
+		INIT_LIST_HEAD(&cmd->list);
+		cmd->ll_list.next = NULL;
+		cmd->tag = -1U;
+	}
+
+	return 0;
+}
+
+static void cleanup_queue(struct nullb_queue *nq)
+{
+	kfree(nq->tag_map);
+	kfree(nq->cmds);
+}
+
+static void cleanup_queues(struct nullb *nullb)
+{
+	int i;
+
+	for (i = 0; i < nullb->nr_queues; i++)
+		cleanup_queue(&nullb->queues[i]);
+
+	kfree(nullb->queues);
+}
+
+static int setup_queues(struct nullb *nullb)
+{
+	struct nullb_queue *nq;
+	int i;
+
+	nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL);
+	if (!nullb->queues)
+		return 1;
+
+	nullb->nr_queues = 0;
+	nullb->queue_depth = hw_queue_depth;
+
+	if (queue_mode == NULL_Q_MQ)
+		return 0;
+
+	for (i = 0; i < submit_queues; i++) {
+		nq = &nullb->queues[i];
+		init_waitqueue_head(&nq->wait);
+		nq->queue_depth = hw_queue_depth;
+		if (setup_commands(nq))
+			break;
+		nullb->nr_queues++;
+	}
+
+	if (i == submit_queues)
+		return 0;
+
+	cleanup_queues(nullb);
+	return 1;
+}
+
+static int null_add_dev(void)
+{
+	struct gendisk *disk;
+	struct nullb *nullb;
+	sector_t size;
+
+	nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
+	if (!nullb)
+		return -ENOMEM;
+
+	spin_lock_init(&nullb->lock);
+
+	if (setup_queues(nullb))
+		goto err;
+
+	if (queue_mode == NULL_Q_MQ) {
+		null_mq_reg.numa_node = home_node;
+		null_mq_reg.queue_depth = hw_queue_depth;
+
+		if (use_per_node_hctx) {
+			null_mq_reg.ops->alloc_hctx = null_alloc_hctx;
+			null_mq_reg.ops->free_hctx = null_free_hctx;
+
+			null_mq_reg.nr_hw_queues = nr_online_nodes;
+		} else {
+			null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue;
+			null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue;
+
+			null_mq_reg.nr_hw_queues = submit_queues;
+		}
+
+		nullb->q = blk_mq_init_queue(&null_mq_reg, nullb);
+	} else if (queue_mode == NULL_Q_BIO) {
+		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
+		blk_queue_make_request(nullb->q, null_queue_bio);
+	} else {
+		nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
+		blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
+		if (nullb->q)
+			blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
+	}
+
+	if (!nullb->q)
+		goto queue_fail;
+
+	nullb->q->queuedata = nullb;
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
+
+	disk = nullb->disk = alloc_disk_node(1, home_node);
+	if (!disk) {
+queue_fail:
+		if (queue_mode == NULL_Q_MQ)
+			blk_mq_free_queue(nullb->q);
+		else
+			blk_cleanup_queue(nullb->q);
+		cleanup_queues(nullb);
+err:
+		kfree(nullb);
+		return -ENOMEM;
+	}
+
+	mutex_lock(&lock);
+	list_add_tail(&nullb->list, &nullb_list);
+	nullb->index = nullb_indexes++;
+	mutex_unlock(&lock);
+
+	blk_queue_logical_block_size(nullb->q, bs);
+	blk_queue_physical_block_size(nullb->q, bs);
+
+	size = gb * 1024 * 1024 * 1024ULL;
+	sector_div(size, bs);
+	set_capacity(disk, size);
+
+	disk->flags |= GENHD_FL_EXT_DEVT;
+	disk->major		= null_major;
+	disk->first_minor	= nullb->index;
+	disk->fops		= &null_fops;
+	disk->private_data	= nullb;
+	disk->queue		= nullb->q;
+	sprintf(disk->disk_name, "nullb%d", nullb->index);
+	add_disk(disk);
+	return 0;
+}
+
+static int __init null_init(void)
+{
+	unsigned int i;
+
+#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS)
+	if (irqmode == NULL_IRQ_SOFTIRQ) {
+		pr_warn("null_blk: softirq completions not available.\n");
+		pr_warn("null_blk: using direct completions.\n");
+		irqmode = NULL_IRQ_NONE;
+	}
+#endif
+
+	if (submit_queues > nr_cpu_ids)
+		submit_queues = nr_cpu_ids;
+	else if (!submit_queues)
+		submit_queues = 1;
+
+	mutex_init(&lock);
+
+	/* Initialize a separate list for each CPU for issuing softirqs */
+	for_each_possible_cpu(i) {
+		struct completion_queue *cq = &per_cpu(completion_queues, i);
+
+		init_llist_head(&cq->list);
+
+		if (irqmode != NULL_IRQ_TIMER)
+			continue;
+
+		hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		cq->timer.function = null_cmd_timer_expired;
+	}
+
+	null_major = register_blkdev(0, "nullb");
+	if (null_major < 0)
+		return null_major;
+
+	for (i = 0; i < nr_devices; i++) {
+		if (null_add_dev()) {
+			unregister_blkdev(null_major, "nullb");
+			return -EINVAL;
+		}
+	}
+
+	pr_info("null: module loaded\n");
+	return 0;
+}
+
+static void __exit null_exit(void)
+{
+	struct nullb *nullb;
+
+	unregister_blkdev(null_major, "nullb");
+
+	mutex_lock(&lock);
+	while (!list_empty(&nullb_list)) {
+		nullb = list_entry(nullb_list.next, struct nullb, list);
+		null_del_dev(nullb);
+	}
+	mutex_unlock(&lock);
+}
+
+module_init(null_init);
+module_exit(null_exit);
+
+MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a4660bbee8a6..8d53ed293606 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1336,57 +1336,6 @@ static int blkfront_probe(struct xenbus_device *dev,
 	return 0;
 }
 
-/*
- * This is a clone of md_trim_bio, used to split a bio into smaller ones
- */
-static void trim_bio(struct bio *bio, int offset, int size)
-{
-	/* 'bio' is a cloned bio which we need to trim to match
-	 * the given offset and size.
-	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
-	 */
-	int i;
-	struct bio_vec *bvec;
-	int sofar = 0;
-
-	size <<= 9;
-	if (offset == 0 && size == bio->bi_size)
-		return;
-
-	bio->bi_sector += offset;
-	bio->bi_size = size;
-	offset <<= 9;
-	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
-
-	while (bio->bi_idx < bio->bi_vcnt &&
-	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
-		/* remove this whole bio_vec */
-		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
-		bio->bi_idx++;
-	}
-	if (bio->bi_idx < bio->bi_vcnt) {
-		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
-		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
-	}
-	/* avoid any complications with bi_idx being non-zero*/
-	if (bio->bi_idx) {
-		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
-			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
-		bio->bi_vcnt -= bio->bi_idx;
-		bio->bi_idx = 0;
-	}
-	/* Make sure vcnt and last bv are not too big */
-	bio_for_each_segment(bvec, bio, i) {
-		if (sofar + bvec->bv_len > size)
-			bvec->bv_len = size - sofar;
-		if (bvec->bv_len == 0) {
-			bio->bi_vcnt = i;
-			break;
-		}
-		sofar += bvec->bv_len;
-	}
-}
-
 static void split_bio_end(struct bio *bio, int error)
 {
 	struct split_bio *split_bio = bio->bi_private;
@@ -1522,7 +1471,7 @@ static int blkif_recover(struct blkfront_info *info)
 					   (unsigned int)(bio->bi_size >> 9) - offset);
 				cloned_bio = bio_clone(bio, GFP_NOIO);
 				BUG_ON(cloned_bio == NULL);
-				trim_bio(cloned_bio, offset, size);
+				bio_trim(cloned_bio, offset, size);
 				cloned_bio->bi_private = split_bio;
 				cloned_bio->bi_end_io = split_bio_end;
 				submit_bio(cloned_bio->bi_rw, cloned_bio);
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-11-14 12:08:14 +0900
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-11-14 12:08:14 +0900
commit	0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch)
tree	177c4cb22ece78b18f64f548ae82b9a15edbb99c /drivers/block
parent	2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff)
parent	e37459b8e2c7db6735e39e019e448b76e5e77647 (diff)