diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 12:08:14 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 12:08:14 +0900 |
commit | 0910c0bdf7c291a41bc21e40a97389c9d4c1960d (patch) | |
tree | 177c4cb22ece78b18f64f548ae82b9a15edbb99c /drivers/block | |
parent | 2821fe6b00a1e902fd399bb4b7e40bc3041f4d44 (diff) | |
parent | e37459b8e2c7db6735e39e019e448b76e5e77647 (diff) |
Merge branch 'for-3.13/core' of git://git.kernel.dk/linux-block
Pull block IO core updates from Jens Axboe:
"This is the pull request for the core changes in the block layer for
3.13. It contains:
- The new blk-mq request interface.
This is a new and more scalable queueing model that marries the
best part of the request based interface we currently have (which
is fully featured, but scales poorly) and the bio based "interface"
which the new drivers for high IOPS devices end up using because
it's much faster than the request based one.
The bio interface has no block layer support, since it taps into
the stack much earlier. This means that drivers end up having to
implement a lot of functionality on their own, like tagging,
timeout handling, requeue, etc. The blk-mq interface provides all
these. Some drivers even provide a switch to select bio or rq and
has code to handle both, since things like merging only works in
the rq model and hence is faster for some workloads. This is a
huge mess. Conversion of these drivers nets us a substantial code
reduction. Initial results on converting SCSI to this model even
shows an 8x improvement on single queue devices. So while the
model was intended to work on the newer multiqueue devices, it has
substantial improvements for "classic" hardware as well. This code
has gone through extensive testing and development, it's now ready
to go. A pull request is coming to convert virtio-blk to this
model will be will be coming as well, with more drivers scheduled
for 3.14 conversion.
- Two blktrace fixes from Jan and Chen Gang.
- A plug merge fix from Alireza Haghdoost.
- Conversion of __get_cpu_var() from Christoph Lameter.
- Fix for sector_div() with 64-bit divider from Geert Uytterhoeven.
- A fix for a race between request completion and the timeout
handling from Jeff Moyer. This is what caused the merge conflict
with blk-mq/core, in case you are looking at that.
- A dm stacking fix from Mike Snitzer.
- A code consolidation fix and duplicated code removal from Kent
Overstreet.
- A handful of block bug fixes from Mikulas Patocka, fixing a loop
crash and memory corruption on blk cg.
- Elevator switch bug fix from Tomoki Sekiyama.
A heads-up that I had to rebase this branch. Initially the immutable
bio_vecs had been queued up for inclusion, but a week later, it became
clear that it wasn't fully cooked yet. So the decision was made to
pull this out and postpone it until 3.14. It was a straight forward
rebase, just pruning out the immutable series and the later fixes of
problems with it. The rest of the patches applied directly and no
further changes were made"
* 'for-3.13/core' of git://git.kernel.dk/linux-block: (31 commits)
block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
block: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
block: Do not call sector_div() with a 64-bit divisor
kernel: trace: blktrace: remove redundent memcpy() in compat_blk_trace_setup()
block: Consolidate duplicated bio_trim() implementations
block: Use rw_copy_check_uvector()
block: Enable sysfs nomerge control for I/O requests in the plug list
block: properly stack underlying max_segment_size to DM device
elevator: acquire q->sysfs_lock in elevator_change()
elevator: Fix a race in elevator switching and md device initialization
block: Replace __get_cpu_var uses
bdi: test bdi_init failure
block: fix a probe argument to blk_register_region
loop: fix crash if blk_alloc_queue fails
blk-core: Fix memory corruption if blkcg_init_queue fails
block: fix race between request completion and timeout handling
blktrace: Send BLK_TN_PROCESS events to all running traces
blk-mq: don't disallow request merges for req->special being set
blk-mq: mq plug list breakage
blk-mq: fix for flush deadlock
...
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 3 | ||||
-rw-r--r-- | drivers/block/Makefile | 1 | ||||
-rw-r--r-- | drivers/block/brd.c | 2 | ||||
-rw-r--r-- | drivers/block/floppy.c | 4 | ||||
-rw-r--r-- | drivers/block/loop.c | 6 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 635 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 53 |
7 files changed, 647 insertions, 57 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index e67fa16e1938..5902bd006a9c 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -15,6 +15,9 @@ menuconfig BLK_DEV if BLK_DEV +config BLK_DEV_NULL_BLK + tristate "Null test block driver" + config BLK_DEV_FD tristate "Normal floppy disk support" depends on ARCH_MAY_HAVE_PC_FDC diff --git a/drivers/block/Makefile b/drivers/block/Makefile index ca07399a8d99..03b3b4a2bd8a 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o nvme-y := nvme-core.o nvme-scsi.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 9bf4371755f2..d91f1a56e861 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -545,7 +545,7 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) mutex_lock(&brd_devices_mutex); brd = brd_init_one(MINOR(dev) >> part_shift); - kobj = brd ? get_disk(brd->brd_disk) : ERR_PTR(-ENOMEM); + kobj = brd ? get_disk(brd->brd_disk) : NULL; mutex_unlock(&brd_devices_mutex); *part = 0; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 04ceb7e2fadd..000abe2f105c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q) return; if (WARN(atomic_read(&usage_count) == 0, - "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n", + "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n", current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, - current_req->cmd_flags)) + (unsigned long long) current_req->cmd_flags)) return; if (test_and_set_bit(0, &fdc_busy)) { diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 40e715531aa6..dbdb88a4976c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1633,7 +1633,7 @@ static int loop_add(struct loop_device **l, int i) err = -ENOMEM; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) - goto out_free_dev; + goto out_free_idr; disk = lo->lo_disk = alloc_disk(1 << part_shift); if (!disk) @@ -1678,6 +1678,8 @@ static int loop_add(struct loop_device **l, int i) out_free_queue: blk_cleanup_queue(lo->lo_queue); +out_free_idr: + idr_remove(&loop_index_idr, i); out_free_dev: kfree(lo); out: @@ -1741,7 +1743,7 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) if (err < 0) err = loop_add(&lo, MINOR(dev) >> part_shift); if (err < 0) - kobj = ERR_PTR(err); + kobj = NULL; else kobj = get_disk(lo->lo_disk); mutex_unlock(&loop_index_mutex); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c new file mode 100644 index 000000000000..b5d842370cc9 --- /dev/null +++ b/drivers/block/null_blk.c @@ -0,0 +1,635 @@ +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/blk-mq.h> +#include <linux/hrtimer.h> + +struct nullb_cmd { + struct list_head list; + struct llist_node ll_list; + struct call_single_data csd; + struct request *rq; + struct bio *bio; + unsigned int tag; + struct nullb_queue *nq; +}; + +struct nullb_queue { + unsigned long *tag_map; + wait_queue_head_t wait; + unsigned int queue_depth; + + struct nullb_cmd *cmds; +}; + +struct nullb { + struct list_head list; + unsigned int index; + struct request_queue *q; + struct gendisk *disk; + struct hrtimer timer; + unsigned int queue_depth; + spinlock_t lock; + + struct nullb_queue *queues; + unsigned int nr_queues; +}; + +static LIST_HEAD(nullb_list); +static struct mutex lock; +static int null_major; +static int nullb_indexes; + +struct completion_queue { + struct llist_head list; + struct hrtimer timer; +}; + +/* + * These are per-cpu for now, they will need to be configured by the + * complete_queues parameter and appropriately mapped. + */ +static DEFINE_PER_CPU(struct completion_queue, completion_queues); + +enum { + NULL_IRQ_NONE = 0, + NULL_IRQ_SOFTIRQ = 1, + NULL_IRQ_TIMER = 2, + + NULL_Q_BIO = 0, + NULL_Q_RQ = 1, + NULL_Q_MQ = 2, +}; + +static int submit_queues = 1; +module_param(submit_queues, int, S_IRUGO); +MODULE_PARM_DESC(submit_queues, "Number of submission queues"); + +static int home_node = NUMA_NO_NODE; +module_param(home_node, int, S_IRUGO); +MODULE_PARM_DESC(home_node, "Home node for the device"); + +static int queue_mode = NULL_Q_MQ; +module_param(queue_mode, int, S_IRUGO); +MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); + +static int gb = 250; +module_param(gb, int, S_IRUGO); +MODULE_PARM_DESC(gb, "Size in GB"); + +static int bs = 512; +module_param(bs, int, S_IRUGO); +MODULE_PARM_DESC(bs, "Block size (in bytes)"); + +static int nr_devices = 2; +module_param(nr_devices, int, S_IRUGO); +MODULE_PARM_DESC(nr_devices, "Number of devices to register"); + +static int irqmode = NULL_IRQ_SOFTIRQ; +module_param(irqmode, int, S_IRUGO); +MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); + +static int completion_nsec = 10000; +module_param(completion_nsec, int, S_IRUGO); +MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); + +static int hw_queue_depth = 64; +module_param(hw_queue_depth, int, S_IRUGO); +MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); + +static bool use_per_node_hctx = true; +module_param(use_per_node_hctx, bool, S_IRUGO); +MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true"); + +static void put_tag(struct nullb_queue *nq, unsigned int tag) +{ + clear_bit_unlock(tag, nq->tag_map); + + if (waitqueue_active(&nq->wait)) + wake_up(&nq->wait); +} + +static unsigned int get_tag(struct nullb_queue *nq) +{ + unsigned int tag; + + do { + tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); + if (tag >= nq->queue_depth) + return -1U; + } while (test_and_set_bit_lock(tag, nq->tag_map)); + + return tag; +} + +static void free_cmd(struct nullb_cmd *cmd) +{ + put_tag(cmd->nq, cmd->tag); +} + +static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) +{ + struct nullb_cmd *cmd; + unsigned int tag; + + tag = get_tag(nq); + if (tag != -1U) { + cmd = &nq->cmds[tag]; + cmd->tag = tag; + cmd->nq = nq; + return cmd; + } + + return NULL; +} + +static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) +{ + struct nullb_cmd *cmd; + DEFINE_WAIT(wait); + + cmd = __alloc_cmd(nq); + if (cmd || !can_wait) + return cmd; + + do { + prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); + cmd = __alloc_cmd(nq); + if (cmd) + break; + + io_schedule(); + } while (1); + + finish_wait(&nq->wait, &wait); + return cmd; +} + +static void end_cmd(struct nullb_cmd *cmd) +{ + if (cmd->rq) { + if (queue_mode == NULL_Q_MQ) + blk_mq_end_io(cmd->rq, 0); + else { + INIT_LIST_HEAD(&cmd->rq->queuelist); + blk_end_request_all(cmd->rq, 0); + } + } else if (cmd->bio) + bio_endio(cmd->bio, 0); + + if (queue_mode != NULL_Q_MQ) + free_cmd(cmd); +} + +static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) +{ + struct completion_queue *cq; + struct llist_node *entry; + struct nullb_cmd *cmd; + + cq = &per_cpu(completion_queues, smp_processor_id()); + + while ((entry = llist_del_all(&cq->list)) != NULL) { + do { + cmd = container_of(entry, struct nullb_cmd, ll_list); + end_cmd(cmd); + entry = entry->next; + } while (entry); + } + + return HRTIMER_NORESTART; +} + +static void null_cmd_end_timer(struct nullb_cmd *cmd) +{ + struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); + + cmd->ll_list.next = NULL; + if (llist_add(&cmd->ll_list, &cq->list)) { + ktime_t kt = ktime_set(0, completion_nsec); + + hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); + } + + put_cpu(); +} + +static void null_softirq_done_fn(struct request *rq) +{ + blk_end_request_all(rq, 0); +} + +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) + +static void null_ipi_cmd_end_io(void *data) +{ + struct completion_queue *cq; + struct llist_node *entry, *next; + struct nullb_cmd *cmd; + + cq = &per_cpu(completion_queues, smp_processor_id()); + + entry = llist_del_all(&cq->list); + + while (entry) { + next = entry->next; + cmd = llist_entry(entry, struct nullb_cmd, ll_list); + end_cmd(cmd); + entry = next; + } +} + +static void null_cmd_end_ipi(struct nullb_cmd *cmd) +{ + struct call_single_data *data = &cmd->csd; + int cpu = get_cpu(); + struct completion_queue *cq = &per_cpu(completion_queues, cpu); + + cmd->ll_list.next = NULL; + + if (llist_add(&cmd->ll_list, &cq->list)) { + data->func = null_ipi_cmd_end_io; + data->flags = 0; + __smp_call_function_single(cpu, data, 0); + } + + put_cpu(); +} + +#endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ + +static inline void null_handle_cmd(struct nullb_cmd *cmd) +{ + /* Complete IO by inline, softirq or timer */ + switch (irqmode) { + case NULL_IRQ_NONE: + end_cmd(cmd); + break; + case NULL_IRQ_SOFTIRQ: +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) + null_cmd_end_ipi(cmd); +#else + end_cmd(cmd); +#endif + break; + case NULL_IRQ_TIMER: + null_cmd_end_timer(cmd); + break; + } +} + +static struct nullb_queue *nullb_to_queue(struct nullb *nullb) +{ + int index = 0; + + if (nullb->nr_queues != 1) + index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); + + return &nullb->queues[index]; +} + +static void null_queue_bio(struct request_queue *q, struct bio *bio) +{ + struct nullb *nullb = q->queuedata; + struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd; + + cmd = alloc_cmd(nq, 1); + cmd->bio = bio; + + null_handle_cmd(cmd); +} + +static int null_rq_prep_fn(struct request_queue *q, struct request *req) +{ + struct nullb *nullb = q->queuedata; + struct nullb_queue *nq = nullb_to_queue(nullb); + struct nullb_cmd *cmd; + + cmd = alloc_cmd(nq, 0); + if (cmd) { + cmd->rq = req; + req->special = cmd; + return BLKPREP_OK; + } + + return BLKPREP_DEFER; +} + +static void null_request_fn(struct request_queue *q) +{ + struct request *rq; + + while ((rq = blk_fetch_request(q)) != NULL) { + struct nullb_cmd *cmd = rq->special; + + spin_unlock_irq(q->queue_lock); + null_handle_cmd(cmd); + spin_lock_irq(q->queue_lock); + } +} + +static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) +{ + struct nullb_cmd *cmd = rq->special; + + cmd->rq = rq; + cmd->nq = hctx->driver_data; + + null_handle_cmd(cmd); + return BLK_MQ_RQ_QUEUE_OK; +} + +static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) +{ + return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, + hctx_index); +} + +static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) +{ + kfree(hctx); +} + +static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, + unsigned int index) +{ + struct nullb *nullb = data; + struct nullb_queue *nq = &nullb->queues[index]; + + init_waitqueue_head(&nq->wait); + nq->queue_depth = nullb->queue_depth; + nullb->nr_queues++; + hctx->driver_data = nq; + + return 0; +} + +static struct blk_mq_ops null_mq_ops = { + .queue_rq = null_queue_rq, + .map_queue = blk_mq_map_queue, + .init_hctx = null_init_hctx, +}; + +static struct blk_mq_reg null_mq_reg = { + .ops = &null_mq_ops, + .queue_depth = 64, + .cmd_size = sizeof(struct nullb_cmd), + .flags = BLK_MQ_F_SHOULD_MERGE, +}; + +static void null_del_dev(struct nullb *nullb) +{ + list_del_init(&nullb->list); + + del_gendisk(nullb->disk); + if (queue_mode == NULL_Q_MQ) + blk_mq_free_queue(nullb->q); + else + blk_cleanup_queue(nullb->q); + put_disk(nullb->disk); + kfree(nullb); +} + +static int null_open(struct block_device *bdev, fmode_t mode) +{ + return 0; +} + +static void null_release(struct gendisk *disk, fmode_t mode) +{ +} + +static const struct block_device_operations null_fops = { + .owner = THIS_MODULE, + .open = null_open, + .release = null_release, +}; + +static int setup_commands(struct nullb_queue *nq) +{ + struct nullb_cmd *cmd; + int i, tag_size; + + nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); + if (!nq->cmds) + return 1; + + tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; + nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); + if (!nq->tag_map) { + kfree(nq->cmds); + return 1; + } + + for (i = 0; i < nq->queue_depth; i++) { + cmd = &nq->cmds[i]; + INIT_LIST_HEAD(&cmd->list); + cmd->ll_list.next = NULL; + cmd->tag = -1U; + } + + return 0; +} + +static void cleanup_queue(struct nullb_queue *nq) +{ + kfree(nq->tag_map); + kfree(nq->cmds); +} + +static void cleanup_queues(struct nullb *nullb) +{ + int i; + + for (i = 0; i < nullb->nr_queues; i++) + cleanup_queue(&nullb->queues[i]); + + kfree(nullb->queues); +} + +static int setup_queues(struct nullb *nullb) +{ + struct nullb_queue *nq; + int i; + + nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL); + if (!nullb->queues) + return 1; + + nullb->nr_queues = 0; + nullb->queue_depth = hw_queue_depth; + + if (queue_mode == NULL_Q_MQ) + return 0; + + for (i = 0; i < submit_queues; i++) { + nq = &nullb->queues[i]; + init_waitqueue_head(&nq->wait); + nq->queue_depth = hw_queue_depth; + if (setup_commands(nq)) + break; + nullb->nr_queues++; + } + + if (i == submit_queues) + return 0; + + cleanup_queues(nullb); + return 1; +} + +static int null_add_dev(void) +{ + struct gendisk *disk; + struct nullb *nullb; + sector_t size; + + nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); + if (!nullb) + return -ENOMEM; + + spin_lock_init(&nullb->lock); + + if (setup_queues(nullb)) + goto err; + + if (queue_mode == NULL_Q_MQ) { + null_mq_reg.numa_node = home_node; + null_mq_reg.queue_depth = hw_queue_depth; + + if (use_per_node_hctx) { + null_mq_reg.ops->alloc_hctx = null_alloc_hctx; + null_mq_reg.ops->free_hctx = null_free_hctx; + + null_mq_reg.nr_hw_queues = nr_online_nodes; + } else { + null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; + null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; + + null_mq_reg.nr_hw_queues = submit_queues; + } + + nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); + } else if (queue_mode == NULL_Q_BIO) { + nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); + blk_queue_make_request(nullb->q, null_queue_bio); + } else { + nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); + blk_queue_prep_rq(nullb->q, null_rq_prep_fn); + if (nullb->q) + blk_queue_softirq_done(nullb->q, null_softirq_done_fn); + } + + if (!nullb->q) + goto queue_fail; + + nullb->q->queuedata = nullb; + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); + + disk = nullb->disk = alloc_disk_node(1, home_node); + if (!disk) { +queue_fail: + if (queue_mode == NULL_Q_MQ) + blk_mq_free_queue(nullb->q); + else + blk_cleanup_queue(nullb->q); + cleanup_queues(nullb); +err: + kfree(nullb); + return -ENOMEM; + } + + mutex_lock(&lock); + list_add_tail(&nullb->list, &nullb_list); + nullb->index = nullb_indexes++; + mutex_unlock(&lock); + + blk_queue_logical_block_size(nullb->q, bs); + blk_queue_physical_block_size(nullb->q, bs); + + size = gb * 1024 * 1024 * 1024ULL; + sector_div(size, bs); + set_capacity(disk, size); + + disk->flags |= GENHD_FL_EXT_DEVT; + disk->major = null_major; + disk->first_minor = nullb->index; + disk->fops = &null_fops; + disk->private_data = nullb; + disk->queue = nullb->q; + sprintf(disk->disk_name, "nullb%d", nullb->index); + add_disk(disk); + return 0; +} + +static int __init null_init(void) +{ + unsigned int i; + +#if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) + if (irqmode == NULL_IRQ_SOFTIRQ) { + pr_warn("null_blk: softirq completions not available.\n"); + pr_warn("null_blk: using direct completions.\n"); + irqmode = NULL_IRQ_NONE; + } +#endif + + if (submit_queues > nr_cpu_ids) + submit_queues = nr_cpu_ids; + else if (!submit_queues) + submit_queues = 1; + + mutex_init(&lock); + + /* Initialize a separate list for each CPU for issuing softirqs */ + for_each_possible_cpu(i) { + struct completion_queue *cq = &per_cpu(completion_queues, i); + + init_llist_head(&cq->list); + + if (irqmode != NULL_IRQ_TIMER) + continue; + + hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->timer.function = null_cmd_timer_expired; + } + + null_major = register_blkdev(0, "nullb"); + if (null_major < 0) + return null_major; + + for (i = 0; i < nr_devices; i++) { + if (null_add_dev()) { + unregister_blkdev(null_major, "nullb"); + return -EINVAL; + } + } + + pr_info("null: module loaded\n"); + return 0; +} + +static void __exit null_exit(void) +{ + struct nullb *nullb; + + unregister_blkdev(null_major, "nullb"); + + mutex_lock(&lock); + while (!list_empty(&nullb_list)) { + nullb = list_entry(nullb_list.next, struct nullb, list); + null_del_dev(nullb); + } + mutex_unlock(&lock); +} + +module_init(null_init); +module_exit(null_exit); + +MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index a4660bbee8a6..8d53ed293606 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1336,57 +1336,6 @@ static int blkfront_probe(struct xenbus_device *dev, return 0; } -/* - * This is a clone of md_trim_bio, used to split a bio into smaller ones - */ -static void trim_bio(struct bio *bio, int offset, int size) -{ - /* 'bio' is a cloned bio which we need to trim to match - * the given offset and size. - * This requires adjusting bi_sector, bi_size, and bi_io_vec - */ - int i; - struct bio_vec *bvec; - int sofar = 0; - - size <<= 9; - if (offset == 0 && size == bio->bi_size) - return; - - bio->bi_sector += offset; - bio->bi_size = size; - offset <<= 9; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); - - while (bio->bi_idx < bio->bi_vcnt && - bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { - /* remove this whole bio_vec */ - offset -= bio->bi_io_vec[bio->bi_idx].bv_len; - bio->bi_idx++; - } - if (bio->bi_idx < bio->bi_vcnt) { - bio->bi_io_vec[bio->bi_idx].bv_offset += offset; - bio->bi_io_vec[bio->bi_idx].bv_len -= offset; - } - /* avoid any complications with bi_idx being non-zero*/ - if (bio->bi_idx) { - memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, - (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); - bio->bi_vcnt -= bio->bi_idx; - bio->bi_idx = 0; - } - /* Make sure vcnt and last bv are not too big */ - bio_for_each_segment(bvec, bio, i) { - if (sofar + bvec->bv_len > size) - bvec->bv_len = size - sofar; - if (bvec->bv_len == 0) { - bio->bi_vcnt = i; - break; - } - sofar += bvec->bv_len; - } -} - static void split_bio_end(struct bio *bio, int error) { struct split_bio *split_bio = bio->bi_private; @@ -1522,7 +1471,7 @@ static int blkif_recover(struct blkfront_info *info) (unsigned int)(bio->bi_size >> 9) - offset); cloned_bio = bio_clone(bio, GFP_NOIO); BUG_ON(cloned_bio == NULL); - trim_bio(cloned_bio, offset, size); + bio_trim(cloned_bio, offset, size); cloned_bio->bi_private = split_bio; cloned_bio->bi_end_io = split_bio_end; submit_bio(cloned_bio->bi_rw, cloned_bio); |