diff options
Diffstat (limited to 'drivers/md/dm-mpath.c')
-rw-r--r-- | drivers/md/dm-mpath.c | 354 |
1 files changed, 318 insertions, 36 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 52baf8a5b0f4..7eac080fcb18 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -7,7 +7,8 @@ #include <linux/device-mapper.h> -#include "dm.h" +#include "dm-rq.h" +#include "dm-bio-record.h" #include "dm-path-selector.h" #include "dm-uevent.h" @@ -89,6 +90,8 @@ struct multipath { atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ atomic_t pg_init_count; /* Number of times pg_init called */ + unsigned queue_mode; + /* * We must use a mempool of dm_mpath_io structs so that we * can resubmit bios on error. @@ -97,10 +100,13 @@ struct multipath { struct mutex work_mutex; struct work_struct trigger_event; + + struct work_struct process_queued_bios; + struct bio_list queued_bios; }; /* - * Context information attached to each bio we process. + * Context information attached to each io we process. */ struct dm_mpath_io { struct pgpath *pgpath; @@ -114,6 +120,7 @@ static struct kmem_cache *_mpio_cache; static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void trigger_event(struct work_struct *work); static void activate_path(struct work_struct *work); +static void process_queued_bios(struct work_struct *work); /*----------------------------------------------- * Multipath state flags. @@ -185,7 +192,7 @@ static void free_priority_group(struct priority_group *pg, kfree(pg); } -static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq) +static struct multipath *alloc_multipath(struct dm_target *ti) { struct multipath *m; @@ -203,15 +210,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq) mutex_init(&m->work_mutex); m->mpio_pool = NULL; - if (!use_blk_mq) { - unsigned min_ios = dm_get_reserved_rq_based_ios(); - - m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); - if (!m->mpio_pool) { - kfree(m); - return NULL; - } - } + m->queue_mode = DM_TYPE_NONE; m->ti = ti; ti->private = m; @@ -220,6 +219,39 @@ static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq) return m; } +static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) +{ + if (m->queue_mode == DM_TYPE_NONE) { + /* + * Default to request-based. + */ + if (dm_use_blk_mq(dm_table_get_md(ti->table))) + m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; + else + m->queue_mode = DM_TYPE_REQUEST_BASED; + } + + if (m->queue_mode == DM_TYPE_REQUEST_BASED) { + unsigned min_ios = dm_get_reserved_rq_based_ios(); + + m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); + if (!m->mpio_pool) + return -ENOMEM; + } + else if (m->queue_mode == DM_TYPE_BIO_BASED) { + INIT_WORK(&m->process_queued_bios, process_queued_bios); + /* + * bio-based doesn't support any direct scsi_dh management; + * it just discovers if a scsi_dh is attached. + */ + set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); + } + + dm_table_set_type(ti->table, m->queue_mode); + + return 0; +} + static void free_multipath(struct multipath *m) { struct priority_group *pg, *tmp; @@ -272,6 +304,41 @@ static void clear_request_fn_mpio(struct multipath *m, union map_info *info) } } +static size_t multipath_per_bio_data_size(void) +{ + return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details); +} + +static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio) +{ + return dm_per_bio_data(bio, multipath_per_bio_data_size()); +} + +static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio) +{ + /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */ + struct dm_mpath_io *mpio = get_mpio_from_bio(bio); + void *bio_details = mpio + 1; + + return bio_details; +} + +static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p, + struct dm_bio_details **bio_details_p) +{ + struct dm_mpath_io *mpio = get_mpio_from_bio(bio); + struct dm_bio_details *bio_details = get_bio_details_from_bio(bio); + + memset(mpio, 0, sizeof(*mpio)); + memset(bio_details, 0, sizeof(*bio_details)); + dm_bio_record(bio_details, bio); + + if (mpio_p) + *mpio_p = mpio; + if (bio_details_p) + *bio_details_p = bio_details; +} + /*----------------------------------------------- * Path selection *-----------------------------------------------*/ @@ -431,16 +498,26 @@ failed: * and multipath_resume() calls and we have no need to check * for the DMF_NOFLUSH_SUSPENDING flag. */ -static int must_push_back(struct multipath *m) +static bool __must_push_back(struct multipath *m) +{ + return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) != + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) && + dm_noflush_suspending(m->ti)); +} + +static bool must_push_back_rq(struct multipath *m) { return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || - ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) != - test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) && - dm_noflush_suspending(m->ti))); + __must_push_back(m)); +} + +static bool must_push_back_bio(struct multipath *m) +{ + return __must_push_back(m); } /* - * Map cloned requests + * Map cloned requests (request-based multipath) */ static int __multipath_map(struct dm_target *ti, struct request *clone, union map_info *map_context, @@ -459,7 +536,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, pgpath = choose_pgpath(m, nr_bytes); if (!pgpath) { - if (!must_push_back(m)) + if (!must_push_back_rq(m)) r = -EIO; /* Failed */ return r; } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || @@ -530,6 +607,108 @@ static void multipath_release_clone(struct request *clone) } /* + * Map cloned bios (bio-based multipath) + */ +static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio) +{ + size_t nr_bytes = bio->bi_iter.bi_size; + struct pgpath *pgpath; + unsigned long flags; + bool queue_io; + + /* Do we need to select a new pgpath? */ + pgpath = lockless_dereference(m->current_pgpath); + queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); + if (!pgpath || !queue_io) + pgpath = choose_pgpath(m, nr_bytes); + + if ((pgpath && queue_io) || + (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { + /* Queue for the daemon to resubmit */ + spin_lock_irqsave(&m->lock, flags); + bio_list_add(&m->queued_bios, bio); + spin_unlock_irqrestore(&m->lock, flags); + /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ + if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) + pg_init_all_paths(m); + else if (!queue_io) + queue_work(kmultipathd, &m->process_queued_bios); + return DM_MAPIO_SUBMITTED; + } + + if (!pgpath) { + if (!must_push_back_bio(m)) + return -EIO; + return DM_MAPIO_REQUEUE; + } + + mpio->pgpath = pgpath; + mpio->nr_bytes = nr_bytes; + + bio->bi_error = 0; + bio->bi_bdev = pgpath->path.dev->bdev; + bio->bi_rw |= REQ_FAILFAST_TRANSPORT; + + if (pgpath->pg->ps.type->start_io) + pgpath->pg->ps.type->start_io(&pgpath->pg->ps, + &pgpath->path, + nr_bytes); + return DM_MAPIO_REMAPPED; +} + +static int multipath_map_bio(struct dm_target *ti, struct bio *bio) +{ + struct multipath *m = ti->private; + struct dm_mpath_io *mpio = NULL; + + multipath_init_per_bio_data(bio, &mpio, NULL); + + return __multipath_map_bio(m, bio, mpio); +} + +static void process_queued_bios_list(struct multipath *m) +{ + if (m->queue_mode == DM_TYPE_BIO_BASED) + queue_work(kmultipathd, &m->process_queued_bios); +} + +static void process_queued_bios(struct work_struct *work) +{ + int r; + unsigned long flags; + struct bio *bio; + struct bio_list bios; + struct blk_plug plug; + struct multipath *m = + container_of(work, struct multipath, process_queued_bios); + + bio_list_init(&bios); + + spin_lock_irqsave(&m->lock, flags); + + if (bio_list_empty(&m->queued_bios)) { + spin_unlock_irqrestore(&m->lock, flags); + return; + } + + bio_list_merge(&bios, &m->queued_bios); + bio_list_init(&m->queued_bios); + + spin_unlock_irqrestore(&m->lock, flags); + + blk_start_plug(&plug); + while ((bio = bio_list_pop(&bios))) { + r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio)); + if (r < 0 || r == DM_MAPIO_REQUEUE) { + bio->bi_error = r; + bio_endio(bio); + } else if (r == DM_MAPIO_REMAPPED) + generic_make_request(bio); + } + blk_finish_plug(&plug); +} + +/* * If we run out of usable paths, should we queue I/O or error it? */ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, @@ -557,8 +736,10 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, spin_unlock_irqrestore(&m->lock, flags); - if (!queue_if_no_path) + if (!queue_if_no_path) { dm_table_run_md_queue_async(m->ti->table); + process_queued_bios_list(m); + } return 0; } @@ -798,6 +979,12 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) if (!hw_argc) return 0; + if (m->queue_mode == DM_TYPE_BIO_BASED) { + dm_consume_args(as, hw_argc); + DMERR("bio-based multipath doesn't allow hardware handler args"); + return 0; + } + m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); if (hw_argc > 1) { @@ -833,7 +1020,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) const char *arg_name; static struct dm_arg _args[] = { - {0, 6, "invalid number of feature args"}, + {0, 8, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; @@ -873,6 +1060,24 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) continue; } + if (!strcasecmp(arg_name, "queue_mode") && + (argc >= 1)) { + const char *queue_mode_name = dm_shift_arg(as); + + if (!strcasecmp(queue_mode_name, "bio")) + m->queue_mode = DM_TYPE_BIO_BASED; + else if (!strcasecmp(queue_mode_name, "rq")) + m->queue_mode = DM_TYPE_REQUEST_BASED; + else if (!strcasecmp(queue_mode_name, "mq")) + m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; + else { + ti->error = "Unknown 'queue_mode' requested"; + r = -EINVAL; + } + argc--; + continue; + } + ti->error = "Unrecognised multipath feature request"; r = -EINVAL; } while (argc && !r); @@ -880,8 +1085,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) return r; } -static int multipath_ctr(struct dm_target *ti, unsigned int argc, - char **argv) +static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) { /* target arguments */ static struct dm_arg _args[] = { @@ -894,12 +1098,11 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, struct dm_arg_set as; unsigned pg_count = 0; unsigned next_pg_num; - bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table)); as.argc = argc; as.argv = argv; - m = alloc_multipath(ti, use_blk_mq); + m = alloc_multipath(ti); if (!m) { ti->error = "can't allocate multipath"; return -EINVAL; @@ -909,6 +1112,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, if (r) goto bad; + r = alloc_multipath_stage2(ti, m); + if (r) + goto bad; + r = parse_hw_handler(&as, m); if (r) goto bad; @@ -958,7 +1165,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; - if (use_blk_mq) + if (m->queue_mode == DM_TYPE_BIO_BASED) + ti->per_io_data_size = multipath_per_bio_data_size(); + else if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) ti->per_io_data_size = sizeof(struct dm_mpath_io); return 0; @@ -1083,8 +1292,10 @@ static int reinstate_path(struct pgpath *pgpath) out: spin_unlock_irqrestore(&m->lock, flags); - if (run_queue) + if (run_queue) { dm_table_run_md_queue_async(m->ti->table); + process_queued_bios_list(m); + } return r; } @@ -1281,6 +1492,8 @@ static void pg_init_done(void *data, int errors) } clear_bit(MPATHF_QUEUE_IO, &m->flags); + process_queued_bios_list(m); + /* * Wake up any thread waiting to suspend. */ @@ -1328,7 +1541,7 @@ static int do_end_io(struct multipath *m, struct request *clone, * during end I/O handling, since those clone requests don't have * bio clones. If we queue them inside the multipath target, * we need to make bio clones, that requires memory allocation. - * (See drivers/md/dm.c:end_clone_bio() about why the clone requests + * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests * don't have bio clones.) * Instead of queueing the clone request here, we queue the original * request into dm core, which will remake a clone request and @@ -1347,7 +1560,7 @@ static int do_end_io(struct multipath *m, struct request *clone, if (!atomic_read(&m->nr_valid_paths)) { if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (!must_push_back(m)) + if (!must_push_back_rq(m)) r = -EIO; } else { if (error == -EBADE) @@ -1381,6 +1594,64 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, return r; } +static int do_end_io_bio(struct multipath *m, struct bio *clone, + int error, struct dm_mpath_io *mpio) +{ + unsigned long flags; + + if (!error) + return 0; /* I/O complete */ + + if (noretry_error(error)) + return error; + + if (mpio->pgpath) + fail_path(mpio->pgpath); + + if (!atomic_read(&m->nr_valid_paths)) { + if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + if (!must_push_back_bio(m)) + return -EIO; + return DM_ENDIO_REQUEUE; + } else { + if (error == -EBADE) + return error; + } + } + + /* Queue for the daemon to resubmit */ + dm_bio_restore(get_bio_details_from_bio(clone), clone); + + spin_lock_irqsave(&m->lock, flags); + bio_list_add(&m->queued_bios, clone); + spin_unlock_irqrestore(&m->lock, flags); + if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) + queue_work(kmultipathd, &m->process_queued_bios); + + return DM_ENDIO_INCOMPLETE; +} + +static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error) +{ + struct multipath *m = ti->private; + struct dm_mpath_io *mpio = get_mpio_from_bio(clone); + struct pgpath *pgpath; + struct path_selector *ps; + int r; + + BUG_ON(!mpio); + + r = do_end_io_bio(m, clone, error, mpio); + pgpath = mpio->pgpath; + if (pgpath) { + ps = &pgpath->pg->ps; + if (ps->type->end_io) + ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); + } + + return r; +} + /* * Suspend can't complete until all the I/O is processed so if * the last path fails we must error any remaining I/O. @@ -1454,7 +1725,9 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) + (m->pg_init_retries > 0) * 2 + (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + - test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)); + test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) + + (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2); + if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) DMEMIT("queue_if_no_path "); if (m->pg_init_retries) @@ -1463,6 +1736,16 @@ static void multipath_status(struct dm_target *ti, status_type_t type, DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) DMEMIT("retain_attached_hw_handler "); + if (m->queue_mode != DM_TYPE_REQUEST_BASED) { + switch(m->queue_mode) { + case DM_TYPE_BIO_BASED: + DMEMIT("queue_mode bio "); + break; + case DM_TYPE_MQ_REQUEST_BASED: + DMEMIT("queue_mode mq "); + break; + } + } } if (!m->hw_handler_name || type == STATUSTYPE_INFO) @@ -1642,6 +1925,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) pg_init_all_paths(m); dm_table_run_md_queue_async(m->ti->table); + process_queued_bios_list(m); } /* @@ -1748,7 +2032,7 @@ static int multipath_busy(struct dm_target *ti) *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, .module = THIS_MODULE, .ctr = multipath_ctr, @@ -1757,6 +2041,8 @@ static struct target_type multipath_target = { .clone_and_map_rq = multipath_clone_and_map, .release_clone_rq = multipath_release_clone, .rq_end_io = multipath_end_io, + .map = multipath_map_bio, + .end_io = multipath_end_io_bio, .presuspend = multipath_presuspend, .postsuspend = multipath_postsuspend, .resume = multipath_resume, @@ -1771,14 +2057,14 @@ static int __init dm_multipath_init(void) { int r; - /* allocate a slab for the dm_ios */ + /* allocate a slab for the dm_mpath_ios */ _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); if (!_mpio_cache) return -ENOMEM; r = dm_register_target(&multipath_target); if (r < 0) { - DMERR("register failed %d", r); + DMERR("request-based register failed %d", r); r = -EINVAL; goto bad_register_target; } @@ -1804,10 +2090,6 @@ static int __init dm_multipath_init(void) goto bad_alloc_kmpath_handlerd; } - DMINFO("version %u.%u.%u loaded", - multipath_target.version[0], multipath_target.version[1], - multipath_target.version[2]); - return 0; bad_alloc_kmpath_handlerd: |