From 5686dee34dbfe0238c0274e0454fa0174ac0a57a Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Mon, 27 Apr 2020 20:39:11 -0400 Subject: dm multipath: use updated MPATHF_QUEUE_IO on mapping for bio-based mpath When adding devices that don't have a scsi_dh on a BIO based multipath, I was able to consistently hit the warning below and lock-up the system. The problem is that __map_bio reads the flag before it potentially being modified by choose_pgpath, and ends up using the older value. The WARN_ON below is not trivially linked to the issue. It goes like this: The activate_path delayed_work is not initialized for non-scsi_dh devices, but we always set MPATHF_QUEUE_IO, asking for initialization. That is fine, since MPATHF_QUEUE_IO would be cleared in choose_pgpath. Nevertheless, only for BIO-based mpath, we cache the flag before calling choose_pgpath, and use the older version when deciding if we should initialize the path. Therefore, we end up trying to initialize the paths, and calling the non-initialized activate_path work. [ 82.437100] ------------[ cut here ]------------ [ 82.437659] WARNING: CPU: 3 PID: 602 at kernel/workqueue.c:1624 __queue_delayed_work+0x71/0x90 [ 82.438436] Modules linked in: [ 82.438911] CPU: 3 PID: 602 Comm: systemd-udevd Not tainted 5.6.0-rc6+ #339 [ 82.439680] RIP: 0010:__queue_delayed_work+0x71/0x90 [ 82.440287] Code: c1 48 89 4a 50 81 ff 00 02 00 00 75 2a 4c 89 cf e9 94 d6 07 00 e9 7f e9 ff ff 0f 0b eb c7 0f 0b 48 81 7a 58 40 74 a8 94 74 a7 <0f> 0b 48 83 7a 48 00 74 a5 0f 0b eb a1 89 fe 4c 89 cf e9 c8 c4 07 [ 82.441719] RSP: 0018:ffffb738803977c0 EFLAGS: 00010007 [ 82.442121] RAX: ffffa086389f9740 RBX: 0000000000000002 RCX: 0000000000000000 [ 82.442718] RDX: ffffa086350dd930 RSI: ffffa0863d76f600 RDI: 0000000000000200 [ 82.443484] RBP: 0000000000000200 R08: 0000000000000000 R09: ffffa086350dd970 [ 82.444128] R10: 0000000000000000 R11: 0000000000000000 R12: ffffa086350dd930 [ 82.444773] R13: ffffa0863d76f600 R14: 0000000000000000 R15: ffffa08636738008 [ 82.445427] FS: 00007f6abfe9dd40(0000) GS:ffffa0863dd80000(0000) knlGS:00000 [ 82.446040] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 82.446478] CR2: 0000557d288db4e8 CR3: 0000000078b36000 CR4: 00000000000006e0 [ 82.447104] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 82.447561] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 82.448012] Call Trace: [ 82.448164] queue_delayed_work_on+0x6d/0x80 [ 82.448472] __pg_init_all_paths+0x7b/0xf0 [ 82.448714] pg_init_all_paths+0x26/0x40 [ 82.448980] __multipath_map_bio.isra.0+0x84/0x210 [ 82.449267] __map_bio+0x3c/0x1f0 [ 82.449468] __split_and_process_non_flush+0x14a/0x1b0 [ 82.449775] __split_and_process_bio+0xde/0x340 [ 82.450045] ? dm_get_live_table+0x5/0xb0 [ 82.450278] dm_process_bio+0x98/0x290 [ 82.450518] dm_make_request+0x54/0x120 [ 82.450778] generic_make_request+0xd2/0x3e0 [ 82.451038] ? submit_bio+0x3c/0x150 [ 82.451278] submit_bio+0x3c/0x150 [ 82.451492] mpage_readpages+0x129/0x160 [ 82.451756] ? bdev_evict_inode+0x1d0/0x1d0 [ 82.452033] read_pages+0x72/0x170 [ 82.452260] __do_page_cache_readahead+0x1ba/0x1d0 [ 82.452624] force_page_cache_readahead+0x96/0x110 [ 82.452903] generic_file_read_iter+0x84f/0xae0 [ 82.453192] ? __seccomp_filter+0x7c/0x670 [ 82.453547] new_sync_read+0x10e/0x190 [ 82.453883] vfs_read+0x9d/0x150 [ 82.454172] ksys_read+0x65/0xe0 [ 82.454466] do_syscall_64+0x4e/0x210 [ 82.454828] entry_SYSCALL_64_after_hwframe+0x49/0xbe [...] [ 82.462501] ---[ end trace bb39975e9cf45daa ]--- Cc: stable@vger.kernel.org Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 58fd137b6ae1..3e500098132f 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -585,10 +585,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) /* Do we need to select a new pgpath? */ pgpath = READ_ONCE(m->current_pgpath); - queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); - if (!pgpath || !queue_io) + if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) pgpath = choose_pgpath(m, bio->bi_iter.bi_size); + /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */ + queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); + if ((pgpath && queue_io) || (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { /* Queue for the daemon to resubmit */ -- cgit v1.2.3 From 2361ae595352dec015d14292f1b539242d8446d6 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 20 Apr 2020 22:29:09 +0200 Subject: dm mpath: switch paths in dm_blk_ioctl() code path SCSI LUN passthrough code such as qemu's "scsi-block" device model pass every IO to the host via SG_IO ioctls. Currently, dm-multipath calls choose_pgpath() only in the block IO code path, not in the ioctl code path (unless current_pgpath is NULL). This has the effect that no path switching and thus no load balancing is done for SCSI-passthrough IO, unless the active path fails. Fix this by using the same logic in multipath_prepare_ioctl() as in multipath_clone_and_map(). Note: The allegedly best path selection algorithm, service-time, still wouldn't work perfectly, because the io size of the current request is always set to 0. Changing that for the IO passthrough case would require the ioctl cmd and arg to be passed to dm's prepare_ioctl() method. Signed-off-by: Martin Wilck Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 3e500098132f..e0c800cf87a9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1918,7 +1918,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti, int r; current_pgpath = READ_ONCE(m->current_pgpath); - if (!current_pgpath) + if (!current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) current_pgpath = choose_pgpath(m, 0); if (current_pgpath) { -- cgit v1.2.3 From 087615bf3acdafd0ba7c7c9ed5286e7b7c80fe1b Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 30 Apr 2020 16:48:29 -0400 Subject: dm mpath: pass IO start time to path selector The HST path selector needs this information to perform path prediction. For request-based mpath, struct request's io_start_time_ns is used, while for bio-based, use the start_time stored in dm_io. Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 9 ++++++--- drivers/md/dm-path-selector.h | 2 +- drivers/md/dm-queue-length.c | 2 +- drivers/md/dm-service-time.c | 2 +- drivers/md/dm.c | 9 +++++++++ include/linux/device-mapper.h | 2 ++ 6 files changed, 20 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e0c800cf87a9..74246d7c7d68 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -567,7 +567,8 @@ static void multipath_release_clone(struct request *clone, if (pgpath && pgpath->pg->ps.type->end_io) pgpath->pg->ps.type->end_io(&pgpath->pg->ps, &pgpath->path, - mpio->nr_bytes); + mpio->nr_bytes, + clone->io_start_time_ns); } blk_put_request(clone); @@ -1617,7 +1618,8 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, struct path_selector *ps = &pgpath->pg->ps; if (ps->type->end_io) - ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); + ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes, + clone->io_start_time_ns); } return r; @@ -1661,7 +1663,8 @@ done: struct path_selector *ps = &pgpath->pg->ps; if (ps->type->end_io) - ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); + ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes, + dm_start_time_ns_from_clone(clone)); } return r; diff --git a/drivers/md/dm-path-selector.h b/drivers/md/dm-path-selector.h index b6eb5365b1a4..c47bc0e20275 100644 --- a/drivers/md/dm-path-selector.h +++ b/drivers/md/dm-path-selector.h @@ -74,7 +74,7 @@ struct path_selector_type { int (*start_io) (struct path_selector *ps, struct dm_path *path, size_t nr_bytes); int (*end_io) (struct path_selector *ps, struct dm_path *path, - size_t nr_bytes); + size_t nr_bytes, u64 start_time); }; /* Register a path selector */ diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c index 969c4f1a3633..5fd018d18418 100644 --- a/drivers/md/dm-queue-length.c +++ b/drivers/md/dm-queue-length.c @@ -227,7 +227,7 @@ static int ql_start_io(struct path_selector *ps, struct dm_path *path, } static int ql_end_io(struct path_selector *ps, struct dm_path *path, - size_t nr_bytes) + size_t nr_bytes, u64 start_time) { struct path_info *pi = path->pscontext; diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-service-time.c index f006a9005593..9cfda665e9eb 100644 --- a/drivers/md/dm-service-time.c +++ b/drivers/md/dm-service-time.c @@ -309,7 +309,7 @@ static int st_start_io(struct path_selector *ps, struct dm_path *path, } static int st_end_io(struct path_selector *ps, struct dm_path *path, - size_t nr_bytes) + size_t nr_bytes, u64 start_time) { struct path_info *pi = path->pscontext; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index db9e46114653..2fcb932eb4bd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -675,6 +675,15 @@ static bool md_in_flight(struct mapped_device *md) return md_in_flight_bios(md); } +u64 dm_start_time_ns_from_clone(struct bio *bio) +{ + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + struct dm_io *io = tio->io; + + return jiffies_to_nsecs(io->start_time); +} +EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); + static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index af48d9da3916..934037d938b9 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -332,6 +332,8 @@ void *dm_per_bio_data(struct bio *bio, size_t data_size); struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size); unsigned dm_bio_get_target_bio_nr(const struct bio *bio); +u64 dm_start_time_ns_from_clone(struct bio *bio); + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); -- cgit v1.2.3 From ac75b09fc62df441eee90fecfe9b2a6ca24976f2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 14 May 2020 12:55:39 -0400 Subject: dm: use DMDEBUG macros now that they use pr_debug variants Now that DMDEBUG uses pr_debug and DMDEBUG_LIMIT uses pr_debug_ratelimited cleanup DM's 2 direct pr_debug callers to use them to get the benefit of consistent DM_FMT formatting of debugging messages. While doing so, dm-mpath.c:dm_report_EIO() was switched over to using DMDEBUG_LIMIT due to the potential for error handling floods in the IO completion path. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 12 ++++++------ drivers/md/dm.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 74246d7c7d68..95f16d816585 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -439,7 +439,7 @@ failed: } /* - * dm_report_EIO() is a macro instead of a function to make pr_debug() + * dm_report_EIO() is a macro instead of a function to make pr_debug_ratelimited() * report the function name and line number of the function from which * it has been invoked. */ @@ -447,11 +447,11 @@ failed: do { \ struct mapped_device *md = dm_table_get_md((m)->ti->table); \ \ - pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \ - dm_device_name(md), \ - test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ - test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ - dm_noflush_suspending((m)->ti)); \ + DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \ + dm_device_name(md), \ + test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ + dm_noflush_suspending((m)->ti)); \ } while (0) /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2fcb932eb4bd..1fae647ef108 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2609,7 +2609,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, if (noflush) set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); else - pr_debug("%s: suspending with flush\n", dm_device_name(md)); + DMDEBUG("%s: suspending with flush", dm_device_name(md)); /* * This gets reverted if there's an error later and the targets -- cgit v1.2.3 From a862e4e2154289fc12aa9e70f33614d9c70f3be4 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 26 May 2020 16:06:56 -0400 Subject: dm mpath: simplify __must_push_back Remove micro-optimization that infers device is between presuspend and resume (was done purely to avoid call to dm_noflush_suspending, which isn't expensive anyway). Remove flags argument since they are no longer checked. And remove must_push_back_bio() since it was simply a call to __must_push_back(). Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 95f16d816585..4c34d037aa35 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -457,33 +457,15 @@ do { \ /* * Check whether bios must be queued in the device-mapper core rather * than here in the target. - * - * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold - * the same value then we are not between multipath_presuspend() - * and multipath_resume() calls and we have no need to check - * for the DMF_NOFLUSH_SUSPENDING flag. */ -static bool __must_push_back(struct multipath *m, unsigned long flags) +static bool __must_push_back(struct multipath *m) { - return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) != - test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) && - dm_noflush_suspending(m->ti)); + return dm_noflush_suspending(m->ti); } -/* - * Following functions use READ_ONCE to get atomic access to - * all m->flags to avoid taking spinlock - */ static bool must_push_back_rq(struct multipath *m) { - unsigned long flags = READ_ONCE(m->flags); - return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags); -} - -static bool must_push_back_bio(struct multipath *m) -{ - unsigned long flags = READ_ONCE(m->flags); - return __must_push_back(m, flags); + return test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m); } /* @@ -620,7 +602,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, return DM_MAPIO_SUBMITTED; if (!pgpath) { - if (must_push_back_bio(m)) + if (__must_push_back(m)) return DM_MAPIO_REQUEUE; dm_report_EIO(m); return DM_MAPIO_KILL; @@ -1642,7 +1624,7 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { - if (must_push_back_bio(m)) { + if (__must_push_back(m)) { r = DM_ENDIO_REQUEUE; } else { dm_report_EIO(m); -- cgit v1.2.3 From 553ec94cb4b4937b48f81e27de33f71325d1a227 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 27 May 2020 16:32:51 -0400 Subject: dm mpath: restrict queue_if_no_path state machine Do not allow saving disabled queue_if_no_path if already saved as enabled; implies multiple suspends (which shouldn't ever happen). Log if this unlikely scenario is ever triggered. Also, only write MPATHF_SAVED_QUEUE_IF_NO_PATH during presuspend or if "fail_if_no_path" message. MPATHF_SAVED_QUEUE_IF_NO_PATH is no longer always modified, e.g.: even if queue_if_no_path()'s save_old_value argument wasn't set. This just implies a bit tighter control over the management of MPATHF_SAVED_QUEUE_IF_NO_PATH. Side-effect is multipath_resume() doesn't reset MPATHF_QUEUE_IF_NO_PATH unless MPATHF_SAVED_QUEUE_IF_NO_PATH was set (during presuspend); and at that time the MPATHF_SAVED_QUEUE_IF_NO_PATH bit gets cleared. So MPATHF_SAVED_QUEUE_IF_NO_PATH's use is much more narrow in scope. Last, but not least, do _not_ disable queue_if_no_path during noflush suspend. There is no need/benefit to saving off queue_if_no_path via MPATHF_SAVED_QUEUE_IF_NO_PATH and clearing MPATHF_QUEUE_IF_NO_PATH for noflush suspend -- by avoiding this needless queue_if_no_path flag churn there is less potential for MPATHF_QUEUE_IF_NO_PATH to get lost. Which avoids potential for IOs to be errored back up to userspace during DM multipath's handling of path failures. That said, this last change papers over a reported issue concerning request-based dm-multipath's interaction with blk-mq, relative to suspend and resume: multipath_endio is being called _before_ multipath_resume. This should never happen if DM suspend's blk_mq_quiesce_queue() + dm_wait_for_completion() is genuinely waiting for all inflight blk-mq requests to complete. Similarly: drivers/md/dm.c:__dm_resume() clearly calls dm_table_resume_targets() _before_ dm_start_queue()'s blk_mq_unquiesce_queue() is called. If the queue isn't even restarted until after multipath_resume(); the BIG question that still needs answering is: how can multipath_end_io beat multipath_resume in a race!? Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 4c34d037aa35..bc846cf7b0d8 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -695,12 +695,25 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, bool save_old_value) { unsigned long flags; + bool queue_if_no_path_bit, saved_queue_if_no_path_bit; spin_lock_irqsave(&m->lock, flags); - assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, - (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) || - (!save_old_value && queue_if_no_path)); + + queue_if_no_path_bit = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); + saved_queue_if_no_path_bit = test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); + + if (save_old_value) { + if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) { + DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!", + dm_device_name(dm_table_get_md(m->ti->table))); + } else + assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit); + } else if (!queue_if_no_path && saved_queue_if_no_path_bit) { + /* due to "fail_if_no_path" message, need to honor it. */ + clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); + } assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); + spin_unlock_irqrestore(&m->lock, flags); if (!queue_if_no_path) { @@ -1653,16 +1666,19 @@ done: } /* - * Suspend can't complete until all the I/O is processed so if - * the last path fails we must error any remaining I/O. - * Note that if the freeze_bdev fails while suspending, the - * queue_if_no_path state is lost - userspace should reset it. + * Suspend with flush can't complete until all the I/O is processed + * so if the last path fails we must error any remaining I/O. + * - Note that if the freeze_bdev fails while suspending, the + * queue_if_no_path state is lost - userspace should reset it. + * Otherwise, during noflush suspend, queue_if_no_path will not change. */ static void multipath_presuspend(struct dm_target *ti) { struct multipath *m = ti->private; - queue_if_no_path(m, false, true); + /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */ + if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti)) + queue_if_no_path(m, false, true); } static void multipath_postsuspend(struct dm_target *ti) @@ -1683,8 +1699,10 @@ static void multipath_resume(struct dm_target *ti) unsigned long flags; spin_lock_irqsave(&m->lock, flags); - assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, - test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)); + if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) { + set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); + clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); + } spin_unlock_irqrestore(&m->lock, flags); } -- cgit v1.2.3 From 4c3f48380fedbd714fc95958f503c1b5adf3ee6b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 29 May 2020 15:59:13 -0400 Subject: dm mpath: enhance queue_if_no_path debugging Add more DMDEBUG that shows arguments passed and caller, and another that shows state of related flags at end of queue_if_no_path(). Also add queue_if_no_path DMDEBUG to multipath_resume(). Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index bc846cf7b0d8..b17da3046611 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -692,10 +692,14 @@ static void process_queued_bios(struct work_struct *work) * If we run out of usable paths, should we queue I/O or error it? */ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, - bool save_old_value) + bool save_old_value, const char *caller) { unsigned long flags; bool queue_if_no_path_bit, saved_queue_if_no_path_bit; + const char *dm_dev_name = dm_device_name(dm_table_get_md(m->ti->table)); + + DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d", + dm_dev_name, __func__, caller, queue_if_no_path, save_old_value); spin_lock_irqsave(&m->lock, flags); @@ -705,7 +709,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, if (save_old_value) { if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) { DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!", - dm_device_name(dm_table_get_md(m->ti->table))); + dm_dev_name); } else assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit); } else if (!queue_if_no_path && saved_queue_if_no_path_bit) { @@ -714,6 +718,12 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, } assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); + DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d", + dm_dev_name, __func__, + test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags), + dm_noflush_suspending(m->ti)); + spin_unlock_irqrestore(&m->lock, flags); if (!queue_if_no_path) { @@ -734,7 +744,7 @@ static void queue_if_no_path_timeout_work(struct timer_list *t) struct mapped_device *md = dm_table_get_md(m->ti->table); DMWARN("queue_if_no_path timeout on %s, failing queued IO", dm_device_name(md)); - queue_if_no_path(m, false, false); + queue_if_no_path(m, false, false, __func__); } /* @@ -1074,7 +1084,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) argc--; if (!strcasecmp(arg_name, "queue_if_no_path")) { - r = queue_if_no_path(m, true, false); + r = queue_if_no_path(m, true, false, __func__); continue; } @@ -1678,7 +1688,7 @@ static void multipath_presuspend(struct dm_target *ti) /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */ if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti)) - queue_if_no_path(m, false, true); + queue_if_no_path(m, false, true, __func__); } static void multipath_postsuspend(struct dm_target *ti) @@ -1703,6 +1713,12 @@ static void multipath_resume(struct dm_target *ti) set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); } + + DMDEBUG("%s: %s finished; QIFNP = %d; SQIFNP = %d", + dm_device_name(dm_table_get_md(m->ti->table)), __func__, + test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), + test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)); + spin_unlock_irqrestore(&m->lock, flags); } @@ -1862,13 +1878,13 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, if (argc == 1) { if (!strcasecmp(argv[0], "queue_if_no_path")) { - r = queue_if_no_path(m, true, false); + r = queue_if_no_path(m, true, false, __func__); spin_lock_irqsave(&m->lock, flags); enable_nopath_timeout(m); spin_unlock_irqrestore(&m->lock, flags); goto out; } else if (!strcasecmp(argv[0], "fail_if_no_path")) { - r = queue_if_no_path(m, false, false); + r = queue_if_no_path(m, false, false, __func__); disable_nopath_timeout(m); goto out; } -- cgit v1.2.3 From 04867370ec40b708bd335df641182ddab0c59685 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 4 Jun 2020 10:44:34 -0400 Subject: dm mpath: add DM device name to Failing/Reinstating path log messages When there are many DM multipath devices it really helps to have additional context for which DM device a failed or reinstated path is part of. Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-mpath.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index b17da3046611..78cff42d987e 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1285,7 +1285,9 @@ static int fail_path(struct pgpath *pgpath) if (!pgpath->is_active) goto out; - DMWARN("Failing path %s.", pgpath->path.dev->name); + DMWARN("%s: Failing path %s.", + dm_device_name(dm_table_get_md(m->ti->table)), + pgpath->path.dev->name); pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); pgpath->is_active = false; @@ -1324,7 +1326,9 @@ static int reinstate_path(struct pgpath *pgpath) if (pgpath->is_active) goto out; - DMWARN("Reinstating path %s.", pgpath->path.dev->name); + DMWARN("%s: Reinstating path %s.", + dm_device_name(dm_table_get_md(m->ti->table)), + pgpath->path.dev->name); r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); if (r) -- cgit v1.2.3