diff options
author | Yu Kuai <yukuai3@huawei.com> | 2023-11-29 10:02:34 +0800 |
---|---|---|
committer | Song Liu <song@kernel.org> | 2023-12-01 15:49:42 -0800 |
commit | fa2bbff7b0b4e211fec5e5686ef96350690597b5 (patch) | |
tree | d175d0e0e8b31bd0e6f6145f6c2c6ca823b0e90e /drivers/md | |
parent | 15da990f8dd7e9d0e1fd0275730f6fed6f6a8a57 (diff) |
md: synchronize flush io with array reconfiguration
Currently rcu is used to protect iterating rdev from submit_flushes():
submit_flushes remove_and_add_spares
synchronize_rcu
pers->hot_remove_disk()
rcu_read_lock()
rdev_for_each_rcu
if (rdev->raid_disk >= 0)
rdev->radi_disk = -1;
atomic_inc(&rdev->nr_pending)
rcu_read_unlock()
bi = bio_alloc_bioset()
bi->bi_end_io = md_end_flush
bi->private = rdev
submit_bio
// issue io for removed rdev
Fix this problem by grabbing 'acive_io' before iterating rdev, make sure
that remove_and_add_spares() won't concurrent with submit_flushes().
Fixes: a2826aa92e2e ("md: support barrier requests on all personalities.")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231129020234.1586910-1-yukuai1@huaweicloud.com
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 71b3397dea47..4e9fe5cbeedc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -529,6 +529,9 @@ static void md_end_flush(struct bio *bio) rdev_dec_pending(rdev, mddev); if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + /* The pre-request flush has finished */ queue_work(md_wq, &mddev->flush_work); } @@ -548,12 +551,8 @@ static void submit_flushes(struct work_struct *ws) rdev_for_each_rcu(rdev, mddev) if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) { - /* Take two references, one is dropped - * when request finishes, one after - * we reclaim rcu_read_lock - */ struct bio *bi; - atomic_inc(&rdev->nr_pending); + atomic_inc(&rdev->nr_pending); rcu_read_unlock(); bi = bio_alloc_bioset(rdev->bdev, 0, @@ -564,7 +563,6 @@ static void submit_flushes(struct work_struct *ws) atomic_inc(&mddev->flush_pending); submit_bio(bi); rcu_read_lock(); - rdev_dec_pending(rdev, mddev); } rcu_read_unlock(); if (atomic_dec_and_test(&mddev->flush_pending)) @@ -617,6 +615,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) /* new request after previous flush is completed */ if (ktime_after(req_start, mddev->prev_flush_start)) { WARN_ON(mddev->flush_bio); + /* + * Grab a reference to make sure mddev_suspend() will wait for + * this flush to be done. + * + * md_flush_reqeust() is called under md_handle_request() and + * 'active_io' is already grabbed, hence percpu_ref_is_zero() + * won't pass, percpu_ref_tryget_live() can't be used because + * percpu_ref_kill() can be called by mddev_suspend() + * concurrently. + */ + WARN_ON(percpu_ref_is_zero(&mddev->active_io)); + percpu_ref_get(&mddev->active_io); mddev->flush_bio = bio; bio = NULL; } |