diff options
author | Jens Axboe <axboe@kernel.dk> | 2023-06-28 16:08:19 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2023-06-28 16:08:19 -0600 |
commit | 3a08284ff22080e742814dad1dbabb4b66349642 (patch) | |
tree | 3de2163fedca694e9b71f5d54eff8847a7875e7e /drivers/md | |
parent | 89181f544ffa4da682b0145738342f9b78b9e8dc (diff) | |
parent | 6d85ebf95c44e52337ca1d07f0db4b435d1e6762 (diff) |
Merge branch 'for-6.5/block-late' into block-6.5
* for-6.5/block-late:
blk-sysfs: add a new attr_group for blk_mq
blk-iocost: move wbt_enable/disable_default() out of spinlock
blk-wbt: cleanup rwb_enabled() and wbt_disabled()
blk-wbt: remove dead code to handle wbt enable/disable with io inflight
blk-wbt: don't create wbt sysfs entry if CONFIG_BLK_WBT is disabled
blk-mq: fix two misuses on RQF_USE_SCHED
blk-throttle: Fix io statistics for cgroup v1
bcache: Fix bcache device claiming
bcache: Alloc holder object before async registration
raid10: avoid spin_lock from fastpath from raid10_unplug()
md: fix 'delete_mutex' deadlock
md: use mddev->external to select holder in export_rdev()
md/raid1-10: fix casting from randomized structure in raid1_submit_write()
md/raid10: fix the condition to call bio_end_io_acct()
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/super.c | 123 | ||||
-rw-r--r-- | drivers/md/md.c | 32 | ||||
-rw-r--r-- | drivers/md/md.h | 4 | ||||
-rw-r--r-- | drivers/md/raid1-10.c | 2 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 |
5 files changed, 75 insertions, 92 deletions
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e2a803683105..0ae2b3676293 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl) put_page(virt_to_page(dc->sb_disk)); if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, bcache_kobj); + blkdev_put(dc->bdev, dc); wake_up(&unregister_wait); @@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; - dc->bdev->bd_holder = dc; dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) @@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj) put_page(virt_to_page(ca->sb_disk)); if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, bcache_kobj); + blkdev_put(ca->bdev, ca); kfree(ca); module_put(THIS_MODULE); @@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; - ca->bdev->bd_holder = ca; ca->sb_disk = sb_disk; if (bdev_max_discard_sectors((bdev))) @@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, * call blkdev_put() to bdev in bch_cache_release(). So we * explicitly call blkdev_put() here. */ - blkdev_put(bdev, bcache_kobj); + blkdev_put(bdev, ca); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) @@ -2448,6 +2446,7 @@ struct async_reg_args { struct cache_sb *sb; struct cache_sb_disk *sb_disk; struct block_device *bdev; + void *holder; }; static void register_bdev_worker(struct work_struct *work) @@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cached_dev *dc; - - dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) + < 0) fail = true; mutex_unlock(&bch_register_lock); -out: if (fail) pr_info("error %s: fail to register backing device\n", args->path); @@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cache *ca; - - ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, bcache_kobj); - goto out; - } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) fail = true; -out: if (fail) pr_info("error %s: fail to register cache device\n", args->path); @@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args) queue_delayed_work(system_wq, &args->reg_work, 10); } +static void *alloc_holder_object(struct cache_sb *sb) +{ + if (SB_IS_BDEV(sb)) + return kzalloc(sizeof(struct cached_dev), GFP_KERNEL); + return kzalloc(sizeof(struct cache), GFP_KERNEL); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct block_device *bdev; + struct block_device *bdev, *bdev2; + void *holder = NULL; ssize_t ret; bool async_registration = false; + bool quiet = false; #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION async_registration = true; @@ -2558,10 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ | BLK_OPEN_WRITE, - bcache_kobj, NULL); + bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev)) + goto out_free_sb; + + err = "failed to set blocksize"; + if (set_blocksize(bdev, 4096)) + goto out_blkdev_put; + + err = read_super(sb, bdev, &sb_disk); + if (err) + goto out_blkdev_put; + + holder = alloc_holder_object(sb); + if (!holder) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + + /* Now reopen in exclusive mode with proper holder */ + bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + holder, NULL); + blkdev_put(bdev, NULL); + bdev = bdev2; if (IS_ERR(bdev)) { - if (bdev == ERR_PTR(-EBUSY)) { + ret = PTR_ERR(bdev); + bdev = NULL; + if (ret == -EBUSY) { dev_t dev; mutex_lock(&bch_register_lock); @@ -2571,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); - if (attr == &ksysfs_register_quiet) - goto done; + if (attr == &ksysfs_register_quiet) { + quiet = true; + ret = size; + } } - goto out_free_sb; + goto out_free_holder; } - err = "failed to set blocksize"; - if (set_blocksize(bdev, 4096)) - goto out_blkdev_put; - - err = read_super(sb, bdev, &sb_disk); - if (err) - goto out_blkdev_put; - err = "failed to register device"; if (async_registration) { @@ -2595,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!args) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_free_holder; } args->path = path; args->sb = sb; args->sb_disk = sb_disk; args->bdev = bdev; + args->holder = holder; register_device_async(args); /* No wait and returns to user space */ goto async_done; } if (SB_IS_BDEV(sb)) { - struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - - if (!dc) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev, dc); + ret = register_bdev(sb, sb_disk, bdev, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { - struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev, ca); + ret = register_cache(sb, sb_disk, bdev, holder); if (ret) goto out_free_sb; } -done: kfree(sb); kfree(path); module_put(THIS_MODULE); async_done: return size; +out_free_holder: + kfree(holder); out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - blkdev_put(bdev, register_bcache); + if (bdev) + blkdev_put(bdev, holder); out_free_sb: kfree(sb); out_free_path: @@ -2656,7 +2650,8 @@ out_free_path: out_module_put: module_put(THIS_MODULE); out: - pr_info("error %s: %s\n", path?path:"", err); + if (!quiet) + pr_info("error %s: %s\n", path?path:"", err); return ret; } diff --git a/drivers/md/md.c b/drivers/md/md.c index cf3733c90c47..2e38ef421d69 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -643,7 +643,6 @@ void mddev_init(struct mddev *mddev) { mutex_init(&mddev->open_mutex); mutex_init(&mddev->reconfig_mutex); - mutex_init(&mddev->delete_mutex); mutex_init(&mddev->bitmap_info.mutex); INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->all_mddevs); @@ -749,26 +748,15 @@ static void mddev_free(struct mddev *mddev) static const struct attribute_group md_redundancy_group; -static void md_free_rdev(struct mddev *mddev) +void mddev_unlock(struct mddev *mddev) { struct md_rdev *rdev; struct md_rdev *tmp; + LIST_HEAD(delete); - mutex_lock(&mddev->delete_mutex); - if (list_empty(&mddev->deleting)) - goto out; - - list_for_each_entry_safe(rdev, tmp, &mddev->deleting, same_set) { - list_del_init(&rdev->same_set); - kobject_del(&rdev->kobj); - export_rdev(rdev, mddev); - } -out: - mutex_unlock(&mddev->delete_mutex); -} + if (!list_empty(&mddev->deleting)) + list_splice_init(&mddev->deleting, &delete); -void mddev_unlock(struct mddev *mddev) -{ if (mddev->to_remove) { /* These cannot be removed under reconfig_mutex as * an access to the files will try to take reconfig_mutex @@ -808,7 +796,11 @@ void mddev_unlock(struct mddev *mddev) } else mutex_unlock(&mddev->reconfig_mutex); - md_free_rdev(mddev); + list_for_each_entry_safe(rdev, tmp, &delete, same_set) { + list_del_init(&rdev->same_set); + kobject_del(&rdev->kobj); + export_rdev(rdev, mddev); + } md_wakeup_thread(mddev->thread); wake_up(&mddev->sb_wait); @@ -2458,7 +2450,7 @@ static void export_rdev(struct md_rdev *rdev, struct mddev *mddev) if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); #endif - blkdev_put(rdev->bdev, mddev->major_version == -2 ? &claim_rdev : rdev); + blkdev_put(rdev->bdev, mddev->external ? &claim_rdev : rdev); rdev->bdev = NULL; kobject_put(&rdev->kobj); } @@ -2488,9 +2480,7 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev) * reconfig_mutex is held, hence it can't be called under * reconfig_mutex and it's delayed to mddev_unlock(). */ - mutex_lock(&mddev->delete_mutex); list_add(&rdev->same_set, &mddev->deleting); - mutex_unlock(&mddev->delete_mutex); } static void export_array(struct mddev *mddev) @@ -6140,7 +6130,7 @@ static void md_clean(struct mddev *mddev) mddev->resync_min = 0; mddev->resync_max = MaxSector; mddev->reshape_position = MaxSector; - mddev->external = 0; + /* we still need mddev->external in export_rdev, do not clear it yet */ mddev->persistent = 0; mddev->level = LEVEL_NONE; mddev->clevel[0] = 0; diff --git a/drivers/md/md.h b/drivers/md/md.h index bfd2306bc750..1aef86bf3fc3 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -531,11 +531,9 @@ struct mddev { /* * Temporarily store rdev that will be finally removed when - * reconfig_mutex is unlocked. + * reconfig_mutex is unlocked, protected by reconfig_mutex. */ struct list_head deleting; - /* Protect the deleting list */ - struct mutex delete_mutex; bool has_superblocks:1; bool fail_last_dev:1; diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 169ebe296f2d..3f22edec70e7 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -116,7 +116,7 @@ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp, static inline void raid1_submit_write(struct bio *bio) { - struct md_rdev *rdev = (struct md_rdev *)bio->bi_bdev; + struct md_rdev *rdev = (void *)bio->bi_bdev; bio->bi_next = NULL; bio_set_dev(bio, rdev->bdev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d0de8c9fb3cf..5051149e27bb 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -325,7 +325,7 @@ static void raid_end_bio_io(struct r10bio *r10_bio) if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) bio->bi_status = BLK_STS_IOERR; - if (blk_queue_io_stat(bio->bi_bdev->bd_disk->queue)) + if (r10_bio->start_time) bio_end_io_acct(bio, r10_bio->start_time); bio_endio(bio); /* @@ -1118,7 +1118,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->pending_bio_list, &plug->pending); spin_unlock_irq(&conf->device_lock); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); md_wakeup_thread(mddev->thread); kfree(plug); return; @@ -1127,7 +1127,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) /* we aren't scheduling, so we can do the write-out directly. */ bio = bio_list_get(&plug->pending); raid1_prepare_flush_writes(mddev->bitmap); - wake_up(&conf->wait_barrier); + wake_up_barrier(conf); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; |