diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 12:12:56 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-30 12:12:56 -0700 |
commit | df668a5fe461bb9d7e899c538acc7197746038f4 (patch) | |
tree | 315a71104f5cea7feeb56c9f2c768453408b72f7 /fs/block_dev.c | |
parent | df04fbe8680bfe07f3d7487eccff9f768bb02533 (diff) | |
parent | 2705dfb2094777e405e065105e307074af8965c1 (diff) |
Merge tag 'for-5.14/block-2021-06-29' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe:
- disk events cleanup (Christoph)
- gendisk and request queue allocation simplifications (Christoph)
- bdev_disk_changed cleanups (Christoph)
- IO priority improvements (Bart)
- Chained bio completion trace fix (Edward)
- blk-wbt fixes (Jan)
- blk-wbt enable/disable fix (Zhang)
- Scheduler dispatch improvements (Jan, Ming)
- Shared tagset scheduler improvements (John)
- BFQ updates (Paolo, Luca, Pietro)
- BFQ lock inversion fix (Jan)
- Documentation improvements (Kir)
- CLONE_IO block cgroup fix (Tejun)
- Remove of ancient and deprecated block dump feature (zhangyi)
- Discard merge fix (Ming)
- Misc fixes or followup fixes (Colin, Damien, Dan, Long, Max, Thomas,
Yang)
* tag 'for-5.14/block-2021-06-29' of git://git.kernel.dk/linux-block: (129 commits)
block: fix discard request merge
block/mq-deadline: Remove a WARN_ON_ONCE() call
blk-mq: update hctx->dispatch_busy in case of real scheduler
blk: Fix lock inversion between ioc lock and bfqd lock
bfq: Remove merged request already in bfq_requests_merged()
block: pass a gendisk to bdev_disk_changed
block: move bdev_disk_changed
block: add the events* attributes to disk_attrs
block: move the disk events code to a separate file
block: fix trace completion for chained bio
block/partitions/msdos: Fix typo inidicator -> indicator
block, bfq: reset waker pointer with shared queues
block, bfq: check waker only for queues with no in-flight I/O
block, bfq: avoid delayed merge of async queues
block, bfq: boost throughput by extending queue-merging times
block, bfq: consider also creation time in delayed stable merge
block, bfq: fix delayed stable merge check
block, bfq: let also stably merged queues enjoy weight raising
blk-wbt: make sure throttle is enabled properly
blk-wbt: introduce a new disable state to prevent false positive by rwb_enabled()
...
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r-- | fs/block_dev.c | 244 |
1 files changed, 92 insertions, 152 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index eb34f5c357cf..ca8bf1869ca8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) mapping_set_gfp_mask(&inode->i_data, GFP_USER); bdev = I_BDEV(inode); - mutex_init(&bdev->bd_mutex); mutex_init(&bdev->bd_fsfreeze_mutex); spin_lock_init(&bdev->bd_size_lock); bdev->bd_disk = disk; @@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) struct bd_holder_disk *holder; int ret = 0; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); WARN_ON_ONCE(!bdev->bd_holder); @@ -1199,7 +1198,7 @@ out_del: out_free: kfree(holder); out_unlock: - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); return ret; } EXPORT_SYMBOL_GPL(bd_link_disk_holder); @@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { struct bd_holder_disk *holder; - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); holder = bd_find_holder_disk(bdev, disk); @@ -1230,138 +1229,97 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) kfree(holder); } - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); } EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); #endif -static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); +static void blkdev_flush_mapping(struct block_device *bdev) +{ + WARN_ON_ONCE(bdev->bd_holders); + sync_blockdev(bdev); + kill_bdev(bdev); + bdev_write_inode(bdev); +} -int bdev_disk_changed(struct block_device *bdev, bool invalidate) +static int blkdev_get_whole(struct block_device *bdev, fmode_t mode) { struct gendisk *disk = bdev->bd_disk; int ret = 0; - lockdep_assert_held(&bdev->bd_mutex); - - if (!(disk->flags & GENHD_FL_UP)) - return -ENXIO; - -rescan: - if (bdev->bd_part_count) - return -EBUSY; - sync_blockdev(bdev); - invalidate_bdev(bdev); - blk_drop_partitions(disk); - - clear_bit(GD_NEED_PART_SCAN, &disk->state); - - /* - * Historically we only set the capacity to zero for devices that - * support partitions (independ of actually having partitions created). - * Doing that is rather inconsistent, but changing it broke legacy - * udisks polling for legacy ide-cdrom devices. Use the crude check - * below to get the sane behavior for most device while not breaking - * userspace for this particular setup. - */ - if (invalidate) { - if (disk_part_scan_enabled(disk) || - !(disk->flags & GENHD_FL_REMOVABLE)) - set_capacity(disk, 0); + if (disk->fops->open) { + ret = disk->fops->open(bdev, mode); + if (ret) { + /* avoid ghost partitions on a removed medium */ + if (ret == -ENOMEDIUM && + test_bit(GD_NEED_PART_SCAN, &disk->state)) + bdev_disk_changed(disk, true); + return ret; + } } - if (get_capacity(disk)) { - ret = blk_add_partitions(disk, bdev); - if (ret == -EAGAIN) - goto rescan; - } else if (invalidate) { - /* - * Tell userspace that the media / partition table may have - * changed. - */ - kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); + if (!bdev->bd_openers) { + set_init_blocksize(bdev); + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); } + if (test_bit(GD_NEED_PART_SCAN, &disk->state)) + bdev_disk_changed(disk, false); + bdev->bd_openers++; + return 0;; +} - return ret; +static void blkdev_put_whole(struct block_device *bdev, fmode_t mode) +{ + if (!--bdev->bd_openers) + blkdev_flush_mapping(bdev); + if (bdev->bd_disk->fops->release) + bdev->bd_disk->fops->release(bdev->bd_disk, mode); } -/* - * Only exported for loop and dasd for historic reasons. Don't use in new - * code! - */ -EXPORT_SYMBOL_GPL(bdev_disk_changed); -/* - * bd_mutex locking: - * - * mutex_lock(part->bd_mutex) - * mutex_lock_nested(whole->bd_mutex, 1) - */ -static int __blkdev_get(struct block_device *bdev, fmode_t mode) +static int blkdev_get_part(struct block_device *part, fmode_t mode) { - struct gendisk *disk = bdev->bd_disk; - int ret = 0; + struct gendisk *disk = part->bd_disk; + struct block_device *whole; + int ret; - if (!(disk->flags & GENHD_FL_UP)) - return -ENXIO; + if (part->bd_openers) + goto done; - if (!bdev->bd_openers) { - if (!bdev_is_partition(bdev)) { - ret = 0; - if (disk->fops->open) - ret = disk->fops->open(bdev, mode); + whole = bdgrab(disk->part0); + ret = blkdev_get_whole(whole, mode); + if (ret) + goto out_put_whole; - if (!ret) - set_init_blocksize(bdev); + ret = -ENXIO; + if (!bdev_nr_sectors(part)) + goto out_blkdev_put; - /* - * If the device is invalidated, rescan partition - * if open succeeded or failed with -ENOMEDIUM. - * The latter is necessary to prevent ghost - * partitions on a removed medium. - */ - if (test_bit(GD_NEED_PART_SCAN, &disk->state) && - (!ret || ret == -ENOMEDIUM)) - bdev_disk_changed(bdev, ret == -ENOMEDIUM); + disk->open_partitions++; + set_init_blocksize(part); + if (part->bd_bdi == &noop_backing_dev_info) + part->bd_bdi = bdi_get(disk->queue->backing_dev_info); +done: + part->bd_openers++; + return 0; - if (ret) - return ret; - } else { - struct block_device *whole = bdgrab(disk->part0); - - mutex_lock_nested(&whole->bd_mutex, 1); - ret = __blkdev_get(whole, mode); - if (ret) { - mutex_unlock(&whole->bd_mutex); - bdput(whole); - return ret; - } - whole->bd_part_count++; - mutex_unlock(&whole->bd_mutex); +out_blkdev_put: + blkdev_put_whole(whole, mode); +out_put_whole: + bdput(whole); + return ret; +} - if (!bdev_nr_sectors(bdev)) { - __blkdev_put(whole, mode, 1); - bdput(whole); - return -ENXIO; - } - set_init_blocksize(bdev); - } +static void blkdev_put_part(struct block_device *part, fmode_t mode) +{ + struct block_device *whole = bdev_whole(part); - if (bdev->bd_bdi == &noop_backing_dev_info) - bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); - } else { - if (!bdev_is_partition(bdev)) { - if (bdev->bd_disk->fops->open) - ret = bdev->bd_disk->fops->open(bdev, mode); - /* the same as first opener case, read comment there */ - if (test_bit(GD_NEED_PART_SCAN, &disk->state) && - (!ret || ret == -ENOMEDIUM)) - bdev_disk_changed(bdev, ret == -ENOMEDIUM); - if (ret) - return ret; - } - } - bdev->bd_openers++; - return 0; + if (--part->bd_openers) + return; + blkdev_flush_mapping(part); + whole->bd_disk->open_partitions--; + blkdev_put_whole(whole, mode); + bdput(whole); } struct block_device *blkdev_get_no_open(dev_t dev) @@ -1447,8 +1405,14 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) disk_block_events(disk); - mutex_lock(&bdev->bd_mutex); - ret =__blkdev_get(bdev, mode); + mutex_lock(&disk->open_mutex); + ret = -ENXIO; + if (!(disk->flags & GENHD_FL_UP)) + goto abort_claiming; + if (bdev_is_partition(bdev)) + ret = blkdev_get_part(bdev, mode); + else + ret = blkdev_get_whole(bdev, mode); if (ret) goto abort_claiming; if (mode & FMODE_EXCL) { @@ -1467,7 +1431,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) unblock_events = false; } } - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&disk->open_mutex); if (unblock_events) disk_unblock_events(disk); @@ -1476,7 +1440,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) abort_claiming: if (mode & FMODE_EXCL) bd_abort_claiming(bdev, holder); - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&disk->open_mutex); disk_unblock_events(disk); put_blkdev: blkdev_put_no_open(bdev); @@ -1551,10 +1515,9 @@ static int blkdev_open(struct inode * inode, struct file * filp) return 0; } -static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) +void blkdev_put(struct block_device *bdev, fmode_t mode) { struct gendisk *disk = bdev->bd_disk; - struct block_device *victim = NULL; /* * Sync early if it looks like we're the last one. If someone else @@ -1566,41 +1529,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) if (bdev->bd_openers == 1) sync_blockdev(bdev); - mutex_lock_nested(&bdev->bd_mutex, for_part); - if (for_part) - bdev->bd_part_count--; - - if (!--bdev->bd_openers) { - WARN_ON_ONCE(bdev->bd_holders); - sync_blockdev(bdev); - kill_bdev(bdev); - bdev_write_inode(bdev); - if (bdev_is_partition(bdev)) - victim = bdev_whole(bdev); - } - - if (!bdev_is_partition(bdev) && disk->fops->release) - disk->fops->release(disk, mode); - mutex_unlock(&bdev->bd_mutex); - if (victim) { - __blkdev_put(victim, mode, 1); - bdput(victim); - } -} - -void blkdev_put(struct block_device *bdev, fmode_t mode) -{ - struct gendisk *disk = bdev->bd_disk; - - mutex_lock(&bdev->bd_mutex); - + mutex_lock(&disk->open_mutex); if (mode & FMODE_EXCL) { struct block_device *whole = bdev_whole(bdev); bool bdev_free; /* * Release a claim on the device. The holder fields - * are protected with bdev_lock. bd_mutex is to + * are protected with bdev_lock. open_mutex is to * synchronize disk_holder unlinking. */ spin_lock(&bdev_lock); @@ -1631,9 +1567,13 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * from userland - e.g. eject(1). */ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); - mutex_unlock(&bdev->bd_mutex); - __blkdev_put(bdev, mode, 0); + if (bdev_is_partition(bdev)) + blkdev_put_part(bdev, mode); + else + blkdev_put_whole(bdev, mode); + mutex_unlock(&disk->open_mutex); + blkdev_put_no_open(bdev); } EXPORT_SYMBOL(blkdev_put); @@ -1941,10 +1881,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) old_inode = inode; bdev = I_BDEV(inode); - mutex_lock(&bdev->bd_mutex); + mutex_lock(&bdev->bd_disk->open_mutex); if (bdev->bd_openers) func(bdev, arg); - mutex_unlock(&bdev->bd_mutex); + mutex_unlock(&bdev->bd_disk->open_mutex); spin_lock(&blockdev_superblock->s_inode_list_lock); } |