diff options
author | Naohiro Aota <naohiro.aota@wdc.com> | 2021-08-19 21:19:23 +0900 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2021-10-26 19:08:00 +0200 |
commit | be1a1d7a5d243cc485a4d903976f1fb3a284cc65 (patch) | |
tree | 78450e6c9b28568bb826259ff62fc7220fe4325a | |
parent | a85f05e59bc15a83ad910dbcb71df5ad8fa77295 (diff) |
btrfs: zoned: finish fully written block group
If we have written to the zone capacity, the device automatically
deactivates the zone. Sync up block group side (the active BG list and
zone_is_active flag) with it.
We need to do it both on data BGs and metadata BGs. On data side, we add a
hook to btrfs_finish_ordered_io(). On metadata side, we use
end_extent_buffer_writeback().
To reduce excess lookup of a block group, we mark the last extent buffer in
a block group with EXTENT_BUFFER_ZONE_FINISH flag. This cannot be done for
data (ordered_extent), because the address may change due to
REQ_OP_ZONE_APPEND.
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/extent_io.c | 10 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 6 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 50 | ||||
-rw-r--r-- | fs/btrfs/zoned.h | 5 |
5 files changed, 70 insertions, 2 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fdc066cac572..5ad749e19ff3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4159,6 +4159,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb) static void end_extent_buffer_writeback(struct extent_buffer *eb) { + if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags)) + btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len); + clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); smp_mb__after_atomic(); wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); @@ -4760,8 +4763,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, free_extent_buffer(eb); return ret; } - if (cache) + if (cache) { + /* Impiles write in zoned mode */ btrfs_put_block_group(cache); + /* Mark the last eb in a block group */ + if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) + set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); + } ret = write_one_eb(eb, wbc, epd); free_extent_buffer(eb); if (ret < 0) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 53abdc280451..9f3e0a45a5e4 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -32,6 +32,7 @@ enum { /* write IO error */ EXTENT_BUFFER_WRITE_ERR, EXTENT_BUFFER_NO_CHECK, + EXTENT_BUFFER_ZONE_FINISH, }; /* these are flags for __process_pages_contig */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 487533c35ddb..10efab2e3bd9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3010,8 +3010,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - if (ordered_extent->bdev) + /* A valid bdev implies a write on a sequential zone */ + if (ordered_extent->bdev) { btrfs_rewrite_logical_zoned(ordered_extent); + btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, + ordered_extent->disk_num_bytes); + } btrfs_free_io_failure_record(inode, start, end); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 798069484054..28a06c2d80ad 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1904,3 +1904,53 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index return ret; } + +void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length) +{ + struct btrfs_block_group *block_group; + struct map_lookup *map; + struct btrfs_device *device; + u64 physical; + + if (!btrfs_is_zoned(fs_info)) + return; + + block_group = btrfs_lookup_block_group(fs_info, logical); + ASSERT(block_group); + + if (logical + length < block_group->start + block_group->zone_capacity) + goto out; + + spin_lock(&block_group->lock); + + if (!block_group->zone_is_active) { + spin_unlock(&block_group->lock); + goto out; + } + + block_group->zone_is_active = 0; + /* We should have consumed all the free space */ + ASSERT(block_group->alloc_offset == block_group->zone_capacity); + ASSERT(block_group->free_space_ctl->free_space == 0); + btrfs_clear_treelog_bg(block_group); + spin_unlock(&block_group->lock); + + map = block_group->physical_map; + device = map->stripes[0].dev; + physical = map->stripes[0].physical; + + if (!device->zone_info->max_active_zones) + goto out; + + btrfs_dev_clear_active_zone(device, physical); + + spin_lock(&fs_info->zone_active_bgs_lock); + ASSERT(!list_empty(&block_group->active_bg_list)); + list_del_init(&block_group->active_bg_list); + spin_unlock(&fs_info->zone_active_bgs_lock); + + btrfs_put_block_group(block_group); + +out: + btrfs_put_block_group(block_group); +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index ade6588c4ccd..9c512402d7f4 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -73,6 +73,8 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group); int btrfs_zone_finish(struct btrfs_block_group *block_group); bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index); +void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, + u64 length); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -224,6 +226,9 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, return true; } +static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) { } + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) |