summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNaohiro Aota <naohiro.aota@wdc.com>2021-08-19 21:19:23 +0900
committerDavid Sterba <dsterba@suse.com>2021-10-26 19:08:00 +0200
commitbe1a1d7a5d243cc485a4d903976f1fb3a284cc65 (patch)
tree78450e6c9b28568bb826259ff62fc7220fe4325a
parenta85f05e59bc15a83ad910dbcb71df5ad8fa77295 (diff)
btrfs: zoned: finish fully written block group
If we have written to the zone capacity, the device automatically deactivates the zone. Sync up block group side (the active BG list and zone_is_active flag) with it. We need to do it both on data BGs and metadata BGs. On data side, we add a hook to btrfs_finish_ordered_io(). On metadata side, we use end_extent_buffer_writeback(). To reduce excess lookup of a block group, we mark the last extent buffer in a block group with EXTENT_BUFFER_ZONE_FINISH flag. This cannot be done for data (ordered_extent), because the address may change due to REQ_OP_ZONE_APPEND. Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/extent_io.c10
-rw-r--r--fs/btrfs/extent_io.h1
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/zoned.c50
-rw-r--r--fs/btrfs/zoned.h5
5 files changed, 70 insertions, 2 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fdc066cac572..5ad749e19ff3 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4159,6 +4159,9 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
static void end_extent_buffer_writeback(struct extent_buffer *eb)
{
+ if (test_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags))
+ btrfs_zone_finish_endio(eb->fs_info, eb->start, eb->len);
+
clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
@@ -4760,8 +4763,13 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
free_extent_buffer(eb);
return ret;
}
- if (cache)
+ if (cache) {
+ /* Impiles write in zoned mode */
btrfs_put_block_group(cache);
+ /* Mark the last eb in a block group */
+ if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
+ set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
+ }
ret = write_one_eb(eb, wbc, epd);
free_extent_buffer(eb);
if (ret < 0)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 53abdc280451..9f3e0a45a5e4 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -32,6 +32,7 @@ enum {
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
EXTENT_BUFFER_NO_CHECK,
+ EXTENT_BUFFER_ZONE_FINISH,
};
/* these are flags for __process_pages_contig */
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 487533c35ddb..10efab2e3bd9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3010,8 +3010,12 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
- if (ordered_extent->bdev)
+ /* A valid bdev implies a write on a sequential zone */
+ if (ordered_extent->bdev) {
btrfs_rewrite_logical_zoned(ordered_extent);
+ btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
+ }
btrfs_free_io_failure_record(inode, start, end);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 798069484054..28a06c2d80ad 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1904,3 +1904,53 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, int raid_index
return ret;
}
+
+void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical, u64 length)
+{
+ struct btrfs_block_group *block_group;
+ struct map_lookup *map;
+ struct btrfs_device *device;
+ u64 physical;
+
+ if (!btrfs_is_zoned(fs_info))
+ return;
+
+ block_group = btrfs_lookup_block_group(fs_info, logical);
+ ASSERT(block_group);
+
+ if (logical + length < block_group->start + block_group->zone_capacity)
+ goto out;
+
+ spin_lock(&block_group->lock);
+
+ if (!block_group->zone_is_active) {
+ spin_unlock(&block_group->lock);
+ goto out;
+ }
+
+ block_group->zone_is_active = 0;
+ /* We should have consumed all the free space */
+ ASSERT(block_group->alloc_offset == block_group->zone_capacity);
+ ASSERT(block_group->free_space_ctl->free_space == 0);
+ btrfs_clear_treelog_bg(block_group);
+ spin_unlock(&block_group->lock);
+
+ map = block_group->physical_map;
+ device = map->stripes[0].dev;
+ physical = map->stripes[0].physical;
+
+ if (!device->zone_info->max_active_zones)
+ goto out;
+
+ btrfs_dev_clear_active_zone(device, physical);
+
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ ASSERT(!list_empty(&block_group->active_bg_list));
+ list_del_init(&block_group->active_bg_list);
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+
+ btrfs_put_block_group(block_group);
+
+out:
+ btrfs_put_block_group(block_group);
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index ade6588c4ccd..9c512402d7f4 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -73,6 +73,8 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group);
int btrfs_zone_finish(struct btrfs_block_group *block_group);
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
int raid_index);
+void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -224,6 +226,9 @@ static inline bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices,
return true;
}
+static inline void btrfs_zone_finish_endio(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length) { }
+
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)