btrfs: urgent periodic reclaim pass

Periodic reclaim attempts to avoid block_groups seeing active use with a sweep mark that gets cleared on allocation and set on a sweep. In urgent conditions where we have very little unallocated space (less than one chunk used by the threshold calculation for the unallocated target), we want to be able to override this mechanism. Introduce a second pass that only happens if we fail to find a reclaim candidate and reclaim is urgent. In that case, do a second pass where all block groups are eligible. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Boris Burkov <boris@bur.io> Signed-off-by: David Sterba <dsterba@suse.com>
author: Boris Burkov <boris@bur.io> 2024-02-14 11:29:50 -0800
committer: David Sterba <dsterba@suse.com> 2024-07-11 15:33:27 +0200
commit: 0e962e755b2684185ec37c75d91df56e076782ec (patch)
tree: 9eded06accc737eb78ec019b0a82c85c3a3b0e1a /fs/btrfs/space-info.c
parent: 813d4c642251649352e9680e6278a1c02c0ebf95 (diff)
1 files changed, 34 insertions, 1 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 295c25101e23..9ac94d3119e8 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -1972,17 +1972,35 @@ int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
 	return READ_ONCE(space_info->bg_reclaim_threshold);
 }
 
+/*
+ * Under "urgent" reclaim, we will reclaim even fresh block groups that have
+ * recently seen successful allocations, as we are desperate to reclaim
+ * whatever we can to avoid ENOSPC in a transaction leading to a readonly fs.
+ */
+static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
+{
+	struct btrfs_fs_info *fs_info = space_info->fs_info;
+	u64 unalloc = atomic64_read(&fs_info->free_chunk_space);
+	u64 data_chunk_size = calc_effective_data_chunk_size(fs_info);
+
+	return unalloc < data_chunk_size;
+}
+
 static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
 			    struct btrfs_space_info *space_info, int raid)
 {
 	struct btrfs_block_group *bg;
 	int thresh_pct;
+	bool try_again = true;
+	bool urgent;
 
 	spin_lock(&space_info->lock);
+	urgent = is_reclaim_urgent(space_info);
 	thresh_pct = btrfs_calc_reclaim_threshold(space_info);
 	spin_unlock(&space_info->lock);
 
 	down_read(&space_info->groups_sem);
+again:
 	list_for_each_entry(bg, &space_info->block_groups[raid], list) {
 		u64 thresh;
 		bool reclaim = false;
@@ -1990,14 +2008,29 @@ static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
 		btrfs_get_block_group(bg);
 		spin_lock(&bg->lock);
 		thresh = mult_perc(bg->length, thresh_pct);
-		if (bg->used < thresh && bg->reclaim_mark)
+		if (bg->used < thresh && bg->reclaim_mark) {
+			try_again = false;
 			reclaim = true;
+		}
 		bg->reclaim_mark++;
 		spin_unlock(&bg->lock);
 		if (reclaim)
 			btrfs_mark_bg_to_reclaim(bg);
 		btrfs_put_block_group(bg);
 	}
+
+	/*
+	 * In situations where we are very motivated to reclaim (low unalloc)
+	 * use two passes to make the reclaim mark check best effort.
+	 *
+	 * If we have any staler groups, we don't touch the fresher ones, but if we
+	 * really need a block group, do take a fresh one.
+	 */
+	if (try_again && urgent) {
+		try_again = false;
+		goto again;
+	}
+
 	up_read(&space_info->groups_sem);
 	return 0;
 }
author	Boris Burkov <boris@bur.io>	2024-02-14 11:29:50 -0800
committer	David Sterba <dsterba@suse.com>	2024-07-11 15:33:27 +0200
commit	0e962e755b2684185ec37c75d91df56e076782ec (patch)
tree	9eded06accc737eb78ec019b0a82c85c3a3b0e1a /fs/btrfs/space-info.c
parent	813d4c642251649352e9680e6278a1c02c0ebf95 (diff)