summaryrefslogtreecommitdiff
path: root/fs/btrfs/space-info.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2020-03-13 15:58:05 -0400
committerDavid Sterba <dsterba@suse.com>2020-05-25 11:25:22 +0200
commit7f9fe614407692f670601a634621138233ac00d7 (patch)
tree966a2257879bdbe6a6ab50d91d89265abea406db /fs/btrfs/space-info.c
parent876de781b0da240fcf8d29514c34607e147e5a94 (diff)
btrfs: improve global reserve stealing logic
For unlink transactions and block group removal btrfs_start_transaction_fallback_global_rsv will first try to start an ordinary transaction and if it fails it will fall back to reserving the required amount by stealing from the global reserve. This is problematic because of all the same reasons we had with previous iterations of the ENOSPC handling, thundering herd. We get a bunch of failures all at once, everybody tries to allocate from the global reserve, some win and some lose, we get an ENSOPC. Fix this behavior by introducing BTRFS_RESERVE_FLUSH_ALL_STEAL. It's used to mark unlink reservation. To fix this we need to integrate this logic into the normal ENOSPC infrastructure. We still go through all of the normal flushing work, and at the moment we begin to fail all the tickets we try to satisfy any tickets that are allowed to steal by stealing from the global reserve. If this works we start the flushing system over again just like we would with a normal ticket satisfaction. This serializes our global reserve stealing, so we don't have the thundering herd problem. Reviewed-by: Nikolay Borisov <nborisov@suse.com> Tested-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/space-info.c')
-rw-r--r--fs/btrfs/space-info.c37
1 files changed, 36 insertions, 1 deletions
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ff17a4420358..f5ec50e6c48e 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -856,6 +856,34 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
!test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
}
+static bool steal_from_global_rsv(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *space_info,
+ struct reserve_ticket *ticket)
+{
+ struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
+ u64 min_bytes;
+
+ if (global_rsv->space_info != space_info)
+ return false;
+
+ spin_lock(&global_rsv->lock);
+ min_bytes = div_factor(global_rsv->size, 5);
+ if (global_rsv->reserved < min_bytes + ticket->bytes) {
+ spin_unlock(&global_rsv->lock);
+ return false;
+ }
+ global_rsv->reserved -= ticket->bytes;
+ ticket->bytes = 0;
+ list_del_init(&ticket->list);
+ wake_up(&ticket->wait);
+ space_info->tickets_id++;
+ if (global_rsv->reserved < global_rsv->size)
+ global_rsv->full = 0;
+ spin_unlock(&global_rsv->lock);
+
+ return true;
+}
+
/*
* maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets
* @fs_info - fs_info for this fs
@@ -888,6 +916,10 @@ static bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info,
ticket = list_first_entry(&space_info->tickets,
struct reserve_ticket, list);
+ if (ticket->steal &&
+ steal_from_global_rsv(fs_info, space_info, ticket))
+ return true;
+
/*
* may_commit_transaction will avoid committing the transaction
* if it doesn't feel like the space reclaimed by the commit
@@ -1104,6 +1136,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
switch (flush) {
case BTRFS_RESERVE_FLUSH_ALL:
+ case BTRFS_RESERVE_FLUSH_ALL_STEAL:
wait_reserve_ticket(fs_info, space_info, ticket);
break;
case BTRFS_RESERVE_FLUSH_LIMIT:
@@ -1203,7 +1236,9 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
ticket.error = 0;
space_info->reclaim_size += ticket.bytes;
init_waitqueue_head(&ticket.wait);
- if (flush == BTRFS_RESERVE_FLUSH_ALL) {
+ ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL);
+ if (flush == BTRFS_RESERVE_FLUSH_ALL ||
+ flush == BTRFS_RESERVE_FLUSH_ALL_STEAL) {
list_add_tail(&ticket.list, &space_info->tickets);
if (!space_info->flush) {
space_info->flush = 1;