summaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 11:41:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 11:41:38 -0700
commitcc423f6337d0a5ff1906f3b3d465d28c0d1705f6 (patch)
treefafc40aa7dc3ecd9800239f647d4fe21ee5db6af /fs/btrfs/scrub.c
parente940efa936be65866db9ce20798b13fdc6b3891a (diff)
parent8a4a0b2a3eaf75ca8854f856ef29690c12b2f531 (diff)
Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Mainly core changes, refactoring and optimizations. Performance is improved in some areas, overall there may be a cumulative improvement due to refactoring that removed lookups in the IO path or simplified IO submission tracking. Core: - submit IO synchronously for fast checksums (crc32c and xxhash), remove high priority worker kthread - read extent buffer in one go, simplify IO tracking, bio submission and locking - remove additional tracking of redirtied extent buffers, originally added for zoned mode but actually not needed - track ordered extent pointer in bio to avoid rbtree lookups during IO - scrub, use recovered data stripes as cache to avoid unnecessary read - in zoned mode, optimize logical to physical mappings of extents - remove PageError handling, not set by VFS nor writeback - cleanups, refactoring, better structure packing - lots of error handling improvements - more assertions, lockdep annotations - print assertion failure with the exact line where it happens - tracepoint updates - more debugging prints Performance: - speedup in fsync(), better tracking of inode logged status can avoid transaction commit - IO path structures track logical offsets in data structures and does not need to look it up User visible changes: - don't commit transaction for every created subvolume, this can reduce time when many subvolumes are created in a batch - print affected files when relocation fails - trigger orphan file cleanup during START_SYNC ioctl Notable fixes: - fix crash when disabling quota and relocation - fix crashes when removing roots from drity list - fix transacion abort during relocation when converting from newer profiles not covered by fallback - in zoned mode, stop reclaiming block groups if filesystem becomes read-only - fix rare race condition in tree mod log rewind that can miss some btree node slots - with enabled fsverity, drop up-to-date page bit in case the verification fails" * tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits) btrfs: fix race between quota disable and relocation btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots btrfs: fix race when deleting free space root from the dirty cow roots list btrfs: fix race when deleting quota root from the dirty cow roots list btrfs: tracepoints: also show actual number of the outstanding extents btrfs: update i_version in update_dev_time btrfs: make btrfs_compressed_bioset static btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers btrfs: scrub: remove scrub_ctx::csum_list member btrfs: do not BUG_ON after failure to migrate space during truncation btrfs: do not BUG_ON on failure to get dir index for new snapshot btrfs: send: do not BUG_ON() on unexpected symlink data extent btrfs: do not BUG_ON() when dropping inode items from log root btrfs: replace BUG_ON() at split_item() with proper error handling btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr() btrfs: do not BUG_ON() on tree mod log failures at insert_ptr() btrfs: do not BUG_ON() on tree mod log failure at insert_new_root() btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert() btrfs: abort transaction at update_ref_for_cow() when ref count is zero ...
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c125
1 files changed, 35 insertions, 90 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 16c228344cbb..4cae41bd6de0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -177,7 +177,6 @@ struct scrub_ctx {
struct btrfs_fs_info *fs_info;
int first_free;
int cur_stripe;
- struct list_head csum_list;
atomic_t cancel_req;
int readonly;
int sectors_per_bio;
@@ -309,17 +308,6 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
scrub_pause_off(fs_info);
}
-static void scrub_free_csums(struct scrub_ctx *sctx)
-{
- while (!list_empty(&sctx->csum_list)) {
- struct btrfs_ordered_sum *sum;
- sum = list_first_entry(&sctx->csum_list,
- struct btrfs_ordered_sum, list);
- list_del(&sum->list);
- kfree(sum);
- }
-}
-
static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
{
int i;
@@ -330,7 +318,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
release_scrub_stripe(&sctx->stripes[i]);
- scrub_free_csums(sctx);
kfree(sctx);
}
@@ -352,7 +339,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
- INIT_LIST_HEAD(&sctx->csum_list);
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
int ret;
@@ -479,11 +465,8 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
struct scrub_warning swarn;
- unsigned long ptr = 0;
u64 flags = 0;
- u64 ref_root;
u32 item_size;
- u8 ref_level = 0;
int ret;
/* Super block error, no need to search extent tree. */
@@ -513,19 +496,28 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
item_size = btrfs_item_size(eb, path->slots[0]);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- do {
+ unsigned long ptr = 0;
+ u8 ref_level;
+ u64 ref_root;
+
+ while (true) {
ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
item_size, &ref_root,
&ref_level);
+ if (ret < 0) {
+ btrfs_warn(fs_info,
+ "failed to resolve tree backref for logical %llu: %d",
+ swarn.logical, ret);
+ break;
+ }
+ if (ret > 0)
+ break;
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
- errstr, swarn.logical,
- btrfs_dev_name(dev),
- swarn.physical,
- ref_level ? "node" : "leaf",
- ret < 0 ? -1 : ref_level,
- ret < 0 ? -1 : ref_root);
- } while (ret != 1);
+ errstr, swarn.logical, btrfs_dev_name(dev),
+ swarn.physical, (ref_level ? "node" : "leaf"),
+ ref_level, ref_root);
+ }
btrfs_release_path(path);
} else {
struct btrfs_backref_walk_ctx ctx = { 0 };
@@ -546,48 +538,6 @@ out:
btrfs_free_path(path);
}
-static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
-{
- if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
- return 2;
- else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
- return 3;
- else
- return (int)bioc->num_stripes;
-}
-
-static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
- u64 full_stripe_logical,
- int nstripes, int mirror,
- int *stripe_index,
- u64 *stripe_offset)
-{
- int i;
-
- if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
- const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
- nstripes - 1 : nstripes - 2;
-
- /* RAID5/6 */
- for (i = 0; i < nr_data_stripes; i++) {
- const u64 data_stripe_start = full_stripe_logical +
- (i * BTRFS_STRIPE_LEN);
-
- if (logical >= data_stripe_start &&
- logical < data_stripe_start + BTRFS_STRIPE_LEN)
- break;
- }
-
- *stripe_index = i;
- *stripe_offset = (logical - full_stripe_logical) &
- BTRFS_STRIPE_LEN_MASK;
- } else {
- /* The other RAID type */
- *stripe_index = mirror;
- *stripe_offset = 0;
- }
-}
-
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
{
int ret = 0;
@@ -924,8 +874,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
/* For scrub, our mirror_num should always start at 1. */
ASSERT(stripe->mirror_num >= 1);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
- stripe->logical, &mapped_len, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
+ stripe->logical, &mapped_len, &bioc,
+ NULL, NULL, 1);
/*
* If we failed, dev will be NULL, and later detailed reports
* will just be skipped.
@@ -1957,8 +1908,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bio->bi_end_io = raid56_scrub_wait_endio;
btrfs_bio_counter_inc_blocked(fs_info);
- ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
- &length, &bioc);
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
+ &length, &bioc, NULL, NULL, 1);
if (ret < 0) {
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
@@ -1972,6 +1923,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
btrfs_bio_counter_dec(fs_info);
goto out;
}
+ /* Use the recovered stripes as cache to avoid read them from disk again. */
+ for (int i = 0; i < data_stripes; i++) {
+ stripe = &sctx->raid56_data_stripes[i];
+
+ raid56_parity_cache_data_pages(rbio, stripe->pages,
+ full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
+ }
raid56_parity_submit_scrub_rbio(rbio);
wait_for_completion_io(&io_done);
ret = blk_status_to_errno(bio->bi_status);
@@ -2740,17 +2698,12 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
&fs_info->scrub_lock)) {
struct workqueue_struct *scrub_workers = fs_info->scrub_workers;
- struct workqueue_struct *scrub_wr_comp =
- fs_info->scrub_wr_completion_workers;
fs_info->scrub_workers = NULL;
- fs_info->scrub_wr_completion_workers = NULL;
mutex_unlock(&fs_info->scrub_lock);
if (scrub_workers)
destroy_workqueue(scrub_workers);
- if (scrub_wr_comp)
- destroy_workqueue(scrub_wr_comp);
}
}
@@ -2761,7 +2714,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
struct workqueue_struct *scrub_workers = NULL;
- struct workqueue_struct *scrub_wr_comp = NULL;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
int ret = -ENOMEM;
@@ -2769,21 +2721,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
return 0;
- scrub_workers = alloc_workqueue("btrfs-scrub", flags,
- is_dev_replace ? 1 : max_active);
+ if (is_dev_replace)
+ scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
+ else
+ scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
if (!scrub_workers)
- goto fail_scrub_workers;
-
- scrub_wr_comp = alloc_workqueue("btrfs-scrubwrc", flags, max_active);
- if (!scrub_wr_comp)
- goto fail_scrub_wr_completion_workers;
+ return -ENOMEM;
mutex_lock(&fs_info->scrub_lock);
if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
- ASSERT(fs_info->scrub_workers == NULL &&
- fs_info->scrub_wr_completion_workers == NULL);
+ ASSERT(fs_info->scrub_workers == NULL);
fs_info->scrub_workers = scrub_workers;
- fs_info->scrub_wr_completion_workers = scrub_wr_comp;
refcount_set(&fs_info->scrub_workers_refcnt, 1);
mutex_unlock(&fs_info->scrub_lock);
return 0;
@@ -2794,10 +2742,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
ret = 0;
- destroy_workqueue(scrub_wr_comp);
-fail_scrub_wr_completion_workers:
destroy_workqueue(scrub_workers);
-fail_scrub_workers:
return ret;
}