diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 11:41:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-26 11:41:38 -0700 |
commit | cc423f6337d0a5ff1906f3b3d465d28c0d1705f6 (patch) | |
tree | fafc40aa7dc3ecd9800239f647d4fe21ee5db6af /fs/btrfs/tree-checker.c | |
parent | e940efa936be65866db9ce20798b13fdc6b3891a (diff) | |
parent | 8a4a0b2a3eaf75ca8854f856ef29690c12b2f531 (diff) |
Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Mainly core changes, refactoring and optimizations.
Performance is improved in some areas, overall there may be a
cumulative improvement due to refactoring that removed lookups in the
IO path or simplified IO submission tracking.
Core:
- submit IO synchronously for fast checksums (crc32c and xxhash),
remove high priority worker kthread
- read extent buffer in one go, simplify IO tracking, bio submission
and locking
- remove additional tracking of redirtied extent buffers, originally
added for zoned mode but actually not needed
- track ordered extent pointer in bio to avoid rbtree lookups during
IO
- scrub, use recovered data stripes as cache to avoid unnecessary
read
- in zoned mode, optimize logical to physical mappings of extents
- remove PageError handling, not set by VFS nor writeback
- cleanups, refactoring, better structure packing
- lots of error handling improvements
- more assertions, lockdep annotations
- print assertion failure with the exact line where it happens
- tracepoint updates
- more debugging prints
Performance:
- speedup in fsync(), better tracking of inode logged status can
avoid transaction commit
- IO path structures track logical offsets in data structures and
does not need to look it up
User visible changes:
- don't commit transaction for every created subvolume, this can
reduce time when many subvolumes are created in a batch
- print affected files when relocation fails
- trigger orphan file cleanup during START_SYNC ioctl
Notable fixes:
- fix crash when disabling quota and relocation
- fix crashes when removing roots from drity list
- fix transacion abort during relocation when converting from newer
profiles not covered by fallback
- in zoned mode, stop reclaiming block groups if filesystem becomes
read-only
- fix rare race condition in tree mod log rewind that can miss some
btree node slots
- with enabled fsverity, drop up-to-date page bit in case the
verification fails"
* tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits)
btrfs: fix race between quota disable and relocation
btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots
btrfs: fix race when deleting free space root from the dirty cow roots list
btrfs: fix race when deleting quota root from the dirty cow roots list
btrfs: tracepoints: also show actual number of the outstanding extents
btrfs: update i_version in update_dev_time
btrfs: make btrfs_compressed_bioset static
btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile
btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers
btrfs: scrub: remove scrub_ctx::csum_list member
btrfs: do not BUG_ON after failure to migrate space during truncation
btrfs: do not BUG_ON on failure to get dir index for new snapshot
btrfs: send: do not BUG_ON() on unexpected symlink data extent
btrfs: do not BUG_ON() when dropping inode items from log root
btrfs: replace BUG_ON() at split_item() with proper error handling
btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr()
btrfs: do not BUG_ON() on tree mod log failures at insert_ptr()
btrfs: do not BUG_ON() on tree mod log failure at insert_new_root()
btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert()
btrfs: abort transaction at update_ref_for_cow() when ref count is zero
...
Diffstat (limited to 'fs/btrfs/tree-checker.c')
-rw-r--r-- | fs/btrfs/tree-checker.c | 152 |
1 files changed, 112 insertions, 40 deletions
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 2138e9fc0564..038dfa8f1788 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -25,10 +25,10 @@ #include "compression.h" #include "volumes.h" #include "misc.h" -#include "btrfs_inode.h" #include "fs.h" #include "accessors.h" #include "file-item.h" +#include "inode-item.h" /* * Error message should follow the following format: @@ -1620,9 +1620,10 @@ static int check_inode_ref(struct extent_buffer *leaf, /* * Common point to switch the item-specific validation. */ -static int check_leaf_item(struct extent_buffer *leaf, - struct btrfs_key *key, int slot, - struct btrfs_key *prev_key) +static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, + struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) { int ret = 0; struct btrfs_chunk *chunk; @@ -1671,10 +1672,13 @@ static int check_leaf_item(struct extent_buffer *leaf, ret = check_extent_data_ref(leaf, key, slot); break; } - return ret; + + if (ret) + return BTRFS_TREE_BLOCK_INVALID_ITEM; + return BTRFS_TREE_BLOCK_CLEAN; } -static int check_leaf(struct extent_buffer *leaf, bool check_item_data) +enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) { struct btrfs_fs_info *fs_info = leaf->fs_info; /* No valid key type is 0, so all key should be larger than this key */ @@ -1687,7 +1691,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) generic_err(leaf, 0, "invalid level for leaf, have %d expect 0", btrfs_header_level(leaf)); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_LEVEL; } /* @@ -1710,32 +1714,32 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) generic_err(leaf, 0, "invalid root, root %llu must never be empty", owner); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_NRITEMS; } /* Unknown tree */ if (unlikely(owner == 0)) { generic_err(leaf, 0, "invalid owner, root 0 is not defined"); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_OWNER; } /* EXTENT_TREE_V2 can have empty extent trees. */ if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) - return 0; + return BTRFS_TREE_BLOCK_CLEAN; if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) { generic_err(leaf, 0, "invalid root, root %llu must never be empty", owner); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_NRITEMS; } - return 0; + return BTRFS_TREE_BLOCK_CLEAN; } if (unlikely(nritems == 0)) - return 0; + return BTRFS_TREE_BLOCK_CLEAN; /* * Check the following things to make sure this is a good leaf, and @@ -1751,7 +1755,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) for (slot = 0; slot < nritems; slot++) { u32 item_end_expected; u64 item_data_end; - int ret; btrfs_item_key_to_cpu(leaf, &key, slot); @@ -1762,7 +1765,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) prev_key.objectid, prev_key.type, prev_key.offset, key.objectid, key.type, key.offset); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; } item_data_end = (u64)btrfs_item_offset(leaf, slot) + @@ -1781,7 +1784,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) generic_err(leaf, slot, "unexpected item end, have %llu expect %u", item_data_end, item_end_expected); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } /* @@ -1793,7 +1796,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) generic_err(leaf, slot, "slot end outside of leaf, have %llu expect range [0, %u]", item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info)); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } /* Also check if the item pointer overlaps with btrfs item. */ @@ -1804,16 +1807,22 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item), btrfs_item_ptr_offset(leaf, slot)); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } - if (check_item_data) { + /* + * We only want to do this if WRITTEN is set, otherwise the leaf + * may be in some intermediate state and won't appear valid. + */ + if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) { + enum btrfs_tree_block_status ret; + /* * Check if the item size and content meet other * criteria */ ret = check_leaf_item(leaf, &key, slot, &prev_key); - if (unlikely(ret < 0)) + if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) return ret; } @@ -1822,21 +1831,21 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data) prev_key.offset = key.offset; } - return 0; + return BTRFS_TREE_BLOCK_CLEAN; } -int btrfs_check_leaf_full(struct extent_buffer *leaf) +int btrfs_check_leaf(struct extent_buffer *leaf) { - return check_leaf(leaf, true); -} -ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO); + enum btrfs_tree_block_status ret; -int btrfs_check_leaf_relaxed(struct extent_buffer *leaf) -{ - return check_leaf(leaf, false); + ret = __btrfs_check_leaf(leaf); + if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) + return -EUCLEAN; + return 0; } +ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO); -int btrfs_check_node(struct extent_buffer *node) +enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node) { struct btrfs_fs_info *fs_info = node->fs_info; unsigned long nr = btrfs_header_nritems(node); @@ -1844,13 +1853,12 @@ int btrfs_check_node(struct extent_buffer *node) int slot; int level = btrfs_header_level(node); u64 bytenr; - int ret = 0; if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) { generic_err(node, 0, "invalid level for node, have %d expect [1, %d]", level, BTRFS_MAX_LEVEL - 1); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_LEVEL; } if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) { btrfs_crit(fs_info, @@ -1858,7 +1866,7 @@ int btrfs_check_node(struct extent_buffer *node) btrfs_header_owner(node), node->start, nr == 0 ? "small" : "large", nr, BTRFS_NODEPTRS_PER_BLOCK(fs_info)); - return -EUCLEAN; + return BTRFS_TREE_BLOCK_INVALID_NRITEMS; } for (slot = 0; slot < nr - 1; slot++) { @@ -1869,15 +1877,13 @@ int btrfs_check_node(struct extent_buffer *node) if (unlikely(!bytenr)) { generic_err(node, slot, "invalid NULL node pointer"); - ret = -EUCLEAN; - goto out; + return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; } if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) { generic_err(node, slot, "unaligned pointer, have %llu should be aligned to %u", bytenr, fs_info->sectorsize); - ret = -EUCLEAN; - goto out; + return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; } if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) { @@ -1886,12 +1892,20 @@ int btrfs_check_node(struct extent_buffer *node) key.objectid, key.type, key.offset, next_key.objectid, next_key.type, next_key.offset); - ret = -EUCLEAN; - goto out; + return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; } } -out: - return ret; + return BTRFS_TREE_BLOCK_CLEAN; +} + +int btrfs_check_node(struct extent_buffer *node) +{ + enum btrfs_tree_block_status ret; + + ret = __btrfs_check_node(node); + if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) + return -EUCLEAN; + return 0; } ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO); @@ -1949,3 +1963,61 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) } return 0; } + +int btrfs_verify_level_key(struct extent_buffer *eb, int level, + struct btrfs_key *first_key, u64 parent_transid) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + int found_level; + struct btrfs_key found_key; + int ret; + + found_level = btrfs_header_level(eb); + if (found_level != level) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree level check failed\n"); + btrfs_err(fs_info, +"tree level mismatch detected, bytenr=%llu level expected=%u has=%u", + eb->start, level, found_level); + return -EIO; + } + + if (!first_key) + return 0; + + /* + * For live tree block (new tree blocks in current transaction), + * we need proper lock context to avoid race, which is impossible here. + * So we only checks tree blocks which is read from disk, whose + * generation <= fs_info->last_trans_committed. + */ + if (btrfs_header_generation(eb) > fs_info->last_trans_committed) + return 0; + + /* We have @first_key, so this @eb must have at least one item */ + if (btrfs_header_nritems(eb) == 0) { + btrfs_err(fs_info, + "invalid tree nritems, bytenr=%llu nritems=0 expect >0", + eb->start); + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + return -EUCLEAN; + } + + if (found_level) + btrfs_node_key_to_cpu(eb, &found_key, 0); + else + btrfs_item_key_to_cpu(eb, &found_key, 0); + ret = btrfs_comp_cpu_keys(first_key, &found_key); + + if (ret) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: tree first key check failed\n"); + btrfs_err(fs_info, +"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", + eb->start, parent_transid, first_key->objectid, + first_key->type, first_key->offset, + found_key.objectid, found_key.type, + found_key.offset); + } + return ret; +} |