diff options
-rw-r--r-- | fs/btrfs/ctree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 4 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 25 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.h | 3 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 7 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 10 | ||||
-rw-r--r-- | fs/btrfs/qgroup.c | 10 | ||||
-rw-r--r-- | fs/btrfs/raid-stripe-tree.c | 2 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 10 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 6 | ||||
-rw-r--r-- | fs/btrfs/zoned.c | 7 | ||||
-rw-r--r-- | include/uapi/linux/btrfs_tree.h | 24 |
12 files changed, 69 insertions, 41 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2a9344a3fcee..35c1d24d4a78 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -432,7 +432,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, if (btrfs_block_can_be_shared(trans, root, buf)) { ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, btrfs_header_level(buf), 1, - &refs, &flags); + &refs, &flags, NULL); if (ret) return ret; if (unlikely(refs == 0)) { diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 9223934d95f4..891ea2fa263c 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1041,7 +1041,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, return -ENOMEM; } - if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); @@ -1144,7 +1144,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, return -ENOMEM; } - if (btrfs_qgroup_enabled(fs_info) && !generic_ref->skip_qgroup) { + if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) { record = kzalloc(sizeof(*record), GFP_NOFS); if (!record) { kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c8e5b4715b49..0455935ff558 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -102,7 +102,8 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) */ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags) + u64 offset, int metadata, u64 *refs, u64 *flags, + u64 *owning_root) { struct btrfs_root *extent_root; struct btrfs_delayed_ref_head *head; @@ -114,6 +115,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, u32 item_size; u64 num_refs; u64 extent_flags; + u64 owner = 0; int ret; /* @@ -167,6 +169,8 @@ search_again: struct btrfs_extent_item); num_refs = btrfs_extent_refs(leaf, ei); extent_flags = btrfs_extent_flags(leaf, ei); + owner = btrfs_get_extent_owner_root(fs_info, leaf, + path->slots[0]); } else { ret = -EUCLEAN; btrfs_err(fs_info, @@ -226,6 +230,8 @@ out: *refs = num_refs; if (flags) *flags = extent_flags; + if (owning_root) + *owning_root = owner; out_free: btrfs_free_path(path); return ret; @@ -5234,7 +5240,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, /* We don't lock the tree block, it's OK to be racy here */ ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, wc->level - 1, 1, &refs, - &flags); + &flags, NULL); /* We don't care about errors in readahead. */ if (ret < 0) continue; @@ -5301,7 +5307,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], + NULL); BUG_ON(ret == -ENOMEM); if (ret) return ret; @@ -5391,6 +5398,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 bytenr; u64 generation; u64 parent; + u64 owner_root = 0; struct btrfs_tree_parent_check check = { 0 }; struct btrfs_key key; struct btrfs_ref ref = { 0 }; @@ -5434,7 +5442,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1, &wc->refs[level - 1], - &wc->flags[level - 1]); + &wc->flags[level - 1], + &owner_root); if (ret < 0) goto out_unlock; @@ -5567,8 +5576,7 @@ skip: find_next_key(path, level, &wc->drop_progress); btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, - fs_info->nodesize, parent, - btrfs_header_owner(next)); + fs_info->nodesize, parent, owner_root); btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid, 0, false); ret = btrfs_free_extent(trans, &ref); @@ -5635,7 +5643,8 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], + NULL); if (ret < 0) { btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; @@ -5880,7 +5889,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) ret = btrfs_lookup_extent_info(trans, fs_info, path->nodes[level]->start, level, 1, &wc->refs[level], - &wc->flags[level]); + &wc->flags[level], NULL); if (ret < 0) { err = ret; goto out_end_trans; diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h index 0716f65d9753..2e066035ccee 100644 --- a/fs/btrfs/extent-tree.h +++ b/fs/btrfs/extent-tree.h @@ -99,7 +99,8 @@ u64 btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags); + u64 offset, int metadata, u64 *refs, u64 *flags, + u64 *owner_root); int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, int reserved); int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5e3fccddde0c..9f5a9894f88f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6983,8 +6983,15 @@ static struct extent_map *btrfs_new_extent_direct(struct btrfs_inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); +again: ret = btrfs_reserve_extent(root, len, len, fs_info->sectorsize, 0, alloc_hint, &ins, 1, 1); + if (ret == -EAGAIN) { + ASSERT(btrfs_is_zoned(fs_info)); + wait_on_bit_io(&inode->root->fs_info->flags, BTRFS_FS_NEED_ZONE_FINISH, + TASK_UNINTERRUPTIBLE); + goto again; + } if (ret) return ERR_PTR(ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 752acff2c734..dfe257e1845b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1528,7 +1528,7 @@ static noinline int key_in_sk(struct btrfs_key *key, static noinline int copy_to_sk(struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - size_t *buf_size, + u64 *buf_size, char __user *ubuf, unsigned long *sk_offset, int *num_found) @@ -1660,7 +1660,7 @@ out: static noinline int search_ioctl(struct inode *inode, struct btrfs_ioctl_search_key *sk, - size_t *buf_size, + u64 *buf_size, char __user *ubuf) { struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); @@ -1733,7 +1733,7 @@ static noinline int btrfs_ioctl_tree_search(struct inode *inode, struct btrfs_ioctl_search_args __user *uargs = argp; struct btrfs_ioctl_search_key sk; int ret; - size_t buf_size; + u64 buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -1763,8 +1763,8 @@ static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode, struct btrfs_ioctl_search_args_v2 __user *uarg = argp; struct btrfs_ioctl_search_args_v2 args; int ret; - size_t buf_size; - const size_t buf_limit = SZ_16M; + u64 buf_size; + const u64 buf_limit = SZ_16M; if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index edb84cc03237..ce446d9d7f23 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1888,7 +1888,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, u64 bytenr = record->bytenr; if (!btrfs_qgroup_full_accounting(fs_info)) - return 0; + return 1; lockdep_assert_held(&delayed_refs->lock); trace_btrfs_qgroup_trace_extent(fs_info, record); @@ -2875,12 +2875,18 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr, num_bytes, seq); /* + * We're done using the iterator, release all its qgroups while holding + * fs_info->qgroup_lock so that we don't race with btrfs_remove_qgroup() + * and trigger use-after-free accesses to qgroups. + */ + qgroup_iterator_nested_clean(&qgroups); + + /* * Bump qgroup_seq to avoid seq overlap */ fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1; spin_unlock(&fs_info->qgroup_lock); out_free: - qgroup_iterator_nested_clean(&qgroups); ulist_free(old_roots); ulist_free(new_roots); return ret; diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index 944e8f1862aa..9589362acfbf 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -145,7 +145,7 @@ int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans, btrfs_put_bioc(bioc); } - return ret; + return 0; } int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9ce5be21b036..f62a408671cb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1868,6 +1868,9 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * */ ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES); + /* @found_logical_ret must be specified. */ + ASSERT(found_logical_ret); + stripe = &sctx->stripes[sctx->cur_stripe]; scrub_reset_stripe(stripe); ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path, @@ -1876,8 +1879,7 @@ static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group * /* Either >0 as no more extents or <0 for error. */ if (ret) return ret; - if (found_logical_ret) - *found_logical_ret = stripe->logical; + *found_logical_ret = stripe->logical; sctx->cur_stripe++; /* We filled one group, submit it. */ @@ -2080,7 +2082,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, /* Go through each extent items inside the logical range */ while (cur_logical < logical_end) { - u64 found_logical; + u64 found_logical = U64_MAX; u64 cur_physical = physical + cur_logical - logical_start; /* Canceled? */ @@ -2115,6 +2117,8 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, if (ret < 0) break; + /* queue_scrub_stripe() returned 0, @found_logical must be updated. */ + ASSERT(found_logical != U64_MAX); cur_logical = found_logical + BTRFS_STRIPE_LEN; /* Don't hold CPU for too long time */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c87e18827a0a..c6f16625af51 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -748,13 +748,13 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (!fs_devices) { fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return ERR_CAST(fs_devices); + if (has_metadata_uuid) memcpy(fs_devices->metadata_uuid, disk_super->metadata_uuid, BTRFS_FSID_SIZE); - if (IS_ERR(fs_devices)) - return ERR_CAST(fs_devices); - if (same_fsid_diff_dev) { generate_random_uuid(fs_devices->fsid); fs_devices->temp_fsid = true; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 3504ade30cb0..188378ca19c7 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1661,13 +1661,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } out: - if (cache->alloc_offset > fs_info->zone_size) { - btrfs_err(fs_info, - "zoned: invalid write pointer %llu in block group %llu", - cache->alloc_offset, cache->start); - ret = -EIO; - } - if (cache->alloc_offset > cache->zone_capacity) { btrfs_err(fs_info, "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index c25fc9614594..d24e8e121507 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h @@ -219,6 +219,22 @@ */ #define BTRFS_METADATA_ITEM_KEY 169 +/* + * Special inline ref key which stores the id of the subvolume which originally + * created the extent. This subvolume owns the extent permanently from the + * perspective of simple quotas. Needed to know which subvolume to free quota + * usage from when the extent is deleted. + * + * Stored as an inline ref rather to avoid wasting space on a separate item on + * top of the existing extent item. However, unlike the other inline refs, + * there is one one owner ref per extent rather than one per extent. + * + * Because of this, it goes at the front of the list of inline refs, and thus + * must have a lower type value than any other inline ref type (to satisfy the + * disk format rule that inline refs have non-decreasing type). + */ +#define BTRFS_EXTENT_OWNER_REF_KEY 172 + #define BTRFS_TREE_BLOCK_REF_KEY 176 #define BTRFS_EXTENT_DATA_REF_KEY 178 @@ -234,14 +250,6 @@ #define BTRFS_SHARED_DATA_REF_KEY 184 /* - * Special inline ref key which stores the id of the subvolume which originally - * created the extent. This subvolume owns the extent permanently from the - * perspective of simple quotas. Needed to know which subvolume to free quota - * usage from when the extent is deleted. - */ -#define BTRFS_EXTENT_OWNER_REF_KEY 188 - -/* * block groups give us hints into the extent allocation trees. Which * blocks are free etc etc */ |