diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 293 |
1 files changed, 169 insertions, 124 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 911908ea5f6f..f356f08b55cb 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -69,27 +69,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits) return (cache->flags & bits) == bits; } -int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 num_bytes) -{ - u64 end = start + num_bytes - 1; - set_extent_bit(&fs_info->excluded_extents, start, end, - EXTENT_UPTODATE, NULL); - return 0; -} - -void btrfs_free_excluded_extents(struct btrfs_block_group *cache) -{ - struct btrfs_fs_info *fs_info = cache->fs_info; - u64 start, end; - - start = cache->start; - end = start + cache->length - 1; - - clear_extent_bits(&fs_info->excluded_extents, start, end, - EXTENT_UPTODATE); -} - /* simple helper to search for an existing data extent at a given offset */ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) { @@ -187,8 +166,10 @@ search_again: num_refs = btrfs_extent_refs(leaf, ei); extent_flags = btrfs_extent_flags(leaf, ei); } else { - ret = -EINVAL; - btrfs_print_v0_err(fs_info); + ret = -EUCLEAN; + btrfs_err(fs_info, + "unexpected extent item size, has %u expect >= %zu", + item_size, sizeof(*ei)); if (trans) btrfs_abort_transaction(trans, ret); else @@ -402,11 +383,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, } } + WARN_ON(1); btrfs_print_leaf(eb); btrfs_err(eb->fs_info, "eb %llu iref 0x%lx invalid extent inline ref type %d", eb->start, (unsigned long)iref, type); - WARN_ON(1); return BTRFS_REF_TYPE_INVALID; } @@ -624,12 +605,12 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, ref2 = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_shared_data_ref); num_refs = btrfs_shared_data_ref_count(leaf, ref2); - } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { - btrfs_print_v0_err(trans->fs_info); - btrfs_abort_transaction(trans, -EINVAL); - return -EINVAL; } else { - BUG(); + btrfs_err(trans->fs_info, + "unrecognized backref key (%llu %u %llu)", + key.objectid, key.type, key.offset); + btrfs_abort_transaction(trans, -EUCLEAN); + return -EUCLEAN; } BUG_ON(num_refs < refs_to_drop); @@ -660,7 +641,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path, leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); if (iref) { /* * If type is invalid, we should have bailed out earlier than @@ -869,6 +849,11 @@ again: err = -ENOENT; goto out; } else if (WARN_ON(ret)) { + btrfs_print_leaf(path->nodes[0]); + btrfs_err(fs_info, +"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu", + bytenr, num_bytes, parent, root_objectid, owner, + offset); err = -EIO; goto out; } @@ -876,8 +861,10 @@ again: leaf = path->nodes[0]; item_size = btrfs_item_size(leaf, path->slots[0]); if (unlikely(item_size < sizeof(*ei))) { - err = -EINVAL; - btrfs_print_v0_err(fs_info); + err = -EUCLEAN; + btrfs_err(fs_info, + "unexpected extent item size, has %llu expect >= %zu", + item_size, sizeof(*ei)); btrfs_abort_transaction(trans, err); goto out; } @@ -1079,13 +1066,13 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans, /* * helper to update/remove inline back ref */ -static noinline_for_stack -void update_inline_extent_backref(struct btrfs_path *path, +static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *path, struct btrfs_extent_inline_ref *iref, int refs_to_mod, struct btrfs_delayed_extent_op *extent_op) { struct extent_buffer *leaf = path->nodes[0]; + struct btrfs_fs_info *fs_info = leaf->fs_info; struct btrfs_extent_item *ei; struct btrfs_extent_data_ref *dref = NULL; struct btrfs_shared_data_ref *sref = NULL; @@ -1098,18 +1085,33 @@ void update_inline_extent_backref(struct btrfs_path *path, ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); refs = btrfs_extent_refs(leaf, ei); - WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); + if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) { + struct btrfs_key key; + u32 extent_size; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type == BTRFS_METADATA_ITEM_KEY) + extent_size = fs_info->nodesize; + else + extent_size = key.offset; + btrfs_print_leaf(leaf); + btrfs_err(fs_info, + "invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu", + key.objectid, extent_size, refs_to_mod, refs); + return -EUCLEAN; + } refs += refs_to_mod; btrfs_set_extent_refs(leaf, ei, refs); if (extent_op) __run_delayed_extent_op(extent_op, leaf, ei); + type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY); /* - * If type is invalid, we should have bailed out after - * lookup_inline_extent_backref(). + * Function btrfs_get_extent_inline_ref_type() has already printed + * error messages. */ - type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY); - ASSERT(type != BTRFS_REF_TYPE_INVALID); + if (unlikely(type == BTRFS_REF_TYPE_INVALID)) + return -EUCLEAN; if (type == BTRFS_EXTENT_DATA_REF_KEY) { dref = (struct btrfs_extent_data_ref *)(&iref->offset); @@ -1119,10 +1121,43 @@ void update_inline_extent_backref(struct btrfs_path *path, refs = btrfs_shared_data_ref_count(leaf, sref); } else { refs = 1; - BUG_ON(refs_to_mod != -1); + /* + * For tree blocks we can only drop one ref for it, and tree + * blocks should not have refs > 1. + * + * Furthermore if we're inserting a new inline backref, we + * won't reach this path either. That would be + * setup_inline_extent_backref(). + */ + if (unlikely(refs_to_mod != -1)) { + struct btrfs_key key; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + btrfs_print_leaf(leaf); + btrfs_err(fs_info, + "invalid refs_to_mod for tree block %llu, has %d expect -1", + key.objectid, refs_to_mod); + return -EUCLEAN; + } } - BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); + if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) { + struct btrfs_key key; + u32 extent_size; + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.type == BTRFS_METADATA_ITEM_KEY) + extent_size = fs_info->nodesize; + else + extent_size = key.offset; + btrfs_print_leaf(leaf); + btrfs_err(fs_info, +"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu", + (unsigned long)iref, key.objectid, extent_size, + refs_to_mod, refs); + return -EUCLEAN; + } refs += refs_to_mod; if (refs > 0) { @@ -1142,6 +1177,7 @@ void update_inline_extent_backref(struct btrfs_path *path, btrfs_truncate_item(path, item_size, 1); } btrfs_mark_buffer_dirty(leaf); + return 0; } static noinline_for_stack @@ -1170,7 +1206,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans, bytenr, num_bytes, root_objectid, path->slots[0]); return -EUCLEAN; } - update_inline_extent_backref(path, iref, refs_to_add, extent_op); + ret = update_inline_extent_backref(path, iref, refs_to_add, extent_op); } else if (ret == -ENOENT) { setup_inline_extent_backref(trans->fs_info, path, iref, parent, root_objectid, owner, offset, @@ -1190,7 +1226,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, BUG_ON(!is_data && refs_to_drop != 1); if (iref) - update_inline_extent_backref(path, iref, -refs_to_drop, NULL); + ret = update_inline_extent_backref(path, iref, -refs_to_drop, NULL); else if (is_data) ret = remove_extent_data_ref(trans, root, path, refs_to_drop); else @@ -1629,8 +1665,10 @@ again: item_size = btrfs_item_size(leaf, path->slots[0]); if (unlikely(item_size < sizeof(*ei))) { - err = -EINVAL; - btrfs_print_v0_err(fs_info); + err = -EUCLEAN; + btrfs_err(fs_info, + "unexpected extent item size, has %u expect >= %zu", + item_size, sizeof(*ei)); btrfs_abort_transaction(trans, err); goto out; } @@ -2751,9 +2789,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) struct extent_state *cached_state = NULL; mutex_lock(&fs_info->unused_bg_unpin_mutex); - ret = find_first_extent_bit(unpin, 0, &start, &end, - EXTENT_DIRTY, &cached_state); - if (ret) { + if (!find_first_extent_bit(unpin, 0, &start, &end, + EXTENT_DIRTY, &cached_state)) { mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } @@ -3059,8 +3096,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; item_size = btrfs_item_size(leaf, extent_slot); if (unlikely(item_size < sizeof(*ei))) { - ret = -EINVAL; - btrfs_print_v0_err(info); + ret = -EUCLEAN; + btrfs_err(trans->fs_info, + "unexpected extent item size, has %u expect >= %zu", + item_size, sizeof(*ei)); btrfs_abort_transaction(trans, ret); goto out; } @@ -3351,11 +3390,38 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) } enum btrfs_loop_type { + /* + * Start caching block groups but do not wait for progress or for them + * to be done. + */ LOOP_CACHING_NOWAIT, + + /* + * Wait for the block group free_space >= the space we're waiting for if + * the block group isn't cached. + */ LOOP_CACHING_WAIT, + + /* + * Allow allocations to happen from block groups that do not yet have a + * size classification. + */ LOOP_UNSET_SIZE_CLASS, + + /* + * Allocate a chunk and then retry the allocation. + */ LOOP_ALLOC_CHUNK, + + /* + * Ignore the size class restrictions for this allocation. + */ LOOP_WRONG_SIZE_CLASS, + + /* + * Ignore the empty size, only try to allocate the number of bytes + * needed for this allocation. + */ LOOP_NO_EMPTY_SIZE, }; @@ -3427,7 +3493,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache, * Helper function for find_free_extent(). * * Return -ENOENT to inform caller that we need fallback to unclustered mode. - * Return -EAGAIN to inform caller that we need to re-search this block group * Return >0 to inform caller that we find nothing * Return 0 means we have found a location and set ffe_ctl->found_offset. */ @@ -3508,14 +3573,6 @@ refill_cluster: trace_btrfs_reserve_extent_cluster(bg, ffe_ctl); return 0; } - } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT && - !ffe_ctl->retry_clustered) { - spin_unlock(&last_ptr->refill_lock); - - ffe_ctl->retry_clustered = true; - btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + - ffe_ctl->empty_cluster + ffe_ctl->empty_size); - return -EAGAIN; } /* * At this point we either didn't find a cluster or we weren't able to @@ -3530,7 +3587,6 @@ refill_cluster: /* * Return >0 to inform caller that we find nothing * Return 0 when we found an free extent and set ffe_ctrl->found_offset - * Return -EAGAIN to inform caller that we need to re-search this block group */ static int find_free_extent_unclustered(struct btrfs_block_group *bg, struct find_free_extent_ctl *ffe_ctl) @@ -3568,25 +3624,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg, offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start, ffe_ctl->num_bytes, ffe_ctl->empty_size, &ffe_ctl->max_extent_size); - - /* - * If we didn't find a chunk, and we haven't failed on this block group - * before, and this block group is in the middle of caching and we are - * ok with waiting, then go ahead and wait for progress to be made, and - * set @retry_unclustered to true. - * - * If @retry_unclustered is true then we've already waited on this - * block group once and should move on to the next block group. - */ - if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached && - ffe_ctl->loop > LOOP_CACHING_NOWAIT) { - btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + - ffe_ctl->empty_size); - ffe_ctl->retry_unclustered = true; - return -EAGAIN; - } else if (!offset) { + if (!offset) return 1; - } ffe_ctl->found_offset = offset; return 0; } @@ -3600,7 +3639,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group, /* We want to try and use the cluster allocator, so lets look there */ if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) { ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret); - if (ret >= 0 || ret == -EAGAIN) + if (ret >= 0) return ret; /* ret == -ENOENT case falls through */ } @@ -3685,7 +3724,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, } spin_unlock(&block_group->lock); - if (!ret && !btrfs_zone_activate(block_group)) { + /* Metadata block group is activated at write time. */ + if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) && + !btrfs_zone_activate(block_group)) { ret = 1; /* * May need to clear fs_info->{treelog,data_reloc}_bg. @@ -3709,7 +3750,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, fs_info->data_reloc_bg == 0); if (block_group->ro || - test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { + (!ffe_ctl->for_data_reloc && + test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) { ret = 1; goto out; } @@ -3752,8 +3794,26 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, if (ffe_ctl->for_treelog && !fs_info->treelog_bg) fs_info->treelog_bg = block_group->start; - if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg) - fs_info->data_reloc_bg = block_group->start; + if (ffe_ctl->for_data_reloc) { + if (!fs_info->data_reloc_bg) + fs_info->data_reloc_bg = block_group->start; + /* + * Do not allow allocations from this block group, unless it is + * for data relocation. Compared to increasing the ->ro, setting + * the ->zoned_data_reloc_ongoing flag still allows nocow + * writers to come in. See btrfs_inc_nocow_writers(). + * + * We need to disable an allocation to avoid an allocation of + * regular (non-relocation data) extent. With mix of relocation + * extents and regular extents, we can dispatch WRITE commands + * (for relocation extents) and ZONE APPEND commands (for + * regular extents) at the same time to the same zone, which + * easily break the write pointer. + * + * Also, this flag avoids this block group to be zone finished. + */ + set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags); + } ffe_ctl->found_offset = start + block_group->alloc_offset; block_group->alloc_offset += num_bytes; @@ -3771,24 +3831,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group, out: if (ret && ffe_ctl->for_treelog) fs_info->treelog_bg = 0; - if (ret && ffe_ctl->for_data_reloc && - fs_info->data_reloc_bg == block_group->start) { - /* - * Do not allow further allocations from this block group. - * Compared to increasing the ->ro, setting the - * ->zoned_data_reloc_ongoing flag still allows nocow - * writers to come in. See btrfs_inc_nocow_writers(). - * - * We need to disable an allocation to avoid an allocation of - * regular (non-relocation data) extent. With mix of relocation - * extents and regular extents, we can dispatch WRITE commands - * (for relocation extents) and ZONE APPEND commands (for - * regular extents) at the same time to the same zone, which - * easily break the write pointer. - */ - set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags); + if (ret && ffe_ctl->for_data_reloc) fs_info->data_reloc_bg = 0; - } spin_unlock(&fs_info->relocation_bg_lock); spin_unlock(&fs_info->treelog_bg_lock); spin_unlock(&block_group->lock); @@ -3816,8 +3860,7 @@ static void release_block_group(struct btrfs_block_group *block_group, { switch (ffe_ctl->policy) { case BTRFS_EXTENT_ALLOC_CLUSTERED: - ffe_ctl->retry_clustered = false; - ffe_ctl->retry_unclustered = false; + ffe_ctl->retry_uncached = false; break; case BTRFS_EXTENT_ALLOC_ZONED: /* Nothing to do */ @@ -3861,6 +3904,10 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl, static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info, struct find_free_extent_ctl *ffe_ctl) { + /* Block group's activeness is not a requirement for METADATA block groups. */ + if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)) + return 0; + /* If we can activate new zone, just allocate a chunk and use it */ if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags)) return 0; @@ -3949,15 +3996,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, if (ffe_ctl->index < BTRFS_NR_RAID_TYPES) return 1; - /* - * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking - * caching kthreads as we move along - * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching - * LOOP_UNSET_SIZE_CLASS, allow unset size class - * LOOP_ALLOC_CHUNK, force a chunk allocation and try again - * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try - * again - */ + /* See the comments for btrfs_loop_type for an explanation of the phases. */ if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) { ffe_ctl->index = 0; /* @@ -4168,9 +4207,7 @@ static noinline int find_free_extent(struct btrfs_root *root, ffe_ctl->orig_have_caching_bg = false; ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags); ffe_ctl->loop = 0; - /* For clustered allocation */ - ffe_ctl->retry_clustered = false; - ffe_ctl->retry_unclustered = false; + ffe_ctl->retry_uncached = false; ffe_ctl->cached = 0; ffe_ctl->max_extent_size = 0; ffe_ctl->total_free_space = 0; @@ -4310,24 +4347,23 @@ have_block_group: ret = 0; } - if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) + if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) { + if (!cache_block_group_error) + cache_block_group_error = -EIO; goto loop; + } if (!find_free_extent_check_size_class(ffe_ctl, block_group)) goto loop; bg_ret = NULL; ret = do_allocation(block_group, ffe_ctl, &bg_ret); - if (ret == 0) { - if (bg_ret && bg_ret != block_group) { - btrfs_release_block_group(block_group, - ffe_ctl->delalloc); - block_group = bg_ret; - } - } else if (ret == -EAGAIN) { - goto have_block_group; - } else if (ret > 0) { + if (ret > 0) goto loop; + + if (bg_ret && bg_ret != block_group) { + btrfs_release_block_group(block_group, ffe_ctl->delalloc); + block_group = bg_ret; } /* Checks */ @@ -4368,6 +4404,15 @@ have_block_group: btrfs_release_block_group(block_group, ffe_ctl->delalloc); break; loop: + if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT && + !ffe_ctl->retry_uncached) { + ffe_ctl->retry_uncached = true; + btrfs_wait_block_group_cache_progress(block_group, + ffe_ctl->num_bytes + + ffe_ctl->empty_cluster + + ffe_ctl->empty_size); + goto have_block_group; + } release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc); cond_resched(); } |