From f0486c68e4bd9a06a5904d3eeb3a0d73a83befb8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 16 May 2010 10:46:25 -0400 Subject: Btrfs: Introduce contexts for metadata reservation Introducing metadata reseravtion contexts has two major advantages. First, it makes metadata reseravtion more traceable. Second, it can reclaim freed space and re-add them to the itself after transaction committed. Besides add btrfs_block_rsv structure and related helper functions, This patch contains following changes: Move code that decides if freed tree block should be pinned into btrfs_free_tree_block(). Make space accounting more accurate, mainly for handling read only block groups. Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e558dd941ded..d565b45a6352 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3514,6 +3514,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) struct btrfs_fs_info *fs_info = extent_root->fs_info; struct reloc_control *rc; int ret; + int rw = 0; int err = 0; rc = kzalloc(sizeof(*rc), GFP_NOFS); @@ -3524,15 +3525,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); INIT_LIST_HEAD(&rc->reloc_roots); + rc->extent_root = extent_root; rc->block_group = btrfs_lookup_block_group(fs_info, group_start); BUG_ON(!rc->block_group); + if (!rc->block_group->ro) { + ret = btrfs_set_block_group_ro(extent_root, rc->block_group); + if (ret) { + err = ret; + goto out; + } + rw = 1; + } + btrfs_init_workers(&rc->workers, "relocate", fs_info->thread_pool_size, NULL); - rc->extent_root = extent_root; - btrfs_prepare_block_group_relocation(extent_root, rc->block_group); - rc->data_inode = create_reloc_inode(fs_info, rc->block_group); if (IS_ERR(rc->data_inode)) { err = PTR_ERR(rc->data_inode); @@ -3597,6 +3605,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) WARN_ON(rc->block_group->reserved > 0); WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); out: + if (err && rw) + btrfs_set_block_group_rw(extent_root, rc->block_group); iput(rc->data_inode); btrfs_stop_workers(&rc->workers); btrfs_put_block_group(rc->block_group); -- cgit v1.2.3 From a22285a6a32390195235171b89d157ed1a1fe932 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 16 May 2010 10:48:46 -0400 Subject: Btrfs: Integrate metadata reservation with start_transaction Besides simplify the code, this change makes sure all metadata reservation for normal metadata operations are released after committing transaction. Changes since V1: Add code that check if unlink and rmdir will free space. Add ENOSPC handling for clone ioctl. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 20 ++- fs/btrfs/delayed-ref.c | 101 ------------- fs/btrfs/delayed-ref.h | 3 - fs/btrfs/disk-io.c | 6 +- fs/btrfs/extent-tree.c | 283 ++++++++++++++++++++-------------- fs/btrfs/file.c | 13 +- fs/btrfs/inode-item.c | 27 ++++ fs/btrfs/inode.c | 403 +++++++++++++++++++++++++++++++------------------ fs/btrfs/ioctl.c | 157 +++++++++---------- fs/btrfs/relocation.c | 18 +-- fs/btrfs/super.c | 2 +- fs/btrfs/transaction.c | 131 +++++++++++----- fs/btrfs/transaction.h | 13 +- fs/btrfs/volumes.c | 17 +-- fs/btrfs/xattr.c | 12 +- 15 files changed, 678 insertions(+), 528 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7d2479694a58..e0aa9fb563e2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -34,6 +34,7 @@ struct btrfs_trans_handle; struct btrfs_transaction; +struct btrfs_pending_snapshot; extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; @@ -970,6 +971,7 @@ struct btrfs_fs_info { int do_barriers; int closing; int log_root_recovering; + int enospc_unlink; u64 total_pinned; @@ -1995,6 +1997,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 *refs, u64 *flags); int btrfs_pin_extent(struct btrfs_root *root, u64 bytenr, u64 num, int reserved); int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, @@ -2075,8 +2080,6 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); -int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); -int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, struct inode *inode, int num_items); int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, @@ -2089,6 +2092,13 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, u64 bytes); void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes); +int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + int num_items, int *retries); +void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending); void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); void btrfs_free_block_rsv(struct btrfs_root *root, @@ -2296,6 +2306,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, u64 inode_objectid, u64 ref_objectid, u64 *index); +struct btrfs_inode_ref * +btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, int mod); int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 902ce507c4e3..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -318,107 +318,6 @@ out: return ret; } -/* - * helper function to lookup reference count and flags of extent. - * - * the head node for delayed ref is used to store the sum of all the - * reference count modifications queued up in the rbtree. the head - * node may also store the extent flags to set. This way you can check - * to see what the reference count and extent flags would be if all of - * the delayed refs are not processed. - */ -int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags) -{ - struct btrfs_delayed_ref_node *ref; - struct btrfs_delayed_ref_head *head; - struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_path *path; - struct btrfs_extent_item *ei; - struct extent_buffer *leaf; - struct btrfs_key key; - u32 item_size; - u64 num_refs; - u64 extent_flags; - int ret; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = num_bytes; - delayed_refs = &trans->transaction->delayed_refs; -again: - ret = btrfs_search_slot(trans, root->fs_info->extent_root, - &key, path, 0, 0); - if (ret < 0) - goto out; - - if (ret == 0) { - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, path->slots[0]); - if (item_size >= sizeof(*ei)) { - ei = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item); - num_refs = btrfs_extent_refs(leaf, ei); - extent_flags = btrfs_extent_flags(leaf, ei); - } else { -#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 - struct btrfs_extent_item_v0 *ei0; - BUG_ON(item_size != sizeof(*ei0)); - ei0 = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_extent_item_v0); - num_refs = btrfs_extent_refs_v0(leaf, ei0); - /* FIXME: this isn't correct for data */ - extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; -#else - BUG(); -#endif - } - BUG_ON(num_refs == 0); - } else { - num_refs = 0; - extent_flags = 0; - ret = 0; - } - - spin_lock(&delayed_refs->lock); - ref = find_ref_head(&delayed_refs->root, bytenr, NULL); - if (ref) { - head = btrfs_delayed_node_to_head(ref); - if (!mutex_trylock(&head->mutex)) { - atomic_inc(&ref->refs); - spin_unlock(&delayed_refs->lock); - - btrfs_release_path(root->fs_info->extent_root, path); - - mutex_lock(&head->mutex); - mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(ref); - goto again; - } - if (head->extent_op && head->extent_op->update_flags) - extent_flags |= head->extent_op->flags_to_set; - else - BUG_ON(num_refs == 0); - - num_refs += ref->ref_mod; - mutex_unlock(&head->mutex); - } - WARN_ON(num_refs == 0); - if (refs) - *refs = num_refs; - if (flags) - *flags = extent_flags; -out: - spin_unlock(&delayed_refs->lock); - btrfs_free_path(path); - return ret; -} - /* * helper function to update an extent delayed ref in the * rbtree. existing and update must both have the same diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head * btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); -int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes, u64 *refs, u64 *flags); int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 orig_parent, u64 parent, u64 orig_ref_root, u64 ref_root, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 574594cf6b51..054b4475c757 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1522,7 +1522,7 @@ static int transaction_kthread(void *arg) goto sleep; } mutex_unlock(&root->fs_info->trans_mutex); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); sleep: @@ -2409,11 +2409,11 @@ int btrfs_commit_super(struct btrfs_root *root) down_write(&root->fs_info->cleanup_work_sem); up_write(&root->fs_info->cleanup_work_sem); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); ret = btrfs_commit_transaction(trans, root); BUG_ON(ret); /* run commit again to drop the original snapshot */ - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); btrfs_commit_transaction(trans, root); ret = btrfs_write_and_wait_transaction(NULL, root); BUG_ON(ret); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3367278ac6a1..657df6e002d3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -615,6 +615,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) return ret; } +/* + * helper function to lookup reference count and flags of extent. + * + * the head node for delayed ref is used to store the sum of all the + * reference count modifications queued up in the rbtree. the head + * node may also store the extent flags to set. This way you can check + * to see what the reference count and extent flags would be if all of + * the delayed refs are not processed. + */ +int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes, u64 *refs, u64 *flags) +{ + struct btrfs_delayed_ref_head *head; + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_path *path; + struct btrfs_extent_item *ei; + struct extent_buffer *leaf; + struct btrfs_key key; + u32 item_size; + u64 num_refs; + u64 extent_flags; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = bytenr; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = num_bytes; + if (!trans) { + path->skip_locking = 1; + path->search_commit_root = 1; + } +again: + ret = btrfs_search_slot(trans, root->fs_info->extent_root, + &key, path, 0, 0); + if (ret < 0) + goto out_free; + + if (ret == 0) { + leaf = path->nodes[0]; + item_size = btrfs_item_size_nr(leaf, path->slots[0]); + if (item_size >= sizeof(*ei)) { + ei = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item); + num_refs = btrfs_extent_refs(leaf, ei); + extent_flags = btrfs_extent_flags(leaf, ei); + } else { +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + struct btrfs_extent_item_v0 *ei0; + BUG_ON(item_size != sizeof(*ei0)); + ei0 = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_item_v0); + num_refs = btrfs_extent_refs_v0(leaf, ei0); + /* FIXME: this isn't correct for data */ + extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; +#else + BUG(); +#endif + } + BUG_ON(num_refs == 0); + } else { + num_refs = 0; + extent_flags = 0; + ret = 0; + } + + if (!trans) + goto out; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(trans, bytenr); + if (head) { + if (!mutex_trylock(&head->mutex)) { + atomic_inc(&head->node.refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(root->fs_info->extent_root, path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref(&head->node); + goto again; + } + if (head->extent_op && head->extent_op->update_flags) + extent_flags |= head->extent_op->flags_to_set; + else + BUG_ON(num_refs == 0); + + num_refs += head->node.ref_mod; + mutex_unlock(&head->mutex); + } + spin_unlock(&delayed_refs->lock); +out: + WARN_ON(num_refs == 0); + if (refs) + *refs = num_refs; + if (flags) + *flags = extent_flags; +out_free: + btrfs_free_path(path); + return ret; +} + /* * Back reference rules. Back refs have three main goals: * @@ -2948,113 +3055,6 @@ again: return 0; } -/* - * unreserve num_items number of items worth of metadata space. This needs to - * be paired with btrfs_reserve_metadata_space. - * - * NOTE: if you have the option, run this _AFTER_ you do a - * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref - * oprations which will result in more used metadata, so we want to make sure we - * can do that without issue. - */ -int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) -{ - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_space_info *meta_sinfo; - u64 num_bytes; - u64 alloc_target; - bool bug = false; - - /* get the space info for where the metadata will live */ - alloc_target = btrfs_get_alloc_profile(root, 0); - meta_sinfo = __find_space_info(info, alloc_target); - - num_bytes = calculate_bytes_needed(root, num_items); - - spin_lock(&meta_sinfo->lock); - if (meta_sinfo->bytes_may_use < num_bytes) { - bug = true; - meta_sinfo->bytes_may_use = 0; - } else { - meta_sinfo->bytes_may_use -= num_bytes; - } - spin_unlock(&meta_sinfo->lock); - - BUG_ON(bug); - - return 0; -} - -/* - * Reserve some metadata space for use. We'll calculate the worste case number - * of bytes that would be needed to modify num_items number of items. If we - * have space, fantastic, if not, you get -ENOSPC. Please call - * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of - * items you reserved, since whatever metadata you needed should have already - * been allocated. - * - * This will commit the transaction to make more space if we don't have enough - * metadata space. THe only time we don't do this is if we're reserving space - * inside of a transaction, then we will just return -ENOSPC and it is the - * callers responsibility to handle it properly. - */ -int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) -{ - struct btrfs_fs_info *info = root->fs_info; - struct btrfs_space_info *meta_sinfo; - u64 num_bytes; - u64 used; - u64 alloc_target; - int retries = 0; - - /* get the space info for where the metadata will live */ - alloc_target = btrfs_get_alloc_profile(root, 0); - meta_sinfo = __find_space_info(info, alloc_target); - - num_bytes = calculate_bytes_needed(root, num_items); -again: - spin_lock(&meta_sinfo->lock); - - if (unlikely(!meta_sinfo->bytes_root)) - meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); - - if (!retries) - meta_sinfo->bytes_may_use += num_bytes; - - used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + - meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + - meta_sinfo->bytes_super + meta_sinfo->bytes_root + - meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; - - if (used > meta_sinfo->total_bytes) { - retries++; - if (retries == 1) { - if (maybe_allocate_chunk(NULL, root, meta_sinfo, - num_bytes)) - goto again; - retries++; - } else { - spin_unlock(&meta_sinfo->lock); - } - - if (retries == 2) { - shrink_delalloc(NULL, root, meta_sinfo, num_bytes); - goto again; - } - spin_lock(&meta_sinfo->lock); - meta_sinfo->bytes_may_use -= num_bytes; - spin_unlock(&meta_sinfo->lock); - - dump_space_info(meta_sinfo, 0, 0); - return -ENOSPC; - } - - check_force_delalloc(meta_sinfo); - spin_unlock(&meta_sinfo->lock); - - return 0; -} - /* * This will check the space that the inode allocates from to make sure we have * enough space for bytes. @@ -3095,9 +3095,9 @@ again: spin_unlock(&data_sinfo->lock); alloc: alloc_target = btrfs_get_alloc_profile(root, 1); - trans = btrfs_start_transaction(root, 1); - if (!trans) - return -ENOMEM; + trans = btrfs_join_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = do_chunk_alloc(trans, root->fs_info->extent_root, bytes + 2 * 1024 * 1024, @@ -3118,8 +3118,8 @@ alloc: if (!committed && !root->fs_info->open_ioctl_trans) { committed = 1; trans = btrfs_join_transaction(root, 1); - if (!trans) - return -ENOMEM; + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_commit_transaction(trans, root); if (ret) return ret; @@ -3701,6 +3701,59 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; } +static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) +{ + return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * + 3 * num_items; +} + +int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + int num_items, int *retries) +{ + u64 num_bytes; + int ret; + + if (num_items == 0 || root->fs_info->chunk_root == root) + return 0; + + num_bytes = calc_trans_metadata_size(root, num_items); + ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, + num_bytes, retries); + if (!ret) { + trans->bytes_reserved += num_bytes; + trans->block_rsv = &root->fs_info->trans_block_rsv; + } + return ret; +} + +void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (!trans->bytes_reserved) + return; + + BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); + btrfs_block_rsv_release(root, trans->block_rsv, + trans->bytes_reserved); + trans->bytes_reserved = 0; +} + +int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending) +{ + struct btrfs_root *root = pending->root; + struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); + struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; + /* + * two for root back/forward refs, two for directory entries + * and one for root of the snapshot. + */ + u64 num_bytes = calc_trans_metadata_size(root, 5); + dst_rsv->space_info = src_rsv->space_info; + return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); +} + static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc) @@ -5824,7 +5877,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) wc = kzalloc(sizeof(*wc), GFP_NOFS); BUG_ON(!wc); - trans = btrfs_start_transaction(tree_root, 1); + trans = btrfs_start_transaction(tree_root, 0); if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { level = btrfs_header_level(root->node); @@ -5920,7 +5973,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) BUG_ON(ret); btrfs_end_transaction(trans, tree_root); - trans = btrfs_start_transaction(tree_root, 1); + trans = btrfs_start_transaction(tree_root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); } else { unsigned long update; update = trans->delayed_ref_updates; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 29ff749ff4ca..41e09e24e295 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -126,8 +126,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, end_of_last_block = start_pos + num_bytes - 1; err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, NULL); - if (err) - return err; + BUG_ON(err); for (i = 0; i < num_pages; i++) { struct page *p = pages[i]; @@ -142,7 +141,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, * at this time. */ } - return err; + return 0; } /* @@ -1008,7 +1007,7 @@ out_nolock: num_written = err; if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); if (ret == 0) { @@ -1104,9 +1103,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) if (file && file->private_data) btrfs_ioctl_trans_end(file); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); goto out; } diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, return 0; } +struct btrfs_inode_ref * +btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len, + u64 inode_objectid, u64 ref_objectid, int mod) +{ + struct btrfs_key key; + struct btrfs_inode_ref *ref; + int ins_len = mod < 0 ? -1 : 0; + int cow = mod != 0; + int ret; + + key.objectid = inode_objectid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = ref_objectid; + + ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret > 0) + return NULL; + if (!find_name_in_backref(path, name, name_len, &ref)) + return NULL; + return ref; +} + int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, const char *name, int name_len, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f44425081c02..c4b0fd12df68 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2135,7 +2135,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * do a destroy_inode */ if (is_bad_inode(inode)) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); @@ -2478,29 +2478,201 @@ out: return ret; } -static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +/* helper to check if there is any shared block in the path */ +static int check_path_shared(struct btrfs_root *root, + struct btrfs_path *path) +{ + struct extent_buffer *eb; + int level; + int ret; + u64 refs; + + for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + if (!path->nodes[level]) + break; + eb = path->nodes[level]; + if (!btrfs_block_can_be_shared(root, eb)) + continue; + ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, + &refs, NULL); + if (refs > 1) + return 1; + } + return 0; +} + +/* + * helper to start transaction for unlink and rmdir. + * + * unlink and rmdir are special in btrfs, they do not always free space. + * so in enospc case, we should make sure they will free space before + * allowing them to use the global metadata reservation. + */ +static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, + struct dentry *dentry) { - struct btrfs_root *root; struct btrfs_trans_handle *trans; + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_path *path; + struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; struct inode *inode = dentry->d_inode; + u64 index; + int check_link = 1; + int err = -ENOSPC; int ret; - unsigned long nr = 0; - root = BTRFS_I(dir)->root; + trans = btrfs_start_transaction(root, 10); + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) + return trans; - /* - * 5 items for unlink inode - * 1 for orphan - */ - ret = btrfs_reserve_metadata_space(root, 6); - if (ret) - return ret; + if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return ERR_PTR(-ENOSPC); - trans = btrfs_start_transaction(root, 1); + /* check if there is someone else holds reference */ + if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) + return ERR_PTR(-ENOSPC); + + if (atomic_read(&inode->i_count) > 2) + return ERR_PTR(-ENOSPC); + + if (xchg(&root->fs_info->enospc_unlink, 1)) + return ERR_PTR(-ENOSPC); + + path = btrfs_alloc_path(); + if (!path) { + root->fs_info->enospc_unlink = 0; + return ERR_PTR(-ENOMEM); + } + + trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { - btrfs_unreserve_metadata_space(root, 6); - return PTR_ERR(trans); + btrfs_free_path(path); + root->fs_info->enospc_unlink = 0; + return trans; + } + + path->skip_locking = 1; + path->search_commit_root = 1; + + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(dir)->location, 0); + if (ret < 0) { + err = ret; + goto out; + } + if (ret == 0) { + if (check_path_shared(root, path)) + goto out; + } else { + check_link = 0; } + btrfs_release_path(root, path); + + ret = btrfs_lookup_inode(trans, root, path, + &BTRFS_I(inode)->location, 0); + if (ret < 0) { + err = ret; + goto out; + } + if (ret == 0) { + if (check_path_shared(root, path)) + goto out; + } else { + check_link = 0; + } + btrfs_release_path(root, path); + + if (ret == 0 && S_ISREG(inode->i_mode)) { + ret = btrfs_lookup_file_extent(trans, root, path, + inode->i_ino, (u64)-1, 0); + if (ret < 0) { + err = ret; + goto out; + } + BUG_ON(ret == 0); + if (check_path_shared(root, path)) + goto out; + btrfs_release_path(root, path); + } + + if (!check_link) { + err = 0; + goto out; + } + + di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, + dentry->d_name.name, dentry->d_name.len, 0); + if (IS_ERR(di)) { + err = PTR_ERR(di); + goto out; + } + if (di) { + if (check_path_shared(root, path)) + goto out; + } else { + err = 0; + goto out; + } + btrfs_release_path(root, path); + + ref = btrfs_lookup_inode_ref(trans, root, path, + dentry->d_name.name, dentry->d_name.len, + inode->i_ino, dir->i_ino, 0); + if (IS_ERR(ref)) { + err = PTR_ERR(ref); + goto out; + } + BUG_ON(!ref); + if (check_path_shared(root, path)) + goto out; + index = btrfs_inode_ref_index(path->nodes[0], ref); + btrfs_release_path(root, path); + + di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, + dentry->d_name.name, dentry->d_name.len, 0); + if (IS_ERR(di)) { + err = PTR_ERR(di); + goto out; + } + BUG_ON(ret == -ENOENT); + if (check_path_shared(root, path)) + goto out; + + err = 0; +out: + btrfs_free_path(path); + if (err) { + btrfs_end_transaction(trans, root); + root->fs_info->enospc_unlink = 0; + return ERR_PTR(err); + } + + trans->block_rsv = &root->fs_info->global_block_rsv; + return trans; +} + +static void __unlink_end_trans(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + if (trans->block_rsv == &root->fs_info->global_block_rsv) { + BUG_ON(!root->fs_info->enospc_unlink); + root->fs_info->enospc_unlink = 0; + } + btrfs_end_transaction_throttle(trans, root); +} + +static int btrfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct btrfs_root *root = BTRFS_I(dir)->root; + struct btrfs_trans_handle *trans; + struct inode *inode = dentry->d_inode; + int ret; + unsigned long nr = 0; + + trans = __unlink_start_trans(dir, dentry); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, dir); @@ -2508,14 +2680,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); + BUG_ON(ret); - if (inode->i_nlink == 0) + if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); + } nr = trans->blocks_used; - - btrfs_end_transaction_throttle(trans, root); - btrfs_unreserve_metadata_space(root, 6); + __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); return ret; } @@ -2587,7 +2760,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; int err = 0; - int ret; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_trans_handle *trans; unsigned long nr = 0; @@ -2596,15 +2768,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return -ENOTEMPTY; - ret = btrfs_reserve_metadata_space(root, 5); - if (ret) - return ret; - - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - btrfs_unreserve_metadata_space(root, 5); + trans = __unlink_start_trans(dir, dentry); + if (IS_ERR(trans)) return PTR_ERR(trans); - } btrfs_set_trans_block_group(trans, dir); @@ -2627,12 +2793,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) btrfs_i_size_write(inode, 0); out: nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, root); - btrfs_unreserve_metadata_space(root, 5); + __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); - if (ret && !err) - err = ret; return err; } @@ -3145,7 +3308,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; - struct extent_map *em; + struct extent_map *em = NULL; struct extent_state *cached_state = NULL; u64 mask = root->sectorsize - 1; u64 hole_start = (inode->i_size + mask) & ~mask; @@ -3183,11 +3346,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_reserve_metadata_space(root, 2); - if (err) + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); break; - - trans = btrfs_start_transaction(root, 1); + } btrfs_set_trans_block_group(trans, inode); err = btrfs_drop_extents(trans, inode, cur_offset, @@ -3205,14 +3368,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) last_byte - 1, 0); btrfs_end_transaction(trans, root); - btrfs_unreserve_metadata_space(root, 2); } free_extent_map(em); + em = NULL; cur_offset = last_byte; if (cur_offset >= block_end) break; } + free_extent_map(em); unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, GFP_NOFS); return err; @@ -3239,10 +3403,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) } } - ret = btrfs_reserve_metadata_space(root, 1); - if (ret) - return ret; - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); @@ -3251,7 +3411,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) nr = trans->blocks_used; btrfs_end_transaction(trans, root); - btrfs_unreserve_metadata_space(root, 1); btrfs_btree_balance_dirty(root, nr); if (attr->ia_size > inode->i_size) { @@ -4223,26 +4382,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (!new_valid_dev(rdev)) return -EINVAL; + err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); + if (err) + return err; + /* * 2 for inode item and ref * 2 for dir items * 1 for xattr if selinux is on */ - err = btrfs_reserve_metadata_space(root, 5); - if (err) - return err; + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) + return PTR_ERR(trans); - trans = btrfs_start_transaction(root, 1); - if (!trans) - goto fail; btrfs_set_trans_block_group(trans, dir); - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, @@ -4271,13 +4425,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); -fail: - btrfs_unreserve_metadata_space(root, 5); + btrfs_btree_balance_dirty(root, nr); if (drop_inode) { inode_dec_link_count(inode); iput(inode); } - btrfs_btree_balance_dirty(root, nr); return err; } @@ -4287,32 +4439,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = NULL; - int err; int drop_inode = 0; + int err; unsigned long nr = 0; u64 objectid; u64 index = 0; + err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); + if (err) + return err; /* * 2 for inode item and ref * 2 for dir items * 1 for xattr if selinux is on */ - err = btrfs_reserve_metadata_space(root, 5); - if (err) - return err; + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) + return PTR_ERR(trans); - trans = btrfs_start_transaction(root, 1); - if (!trans) - goto fail; btrfs_set_trans_block_group(trans, dir); - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, @@ -4344,8 +4490,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); -fail: - btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4372,21 +4516,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (root->objectid != BTRFS_I(inode)->root->objectid) return -EPERM; - /* - * 1 item for inode ref - * 2 items for dir items - */ - err = btrfs_reserve_metadata_space(root, 3); - if (err) - return err; - btrfs_inc_nlink(inode); err = btrfs_set_inode_index(dir, &index); if (err) goto fail; - trans = btrfs_start_transaction(root, 1); + /* + * 1 item for inode ref + * 2 items for dir items + */ + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto fail; + } btrfs_set_trans_block_group(trans, dir); atomic_inc(&inode->i_count); @@ -4405,7 +4549,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); fail: - btrfs_unreserve_metadata_space(root, 3); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -4425,28 +4568,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) u64 index = 0; unsigned long nr = 1; + err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); + if (err) + return err; + /* * 2 items for inode and ref * 2 items for dir items * 1 for xattr if selinux is on */ - err = btrfs_reserve_metadata_space(root, 5); - if (err) - return err; - - trans = btrfs_start_transaction(root, 1); - if (!trans) { - err = -ENOMEM; - goto out_unlock; - } + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) + return PTR_ERR(trans); btrfs_set_trans_block_group(trans, dir); - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_fail; - } - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, @@ -4486,9 +4621,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) out_fail: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); - -out_unlock: - btrfs_unreserve_metadata_space(root, 5); if (drop_on_err) iput(inode); btrfs_btree_balance_dirty(root, nr); @@ -5426,19 +5558,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (S_ISDIR(old_inode->i_mode) && new_inode && new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) return -ENOTEMPTY; - - /* - * We want to reserve the absolute worst case amount of items. So if - * both inodes are subvols and we need to unlink them then that would - * require 4 item modifications, but if they are both normal inodes it - * would require 5 item modifications, so we'll assume their normal - * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items - * should cover the worst case number of items we'll modify. - */ - ret = btrfs_reserve_metadata_space(root, 11); - if (ret) - return ret; - /* * we're using rename to replace one file with another. * and the replacement file is large. Start IO on it now so @@ -5451,8 +5570,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* close the racy window with snapshot create/destroy ioctl */ if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) down_read(&root->fs_info->subvol_sem); + /* + * We want to reserve the absolute worst case amount of items. So if + * both inodes are subvols and we need to unlink them then that would + * require 4 item modifications, but if they are both normal inodes it + * would require 5 item modifications, so we'll assume their normal + * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items + * should cover the worst case number of items we'll modify. + */ + trans = btrfs_start_transaction(root, 20); + if (IS_ERR(trans)) + return PTR_ERR(trans); - trans = btrfs_start_transaction(root, 1); btrfs_set_trans_block_group(trans, new_dir); if (dest != root) @@ -5551,7 +5680,6 @@ out_fail: if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&root->fs_info->subvol_sem); - btrfs_unreserve_metadata_space(root, 11); return ret; } @@ -5658,26 +5786,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) return -ENAMETOOLONG; + err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); + if (err) + return err; /* * 2 items for inode item and ref * 2 items for dir items * 1 item for xattr if selinux is on */ - err = btrfs_reserve_metadata_space(root, 5); - if (err) - return err; + trans = btrfs_start_transaction(root, 5); + if (IS_ERR(trans)) + return PTR_ERR(trans); - trans = btrfs_start_transaction(root, 1); - if (!trans) - goto out_fail; btrfs_set_trans_block_group(trans, dir); - err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); - if (err) { - err = -ENOSPC; - goto out_unlock; - } - inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, dentry->d_name.len, dentry->d_parent->d_inode->i_ino, objectid, @@ -5749,8 +5871,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, out_unlock: nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); -out_fail: - btrfs_unreserve_metadata_space(root, 5); if (drop_inode) { inode_dec_link_count(inode); iput(inode); @@ -5771,21 +5891,18 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 i_size; while (num_bytes > 0) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } ret = btrfs_reserve_extent(trans, root, num_bytes, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { - WARN_ON(1); - goto stop_trans; - } - - ret = btrfs_reserve_metadata_space(root, 3); - if (ret) { - btrfs_free_reserved_extent(root, ins.objectid, - ins.offset); - goto stop_trans; + btrfs_end_transaction(trans, root); + break; } ret = insert_reserved_file_extent(trans, inode, @@ -5819,14 +5936,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, BUG_ON(ret); btrfs_end_transaction(trans, root); - btrfs_unreserve_metadata_space(root, 3); } return ret; - -stop_trans: - btrfs_end_transaction(trans, root); - return ret; - } static long btrfs_fallocate(struct inode *inode, int mode, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 97a97839a867..3066da468c6d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root, u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; + ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, + 0, &objectid); + if (ret) + return ret; /* * 1 - inode item * 2 - refs * 1 - root item * 2 - dir items */ - ret = btrfs_reserve_metadata_space(root, 6); - if (ret) - return ret; - - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, - 0, &objectid); - if (ret) - goto fail; + trans = btrfs_start_transaction(root, 6); + if (IS_ERR(trans)) + return PTR_ERR(trans); leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, objectid, NULL, 0, 0, 0); @@ -345,13 +341,10 @@ fail: err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; - - btrfs_unreserve_metadata_space(root, 6); return ret; } -static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, - char *name, int namelen) +static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) { struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; @@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (!root->ref_cows) return -EINVAL; - /* - * 1 - inode item - * 2 - refs - * 1 - root item - * 2 - dir items - */ - ret = btrfs_reserve_metadata_space(root, 6); - if (ret) - goto fail; - pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); - if (!pending_snapshot) { - ret = -ENOMEM; - btrfs_unreserve_metadata_space(root, 6); - goto fail; - } - pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); - if (!pending_snapshot->name) { - ret = -ENOMEM; - kfree(pending_snapshot); - btrfs_unreserve_metadata_space(root, 6); - goto fail; - } - memcpy(pending_snapshot->name, name, namelen); - pending_snapshot->name[namelen] = '\0'; + if (!pending_snapshot) + return -ENOMEM; + + btrfs_init_block_rsv(&pending_snapshot->block_rsv); pending_snapshot->dentry = dentry; - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); pending_snapshot->root = root; + + trans = btrfs_start_transaction(root->fs_info->extent_root, 5); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto fail; + } + + ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); + BUG_ON(ret); + list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); - ret = btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); BUG_ON(ret); - btrfs_unreserve_metadata_space(root, 6); + + ret = pending_snapshot->error; + if (ret) + goto fail; + + btrfs_orphan_cleanup(pending_snapshot->snap); inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); if (IS_ERR(inode)) { @@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, d_instantiate(dentry, inode); ret = 0; fail: + kfree(pending_snapshot); return ret; } @@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent, goto out_up_read; if (snap_src) { - error = create_snapshot(snap_src, dentry, - name, namelen); + error = create_snapshot(snap_src, dentry); } else { error = create_subvol(BTRFS_I(dir)->root, dentry, name, namelen); @@ -811,7 +797,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, device->name, (unsigned long long)new_size); if (new_size > old_size) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); ret = btrfs_grow_device(trans, device, new_size); btrfs_commit_transaction(trans, root); } else { @@ -1300,7 +1286,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, if (err) goto out_up_write; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + goto out; + } + trans->block_rsv = &root->fs_info->global_block_rsv; + ret = btrfs_unlink_subvol(trans, root, dir, dest->root_key.objectid, dentry->d_name.name, @@ -1550,12 +1542,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_wait_ordered_range(src, off, off+len); } - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); - - /* punch hole in destination first */ - btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); - /* clone data */ key.objectid = src->i_ino; key.type = BTRFS_EXTENT_DATA_KEY; @@ -1566,7 +1552,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, * note the key will change type as we walk through the * tree. */ - ret = btrfs_search_slot(trans, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; @@ -1629,12 +1615,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.objectid = inode->i_ino; new_key.offset = key.offset + destoff - off; + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { + if (off > key.offset) { + datao += off - key.offset; + datal -= off - key.offset; + } + + if (key.offset + datal > off + len) + datal = off + len - key.offset; + + ret = btrfs_drop_extents(trans, inode, + new_key.offset, + new_key.offset + datal, + &hint_byte, 1); + BUG_ON(ret); + ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); - if (ret) - goto out; + BUG_ON(ret); leaf = path->nodes[0]; slot = path->slots[0]; @@ -1645,14 +1650,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, extent = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - if (off > key.offset) { - datao += off - key.offset; - datal -= off - key.offset; - } - - if (key.offset + datal > off + len) - datal = off + len - key.offset; - /* disko == 0 means it's a hole */ if (!disko) datao = 0; @@ -1683,14 +1680,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (comp && (skip || trim)) { ret = -EINVAL; + btrfs_end_transaction(trans, root); goto out; } size -= skip + trim; datal -= skip + trim; + + ret = btrfs_drop_extents(trans, inode, + new_key.offset, + new_key.offset + datal, + &hint_byte, 1); + BUG_ON(ret); + ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); - if (ret) - goto out; + BUG_ON(ret); if (skip) { u32 start = @@ -1708,8 +1712,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, } btrfs_mark_buffer_dirty(leaf); - } + btrfs_release_path(root, path); + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + if (new_key.offset + datal > inode->i_size) + btrfs_i_size_write(inode, + new_key.offset + datal); + BTRFS_I(inode)->flags = BTRFS_I(src)->flags; + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); + } next: btrfs_release_path(root, path); key.offset++; @@ -1717,17 +1730,7 @@ next: ret = 0; out: btrfs_release_path(root, path); - if (ret == 0) { - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - if (destoff + olen > inode->i_size) - btrfs_i_size_write(inode, destoff + olen); - BTRFS_I(inode)->flags = BTRFS_I(src)->flags; - ret = btrfs_update_inode(trans, root, inode); - } - btrfs_end_transaction(trans, root); unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); - if (ret) - vmtruncate(inode, 0); out_unlock: mutex_unlock(&src->i_mutex); mutex_unlock(&inode->i_mutex); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d565b45a6352..145a468c300d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1649,7 +1649,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, } if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, 0); @@ -1675,7 +1675,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, while (1) { leaf = NULL; replaced = 0; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); max_level = level; ret = walk_down_reloc_tree(reloc_root, path, &level); @@ -1803,7 +1803,7 @@ static void merge_func(struct btrfs_work *work) merge_reloc_root(async->rc, root); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); btrfs_update_reloc_root(trans, root); btrfs_end_transaction(trans, root); } @@ -3297,11 +3297,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) rc->create_reloc_root = 1; set_reloc_control(rc); - trans = btrfs_start_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root, 1); btrfs_commit_transaction(trans, rc->extent_root); while (1) { - trans = btrfs_start_transaction(rc->extent_root, 1); + trans = btrfs_start_transaction(rc->extent_root, 0); ret = find_next_extent(trans, rc, path); if (ret < 0) @@ -3411,7 +3411,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) smp_mb(); if (rc->extents_found > 0) { - trans = btrfs_start_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root, 1); btrfs_commit_transaction(trans, rc->extent_root); } @@ -3420,7 +3420,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) unset_reloc_control(rc); /* get rid of pinned extents */ - trans = btrfs_start_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root, 1); btrfs_commit_transaction(trans, rc->extent_root); return err; @@ -3475,7 +3475,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, if (IS_ERR(root)) return ERR_CAST(root); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 6); BUG_ON(!trans); err = btrfs_find_free_objectid(trans, root, objectid, &objectid); @@ -3619,7 +3619,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) struct btrfs_trans_handle *trans; int ret; - trans = btrfs_start_transaction(root->fs_info->tree_root, 1); + trans = btrfs_start_transaction(root->fs_info->tree_root, 0); memset(&root->root_item.drop_progress, 0, sizeof(root->root_item.drop_progress)); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 55265c24ab96..38d91c7e8db2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) btrfs_start_delalloc_inodes(root, 0); btrfs_wait_ordered_extents(root, 0, 0); - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); ret = btrfs_commit_transaction(trans, root); return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 21ad37c05199..2616491a5c5b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -165,53 +165,89 @@ enum btrfs_trans_type { TRANS_USERSPACE, }; +static int may_wait_transaction(struct btrfs_root *root, int type) +{ + if (!root->fs_info->log_root_recovering && + ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || + type == TRANS_USERSPACE)) + return 1; + return 0; +} + static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, - int num_blocks, int type) + u64 num_items, int type) { - struct btrfs_trans_handle *h = - kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + struct btrfs_trans_handle *h; + struct btrfs_transaction *cur_trans; + int retries = 0; int ret; +again: + h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); + if (!h) + return ERR_PTR(-ENOMEM); mutex_lock(&root->fs_info->trans_mutex); - if (!root->fs_info->log_root_recovering && - ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || - type == TRANS_USERSPACE)) + if (may_wait_transaction(root, type)) wait_current_trans(root); + ret = join_transaction(root); BUG_ON(ret); - h->transid = root->fs_info->running_transaction->transid; - h->transaction = root->fs_info->running_transaction; - h->blocks_reserved = num_blocks; + cur_trans = root->fs_info->running_transaction; + cur_trans->use_count++; + mutex_unlock(&root->fs_info->trans_mutex); + + h->transid = cur_trans->transid; + h->transaction = cur_trans; h->blocks_used = 0; h->block_group = 0; + h->bytes_reserved = 0; h->delayed_ref_updates = 0; h->block_rsv = NULL; - if (!current->journal_info && type != TRANS_USERSPACE) - current->journal_info = h; + smp_mb(); + if (cur_trans->blocked && may_wait_transaction(root, type)) { + btrfs_commit_transaction(h, root); + goto again; + } + + if (num_items > 0) { + ret = btrfs_trans_reserve_metadata(h, root, num_items, + &retries); + if (ret == -EAGAIN) { + btrfs_commit_transaction(h, root); + goto again; + } + if (ret < 0) { + btrfs_end_transaction(h, root); + return ERR_PTR(ret); + } + } - root->fs_info->running_transaction->use_count++; + mutex_lock(&root->fs_info->trans_mutex); record_root_in_trans(h, root); mutex_unlock(&root->fs_info->trans_mutex); + + if (!current->journal_info && type != TRANS_USERSPACE) + current->journal_info = h; return h; } struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, - int num_blocks) + int num_items) { - return start_transaction(root, num_blocks, TRANS_START); + return start_transaction(root, num_items, TRANS_START); } struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, int num_blocks) { - return start_transaction(root, num_blocks, TRANS_JOIN); + return start_transaction(root, 0, TRANS_JOIN); } struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, int num_blocks) { - return start_transaction(r, num_blocks, TRANS_USERSPACE); + return start_transaction(r, 0, TRANS_USERSPACE); } /* wait for a transaction commit to be fully complete */ @@ -312,6 +348,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, count++; } + btrfs_trans_release_metadata(trans, root); + mutex_lock(&info->trans_mutex); cur_trans = info->running_transaction; WARN_ON(cur_trans != trans->transaction); @@ -757,47 +795,49 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; struct inode *parent_inode; + struct dentry *dentry; struct extent_buffer *tmp; struct extent_buffer *old; int ret; - u64 objectid; - int namelen; u64 index = 0; - - parent_inode = pending->dentry->d_parent->d_inode; - parent_root = BTRFS_I(parent_inode)->root; + u64 objectid; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { - ret = -ENOMEM; + pending->error = -ENOMEM; goto fail; } + ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); - if (ret) + if (ret) { + pending->error = ret; goto fail; + } key.objectid = objectid; - /* record when the snapshot was created in key.offset */ - key.offset = trans->transid; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); + key.offset = (u64)-1; + key.type = BTRFS_ROOT_ITEM_KEY; - memcpy(&pending->root_key, &key, sizeof(key)); - pending->root_key.offset = (u64)-1; + trans->block_rsv = &pending->block_rsv; + dentry = pending->dentry; + parent_inode = dentry->d_parent->d_inode; + parent_root = BTRFS_I(parent_inode)->root; record_root_in_trans(trans, parent_root); + /* * insert the directory item */ - namelen = strlen(pending->name); ret = btrfs_set_inode_index(parent_inode, &index); BUG_ON(ret); ret = btrfs_insert_dir_item(trans, parent_root, - pending->name, namelen, - parent_inode->i_ino, - &pending->root_key, BTRFS_FT_DIR, index); + dentry->d_name.name, dentry->d_name.len, + parent_inode->i_ino, &key, + BTRFS_FT_DIR, index); BUG_ON(ret); - btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); + btrfs_i_size_write(parent_inode, parent_inode->i_size + + dentry->d_name.len * 2); ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); @@ -814,22 +854,29 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, free_extent_buffer(old); btrfs_set_root_node(new_root_item, tmp); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - new_root_item); - BUG_ON(ret); + /* record when the snapshot was created in key.offset */ + key.offset = trans->transid; + ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); btrfs_tree_unlock(tmp); free_extent_buffer(tmp); + BUG_ON(ret); - ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, - pending->root_key.objectid, + /* + * insert root back/forward references + */ + ret = btrfs_add_root_ref(trans, tree_root, objectid, parent_root->root_key.objectid, - parent_inode->i_ino, index, pending->name, - namelen); + parent_inode->i_ino, index, + dentry->d_name.name, dentry->d_name.len); BUG_ON(ret); + key.offset = (u64)-1; + pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); + BUG_ON(IS_ERR(pending->snap)); fail: kfree(new_root_item); - return ret; + btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); + return 0; } /* @@ -898,6 +945,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = btrfs_run_delayed_refs(trans, root, 0); BUG_ON(ret); + btrfs_trans_release_metadata(trans, root); + cur_trans = trans->transaction; /* * set the flushing flag so procs in this transaction have to diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 56e728d70e39..14b3841b75d5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -57,8 +57,11 @@ struct btrfs_trans_handle { struct btrfs_pending_snapshot { struct dentry *dentry; struct btrfs_root *root; - char *name; - struct btrfs_key root_key; + struct btrfs_root *snap; + /* block reservation for the operation */ + struct btrfs_block_rsv block_rsv; + /* extra metadata reseration for relocation */ + int error; struct list_head list; }; @@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, - int num_blocks); + int num_items); struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, - int num_blocks); + int num_blocks); struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, - int num_blocks); + int num_blocks); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8db7b14bbae8..d6e3af8be95b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; key.offset = device->devid; @@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) goto error; } - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); lock_chunks(root); device->barriers = 1; @@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, /* step one, relocate all the extents inside this chunk */ ret = btrfs_relocate_block_group(extent_root, chunk_offset); - BUG_ON(ret); + if (ret) + return ret; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_transaction(root, 0); BUG_ON(!trans); lock_chunks(root); @@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root) break; BUG_ON(ret); - trans = btrfs_start_transaction(dev_root, 1); + trans = btrfs_start_transaction(dev_root, 0); BUG_ON(!trans); ret = btrfs_grow_device(trans, device, old_size); @@ -2094,11 +2095,7 @@ again: } /* Shrinking succeeded, else we would be at "done". */ - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; - goto done; - } + trans = btrfs_start_transaction(root, 0); lock_chunks(root); device->disk_total_bytes = new_size; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 193b58f7d3f3..007fae581a04 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, if (trans) return do_setxattr(trans, inode, name, value, size, flags); - ret = btrfs_reserve_metadata_space(root, 2); - if (ret) - return ret; + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); - trans = btrfs_start_transaction(root, 1); - if (!trans) { - ret = -ENOMEM; - goto out; - } btrfs_set_trans_block_group(trans, inode); ret = do_setxattr(trans, inode, name, value, size, flags); @@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, BUG_ON(ret); out: btrfs_end_transaction_throttle(trans, root); - btrfs_unreserve_metadata_space(root, 2); return ret; } -- cgit v1.2.3 From efa56464562991b8c24f965199888806bd8c4b38 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 16 May 2010 10:49:59 -0400 Subject: Btrfs: Pre-allocate space for data relocation Pre-allocate space for data relocation. This can detect ENOPSC condition caused by fragmentation of free space. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++ fs/btrfs/inode.c | 44 +++++++++++++------------ fs/btrfs/relocation.c | 90 +++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 92 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fc324f9fcb42..65530837d04b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2420,6 +2420,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size); int btrfs_invalidate_inodes(struct btrfs_root *root); void btrfs_add_delayed_iput(struct inode *inode); void btrfs_run_delayed_iputs(struct btrfs_root *root); +int btrfs_prealloc_file_range(struct inode *inode, int mode, + u64 start, u64 num_bytes, u64 min_size, + loff_t actual_len, u64 *alloc_hint); extern const struct dentry_operations btrfs_dentry_operations; /* ioctl.c */ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bef69bedf3cf..460dd512eebd 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1175,6 +1175,13 @@ out_check: num_bytes, num_bytes, type); BUG_ON(ret); + if (root->root_key.objectid == + BTRFS_DATA_RELOC_TREE_OBJECTID) { + ret = btrfs_reloc_clone_csums(inode, cur_offset, + num_bytes); + BUG_ON(ret); + } + extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, cur_offset, cur_offset + num_bytes - 1, locked_page, EXTENT_CLEAR_UNLOCK_PAGE | @@ -6080,16 +6087,15 @@ out_unlock: return err; } -static int prealloc_file_range(struct inode *inode, u64 start, u64 end, - u64 alloc_hint, int mode, loff_t actual_len) +int btrfs_prealloc_file_range(struct inode *inode, int mode, + u64 start, u64 num_bytes, u64 min_size, + loff_t actual_len, u64 *alloc_hint) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; - u64 num_bytes = end - start; int ret = 0; - u64 i_size; while (num_bytes > 0) { trans = btrfs_start_transaction(root, 3); @@ -6098,9 +6104,8 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, break; } - ret = btrfs_reserve_extent(trans, root, num_bytes, - root->sectorsize, 0, alloc_hint, - (u64)-1, &ins, 1); + ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, + 0, *alloc_hint, (u64)-1, &ins, 1); if (ret) { btrfs_end_transaction(trans, root); break; @@ -6117,20 +6122,19 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, num_bytes -= ins.offset; cur_offset += ins.offset; - alloc_hint = ins.objectid + ins.offset; + *alloc_hint = ins.objectid + ins.offset; inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - (actual_len > inode->i_size) && - (cur_offset > inode->i_size)) { - + (actual_len > inode->i_size) && + (cur_offset > inode->i_size)) { if (cur_offset > actual_len) - i_size = actual_len; + i_size_write(inode, actual_len); else - i_size = cur_offset; - i_size_write(inode, i_size); - btrfs_ordered_update_i_size(inode, i_size, NULL); + i_size_write(inode, cur_offset); + i_size_write(inode, cur_offset); + btrfs_ordered_update_i_size(inode, cur_offset, NULL); } ret = btrfs_update_inode(trans, root, inode); @@ -6216,16 +6220,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { - ret = prealloc_file_range(inode, - cur_offset, last_byte, - alloc_hint, mode, offset+len); + ret = btrfs_prealloc_file_range(inode, 0, cur_offset, + last_byte - cur_offset, + 1 << inode->i_blkbits, + offset + len, + &alloc_hint); if (ret < 0) { free_extent_map(em); break; } } - if (em->block_start <= EXTENT_MAP_LAST_BYTE) - alloc_hint = em->block_start; free_extent_map(em); cur_offset = last_byte; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 145a468c300d..3943526b7348 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2545,6 +2545,50 @@ out: return err; } +static noinline_for_stack +int prealloc_file_extent_cluster(struct inode *inode, + struct file_extent_cluster *cluster) +{ + u64 alloc_hint = 0; + u64 start; + u64 end; + u64 offset = BTRFS_I(inode)->index_cnt; + u64 num_bytes; + int nr = 0; + int ret = 0; + + BUG_ON(cluster->start != cluster->boundary[0]); + mutex_lock(&inode->i_mutex); + + ret = btrfs_check_data_free_space(inode, cluster->end + + 1 - cluster->start); + if (ret) + goto out; + + while (nr < cluster->nr) { + start = cluster->boundary[nr] - offset; + if (nr + 1 < cluster->nr) + end = cluster->boundary[nr + 1] - 1 - offset; + else + end = cluster->end - offset; + + lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + num_bytes = end + 1 - start; + ret = btrfs_prealloc_file_range(inode, 0, start, + num_bytes, num_bytes, + end + 1, &alloc_hint); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + if (ret) + break; + nr++; + } + btrfs_free_reserved_data_space(inode, cluster->end + + 1 - cluster->start); +out: + mutex_unlock(&inode->i_mutex); + return ret; +} + static noinline_for_stack int setup_extent_mapping(struct inode *inode, u64 start, u64 end, u64 block_start) @@ -2588,7 +2632,6 @@ static int relocate_file_extent_cluster(struct inode *inode, u64 offset = BTRFS_I(inode)->index_cnt; unsigned long index; unsigned long last_index; - unsigned int dirty_page = 0; struct page *page; struct file_ra_state *ra; int nr = 0; @@ -2601,21 +2644,24 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!ra) return -ENOMEM; - index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; - last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; + ret = prealloc_file_extent_cluster(inode, cluster); + if (ret) + goto out; - mutex_lock(&inode->i_mutex); + file_ra_state_init(ra, inode->i_mapping); - i_size_write(inode, cluster->end + 1 - offset); ret = setup_extent_mapping(inode, cluster->start - offset, cluster->end - offset, cluster->start); if (ret) - goto out_unlock; - - file_ra_state_init(ra, inode->i_mapping); + goto out; - WARN_ON(cluster->start != cluster->boundary[0]); + index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; + last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; while (index <= last_index) { + ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); + if (ret) + goto out; + page = find_lock_page(inode->i_mapping, index); if (!page) { page_cache_sync_readahead(inode->i_mapping, @@ -2623,8 +2669,10 @@ static int relocate_file_extent_cluster(struct inode *inode, last_index + 1 - index); page = grab_cache_page(inode->i_mapping, index); if (!page) { + btrfs_delalloc_release_metadata(inode, + PAGE_CACHE_SIZE); ret = -ENOMEM; - goto out_unlock; + goto out; } } @@ -2640,8 +2688,10 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); + btrfs_delalloc_release_metadata(inode, + PAGE_CACHE_SIZE); ret = -EIO; - goto out_unlock; + goto out; } } @@ -2660,10 +2710,9 @@ static int relocate_file_extent_cluster(struct inode *inode, EXTENT_BOUNDARY, GFP_NOFS); nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); set_page_dirty(page); - dirty_page++; unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); @@ -2671,20 +2720,11 @@ static int relocate_file_extent_cluster(struct inode *inode, page_cache_release(page); index++; - if (nr < cluster->nr && - page_end + 1 + offset == cluster->boundary[nr]) { - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - dirty_page); - dirty_page = 0; - } - } - if (dirty_page) { - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - dirty_page); + balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_throttle(BTRFS_I(inode)->root); } WARN_ON(nr != cluster->nr); -out_unlock: - mutex_unlock(&inode->i_mutex); +out: kfree(ra); return ret; } -- cgit v1.2.3 From 3fd0a5585eb98e074fb9934549c8d85c49756c0d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 16 May 2010 10:49:59 -0400 Subject: Btrfs: Metadata ENOSPC handling for balance This patch adds metadata ENOSPC handling for the balance code. It is consisted by following major changes: 1. Avoid COW tree leave in the phrase of merging tree. 2. Handle interaction with snapshot creation. 3. make the backref cache can live across transactions. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 + fs/btrfs/ctree.h | 11 +- fs/btrfs/extent-tree.c | 23 +- fs/btrfs/relocation.c | 1881 +++++++++++++++++++++++++++++------------------- fs/btrfs/transaction.c | 6 +- 5 files changed, 1170 insertions(+), 754 deletions(-) (limited to 'fs/btrfs/relocation.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6bee8e5204fb..acd532af8e55 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -447,6 +447,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, update_ref_for_cow(trans, root, buf, cow, &last_ref); + if (root->ref_cows) + btrfs_reloc_cow_block(trans, root, buf, cow); + if (buf == root->node) { WARN_ON(parent && parent != buf); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 65530837d04b..5ed0223d1cbe 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2205,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); +int btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, @@ -2479,4 +2480,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_recover_relocation(struct btrfs_root *root); int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); +void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow); +void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending, + u64 *bytes_to_reserve); +void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a713f69f0c7a..d61a799fe323 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5875,7 +5875,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) +int btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref) { struct btrfs_path *path; struct btrfs_trans_handle *trans; @@ -5894,6 +5895,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) BUG_ON(!wc); trans = btrfs_start_transaction(tree_root, 0); + if (block_rsv) + trans->block_rsv = block_rsv; if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { level = btrfs_header_level(root->node); @@ -5981,24 +5984,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) } BUG_ON(wc->level == 0); - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) { + if (btrfs_should_end_transaction(trans, tree_root)) { ret = btrfs_update_root(trans, tree_root, &root->root_key, root_item); BUG_ON(ret); - btrfs_end_transaction(trans, tree_root); + btrfs_end_transaction_throttle(trans, tree_root); trans = btrfs_start_transaction(tree_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - } else { - unsigned long update; - update = trans->delayed_ref_updates; - trans->delayed_ref_updates = 0; - if (update) - btrfs_run_delayed_refs(trans, tree_root, - update); + if (block_rsv) + trans->block_rsv = block_rsv; } } btrfs_release_path(root, path); @@ -6026,7 +6021,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) kfree(root); } out: - btrfs_end_transaction(trans, tree_root); + btrfs_end_transaction_throttle(trans, tree_root); kfree(wc); btrfs_free_path(path); return err; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3943526b7348..05d41e569236 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -44,8 +44,12 @@ struct tree_entry { struct backref_node { struct rb_node rb_node; u64 bytenr; - /* objectid tree block owner */ + + u64 new_bytenr; + /* objectid of tree block owner, can be not uptodate */ u64 owner; + /* link to pending, changed or detached list */ + struct list_head list; /* list of upper level blocks reference this block */ struct list_head upper; /* list of child blocks in the cache */ @@ -56,9 +60,9 @@ struct backref_node { struct extent_buffer *eb; /* level of tree block */ unsigned int level:8; - /* 1 if the block is root of old snapshot */ - unsigned int old_root:1; - /* 1 if no child blocks in the cache */ + /* is the block in non-reference counted tree */ + unsigned int cowonly:1; + /* 1 if no child node in the cache */ unsigned int lowest:1; /* is the extent buffer locked */ unsigned int locked:1; @@ -66,6 +70,16 @@ struct backref_node { unsigned int processed:1; /* have backrefs of this block been checked */ unsigned int checked:1; + /* + * 1 if corresponding block has been cowed but some upper + * level block pointers may not point to the new location + */ + unsigned int pending:1; + /* + * 1 if the backref node isn't connected to any other + * backref node. + */ + unsigned int detached:1; }; /* @@ -74,7 +88,6 @@ struct backref_node { struct backref_edge { struct list_head list[2]; struct backref_node *node[2]; - u64 blockptr; }; #define LOWER 0 @@ -83,9 +96,25 @@ struct backref_edge { struct backref_cache { /* red black tree of all backref nodes in the cache */ struct rb_root rb_root; - /* list of backref nodes with no child block in the cache */ + /* for passing backref nodes to btrfs_reloc_cow_block */ + struct backref_node *path[BTRFS_MAX_LEVEL]; + /* + * list of blocks that have been cowed but some block + * pointers in upper level blocks may not reflect the + * new location + */ struct list_head pending[BTRFS_MAX_LEVEL]; - spinlock_t lock; + /* list of backref nodes with no child node */ + struct list_head leaves; + /* list of blocks that have been cowed in current transaction */ + struct list_head changed; + /* list of detached backref node. */ + struct list_head detached; + + u64 last_trans; + + int nr_nodes; + int nr_edges; }; /* @@ -113,15 +142,6 @@ struct tree_block { unsigned int key_ready:1; }; -/* inode vector */ -#define INODEVEC_SIZE 16 - -struct inodevec { - struct list_head list; - struct inode *inode[INODEVEC_SIZE]; - int nr; -}; - #define MAX_EXTENTS 128 struct file_extent_cluster { @@ -138,36 +158,43 @@ struct reloc_control { struct btrfs_root *extent_root; /* inode for moving data */ struct inode *data_inode; - struct btrfs_workers workers; + + struct btrfs_block_rsv *block_rsv; + + struct backref_cache backref_cache; + + struct file_extent_cluster cluster; /* tree blocks have been processed */ struct extent_io_tree processed_blocks; /* map start of tree root to corresponding reloc tree */ struct mapping_tree reloc_root_tree; /* list of reloc trees */ struct list_head reloc_roots; + /* size of metadata reservation for merging reloc trees */ + u64 merging_rsv_size; + /* size of relocated tree nodes */ + u64 nodes_relocated; + u64 search_start; u64 extents_found; - u64 extents_skipped; - int stage; - int create_reloc_root; + + int block_rsv_retries; + + unsigned int stage:8; + unsigned int create_reloc_tree:1; + unsigned int merge_reloc_tree:1; unsigned int found_file_extent:1; - unsigned int found_old_snapshot:1; + unsigned int commit_transaction:1; }; /* stages of data relocation */ #define MOVE_DATA_EXTENTS 0 #define UPDATE_DATA_PTRS 1 -/* - * merge reloc tree to corresponding fs tree in worker threads - */ -struct async_merge { - struct btrfs_work work; - struct reloc_control *rc; - struct btrfs_root *root; - struct completion *done; - atomic_t *num_pending; -}; +static void remove_backref_node(struct backref_cache *cache, + struct backref_node *node); +static void __mark_block_processed(struct reloc_control *rc, + struct backref_node *node); static void mapping_tree_init(struct mapping_tree *tree) { @@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache) cache->rb_root = RB_ROOT; for (i = 0; i < BTRFS_MAX_LEVEL; i++) INIT_LIST_HEAD(&cache->pending[i]); - spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->changed); + INIT_LIST_HEAD(&cache->detached); + INIT_LIST_HEAD(&cache->leaves); +} + +static void backref_cache_cleanup(struct backref_cache *cache) +{ + struct backref_node *node; + int i; + + while (!list_empty(&cache->detached)) { + node = list_entry(cache->detached.next, + struct backref_node, list); + remove_backref_node(cache, node); + } + + while (!list_empty(&cache->leaves)) { + node = list_entry(cache->leaves.next, + struct backref_node, lower); + remove_backref_node(cache, node); + } + + cache->last_trans = 0; + + for (i = 0; i < BTRFS_MAX_LEVEL; i++) + BUG_ON(!list_empty(&cache->pending[i])); + BUG_ON(!list_empty(&cache->changed)); + BUG_ON(!list_empty(&cache->detached)); + BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); + BUG_ON(cache->nr_nodes); + BUG_ON(cache->nr_edges); +} + +static struct backref_node *alloc_backref_node(struct backref_cache *cache) +{ + struct backref_node *node; + + node = kzalloc(sizeof(*node), GFP_NOFS); + if (node) { + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->upper); + INIT_LIST_HEAD(&node->lower); + RB_CLEAR_NODE(&node->rb_node); + cache->nr_nodes++; + } + return node; +} + +static void free_backref_node(struct backref_cache *cache, + struct backref_node *node) +{ + if (node) { + cache->nr_nodes--; + kfree(node); + } +} + +static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) +{ + struct backref_edge *edge; + + edge = kzalloc(sizeof(*edge), GFP_NOFS); + if (edge) + cache->nr_edges++; + return edge; } -static void backref_node_init(struct backref_node *node) +static void free_backref_edge(struct backref_cache *cache, + struct backref_edge *edge) { - memset(node, 0, sizeof(*node)); - INIT_LIST_HEAD(&node->upper); - INIT_LIST_HEAD(&node->lower); - RB_CLEAR_NODE(&node->rb_node); + if (edge) { + cache->nr_edges--; + kfree(edge); + } } static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, @@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, edges[idx++] = edge; node = edge->node[UPPER]; } + BUG_ON(node->detached); *index = idx; return node; } @@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], return NULL; } +static void unlock_node_buffer(struct backref_node *node) +{ + if (node->locked) { + btrfs_tree_unlock(node->eb); + node->locked = 0; + } +} + static void drop_node_buffer(struct backref_node *node) { if (node->eb) { - if (node->locked) { - btrfs_tree_unlock(node->eb); - node->locked = 0; - } + unlock_node_buffer(node); free_extent_buffer(node->eb); node->eb = NULL; } @@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node) static void drop_backref_node(struct backref_cache *tree, struct backref_node *node) { - BUG_ON(!node->lowest); BUG_ON(!list_empty(&node->upper)); drop_node_buffer(node); + list_del(&node->list); list_del(&node->lower); - - rb_erase(&node->rb_node, &tree->rb_root); - kfree(node); + if (!RB_EMPTY_NODE(&node->rb_node)) + rb_erase(&node->rb_node, &tree->rb_root); + free_backref_node(tree, node); } /* @@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache, if (!node) return; - BUG_ON(!node->lowest); + BUG_ON(!node->lowest && !node->detached); while (!list_empty(&node->upper)) { edge = list_entry(node->upper.next, struct backref_edge, list[LOWER]); upper = edge->node[UPPER]; list_del(&edge->list[LOWER]); list_del(&edge->list[UPPER]); - kfree(edge); + free_backref_edge(cache, edge); + + if (RB_EMPTY_NODE(&upper->rb_node)) { + BUG_ON(!list_empty(&node->upper)); + drop_backref_node(cache, node); + node = upper; + node->lowest = 1; + continue; + } /* - * add the node to pending list if no other + * add the node to leaf node list if no other * child block cached. */ if (list_empty(&upper->lower)) { - list_add_tail(&upper->lower, - &cache->pending[upper->level]); + list_add_tail(&upper->lower, &cache->leaves); upper->lowest = 1; } } + drop_backref_node(cache, node); } +static void update_backref_node(struct backref_cache *cache, + struct backref_node *node, u64 bytenr) +{ + struct rb_node *rb_node; + rb_erase(&node->rb_node, &cache->rb_root); + node->bytenr = bytenr; + rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); + BUG_ON(rb_node); +} + +/* + * update backref cache after a transaction commit + */ +static int update_backref_cache(struct btrfs_trans_handle *trans, + struct backref_cache *cache) +{ + struct backref_node *node; + int level = 0; + + if (cache->last_trans == 0) { + cache->last_trans = trans->transid; + return 0; + } + + if (cache->last_trans == trans->transid) + return 0; + + /* + * detached nodes are used to avoid unnecessary backref + * lookup. transaction commit changes the extent tree. + * so the detached nodes are no longer useful. + */ + while (!list_empty(&cache->detached)) { + node = list_entry(cache->detached.next, + struct backref_node, list); + remove_backref_node(cache, node); + } + + while (!list_empty(&cache->changed)) { + node = list_entry(cache->changed.next, + struct backref_node, list); + list_del_init(&node->list); + BUG_ON(node->pending); + update_backref_node(cache, node, node->new_bytenr); + } + + /* + * some nodes can be left in the pending list if there were + * errors during processing the pending nodes. + */ + for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + list_for_each_entry(node, &cache->pending[level], list) { + BUG_ON(!node->pending); + if (node->bytenr == node->new_bytenr) + continue; + update_backref_node(cache, node, node->new_bytenr); + } + } + + cache->last_trans = 0; + return 1; +} + +static int should_ignore_root(struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + + if (!root->ref_cows) + return 0; + + reloc_root = root->reloc_root; + if (!reloc_root) + return 0; + + if (btrfs_root_last_snapshot(&reloc_root->root_item) == + root->fs_info->running_transaction->transid - 1) + return 0; + /* + * if there is reloc tree and it was created in previous + * transaction backref lookup can find the reloc tree, + * so backref node for the fs tree root is useless for + * relocation. + */ + return 1; +} + /* * find reloc tree by address of tree root */ @@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, * for all upper level blocks that directly/indirectly reference the * block are also cached. */ -static struct backref_node *build_backref_tree(struct reloc_control *rc, - struct backref_cache *cache, - struct btrfs_key *node_key, - int level, u64 bytenr) +static noinline_for_stack +struct backref_node *build_backref_tree(struct reloc_control *rc, + struct btrfs_key *node_key, + int level, u64 bytenr) { + struct backref_cache *cache = &rc->backref_cache; struct btrfs_path *path1; struct btrfs_path *path2; struct extent_buffer *eb; @@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, unsigned long end; unsigned long ptr; LIST_HEAD(list); + LIST_HEAD(useless); + int cowonly; int ret; int err = 0; @@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, goto out; } - node = kmalloc(sizeof(*node), GFP_NOFS); + node = alloc_backref_node(cache); if (!node) { err = -ENOMEM; goto out; } - backref_node_init(node); node->bytenr = bytenr; - node->owner = 0; node->level = level; node->lowest = 1; cur = node; @@ -587,17 +780,20 @@ again: #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || key.type == BTRFS_EXTENT_REF_V0_KEY) { - if (key.objectid == key.offset && - key.type == BTRFS_EXTENT_REF_V0_KEY) { + if (key.type == BTRFS_EXTENT_REF_V0_KEY) { struct btrfs_extent_ref_v0 *ref0; ref0 = btrfs_item_ptr(eb, path1->slots[0], struct btrfs_extent_ref_v0); root = find_tree_root(rc, eb, ref0); - if (root) - cur->root = root; - else - cur->old_root = 1; - break; + if (!root->ref_cows) + cur->cowonly = 1; + if (key.objectid == key.offset) { + if (root && !should_ignore_root(root)) + cur->root = root; + else + list_add(&cur->list, &useless); + break; + } } #else BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); @@ -614,22 +810,20 @@ again: break; } - edge = kzalloc(sizeof(*edge), GFP_NOFS); + edge = alloc_backref_edge(cache); if (!edge) { err = -ENOMEM; goto out; } rb_node = tree_search(&cache->rb_root, key.offset); if (!rb_node) { - upper = kmalloc(sizeof(*upper), GFP_NOFS); + upper = alloc_backref_node(cache); if (!upper) { - kfree(edge); + free_backref_edge(cache, edge); err = -ENOMEM; goto out; } - backref_node_init(upper); upper->bytenr = key.offset; - upper->owner = 0; upper->level = cur->level + 1; /* * backrefs for the upper level block isn't @@ -639,11 +833,12 @@ again: } else { upper = rb_entry(rb_node, struct backref_node, rb_node); + BUG_ON(!upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); } - list_add(&edge->list[LOWER], &cur->upper); - edge->node[UPPER] = upper; + list_add_tail(&edge->list[LOWER], &cur->upper); edge->node[LOWER] = cur; + edge->node[UPPER] = upper; goto next; } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { @@ -657,11 +852,17 @@ again: goto out; } + if (!root->ref_cows) + cur->cowonly = 1; + if (btrfs_root_level(&root->root_item) == cur->level) { /* tree root */ BUG_ON(btrfs_root_bytenr(&root->root_item) != cur->bytenr); - cur->root = root; + if (should_ignore_root(root)) + list_add(&cur->list, &useless); + else + cur->root = root; break; } @@ -692,11 +893,14 @@ again: if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != lower->bytenr); - lower->root = root; + if (should_ignore_root(root)) + list_add(&lower->list, &useless); + else + lower->root = root; break; } - edge = kzalloc(sizeof(*edge), GFP_NOFS); + edge = alloc_backref_edge(cache); if (!edge) { err = -ENOMEM; goto out; @@ -705,16 +909,17 @@ again: eb = path2->nodes[level]; rb_node = tree_search(&cache->rb_root, eb->start); if (!rb_node) { - upper = kmalloc(sizeof(*upper), GFP_NOFS); + upper = alloc_backref_node(cache); if (!upper) { - kfree(edge); + free_backref_edge(cache, edge); err = -ENOMEM; goto out; } - backref_node_init(upper); upper->bytenr = eb->start; upper->owner = btrfs_header_owner(eb); upper->level = lower->level + 1; + if (!root->ref_cows) + upper->cowonly = 1; /* * if we know the block isn't shared @@ -744,10 +949,12 @@ again: rb_node); BUG_ON(!upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); + if (!upper->owner) + upper->owner = btrfs_header_owner(eb); } list_add_tail(&edge->list[LOWER], &lower->upper); - edge->node[UPPER] = upper; edge->node[LOWER] = lower; + edge->node[UPPER] = upper; if (rb_node) break; @@ -785,8 +992,13 @@ next: * into the cache. */ BUG_ON(!node->checked); - rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); - BUG_ON(rb_node); + cowonly = node->cowonly; + if (!cowonly) { + rb_node = tree_insert(&cache->rb_root, node->bytenr, + &node->rb_node); + BUG_ON(rb_node); + list_add_tail(&node->lower, &cache->leaves); + } list_for_each_entry(edge, &node->upper, list[LOWER]) list_add_tail(&edge->list[UPPER], &list); @@ -795,6 +1007,14 @@ next: edge = list_entry(list.next, struct backref_edge, list[UPPER]); list_del_init(&edge->list[UPPER]); upper = edge->node[UPPER]; + if (upper->detached) { + list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; + free_backref_edge(cache, edge); + if (list_empty(&lower->upper)) + list_add(&lower->list, &useless); + continue; + } if (!RB_EMPTY_NODE(&upper->rb_node)) { if (upper->lowest) { @@ -807,25 +1027,69 @@ next: } BUG_ON(!upper->checked); - rb_node = tree_insert(&cache->rb_root, upper->bytenr, - &upper->rb_node); - BUG_ON(rb_node); + BUG_ON(cowonly != upper->cowonly); + if (!cowonly) { + rb_node = tree_insert(&cache->rb_root, upper->bytenr, + &upper->rb_node); + BUG_ON(rb_node); + } list_add_tail(&edge->list[UPPER], &upper->lower); list_for_each_entry(edge, &upper->upper, list[LOWER]) list_add_tail(&edge->list[UPPER], &list); } + /* + * process useless backref nodes. backref nodes for tree leaves + * are deleted from the cache. backref nodes for upper level + * tree blocks are left in the cache to avoid unnecessary backref + * lookup. + */ + while (!list_empty(&useless)) { + upper = list_entry(useless.next, struct backref_node, list); + list_del_init(&upper->list); + BUG_ON(!list_empty(&upper->upper)); + if (upper == node) + node = NULL; + if (upper->lowest) { + list_del_init(&upper->lower); + upper->lowest = 0; + } + while (!list_empty(&upper->lower)) { + edge = list_entry(upper->lower.next, + struct backref_edge, list[UPPER]); + list_del(&edge->list[UPPER]); + list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; + free_backref_edge(cache, edge); + + if (list_empty(&lower->upper)) + list_add(&lower->list, &useless); + } + __mark_block_processed(rc, upper); + if (upper->level > 0) { + list_add(&upper->list, &cache->detached); + upper->detached = 1; + } else { + rb_erase(&upper->rb_node, &cache->rb_root); + free_backref_node(cache, upper); + } + } out: btrfs_free_path(path1); btrfs_free_path(path2); if (err) { - INIT_LIST_HEAD(&list); + while (!list_empty(&useless)) { + lower = list_entry(useless.next, + struct backref_node, upper); + list_del_init(&lower->upper); + } upper = node; + INIT_LIST_HEAD(&list); while (upper) { if (RB_EMPTY_NODE(&upper->rb_node)) { list_splice_tail(&upper->upper, &list); - kfree(upper); + free_backref_node(cache, upper); } if (list_empty(&list)) @@ -833,14 +1097,103 @@ out: edge = list_entry(list.next, struct backref_edge, list[LOWER]); + list_del(&edge->list[LOWER]); upper = edge->node[UPPER]; - kfree(edge); + free_backref_edge(cache, edge); } return ERR_PTR(err); } + BUG_ON(node && node->detached); return node; } +/* + * helper to add backref node for the newly created snapshot. + * the backref node is created by cloning backref node that + * corresponds to root of source tree + */ +static int clone_backref_node(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct btrfs_root *src, + struct btrfs_root *dest) +{ + struct btrfs_root *reloc_root = src->reloc_root; + struct backref_cache *cache = &rc->backref_cache; + struct backref_node *node = NULL; + struct backref_node *new_node; + struct backref_edge *edge; + struct backref_edge *new_edge; + struct rb_node *rb_node; + + if (cache->last_trans > 0) + update_backref_cache(trans, cache); + + rb_node = tree_search(&cache->rb_root, src->commit_root->start); + if (rb_node) { + node = rb_entry(rb_node, struct backref_node, rb_node); + if (node->detached) + node = NULL; + else + BUG_ON(node->new_bytenr != reloc_root->node->start); + } + + if (!node) { + rb_node = tree_search(&cache->rb_root, + reloc_root->commit_root->start); + if (rb_node) { + node = rb_entry(rb_node, struct backref_node, + rb_node); + BUG_ON(node->detached); + } + } + + if (!node) + return 0; + + new_node = alloc_backref_node(cache); + if (!new_node) + return -ENOMEM; + + new_node->bytenr = dest->node->start; + new_node->level = node->level; + new_node->lowest = node->lowest; + new_node->root = dest; + + if (!node->lowest) { + list_for_each_entry(edge, &node->lower, list[UPPER]) { + new_edge = alloc_backref_edge(cache); + if (!new_edge) + goto fail; + + new_edge->node[UPPER] = new_node; + new_edge->node[LOWER] = edge->node[LOWER]; + list_add_tail(&new_edge->list[UPPER], + &new_node->lower); + } + } + + rb_node = tree_insert(&cache->rb_root, new_node->bytenr, + &new_node->rb_node); + BUG_ON(rb_node); + + if (!new_node->lowest) { + list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { + list_add_tail(&new_edge->list[LOWER], + &new_edge->node[LOWER]->upper); + } + } + return 0; +fail: + while (!list_empty(&new_node->lower)) { + new_edge = list_entry(new_node->lower.next, + struct backref_edge, list[UPPER]); + list_del(&new_edge->list[UPPER]); + free_backref_edge(cache, new_edge); + } + free_backref_node(cache, new_node); + return -ENOMEM; +} + /* * helper to add 'address of tree root -> reloc tree' mapping */ @@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) return 0; } -/* - * create reloc tree for a given fs tree. reloc tree is just a - * snapshot of the fs tree with special root objectid. - */ -int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid) { struct btrfs_root *reloc_root; struct extent_buffer *eb; @@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_key root_key; int ret; - if (root->reloc_root) { - reloc_root = root->reloc_root; - reloc_root->last_trans = trans->transid; - return 0; - } - - if (!root->fs_info->reloc_ctl || - !root->fs_info->reloc_ctl->create_reloc_root || - root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - return 0; - root_item = kmalloc(sizeof(*root_item), GFP_NOFS); BUG_ON(!root_item); root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = root->root_key.objectid; + root_key.offset = objectid; - ret = btrfs_copy_root(trans, root, root->commit_root, &eb, - BTRFS_TREE_RELOC_OBJECTID); - BUG_ON(ret); + if (root->root_key.objectid == objectid) { + /* called by btrfs_init_reloc_root */ + ret = btrfs_copy_root(trans, root, root->commit_root, &eb, + BTRFS_TREE_RELOC_OBJECTID); + BUG_ON(ret); + + btrfs_set_root_last_snapshot(&root->root_item, + trans->transid - 1); + } else { + /* + * called by btrfs_reloc_post_snapshot_hook. + * the source tree is a reloc tree, all tree blocks + * modified after it was created have RELOC flag + * set in their headers. so it's OK to not update + * the 'last_snapshot'. + */ + ret = btrfs_copy_root(trans, root, root->node, &eb, + BTRFS_TREE_RELOC_OBJECTID); + BUG_ON(ret); + } - btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); memcpy(root_item, &root->root_item, sizeof(*root_item)); - btrfs_set_root_refs(root_item, 1); btrfs_set_root_bytenr(root_item, eb->start); btrfs_set_root_level(root_item, btrfs_header_level(eb)); btrfs_set_root_generation(root_item, trans->transid); - memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); - root_item->drop_level = 0; + + if (root->root_key.objectid == objectid) { + btrfs_set_root_refs(root_item, 0); + memset(&root_item->drop_progress, 0, + sizeof(struct btrfs_disk_key)); + root_item->drop_level = 0; + } btrfs_tree_unlock(eb); free_extent_buffer(eb); @@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, &root_key); BUG_ON(IS_ERR(reloc_root)); reloc_root->last_trans = trans->transid; + return reloc_root; +} + +/* + * create reloc tree for a given fs tree. reloc tree is just a + * snapshot of the fs tree with special root objectid. + */ +int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + struct reloc_control *rc = root->fs_info->reloc_ctl; + int clear_rsv = 0; + + if (root->reloc_root) { + reloc_root = root->reloc_root; + reloc_root->last_trans = trans->transid; + return 0; + } + + if (!rc || !rc->create_reloc_tree || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + return 0; + + if (!trans->block_rsv) { + trans->block_rsv = rc->block_rsv; + clear_rsv = 1; + } + reloc_root = create_reloc_root(trans, root, root->root_key.objectid); + if (clear_rsv) + trans->block_rsv = NULL; __add_reloc_root(reloc_root); root->reloc_root = reloc_root; @@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, reloc_root = root->reloc_root; root_item = &reloc_root->root_item; - if (btrfs_root_refs(root_item) == 0) { + if (root->fs_info->reloc_ctl->merge_reloc_tree && + btrfs_root_refs(root_item) == 0) { root->reloc_root = NULL; del = 1; } @@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, goto out; } - if (new_bytenr) - *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); ret = 0; out: btrfs_free_path(path); @@ -1114,19 +1503,18 @@ out: * update file extent items in the tree leaf to point to * the new locations. */ -static int replace_file_extents(struct btrfs_trans_handle *trans, - struct reloc_control *rc, - struct btrfs_root *root, - struct extent_buffer *leaf, - struct list_head *inode_list) +static noinline_for_stack +int replace_file_extents(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct btrfs_root *root, + struct extent_buffer *leaf) { struct btrfs_key key; struct btrfs_file_extent_item *fi; struct inode *inode = NULL; - struct inodevec *ivec = NULL; u64 parent; u64 bytenr; - u64 new_bytenr; + u64 new_bytenr = 0; u64 num_bytes; u64 end; u32 nritems; @@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, * to complete and drop the extent cache */ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { - if (!ivec || ivec->nr == INODEVEC_SIZE) { - ivec = kmalloc(sizeof(*ivec), GFP_NOFS); - BUG_ON(!ivec); - ivec->nr = 0; - list_add_tail(&ivec->list, inode_list); - } if (first) { inode = find_next_inode(root, key.objectid); - if (inode) - ivec->inode[ivec->nr++] = inode; first = 0; } else if (inode && inode->i_ino < key.objectid) { + btrfs_add_delayed_iput(inode); inode = find_next_inode(root, key.objectid); - if (inode) - ivec->inode[ivec->nr++] = inode; } if (inode && inode->i_ino == key.objectid) { end = key.offset + @@ -1204,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, ret = get_new_location(rc->data_inode, &new_bytenr, bytenr, num_bytes); - if (ret > 0) + if (ret > 0) { + WARN_ON(1); continue; + } BUG_ON(ret < 0); btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); @@ -1225,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, } if (dirty) btrfs_mark_buffer_dirty(leaf); + if (inode) + btrfs_add_delayed_iput(inode); return 0; } @@ -1248,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, * if no block got replaced, 0 is returned. if there are other * errors, a negative error number is returned. */ -static int replace_path(struct btrfs_trans_handle *trans, - struct btrfs_root *dest, struct btrfs_root *src, - struct btrfs_path *path, struct btrfs_key *next_key, - struct extent_buffer **leaf, - int lowest_level, int max_level) +static noinline_for_stack +int replace_path(struct btrfs_trans_handle *trans, + struct btrfs_root *dest, struct btrfs_root *src, + struct btrfs_path *path, struct btrfs_key *next_key, + int lowest_level, int max_level) { struct extent_buffer *eb; struct extent_buffer *parent; @@ -1263,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans, u64 new_ptr_gen; u64 last_snapshot; u32 blocksize; + int cow = 0; int level; int ret; int slot; BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); - BUG_ON(lowest_level > 1 && leaf); last_snapshot = btrfs_root_last_snapshot(&src->root_item); - +again: slot = path->slots[lowest_level]; btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); @@ -1286,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans, return 0; } - ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); - BUG_ON(ret); + if (cow) { + ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); + BUG_ON(ret); + } btrfs_set_lock_blocking(eb); if (next_key) { @@ -1331,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans, if (new_bytenr == 0 || old_ptr_gen > last_snapshot || memcmp_node_keys(parent, slot, path, level)) { - if (level <= lowest_level && !leaf) { + if (level <= lowest_level) { ret = 0; break; } @@ -1339,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans, eb = read_tree_block(dest, old_bytenr, blocksize, old_ptr_gen); btrfs_tree_lock(eb); - ret = btrfs_cow_block(trans, dest, eb, parent, - slot, &eb); - BUG_ON(ret); - btrfs_set_lock_blocking(eb); - - if (level <= lowest_level) { - *leaf = eb; - ret = 0; - break; + if (cow) { + ret = btrfs_cow_block(trans, dest, eb, parent, + slot, &eb); + BUG_ON(ret); } + btrfs_set_lock_blocking(eb); btrfs_tree_unlock(parent); free_extent_buffer(parent); @@ -1357,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans, continue; } + if (!cow) { + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + cow = 1; + goto again; + } + btrfs_node_key_to_cpu(path->nodes[level], &key, path->slots[level]); btrfs_release_path(src, path); @@ -1562,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, return 0; } -static void put_inodes(struct list_head *list) -{ - struct inodevec *ivec; - while (!list_empty(list)) { - ivec = list_entry(list->next, struct inodevec, list); - list_del(&ivec->list); - while (ivec->nr > 0) { - ivec->nr--; - iput(ivec->inode[ivec->nr]); - } - kfree(ivec); - } -} - static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key) @@ -1608,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; struct btrfs_path *path; - struct extent_buffer *leaf = NULL; + struct extent_buffer *leaf; unsigned long nr; int level; int max_level; int replaced = 0; int ret; int err = 0; + u32 min_reserved; path = btrfs_alloc_path(); if (!path) @@ -1648,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_unlock_up_safe(path, 0); } - if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { - trans = btrfs_start_transaction(root, 0); + min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; + memset(&next_key, 0, sizeof(next_key)); - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, 0); - btrfs_release_path(reloc_root, path); + while (1) { + trans = btrfs_start_transaction(root, 0); + trans->block_rsv = rc->block_rsv; - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { - err = ret; - goto out; + ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, + min_reserved, 0); + if (ret) { + BUG_ON(ret != -EAGAIN); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + continue; } - leaf = path->nodes[0]; - btrfs_unlock_up_safe(path, 1); - ret = replace_file_extents(trans, rc, root, leaf, - &inode_list); - if (ret < 0) - err = ret; - goto out; - } - - memset(&next_key, 0, sizeof(next_key)); - - while (1) { - leaf = NULL; replaced = 0; - trans = btrfs_start_transaction(root, 0); max_level = level; ret = walk_down_reloc_tree(reloc_root, path, &level); @@ -1689,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, if (!find_next_key(path, level, &key) && btrfs_comp_cpu_keys(&next_key, &key) >= 0) { ret = 0; - } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { - ret = replace_path(trans, root, reloc_root, - path, &next_key, &leaf, - level, max_level); } else { - ret = replace_path(trans, root, reloc_root, - path, &next_key, NULL, - level, max_level); + ret = replace_path(trans, root, reloc_root, path, + &next_key, level, max_level); } if (ret < 0) { err = ret; @@ -1708,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_node_key_to_cpu(path->nodes[level], &key, path->slots[level]); replaced = 1; - } else if (leaf) { - /* - * no block got replaced, try replacing file extents - */ - btrfs_item_key_to_cpu(leaf, &key, 0); - ret = replace_file_extents(trans, rc, root, leaf, - &inode_list); - btrfs_tree_unlock(leaf); - free_extent_buffer(leaf); - BUG_ON(ret < 0); } ret = walk_up_reloc_tree(reloc_root, path, &level); @@ -1734,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, root_item->drop_level = level; nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root, nr); - /* - * put inodes outside transaction, otherwise we may deadlock. - */ - put_inodes(&inode_list); - if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); } @@ -1765,87 +2109,125 @@ out: sizeof(root_item->drop_progress)); root_item->drop_level = 0; btrfs_set_root_refs(root_item, 0); + btrfs_update_reloc_root(trans, root); } nr = trans->blocks_used; - btrfs_end_transaction(trans, root); + btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root, nr); - put_inodes(&inode_list); - if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); return err; } -/* - * callback for the work threads. - * this function merges reloc tree with corresponding fs tree, - * and then drops the reloc tree. - */ -static void merge_func(struct btrfs_work *work) +static noinline_for_stack +int prepare_to_merge(struct reloc_control *rc, int err) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root; + struct btrfs_root *root = rc->extent_root; struct btrfs_root *reloc_root; - struct async_merge *async; + struct btrfs_trans_handle *trans; + LIST_HEAD(reloc_roots); + u64 num_bytes = 0; + int ret; + int retries = 0; + + mutex_lock(&root->fs_info->trans_mutex); + rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; + rc->merging_rsv_size += rc->nodes_relocated * 2; + mutex_unlock(&root->fs_info->trans_mutex); +again: + if (!err) { + num_bytes = rc->merging_rsv_size; + ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, + num_bytes, &retries); + if (ret) + err = ret; + } + + trans = btrfs_join_transaction(rc->extent_root, 1); + + if (!err) { + if (num_bytes != rc->merging_rsv_size) { + btrfs_end_transaction(trans, rc->extent_root); + btrfs_block_rsv_release(rc->extent_root, + rc->block_rsv, num_bytes); + retries = 0; + goto again; + } + } - async = container_of(work, struct async_merge, work); - reloc_root = async->root; + rc->merge_reloc_tree = 1; + + while (!list_empty(&rc->reloc_roots)) { + reloc_root = list_entry(rc->reloc_roots.next, + struct btrfs_root, root_list); + list_del_init(&reloc_root->root_list); - if (btrfs_root_refs(&reloc_root->root_item) > 0) { root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); BUG_ON(IS_ERR(root)); BUG_ON(root->reloc_root != reloc_root); - merge_reloc_root(async->rc, root); - - trans = btrfs_start_transaction(root, 0); + /* + * set reference count to 1, so btrfs_recover_relocation + * knows it should resumes merging + */ + if (!err) + btrfs_set_root_refs(&reloc_root->root_item, 1); btrfs_update_reloc_root(trans, root); - btrfs_end_transaction(trans, root); - } - btrfs_drop_snapshot(reloc_root, 0); + list_add(&reloc_root->root_list, &reloc_roots); + } - if (atomic_dec_and_test(async->num_pending)) - complete(async->done); + list_splice(&reloc_roots, &rc->reloc_roots); - kfree(async); + if (!err) + btrfs_commit_transaction(trans, rc->extent_root); + else + btrfs_end_transaction(trans, rc->extent_root); + return err; } -static int merge_reloc_roots(struct reloc_control *rc) +static noinline_for_stack +int merge_reloc_roots(struct reloc_control *rc) { - struct async_merge *async; struct btrfs_root *root; - struct completion done; - atomic_t num_pending; + struct btrfs_root *reloc_root; + LIST_HEAD(reloc_roots); + int found = 0; + int ret; +again: + root = rc->extent_root; + mutex_lock(&root->fs_info->trans_mutex); + list_splice_init(&rc->reloc_roots, &reloc_roots); + mutex_unlock(&root->fs_info->trans_mutex); - init_completion(&done); - atomic_set(&num_pending, 1); + while (!list_empty(&reloc_roots)) { + found = 1; + reloc_root = list_entry(reloc_roots.next, + struct btrfs_root, root_list); - while (!list_empty(&rc->reloc_roots)) { - root = list_entry(rc->reloc_roots.next, - struct btrfs_root, root_list); - list_del_init(&root->root_list); + if (btrfs_root_refs(&reloc_root->root_item) > 0) { + root = read_fs_root(reloc_root->fs_info, + reloc_root->root_key.offset); + BUG_ON(IS_ERR(root)); + BUG_ON(root->reloc_root != reloc_root); - async = kmalloc(sizeof(*async), GFP_NOFS); - BUG_ON(!async); - async->work.func = merge_func; - async->work.flags = 0; - async->rc = rc; - async->root = root; - async->done = &done; - async->num_pending = &num_pending; - atomic_inc(&num_pending); - btrfs_queue_worker(&rc->workers, &async->work); + ret = merge_reloc_root(rc, root); + BUG_ON(ret); + } else { + list_del_init(&reloc_root->root_list); + } + btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); } - if (!atomic_dec_and_test(&num_pending)) - wait_for_completion(&done); - + if (found) { + found = 0; + goto again; + } BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); return 0; } @@ -1876,119 +2258,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, return btrfs_record_root_in_trans(trans, root); } -/* - * select one tree from trees that references the block. - * for blocks in refernce counted trees, we preper reloc tree. - * if no reloc tree found and reloc_only is true, NULL is returned. - */ -static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, - struct backref_node *node, - struct backref_edge *edges[], - int *nr, int reloc_only) +static noinline_for_stack +struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct backref_node *node, + struct backref_edge *edges[], int *nr) { struct backref_node *next; struct btrfs_root *root; - int index; - int loop = 0; -again: - index = 0; + int index = 0; + next = node; while (1) { cond_resched(); next = walk_up_backref(next, edges, &index); root = next->root; - if (!root) { - BUG_ON(!node->old_root); - goto skip; - } - - /* no other choice for non-refernce counted tree */ - if (!root->ref_cows) { - BUG_ON(reloc_only); - break; - } + BUG_ON(!root); + BUG_ON(!root->ref_cows); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { record_reloc_root_in_trans(trans, root); break; } - if (loop) { - btrfs_record_root_in_trans(trans, root); + btrfs_record_root_in_trans(trans, root); + root = root->reloc_root; + + if (next->new_bytenr != root->node->start) { + BUG_ON(next->new_bytenr); + BUG_ON(!list_empty(&next->list)); + next->new_bytenr = root->node->start; + next->root = root; + list_add_tail(&next->list, + &rc->backref_cache.changed); + __mark_block_processed(rc, next); break; } - if (reloc_only || next != node) { - if (!root->reloc_root) - btrfs_record_root_in_trans(trans, root); - root = root->reloc_root; - /* - * if the reloc tree was created in current - * transation, there is no node in backref tree - * corresponds to the root of the reloc tree. - */ - if (btrfs_root_last_snapshot(&root->root_item) == - trans->transid - 1) - break; - } -skip: + WARN_ON(1); root = NULL; next = walk_down_backref(edges, &index); if (!next || next->level <= node->level) break; } + if (!root) + return NULL; - if (!root && !loop && !reloc_only) { - loop = 1; - goto again; + *nr = index; + next = node; + /* setup backref node path for btrfs_reloc_cow_block */ + while (1) { + rc->backref_cache.path[next->level] = next; + if (--index < 0) + break; + next = edges[index]->node[UPPER]; } - - if (root) - *nr = index; - else - *nr = 0; - return root; } +/* + * select a tree root for relocation. return NULL if the block + * is reference counted. we should use do_relocation() in this + * case. return a tree root pointer if the block isn't reference + * counted. return -ENOENT if the block is root of reloc tree. + */ static noinline_for_stack struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, struct backref_node *node) { + struct backref_node *next; + struct btrfs_root *root; + struct btrfs_root *fs_root = NULL; struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; - int nr; - return __select_one_root(trans, node, edges, &nr, 0); + int index = 0; + + next = node; + while (1) { + cond_resched(); + next = walk_up_backref(next, edges, &index); + root = next->root; + BUG_ON(!root); + + /* no other choice for non-refernce counted tree */ + if (!root->ref_cows) + return root; + + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) + fs_root = root; + + if (next != node) + return NULL; + + next = walk_down_backref(edges, &index); + if (!next || next->level <= node->level) + break; + } + + if (!fs_root) + return ERR_PTR(-ENOENT); + return fs_root; } static noinline_for_stack -struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, - struct backref_node *node, - struct backref_edge *edges[], int *nr) +u64 calcu_metadata_size(struct reloc_control *rc, + struct backref_node *node, int reserve) { - return __select_one_root(trans, node, edges, nr, 1); + struct backref_node *next = node; + struct backref_edge *edge; + struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; + u64 num_bytes = 0; + int index = 0; + + BUG_ON(reserve && node->processed); + + while (next) { + cond_resched(); + while (1) { + if (next->processed && (reserve || next != node)) + break; + + num_bytes += btrfs_level_size(rc->extent_root, + next->level); + + if (list_empty(&next->upper)) + break; + + edge = list_entry(next->upper.next, + struct backref_edge, list[LOWER]); + edges[index++] = edge; + next = edge->node[UPPER]; + } + next = walk_down_backref(edges, &index); + } + return num_bytes; } -static void grab_path_buffers(struct btrfs_path *path, - struct backref_node *node, - struct backref_edge *edges[], int nr) +static int reserve_metadata_space(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct backref_node *node) { - int i = 0; - while (1) { - drop_node_buffer(node); - node->eb = path->nodes[node->level]; - BUG_ON(!node->eb); - if (path->locks[node->level]) - node->locked = 1; - path->nodes[node->level] = NULL; - path->locks[node->level] = 0; - - if (i >= nr) - break; + struct btrfs_root *root = rc->extent_root; + u64 num_bytes; + int ret; + + num_bytes = calcu_metadata_size(rc, node, 1) * 2; - edges[i]->blockptr = node->eb->start; - node = edges[i]->node[UPPER]; - i++; + trans->block_rsv = rc->block_rsv; + ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, + &rc->block_rsv_retries); + if (ret) { + if (ret == -EAGAIN) + rc->commit_transaction = 1; + return ret; } + + rc->block_rsv_retries = 0; + return 0; +} + +static void release_metadata_space(struct reloc_control *rc, + struct backref_node *node) +{ + u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; + btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); } /* @@ -1999,6 +2431,7 @@ static void grab_path_buffers(struct btrfs_path *path, * in that case this function just updates pointers. */ static int do_relocation(struct btrfs_trans_handle *trans, + struct reloc_control *rc, struct backref_node *node, struct btrfs_key *key, struct btrfs_path *path, int lowest) @@ -2019,18 +2452,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, BUG_ON(lowest && node->eb); path->lowest_level = node->level + 1; + rc->backref_cache.path[node->level] = node; list_for_each_entry(edge, &node->upper, list[LOWER]) { cond_resched(); - if (node->eb && node->eb->start == edge->blockptr) - continue; upper = edge->node[UPPER]; - root = select_reloc_root(trans, upper, edges, &nr); - if (!root) - continue; - - if (upper->eb && !upper->locked) + root = select_reloc_root(trans, rc, upper, edges, &nr); + BUG_ON(!root); + + if (upper->eb && !upper->locked) { + if (!lowest) { + ret = btrfs_bin_search(upper->eb, key, + upper->level, &slot); + BUG_ON(ret); + bytenr = btrfs_node_blockptr(upper->eb, slot); + if (node->eb->start == bytenr) + goto next; + } drop_node_buffer(upper); + } if (!upper->eb) { ret = btrfs_search_slot(trans, root, key, path, 0, 1); @@ -2040,11 +2480,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, } BUG_ON(ret > 0); - slot = path->slots[upper->level]; + if (!upper->eb) { + upper->eb = path->nodes[upper->level]; + path->nodes[upper->level] = NULL; + } else { + BUG_ON(upper->eb != path->nodes[upper->level]); + } - btrfs_unlock_up_safe(path, upper->level + 1); - grab_path_buffers(path, upper, edges, nr); + upper->locked = 1; + path->locks[upper->level] = 0; + slot = path->slots[upper->level]; btrfs_release_path(NULL, path); } else { ret = btrfs_bin_search(upper->eb, key, upper->level, @@ -2053,14 +2499,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(upper->eb, slot); - if (!lowest) { - if (node->eb->start == bytenr) { - btrfs_tree_unlock(upper->eb); - upper->locked = 0; - continue; - } + if (lowest) { + BUG_ON(bytenr != node->bytenr); } else { - BUG_ON(node->bytenr != bytenr); + if (node->eb->start == bytenr) + goto next; } blocksize = btrfs_level_size(root, node->level); @@ -2072,13 +2515,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, if (!node->eb) { ret = btrfs_cow_block(trans, root, eb, upper->eb, slot, &eb); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); if (ret < 0) { err = ret; - break; + goto next; } - btrfs_set_lock_blocking(eb); - node->eb = eb; - node->locked = 1; + BUG_ON(node->eb != eb); } else { btrfs_set_node_blockptr(upper->eb, slot, node->eb->start); @@ -2096,67 +2539,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, ret = btrfs_drop_subtree(trans, root, eb, upper->eb); BUG_ON(ret); } - if (!lowest) { - btrfs_tree_unlock(upper->eb); - upper->locked = 0; - } +next: + if (!upper->pending) + drop_node_buffer(upper); + else + unlock_node_buffer(upper); + if (err) + break; } + + if (!err && node->pending) { + drop_node_buffer(node); + list_move_tail(&node->list, &rc->backref_cache.changed); + node->pending = 0; + } + path->lowest_level = 0; + BUG_ON(err == -ENOSPC); return err; } static int link_to_upper(struct btrfs_trans_handle *trans, + struct reloc_control *rc, struct backref_node *node, struct btrfs_path *path) { struct btrfs_key key; - if (!node->eb || list_empty(&node->upper)) - return 0; btrfs_node_key_to_cpu(node->eb, &key, 0); - return do_relocation(trans, node, &key, path, 0); + return do_relocation(trans, rc, node, &key, path, 0); } static int finish_pending_nodes(struct btrfs_trans_handle *trans, - struct backref_cache *cache, - struct btrfs_path *path) + struct reloc_control *rc, + struct btrfs_path *path, int err) { + LIST_HEAD(list); + struct backref_cache *cache = &rc->backref_cache; struct backref_node *node; int level; int ret; - int err = 0; for (level = 0; level < BTRFS_MAX_LEVEL; level++) { while (!list_empty(&cache->pending[level])) { node = list_entry(cache->pending[level].next, - struct backref_node, lower); - BUG_ON(node->level != level); + struct backref_node, list); + list_move_tail(&node->list, &list); + BUG_ON(!node->pending); - ret = link_to_upper(trans, node, path); - if (ret < 0) - err = ret; - /* - * this remove the node from the pending list and - * may add some other nodes to the level + 1 - * pending list - */ - remove_backref_node(cache, node); + if (!err) { + ret = link_to_upper(trans, rc, node, path); + if (ret < 0) + err = ret; + } } + list_splice_init(&list, &cache->pending[level]); } - BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); return err; } static void mark_block_processed(struct reloc_control *rc, - struct backref_node *node) + u64 bytenr, u32 blocksize) +{ + set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, + EXTENT_DIRTY, GFP_NOFS); +} + +static void __mark_block_processed(struct reloc_control *rc, + struct backref_node *node) { u32 blocksize; if (node->level == 0 || in_block_group(node->bytenr, rc->block_group)) { blocksize = btrfs_level_size(rc->extent_root, node->level); - set_extent_bits(&rc->processed_blocks, node->bytenr, - node->bytenr + blocksize - 1, EXTENT_DIRTY, - GFP_NOFS); + mark_block_processed(rc, node->bytenr, blocksize); } node->processed = 1; } @@ -2179,7 +2635,7 @@ static void update_processed_blocks(struct reloc_control *rc, if (next->processed) break; - mark_block_processed(rc, next); + __mark_block_processed(rc, next); if (list_empty(&next->upper)) break; @@ -2193,145 +2649,13 @@ static void update_processed_blocks(struct reloc_control *rc, } } -static int tree_block_processed(u64 bytenr, u32 blocksize, - struct reloc_control *rc) -{ - if (test_range_bit(&rc->processed_blocks, bytenr, - bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) - return 1; - return 0; -} - -/* - * check if there are any file extent pointers in the leaf point to - * data require processing - */ -static int check_file_extents(struct reloc_control *rc, - u64 bytenr, u32 blocksize, u64 ptr_gen) -{ - struct btrfs_key found_key; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; - u32 nritems; - int i; - int ret = 0; - - leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); - - nritems = btrfs_header_nritems(leaf); - for (i = 0; i < nritems; i++) { - cond_resched(); - btrfs_item_key_to_cpu(leaf, &found_key, i); - if (found_key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - if (bytenr == 0) - continue; - if (in_block_group(bytenr, rc->block_group)) { - ret = 1; - break; - } - } - free_extent_buffer(leaf); - return ret; -} - -/* - * scan child blocks of a given block to find blocks require processing - */ -static int add_child_blocks(struct btrfs_trans_handle *trans, - struct reloc_control *rc, - struct backref_node *node, - struct rb_root *blocks) -{ - struct tree_block *block; - struct rb_node *rb_node; - u64 bytenr; - u64 ptr_gen; - u32 blocksize; - u32 nritems; - int i; - int err = 0; - - nritems = btrfs_header_nritems(node->eb); - blocksize = btrfs_level_size(rc->extent_root, node->level - 1); - for (i = 0; i < nritems; i++) { - cond_resched(); - bytenr = btrfs_node_blockptr(node->eb, i); - ptr_gen = btrfs_node_ptr_generation(node->eb, i); - if (ptr_gen == trans->transid) - continue; - if (!in_block_group(bytenr, rc->block_group) && - (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) - continue; - if (tree_block_processed(bytenr, blocksize, rc)) - continue; - - readahead_tree_block(rc->extent_root, - bytenr, blocksize, ptr_gen); - } - - for (i = 0; i < nritems; i++) { - cond_resched(); - bytenr = btrfs_node_blockptr(node->eb, i); - ptr_gen = btrfs_node_ptr_generation(node->eb, i); - if (ptr_gen == trans->transid) - continue; - if (!in_block_group(bytenr, rc->block_group) && - (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) - continue; - if (tree_block_processed(bytenr, blocksize, rc)) - continue; - if (!in_block_group(bytenr, rc->block_group) && - !check_file_extents(rc, bytenr, blocksize, ptr_gen)) - continue; - - block = kmalloc(sizeof(*block), GFP_NOFS); - if (!block) { - err = -ENOMEM; - break; - } - block->bytenr = bytenr; - btrfs_node_key_to_cpu(node->eb, &block->key, i); - block->level = node->level - 1; - block->key_ready = 1; - rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); - BUG_ON(rb_node); - } - if (err) - free_block_list(blocks); - return err; -} - -/* - * find adjacent blocks require processing - */ -static noinline_for_stack -int add_adjacent_blocks(struct btrfs_trans_handle *trans, - struct reloc_control *rc, - struct backref_cache *cache, - struct rb_root *blocks, int level, - struct backref_node **upper) -{ - struct backref_node *node; - int ret = 0; - - WARN_ON(!list_empty(&cache->pending[level])); - - if (list_empty(&cache->pending[level + 1])) - return 1; - - node = list_entry(cache->pending[level + 1].next, - struct backref_node, lower); - if (node->eb) - ret = add_child_blocks(trans, rc, node, blocks); - - *upper = node; - return ret; +static int tree_block_processed(u64 bytenr, u32 blocksize, + struct reloc_control *rc) +{ + if (test_range_bit(&rc->processed_blocks, bytenr, + bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) + return 1; + return 0; } static int get_tree_block_key(struct reloc_control *rc, @@ -2371,40 +2695,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, struct btrfs_path *path) { struct btrfs_root *root; - int ret; + int release = 0; + int ret = 0; + + if (!node) + return 0; + BUG_ON(node->processed); root = select_one_root(trans, node); - if (unlikely(!root)) { - rc->found_old_snapshot = 1; + if (root == ERR_PTR(-ENOENT)) { update_processed_blocks(rc, node); - return 0; + goto out; } - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - ret = do_relocation(trans, node, key, path, 1); - if (ret < 0) - goto out; - if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { - ret = replace_file_extents(trans, rc, root, - node->eb, NULL); - if (ret < 0) - goto out; - } - drop_node_buffer(node); - } else if (!root->ref_cows) { - path->lowest_level = node->level; - ret = btrfs_search_slot(trans, root, key, path, 0, 1); - btrfs_release_path(root, path); - if (ret < 0) + if (!root || root->ref_cows) { + ret = reserve_metadata_space(trans, rc, node); + if (ret) goto out; - } else if (root != node->root) { - WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); + release = 1; } - update_processed_blocks(rc, node); - ret = 0; + if (root) { + if (root->ref_cows) { + BUG_ON(node->new_bytenr); + BUG_ON(!list_empty(&node->list)); + btrfs_record_root_in_trans(trans, root); + root = root->reloc_root; + node->new_bytenr = root->node->start; + node->root = root; + list_add_tail(&node->list, &rc->backref_cache.changed); + } else { + path->lowest_level = node->level; + ret = btrfs_search_slot(trans, root, key, path, 0, 1); + btrfs_release_path(root, path); + if (ret > 0) + ret = 0; + } + if (!ret) + update_processed_blocks(rc, node); + } else { + ret = do_relocation(trans, rc, node, key, path, 1); + } out: - drop_node_buffer(node); + if (ret || node->level == 0 || node->cowonly) { + if (release) + release_metadata_space(rc, node); + remove_backref_node(&rc->backref_cache, node); + } return ret; } @@ -2415,12 +2752,10 @@ static noinline_for_stack int relocate_tree_blocks(struct btrfs_trans_handle *trans, struct reloc_control *rc, struct rb_root *blocks) { - struct backref_cache *cache; struct backref_node *node; struct btrfs_path *path; struct tree_block *block; struct rb_node *rb_node; - int level = -1; int ret; int err = 0; @@ -2428,21 +2763,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - cache = kmalloc(sizeof(*cache), GFP_NOFS); - if (!cache) { - btrfs_free_path(path); - return -ENOMEM; - } - - backref_cache_init(cache); - rb_node = rb_first(blocks); while (rb_node) { block = rb_entry(rb_node, struct tree_block, rb_node); - if (level == -1) - level = block->level; - else - BUG_ON(level != block->level); if (!block->key_ready) reada_tree_block(rc, block); rb_node = rb_next(rb_node); @@ -2460,7 +2783,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, while (rb_node) { block = rb_entry(rb_node, struct tree_block, rb_node); - node = build_backref_tree(rc, cache, &block->key, + node = build_backref_tree(rc, &block->key, block->level, block->bytenr); if (IS_ERR(node)) { err = PTR_ERR(node); @@ -2470,77 +2793,16 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, ret = relocate_tree_block(trans, rc, node, &block->key, path); if (ret < 0) { - err = ret; + if (ret != -EAGAIN || rb_node == rb_first(blocks)) + err = ret; goto out; } - remove_backref_node(cache, node); rb_node = rb_next(rb_node); } - - if (level > 0) - goto out; - - free_block_list(blocks); - - /* - * now backrefs of some upper level tree blocks have been cached, - * try relocating blocks referenced by these upper level blocks. - */ - while (1) { - struct backref_node *upper = NULL; - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) - break; - - ret = add_adjacent_blocks(trans, rc, cache, blocks, level, - &upper); - if (ret < 0) - err = ret; - if (ret != 0) - break; - - rb_node = rb_first(blocks); - while (rb_node) { - block = rb_entry(rb_node, struct tree_block, rb_node); - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) - goto out; - BUG_ON(!block->key_ready); - node = build_backref_tree(rc, cache, &block->key, - level, block->bytenr); - if (IS_ERR(node)) { - err = PTR_ERR(node); - goto out; - } - - ret = relocate_tree_block(trans, rc, node, - &block->key, path); - if (ret < 0) { - err = ret; - goto out; - } - remove_backref_node(cache, node); - rb_node = rb_next(rb_node); - } - free_block_list(blocks); - - if (upper) { - ret = link_to_upper(trans, upper, path); - if (ret < 0) { - err = ret; - break; - } - remove_backref_node(cache, upper); - } - } out: free_block_list(blocks); + err = finish_pending_nodes(trans, rc, path, err); - ret = finish_pending_nodes(trans, cache, path); - if (ret < 0) - err = ret; - - kfree(cache); btrfs_free_path(path); return err; } @@ -2910,9 +3172,6 @@ out: static int block_use_full_backref(struct reloc_control *rc, struct extent_buffer *eb) { - struct btrfs_path *path; - struct btrfs_extent_item *ei; - struct btrfs_key key; u64 flags; int ret; @@ -2920,28 +3179,14 @@ static int block_use_full_backref(struct reloc_control *rc, btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) return 1; - path = btrfs_alloc_path(); - BUG_ON(!path); - - key.objectid = eb->start; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = eb->len; - - path->search_commit_root = 1; - path->skip_locking = 1; - ret = btrfs_search_slot(NULL, rc->extent_root, - &key, path, 0, 0); + ret = btrfs_lookup_extent_info(NULL, rc->extent_root, + eb->start, eb->len, NULL, &flags); BUG_ON(ret); - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_item); - flags = btrfs_extent_flags(path->nodes[0], ei); - BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) ret = 1; else ret = 0; - btrfs_free_path(path); return ret; } @@ -3114,22 +3359,10 @@ int add_data_references(struct reloc_control *rc, struct btrfs_extent_inline_ref *iref; unsigned long ptr; unsigned long end; - u32 blocksize; + u32 blocksize = btrfs_level_size(rc->extent_root, 0); int ret; int err = 0; - ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, - extent_key->offset); - BUG_ON(ret < 0); - if (ret > 0) { - /* the relocated data is fragmented */ - rc->extents_skipped++; - btrfs_release_path(rc->extent_root, path); - return 0; - } - - blocksize = btrfs_level_size(rc->extent_root, 0); - eb = path->nodes[0]; ptr = btrfs_item_ptr_offset(eb, path->slots[0]); end = ptr + btrfs_item_size_nr(eb, path->slots[0]); @@ -3210,7 +3443,8 @@ int add_data_references(struct reloc_control *rc, */ static noinline_for_stack int find_next_extent(struct btrfs_trans_handle *trans, - struct reloc_control *rc, struct btrfs_path *path) + struct reloc_control *rc, struct btrfs_path *path, + struct btrfs_key *extent_key) { struct btrfs_key key; struct extent_buffer *leaf; @@ -3265,6 +3499,7 @@ next: rc->search_start = end + 1; } else { rc->search_start = key.objectid + key.offset; + memcpy(extent_key, &key, sizeof(key)); return 0; } } @@ -3302,12 +3537,49 @@ static int check_extent_flags(u64 flags) return 0; } +static noinline_for_stack +int prepare_to_relocate(struct reloc_control *rc) +{ + struct btrfs_trans_handle *trans; + int ret; + + rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); + if (!rc->block_rsv) + return -ENOMEM; + + /* + * reserve some space for creating reloc trees. + * btrfs_init_reloc_root will use them when there + * is no reservation in transaction handle. + */ + ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, + rc->extent_root->nodesize * 256, + &rc->block_rsv_retries); + if (ret) + return ret; + + rc->block_rsv->refill_used = 1; + btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); + + memset(&rc->cluster, 0, sizeof(rc->cluster)); + rc->search_start = rc->block_group->key.objectid; + rc->extents_found = 0; + rc->nodes_relocated = 0; + rc->merging_rsv_size = 0; + rc->block_rsv_retries = 0; + + rc->create_reloc_tree = 1; + set_reloc_control(rc); + + trans = btrfs_join_transaction(rc->extent_root, 1); + btrfs_commit_transaction(trans, rc->extent_root); + return 0; +} static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; struct btrfs_key key; - struct file_extent_cluster *cluster; struct btrfs_trans_handle *trans = NULL; struct btrfs_path *path; struct btrfs_extent_item *ei; @@ -3317,33 +3589,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) int ret; int err = 0; - cluster = kzalloc(sizeof(*cluster), GFP_NOFS); - if (!cluster) - return -ENOMEM; - path = btrfs_alloc_path(); - if (!path) { - kfree(cluster); + if (!path) return -ENOMEM; - } - - rc->extents_found = 0; - rc->extents_skipped = 0; - - rc->search_start = rc->block_group->key.objectid; - clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, - GFP_NOFS); - - rc->create_reloc_root = 1; - set_reloc_control(rc); - trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + ret = prepare_to_relocate(rc); + if (ret) { + err = ret; + goto out_free; + } while (1) { trans = btrfs_start_transaction(rc->extent_root, 0); - ret = find_next_extent(trans, rc, path); + if (update_backref_cache(trans, &rc->backref_cache)) { + btrfs_end_transaction(trans, rc->extent_root); + continue; + } + + ret = find_next_extent(trans, rc, path, &key); if (ret < 0) err = ret; if (ret != 0) @@ -3353,9 +3617,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - item_size = btrfs_item_size_nr(path->nodes[0], - path->slots[0]); + item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); if (item_size >= sizeof(*ei)) { flags = btrfs_extent_flags(path->nodes[0], ei); ret = check_extent_flags(flags); @@ -3396,73 +3658,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { ret = add_tree_block(rc, &key, path, &blocks); } else if (rc->stage == UPDATE_DATA_PTRS && - (flags & BTRFS_EXTENT_FLAG_DATA)) { + (flags & BTRFS_EXTENT_FLAG_DATA)) { ret = add_data_references(rc, &key, path, &blocks); } else { btrfs_release_path(rc->extent_root, path); ret = 0; } if (ret < 0) { - err = 0; + err = ret; break; } if (!RB_EMPTY_ROOT(&blocks)) { ret = relocate_tree_blocks(trans, rc, &blocks); if (ret < 0) { + if (ret != -EAGAIN) { + err = ret; + break; + } + rc->extents_found--; + rc->search_start = key.objectid; + } + } + + ret = btrfs_block_rsv_check(trans, rc->extent_root, + rc->block_rsv, 0, 5); + if (ret < 0) { + if (ret != -EAGAIN) { err = ret; + WARN_ON(1); break; } + rc->commit_transaction = 1; } - nr = trans->blocks_used; - btrfs_end_transaction(trans, rc->extent_root); + if (rc->commit_transaction) { + rc->commit_transaction = 0; + ret = btrfs_commit_transaction(trans, rc->extent_root); + BUG_ON(ret); + } else { + nr = trans->blocks_used; + btrfs_end_transaction_throttle(trans, rc->extent_root); + btrfs_btree_balance_dirty(rc->extent_root, nr); + } trans = NULL; - btrfs_btree_balance_dirty(rc->extent_root, nr); if (rc->stage == MOVE_DATA_EXTENTS && (flags & BTRFS_EXTENT_FLAG_DATA)) { rc->found_file_extent = 1; ret = relocate_data_extent(rc->data_inode, - &key, cluster); + &key, &rc->cluster); if (ret < 0) { err = ret; break; } } } - btrfs_free_path(path); + + btrfs_release_path(rc->extent_root, path); + clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, + GFP_NOFS); if (trans) { nr = trans->blocks_used; - btrfs_end_transaction(trans, rc->extent_root); + btrfs_end_transaction_throttle(trans, rc->extent_root); btrfs_btree_balance_dirty(rc->extent_root, nr); } if (!err) { - ret = relocate_file_extent_cluster(rc->data_inode, cluster); + ret = relocate_file_extent_cluster(rc->data_inode, + &rc->cluster); if (ret < 0) err = ret; } - kfree(cluster); + rc->create_reloc_tree = 0; + set_reloc_control(rc); - rc->create_reloc_root = 0; - smp_mb(); + backref_cache_cleanup(&rc->backref_cache); + btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); - if (rc->extents_found > 0) { - trans = btrfs_join_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); - } + err = prepare_to_merge(rc, err); merge_reloc_roots(rc); + rc->merge_reloc_tree = 0; unset_reloc_control(rc); + btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root, 1); btrfs_commit_transaction(trans, rc->extent_root); - +out_free: + btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); + btrfs_free_path(path); return err; } @@ -3488,7 +3777,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | + BTRFS_INODE_PREALLOC); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(root, path); out: @@ -3500,8 +3790,9 @@ out: * helper to create inode for data relocation. * the inode is in data relocation tree and its link count is 0 */ -static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *group) +static noinline_for_stack +struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *group) { struct inode *inode = NULL; struct btrfs_trans_handle *trans; @@ -3516,7 +3807,8 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, return ERR_CAST(root); trans = btrfs_start_transaction(root, 6); - BUG_ON(!trans); + if (IS_ERR(trans)) + return ERR_CAST(trans); err = btrfs_find_free_objectid(trans, root, objectid, &objectid); if (err) @@ -3536,7 +3828,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, out: nr = trans->blocks_used; btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); if (err) { if (inode) @@ -3546,6 +3837,21 @@ out: return inode; } +static struct reloc_control *alloc_reloc_control(void) +{ + struct reloc_control *rc; + + rc = kzalloc(sizeof(*rc), GFP_NOFS); + if (!rc) + return NULL; + + INIT_LIST_HEAD(&rc->reloc_roots); + backref_cache_init(&rc->backref_cache); + mapping_tree_init(&rc->reloc_root_tree); + extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); + return rc; +} + /* * function to relocate all extents in a block group. */ @@ -3557,15 +3863,12 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) int rw = 0; int err = 0; - rc = kzalloc(sizeof(*rc), GFP_NOFS); + rc = alloc_reloc_control(); if (!rc) return -ENOMEM; - mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); - INIT_LIST_HEAD(&rc->reloc_roots); - rc->extent_root = extent_root; + rc->block_group = btrfs_lookup_block_group(fs_info, group_start); BUG_ON(!rc->block_group); @@ -3578,9 +3881,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) rw = 1; } - btrfs_init_workers(&rc->workers, "relocate", - fs_info->thread_pool_size, NULL); - rc->data_inode = create_reloc_inode(fs_info, rc->block_group); if (IS_ERR(rc->data_inode)) { err = PTR_ERR(rc->data_inode); @@ -3596,9 +3896,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); while (1) { - rc->extents_found = 0; - rc->extents_skipped = 0; - mutex_lock(&fs_info->cleaner_mutex); btrfs_clean_old_snapshots(fs_info->tree_root); @@ -3607,7 +3904,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { err = ret; - break; + goto out; } if (rc->extents_found == 0) @@ -3621,18 +3918,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) invalidate_mapping_pages(rc->data_inode->i_mapping, 0, -1); rc->stage = UPDATE_DATA_PTRS; - } else if (rc->stage == UPDATE_DATA_PTRS && - rc->extents_skipped >= rc->extents_found) { - iput(rc->data_inode); - rc->data_inode = create_reloc_inode(fs_info, - rc->block_group); - if (IS_ERR(rc->data_inode)) { - err = PTR_ERR(rc->data_inode); - rc->data_inode = NULL; - break; - } - rc->stage = MOVE_DATA_EXTENTS; - rc->found_file_extent = 0; } } @@ -3648,7 +3933,6 @@ out: if (err && rw) btrfs_set_block_group_rw(extent_root, rc->block_group); iput(rc->data_inode); - btrfs_stop_workers(&rc->workers); btrfs_put_block_group(rc->block_group); kfree(rc); return err; @@ -3752,20 +4036,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (list_empty(&reloc_roots)) goto out; - rc = kzalloc(sizeof(*rc), GFP_NOFS); + rc = alloc_reloc_control(); if (!rc) { err = -ENOMEM; goto out; } - mapping_tree_init(&rc->reloc_root_tree); - INIT_LIST_HEAD(&rc->reloc_roots); - btrfs_init_workers(&rc->workers, "relocate", - root->fs_info->thread_pool_size, NULL); rc->extent_root = root->fs_info->extent_root; set_reloc_control(rc); + trans = btrfs_join_transaction(rc->extent_root, 1); + + rc->merge_reloc_tree = 1; + while (!list_empty(&reloc_roots)) { reloc_root = list_entry(reloc_roots.next, struct btrfs_root, root_list); @@ -3785,20 +4069,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root->reloc_root = reloc_root; } - trans = btrfs_start_transaction(rc->extent_root, 1); btrfs_commit_transaction(trans, rc->extent_root); merge_reloc_roots(rc); unset_reloc_control(rc); - trans = btrfs_start_transaction(rc->extent_root, 1); + trans = btrfs_join_transaction(rc->extent_root, 1); btrfs_commit_transaction(trans, rc->extent_root); out: - if (rc) { - btrfs_stop_workers(&rc->workers); - kfree(rc); - } + kfree(rc); while (!list_empty(&reloc_roots)) { reloc_root = list_entry(reloc_roots.next, struct btrfs_root, root_list); @@ -3864,3 +4144,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) btrfs_put_ordered_extent(ordered); return 0; } + +void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow) +{ + struct reloc_control *rc; + struct backref_node *node; + int first_cow = 0; + int level; + int ret; + + rc = root->fs_info->reloc_ctl; + if (!rc) + return; + + BUG_ON(rc->stage == UPDATE_DATA_PTRS && + root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); + + level = btrfs_header_level(buf); + if (btrfs_header_generation(buf) <= + btrfs_root_last_snapshot(&root->root_item)) + first_cow = 1; + + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && + rc->create_reloc_tree) { + WARN_ON(!first_cow && level == 0); + + node = rc->backref_cache.path[level]; + BUG_ON(node->bytenr != buf->start && + node->new_bytenr != buf->start); + + drop_node_buffer(node); + extent_buffer_get(cow); + node->eb = cow; + node->new_bytenr = cow->start; + + if (!node->pending) { + list_move_tail(&node->list, + &rc->backref_cache.pending[level]); + node->pending = 1; + } + + if (first_cow) + __mark_block_processed(rc, node); + + if (first_cow && level > 0) + rc->nodes_relocated += buf->len; + } + + if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { + ret = replace_file_extents(trans, rc, root, cow); + BUG_ON(ret); + } +} + +/* + * called before creating snapshot. it calculates metadata reservation + * requried for relocating tree blocks in the snapshot + */ +void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending, + u64 *bytes_to_reserve) +{ + struct btrfs_root *root; + struct reloc_control *rc; + + root = pending->root; + if (!root->reloc_root) + return; + + rc = root->fs_info->reloc_ctl; + if (!rc->merge_reloc_tree) + return; + + root = root->reloc_root; + BUG_ON(btrfs_root_refs(&root->root_item) == 0); + /* + * relocation is in the stage of merging trees. the space + * used by merging a reloc tree is twice the size of + * relocated tree nodes in the worst case. half for cowing + * the reloc tree, half for cowing the fs tree. the space + * used by cowing the reloc tree will be freed after the + * tree is dropped. if we create snapshot, cowing the fs + * tree may use more space than it frees. so we need + * reserve extra space. + */ + *bytes_to_reserve += rc->nodes_relocated; +} + +/* + * called after snapshot is created. migrate block reservation + * and create reloc root for the newly created snapshot + */ +void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending) +{ + struct btrfs_root *root = pending->root; + struct btrfs_root *reloc_root; + struct btrfs_root *new_root; + struct reloc_control *rc; + int ret; + + if (!root->reloc_root) + return; + + rc = root->fs_info->reloc_ctl; + rc->merging_rsv_size += rc->nodes_relocated; + + if (rc->merge_reloc_tree) { + ret = btrfs_block_rsv_migrate(&pending->block_rsv, + rc->block_rsv, + rc->nodes_relocated); + BUG_ON(ret); + } + + new_root = pending->snap; + reloc_root = create_reloc_root(trans, root->reloc_root, + new_root->root_key.objectid); + + __add_reloc_root(reloc_root); + new_root->reloc_root = reloc_root; + + if (rc->create_reloc_tree) { + ret = clone_backref_node(trans, rc, root, reloc_root); + BUG_ON(ret); + } +} diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index cfe7f588ef05..c346d320173a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -853,6 +853,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } + btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); if (to_reserve > 0) { @@ -924,6 +925,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); BUG_ON(IS_ERR(pending->snap)); + btrfs_reloc_post_snapshot(trans, pending); btrfs_orphan_post_snapshot(trans, pending); fail: kfree(new_root_item); @@ -1213,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) if (btrfs_header_backref_rev(root->node) < BTRFS_MIXED_BACKREF_REV) - btrfs_drop_snapshot(root, 0); + btrfs_drop_snapshot(root, NULL, 0); else - btrfs_drop_snapshot(root, 1); + btrfs_drop_snapshot(root, NULL, 1); } return 0; } -- cgit v1.2.3