diff options
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 238 |
1 files changed, 129 insertions, 109 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2ce16f97730..44f06201f37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2670,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, node = rb_prev(node); if (node) { + int seq = ref->seq; + ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); - if (ref->bytenr == bytenr) + if (ref->bytenr == bytenr && ref->seq == seq) goto out_unlock; } @@ -2992,8 +2994,13 @@ again: } spin_lock(&block_group->lock); - if (block_group->cached != BTRFS_CACHE_FINISHED) { - /* We're not cached, don't bother trying to write stuff out */ + if (block_group->cached != BTRFS_CACHE_FINISHED || + !btrfs_test_opt(root, SPACE_CACHE)) { + /* + * don't bother trying to write stuff out _if_ + * a) we're not cached, + * b) we're with nospace_cache mount option. + */ dcs = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); goto out_put; @@ -3223,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, init_waitqueue_head(&found->wait); *space_info = found; list_add_rcu(&found->list, &info->space_info); + if (flags & BTRFS_BLOCK_GROUP_DATA) + info->data_sinfo = found; return 0; } @@ -3352,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) return get_alloc_profile(root, flags); } -void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) -{ - BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, - BTRFS_BLOCK_GROUP_DATA); -} - /* * This will check the space that the inode allocates from to make sure we have * enough space for bytes. @@ -3366,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = root->fs_info; u64 used; int ret = 0, committed = 0, alloc_chunk = 1; @@ -3378,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes) committed = 1; } - data_sinfo = BTRFS_I(inode)->space_info; + data_sinfo = fs_info->data_sinfo; if (!data_sinfo) goto alloc; @@ -3419,10 +3423,9 @@ alloc: goto commit_trans; } - if (!data_sinfo) { - btrfs_set_inode_space_info(root, inode); - data_sinfo = BTRFS_I(inode)->space_info; - } + if (!data_sinfo) + data_sinfo = fs_info->data_sinfo; + goto again; } @@ -3469,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); - data_sinfo = BTRFS_I(inode)->space_info; + data_sinfo = root->fs_info->data_sinfo; spin_lock(&data_sinfo->lock); data_sinfo->bytes_may_use -= bytes; trace_btrfs_space_reservation(root->fs_info, "space_info", @@ -3675,89 +3678,58 @@ out: /* * shrink metadata reservation for delalloc */ -static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, - bool wait_ordered) +static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, + bool wait_ordered) { struct btrfs_block_rsv *block_rsv; struct btrfs_space_info *space_info; struct btrfs_trans_handle *trans; - u64 reserved; + u64 delalloc_bytes; u64 max_reclaim; - u64 reclaimed = 0; long time_left; unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; int loops = 0; - unsigned long progress; trans = (struct btrfs_trans_handle *)current->journal_info; block_rsv = &root->fs_info->delalloc_block_rsv; space_info = block_rsv->space_info; smp_mb(); - reserved = space_info->bytes_may_use; - progress = space_info->reservation_progress; - - if (reserved == 0) - return 0; - - smp_mb(); - if (root->fs_info->delalloc_bytes == 0) { + delalloc_bytes = root->fs_info->delalloc_bytes; + if (delalloc_bytes == 0) { if (trans) - return 0; + return; btrfs_wait_ordered_extents(root, 0, 0); - return 0; + return; } - max_reclaim = min(reserved, to_reclaim); - nr_pages = max_t(unsigned long, nr_pages, - max_reclaim >> PAGE_CACHE_SHIFT); - while (loops < 1024) { - /* have the flusher threads jump in and do some IO */ - smp_mb(); - nr_pages = min_t(unsigned long, nr_pages, - root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); + while (delalloc_bytes && loops < 3) { + max_reclaim = min(delalloc_bytes, to_reclaim); + nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, - WB_REASON_FS_FREE_SPACE); + WB_REASON_FS_FREE_SPACE); spin_lock(&space_info->lock); - if (reserved > space_info->bytes_may_use) - reclaimed += reserved - space_info->bytes_may_use; - reserved = space_info->bytes_may_use; + if (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + space_info->bytes_may_use + orig <= + space_info->total_bytes) { + spin_unlock(&space_info->lock); + break; + } spin_unlock(&space_info->lock); loops++; - - if (reserved == 0 || reclaimed >= max_reclaim) - break; - - if (trans && trans->transaction->blocked) - return -EAGAIN; - if (wait_ordered && !trans) { btrfs_wait_ordered_extents(root, 0, 0); } else { - time_left = schedule_timeout_interruptible(1); - - /* We were interrupted, exit */ + time_left = schedule_timeout_killable(1); if (time_left) break; } - - /* we've kicked the IO a few times, if anything has been freed, - * exit. There is no sense in looping here for a long time - * when we really need to commit the transaction, or there are - * just too many writers without enough free space - */ - - if (loops > 3) { - smp_mb(); - if (progress != space_info->reservation_progress) - break; - } - + smp_mb(); + delalloc_bytes = root->fs_info->delalloc_bytes; } - - return reclaimed >= to_reclaim; } /** @@ -3817,6 +3789,58 @@ commit: return btrfs_commit_transaction(trans, root); } +enum flush_state { + FLUSH_DELALLOC = 1, + FLUSH_DELALLOC_WAIT = 2, + FLUSH_DELAYED_ITEMS_NR = 3, + FLUSH_DELAYED_ITEMS = 4, + COMMIT_TRANS = 5, +}; + +static int flush_space(struct btrfs_root *root, + struct btrfs_space_info *space_info, u64 num_bytes, + u64 orig_bytes, int state) +{ + struct btrfs_trans_handle *trans; + int nr; + int ret = 0; + + switch (state) { + case FLUSH_DELALLOC: + case FLUSH_DELALLOC_WAIT: + shrink_delalloc(root, num_bytes, orig_bytes, + state == FLUSH_DELALLOC_WAIT); + break; + case FLUSH_DELAYED_ITEMS_NR: + case FLUSH_DELAYED_ITEMS: + if (state == FLUSH_DELAYED_ITEMS_NR) { + u64 bytes = btrfs_calc_trans_metadata_size(root, 1); + + nr = (int)div64_u64(num_bytes, bytes); + if (!nr) + nr = 1; + nr *= 2; + } else { + nr = -1; + } + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + ret = btrfs_run_delayed_items_nr(trans, root, nr); + btrfs_end_transaction(trans, root); + break; + case COMMIT_TRANS: + ret = may_commit_transaction(root, space_info, orig_bytes, 0); + break; + default: + ret = -ENOSPC; + break; + } + + return ret; +} /** * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space * @root - the root we're allocating for @@ -3838,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root, struct btrfs_space_info *space_info = block_rsv->space_info; u64 used; u64 num_bytes = orig_bytes; - int retries = 0; + int flush_state = FLUSH_DELALLOC; int ret = 0; - bool committed = false; bool flushing = false; - bool wait_ordered = false; + bool committed = false; again: ret = 0; @@ -3901,9 +3924,8 @@ again: * amount plus the amount of bytes that we need for this * reservation. */ - wait_ordered = true; num_bytes = used - space_info->total_bytes + - (orig_bytes * (retries + 1)); + (orig_bytes * 2); } if (ret) { @@ -3956,8 +3978,6 @@ again: trace_btrfs_space_reservation(root->fs_info, "space_info", space_info->flags, orig_bytes, 1); ret = 0; - } else { - wait_ordered = true; } } @@ -3976,36 +3996,13 @@ again: if (!ret || !flush) goto out; - /* - * We do synchronous shrinking since we don't actually unreserve - * metadata until after the IO is completed. - */ - ret = shrink_delalloc(root, num_bytes, wait_ordered); - if (ret < 0) - goto out; - - ret = 0; - - /* - * So if we were overcommitted it's possible that somebody else flushed - * out enough space and we simply didn't have enough space to reclaim, - * so go back around and try again. - */ - if (retries < 2) { - wait_ordered = true; - retries++; + ret = flush_space(root, space_info, num_bytes, orig_bytes, + flush_state); + flush_state++; + if (!ret) goto again; - } - - ret = -ENOSPC; - if (committed) - goto out; - - ret = may_commit_transaction(root, space_info, orig_bytes, 0); - if (!ret) { - committed = true; + else if (flush_state <= COMMIT_TRANS) goto again; - } out: if (flushing) { @@ -4023,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv( { struct btrfs_block_rsv *block_rsv = NULL; - if (root->ref_cows || root == root->fs_info->csum_root) + if (root->ref_cows) + block_rsv = trans->block_rsv; + + if (root == root->fs_info->csum_root && trans->adding_csums) block_rsv = trans->block_rsv; if (!block_rsv) @@ -4375,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + if (!trans->block_rsv) + return; + if (!trans->bytes_reserved) return; @@ -4533,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) int ret; /* Need to be holding the i_mutex here if we aren't free space cache */ - if (btrfs_is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(inode)) flush = 0; if (flush && btrfs_transaction_in_commit(root->fs_info)) @@ -5849,7 +5852,11 @@ loop: ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024, data, CHUNK_ALLOC_LIMITED); - if (ret < 0) { + /* + * Do not bail out on ENOSPC since we + * can do more things. + */ + if (ret < 0 && ret != -ENOSPC) { btrfs_abort_transaction(trans, root, ret); goto out; @@ -5917,13 +5924,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, again: list_for_each_entry(cache, &info->block_groups[index], list) { spin_lock(&cache->lock); - printk(KERN_INFO "block group %llu has %llu bytes, %llu used " - "%llu pinned %llu reserved\n", + printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n", (unsigned long long)cache->key.objectid, (unsigned long long)cache->key.offset, (unsigned long long)btrfs_block_group_used(&cache->item), (unsigned long long)cache->pinned, - (unsigned long long)cache->reserved); + (unsigned long long)cache->reserved, + cache->ro ? "[readonly]" : ""); btrfs_dump_free_space(cache, bytes); spin_unlock(&cache->lock); } @@ -7711,8 +7718,21 @@ int btrfs_read_block_groups(struct btrfs_root *root) INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->cluster_list); - if (need_clear) + if (need_clear) { + /* + * When we mount with old space cache, we need to + * set BTRFS_DC_CLEAR and set dirty flag. + * + * a) Setting 'BTRFS_DC_CLEAR' makes sure that we + * truncate the old free space cache inode and + * setup a new one. + * b) Setting 'dirty flag' makes sure that we flush + * the new space cache info onto disk. + */ cache->disk_cache_state = BTRFS_DC_CLEAR; + if (btrfs_test_opt(root, SPACE_CACHE)) + cache->dirty = 1; + } read_extent_buffer(leaf, &cache->item, btrfs_item_ptr_offset(leaf, path->slots[0]), |