summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c238
1 files changed, 129 insertions, 109 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2ce16f97730..44f06201f37 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2670,8 +2670,10 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
node = rb_prev(node);
if (node) {
+ int seq = ref->seq;
+
ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
- if (ref->bytenr == bytenr)
+ if (ref->bytenr == bytenr && ref->seq == seq)
goto out_unlock;
}
@@ -2992,8 +2994,13 @@ again:
}
spin_lock(&block_group->lock);
- if (block_group->cached != BTRFS_CACHE_FINISHED) {
- /* We're not cached, don't bother trying to write stuff out */
+ if (block_group->cached != BTRFS_CACHE_FINISHED ||
+ !btrfs_test_opt(root, SPACE_CACHE)) {
+ /*
+ * don't bother trying to write stuff out _if_
+ * a) we're not cached,
+ * b) we're with nospace_cache mount option.
+ */
dcs = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
goto out_put;
@@ -3223,6 +3230,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
init_waitqueue_head(&found->wait);
*space_info = found;
list_add_rcu(&found->list, &info->space_info);
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ info->data_sinfo = found;
return 0;
}
@@ -3352,12 +3361,6 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
return get_alloc_profile(root, flags);
}
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
-{
- BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
- BTRFS_BLOCK_GROUP_DATA);
-}
-
/*
* This will check the space that the inode allocates from to make sure we have
* enough space for bytes.
@@ -3366,6 +3369,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
{
struct btrfs_space_info *data_sinfo;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 used;
int ret = 0, committed = 0, alloc_chunk = 1;
@@ -3378,7 +3382,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
committed = 1;
}
- data_sinfo = BTRFS_I(inode)->space_info;
+ data_sinfo = fs_info->data_sinfo;
if (!data_sinfo)
goto alloc;
@@ -3419,10 +3423,9 @@ alloc:
goto commit_trans;
}
- if (!data_sinfo) {
- btrfs_set_inode_space_info(root, inode);
- data_sinfo = BTRFS_I(inode)->space_info;
- }
+ if (!data_sinfo)
+ data_sinfo = fs_info->data_sinfo;
+
goto again;
}
@@ -3469,7 +3472,7 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
/* make sure bytes are sectorsize aligned */
bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
- data_sinfo = BTRFS_I(inode)->space_info;
+ data_sinfo = root->fs_info->data_sinfo;
spin_lock(&data_sinfo->lock);
data_sinfo->bytes_may_use -= bytes;
trace_btrfs_space_reservation(root->fs_info, "space_info",
@@ -3675,89 +3678,58 @@ out:
/*
* shrink metadata reservation for delalloc
*/
-static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
- bool wait_ordered)
+static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
+ bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
- u64 reserved;
+ u64 delalloc_bytes;
u64 max_reclaim;
- u64 reclaimed = 0;
long time_left;
unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
- unsigned long progress;
trans = (struct btrfs_trans_handle *)current->journal_info;
block_rsv = &root->fs_info->delalloc_block_rsv;
space_info = block_rsv->space_info;
smp_mb();
- reserved = space_info->bytes_may_use;
- progress = space_info->reservation_progress;
-
- if (reserved == 0)
- return 0;
-
- smp_mb();
- if (root->fs_info->delalloc_bytes == 0) {
+ delalloc_bytes = root->fs_info->delalloc_bytes;
+ if (delalloc_bytes == 0) {
if (trans)
- return 0;
+ return;
btrfs_wait_ordered_extents(root, 0, 0);
- return 0;
+ return;
}
- max_reclaim = min(reserved, to_reclaim);
- nr_pages = max_t(unsigned long, nr_pages,
- max_reclaim >> PAGE_CACHE_SHIFT);
- while (loops < 1024) {
- /* have the flusher threads jump in and do some IO */
- smp_mb();
- nr_pages = min_t(unsigned long, nr_pages,
- root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+ while (delalloc_bytes && loops < 3) {
+ max_reclaim = min(delalloc_bytes, to_reclaim);
+ nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
- WB_REASON_FS_FREE_SPACE);
+ WB_REASON_FS_FREE_SPACE);
spin_lock(&space_info->lock);
- if (reserved > space_info->bytes_may_use)
- reclaimed += reserved - space_info->bytes_may_use;
- reserved = space_info->bytes_may_use;
+ if (space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use + orig <=
+ space_info->total_bytes) {
+ spin_unlock(&space_info->lock);
+ break;
+ }
spin_unlock(&space_info->lock);
loops++;
-
- if (reserved == 0 || reclaimed >= max_reclaim)
- break;
-
- if (trans && trans->transaction->blocked)
- return -EAGAIN;
-
if (wait_ordered && !trans) {
btrfs_wait_ordered_extents(root, 0, 0);
} else {
- time_left = schedule_timeout_interruptible(1);
-
- /* We were interrupted, exit */
+ time_left = schedule_timeout_killable(1);
if (time_left)
break;
}
-
- /* we've kicked the IO a few times, if anything has been freed,
- * exit. There is no sense in looping here for a long time
- * when we really need to commit the transaction, or there are
- * just too many writers without enough free space
- */
-
- if (loops > 3) {
- smp_mb();
- if (progress != space_info->reservation_progress)
- break;
- }
-
+ smp_mb();
+ delalloc_bytes = root->fs_info->delalloc_bytes;
}
-
- return reclaimed >= to_reclaim;
}
/**
@@ -3817,6 +3789,58 @@ commit:
return btrfs_commit_transaction(trans, root);
}
+enum flush_state {
+ FLUSH_DELALLOC = 1,
+ FLUSH_DELALLOC_WAIT = 2,
+ FLUSH_DELAYED_ITEMS_NR = 3,
+ FLUSH_DELAYED_ITEMS = 4,
+ COMMIT_TRANS = 5,
+};
+
+static int flush_space(struct btrfs_root *root,
+ struct btrfs_space_info *space_info, u64 num_bytes,
+ u64 orig_bytes, int state)
+{
+ struct btrfs_trans_handle *trans;
+ int nr;
+ int ret = 0;
+
+ switch (state) {
+ case FLUSH_DELALLOC:
+ case FLUSH_DELALLOC_WAIT:
+ shrink_delalloc(root, num_bytes, orig_bytes,
+ state == FLUSH_DELALLOC_WAIT);
+ break;
+ case FLUSH_DELAYED_ITEMS_NR:
+ case FLUSH_DELAYED_ITEMS:
+ if (state == FLUSH_DELAYED_ITEMS_NR) {
+ u64 bytes = btrfs_calc_trans_metadata_size(root, 1);
+
+ nr = (int)div64_u64(num_bytes, bytes);
+ if (!nr)
+ nr = 1;
+ nr *= 2;
+ } else {
+ nr = -1;
+ }
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ break;
+ }
+ ret = btrfs_run_delayed_items_nr(trans, root, nr);
+ btrfs_end_transaction(trans, root);
+ break;
+ case COMMIT_TRANS:
+ ret = may_commit_transaction(root, space_info, orig_bytes, 0);
+ break;
+ default:
+ ret = -ENOSPC;
+ break;
+ }
+
+ return ret;
+}
/**
* reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
* @root - the root we're allocating for
@@ -3838,11 +3862,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_space_info *space_info = block_rsv->space_info;
u64 used;
u64 num_bytes = orig_bytes;
- int retries = 0;
+ int flush_state = FLUSH_DELALLOC;
int ret = 0;
- bool committed = false;
bool flushing = false;
- bool wait_ordered = false;
+ bool committed = false;
again:
ret = 0;
@@ -3901,9 +3924,8 @@ again:
* amount plus the amount of bytes that we need for this
* reservation.
*/
- wait_ordered = true;
num_bytes = used - space_info->total_bytes +
- (orig_bytes * (retries + 1));
+ (orig_bytes * 2);
}
if (ret) {
@@ -3956,8 +3978,6 @@ again:
trace_btrfs_space_reservation(root->fs_info,
"space_info", space_info->flags, orig_bytes, 1);
ret = 0;
- } else {
- wait_ordered = true;
}
}
@@ -3976,36 +3996,13 @@ again:
if (!ret || !flush)
goto out;
- /*
- * We do synchronous shrinking since we don't actually unreserve
- * metadata until after the IO is completed.
- */
- ret = shrink_delalloc(root, num_bytes, wait_ordered);
- if (ret < 0)
- goto out;
-
- ret = 0;
-
- /*
- * So if we were overcommitted it's possible that somebody else flushed
- * out enough space and we simply didn't have enough space to reclaim,
- * so go back around and try again.
- */
- if (retries < 2) {
- wait_ordered = true;
- retries++;
+ ret = flush_space(root, space_info, num_bytes, orig_bytes,
+ flush_state);
+ flush_state++;
+ if (!ret)
goto again;
- }
-
- ret = -ENOSPC;
- if (committed)
- goto out;
-
- ret = may_commit_transaction(root, space_info, orig_bytes, 0);
- if (!ret) {
- committed = true;
+ else if (flush_state <= COMMIT_TRANS)
goto again;
- }
out:
if (flushing) {
@@ -4023,7 +4020,10 @@ static struct btrfs_block_rsv *get_block_rsv(
{
struct btrfs_block_rsv *block_rsv = NULL;
- if (root->ref_cows || root == root->fs_info->csum_root)
+ if (root->ref_cows)
+ block_rsv = trans->block_rsv;
+
+ if (root == root->fs_info->csum_root && trans->adding_csums)
block_rsv = trans->block_rsv;
if (!block_rsv)
@@ -4375,6 +4375,9 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
+ if (!trans->block_rsv)
+ return;
+
if (!trans->bytes_reserved)
return;
@@ -4533,7 +4536,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
int ret;
/* Need to be holding the i_mutex here if we aren't free space cache */
- if (btrfs_is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(inode))
flush = 0;
if (flush && btrfs_transaction_in_commit(root->fs_info))
@@ -5849,7 +5852,11 @@ loop:
ret = do_chunk_alloc(trans, root, num_bytes +
2 * 1024 * 1024, data,
CHUNK_ALLOC_LIMITED);
- if (ret < 0) {
+ /*
+ * Do not bail out on ENOSPC since we
+ * can do more things.
+ */
+ if (ret < 0 && ret != -ENOSPC) {
btrfs_abort_transaction(trans,
root, ret);
goto out;
@@ -5917,13 +5924,13 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
spin_lock(&cache->lock);
- printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
- "%llu pinned %llu reserved\n",
+ printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
(unsigned long long)cache->key.objectid,
(unsigned long long)cache->key.offset,
(unsigned long long)btrfs_block_group_used(&cache->item),
(unsigned long long)cache->pinned,
- (unsigned long long)cache->reserved);
+ (unsigned long long)cache->reserved,
+ cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
}
@@ -7711,8 +7718,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
- if (need_clear)
+ if (need_clear) {
+ /*
+ * When we mount with old space cache, we need to
+ * set BTRFS_DC_CLEAR and set dirty flag.
+ *
+ * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
+ * truncate the old free space cache inode and
+ * setup a new one.
+ * b) Setting 'dirty flag' makes sure that we flush
+ * the new space cache info onto disk.
+ */
cache->disk_cache_state = BTRFS_DC_CLEAR;
+ if (btrfs_test_opt(root, SPACE_CACHE))
+ cache->dirty = 1;
+ }
read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]),