summaryrefslogtreecommitdiff
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c364
1 files changed, 212 insertions, 152 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a9778a91511e..a629532283bc 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -146,35 +146,11 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
-/*
- * Add an ordered extent to the per-inode tree.
- *
- * @inode: Inode that this extent is for.
- * @file_offset: Logical offset in file where the extent starts.
- * @num_bytes: Logical length of extent in file.
- * @ram_bytes: Full length of unencoded data.
- * @disk_bytenr: Offset of extent on disk.
- * @disk_num_bytes: Size of extent on disk.
- * @offset: Offset into unencoded data where file data starts.
- * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
- * @compress_type: Compression algorithm used for data.
- *
- * Most of these parameters correspond to &struct btrfs_file_extent_item. The
- * tree is given a single reference on the ordered extent that was inserted, and
- * the returned pointer is given a second reference.
- *
- * Return: the new ordered extent or error pointer.
- */
-struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
- struct btrfs_inode *inode, u64 file_offset,
- u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
- u64 disk_num_bytes, u64 offset, unsigned long flags,
- int compress_type)
+static struct btrfs_ordered_extent *alloc_ordered_extent(
+ struct btrfs_inode *inode, u64 file_offset, u64 num_bytes,
+ u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes,
+ u64 offset, unsigned long flags, int compress_type)
{
- struct btrfs_root *root = inode->root;
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
- struct rb_node *node;
struct btrfs_ordered_extent *entry;
int ret;
@@ -184,7 +160,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
return ERR_PTR(ret);
- ret = 0;
} else {
/*
* The ordered extent has reserved qgroup space, release now
@@ -209,15 +184,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
- entry->physical = (u64)-1;
-
- ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
entry->flags = flags;
-
- percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
- fs_info->delalloc_batch);
-
- /* one ref for the tree */
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
@@ -226,15 +193,40 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
+ /*
+ * We don't need the count_max_extents here, we can assume that all of
+ * that work has been done at higher layers, so this is truly the
+ * smallest the extent is going to get.
+ */
+ spin_lock(&inode->lock);
+ btrfs_mod_outstanding_extents(inode, 1);
+ spin_unlock(&inode->lock);
+
+ return entry;
+}
+
+static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
+{
+ struct btrfs_inode *inode = BTRFS_I(entry->inode);
+ struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct rb_node *node;
+
trace_btrfs_ordered_extent_add(inode, entry);
+ percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes,
+ fs_info->delalloc_batch);
+
+ /* One ref for the tree. */
+ refcount_inc(&entry->refs);
+
spin_lock_irq(&tree->lock);
- node = tree_insert(&tree->tree, file_offset,
- &entry->rb_node);
+ node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"inconsistency in ordered tree at offset %llu",
- file_offset);
+ entry->file_offset);
spin_unlock_irq(&tree->lock);
spin_lock(&root->ordered_extent_lock);
@@ -248,43 +240,43 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
spin_unlock(&fs_info->ordered_root_lock);
}
spin_unlock(&root->ordered_extent_lock);
-
- /*
- * We don't need the count_max_extents here, we can assume that all of
- * that work has been done at higher layers, so this is truly the
- * smallest the extent is going to get.
- */
- spin_lock(&inode->lock);
- btrfs_mod_outstanding_extents(inode, 1);
- spin_unlock(&inode->lock);
-
- /* One ref for the returned entry to match semantics of lookup. */
- refcount_inc(&entry->refs);
-
- return entry;
}
/*
- * Add a new btrfs_ordered_extent for the range, but drop the reference instead
- * of returning it to the caller.
+ * Add an ordered extent to the per-inode tree.
+ *
+ * @inode: Inode that this extent is for.
+ * @file_offset: Logical offset in file where the extent starts.
+ * @num_bytes: Logical length of extent in file.
+ * @ram_bytes: Full length of unencoded data.
+ * @disk_bytenr: Offset of extent on disk.
+ * @disk_num_bytes: Size of extent on disk.
+ * @offset: Offset into unencoded data where file data starts.
+ * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
+ * @compress_type: Compression algorithm used for data.
+ *
+ * Most of these parameters correspond to &struct btrfs_file_extent_item. The
+ * tree is given a single reference on the ordered extent that was inserted, and
+ * the returned pointer is given a second reference.
+ *
+ * Return: the new ordered extent or error pointer.
*/
-int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
- u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
- u64 disk_num_bytes, u64 offset, unsigned long flags,
- int compress_type)
+struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
+ struct btrfs_inode *inode, u64 file_offset,
+ u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
+ u64 disk_num_bytes, u64 offset, unsigned long flags,
+ int compress_type)
{
- struct btrfs_ordered_extent *ordered;
-
- ordered = btrfs_alloc_ordered_extent(inode, file_offset, num_bytes,
- ram_bytes, disk_bytenr,
- disk_num_bytes, offset, flags,
- compress_type);
+ struct btrfs_ordered_extent *entry;
- if (IS_ERR(ordered))
- return PTR_ERR(ordered);
- btrfs_put_ordered_extent(ordered);
+ ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
- return 0;
+ entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
+ disk_bytenr, disk_num_bytes, offset, flags,
+ compress_type);
+ if (!IS_ERR(entry))
+ insert_ordered_extent(entry);
+ return entry;
}
/*
@@ -311,6 +303,90 @@ static void finish_ordered_fn(struct btrfs_work *work)
btrfs_finish_ordered_io(ordered_extent);
}
+static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
+ struct page *page, u64 file_offset,
+ u64 len, bool uptodate)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+ lockdep_assert_held(&inode->ordered_tree.lock);
+
+ if (page) {
+ ASSERT(page->mapping);
+ ASSERT(page_offset(page) <= file_offset);
+ ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
+
+ /*
+ * Ordered (Private2) bit indicates whether we still have
+ * pending io unfinished for the ordered extent.
+ *
+ * If there's no such bit, we need to skip to next range.
+ */
+ if (!btrfs_page_test_ordered(fs_info, page, file_offset, len))
+ return false;
+ btrfs_page_clear_ordered(fs_info, page, file_offset, len);
+ }
+
+ /* Now we're fine to update the accounting. */
+ if (WARN_ON_ONCE(len > ordered->bytes_left)) {
+ btrfs_crit(fs_info,
+"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
+ inode->root->root_key.objectid, btrfs_ino(inode),
+ ordered->file_offset, ordered->num_bytes,
+ len, ordered->bytes_left);
+ ordered->bytes_left = 0;
+ } else {
+ ordered->bytes_left -= len;
+ }
+
+ if (!uptodate)
+ set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+
+ if (ordered->bytes_left)
+ return false;
+
+ /*
+ * All the IO of the ordered extent is finished, we need to queue
+ * the finish_func to be executed.
+ */
+ set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
+ cond_wake_up(&ordered->wait);
+ refcount_inc(&ordered->refs);
+ trace_btrfs_ordered_extent_mark_finished(inode, ordered);
+ return true;
+}
+
+static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
+ fs_info->endio_freespace_worker : fs_info->endio_write_workers;
+
+ btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
+ btrfs_queue_work(wq, &ordered->work);
+}
+
+bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
+ struct page *page, u64 file_offset, u64 len,
+ bool uptodate)
+{
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ unsigned long flags;
+ bool ret;
+
+ trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
+
+ spin_lock_irqsave(&inode->ordered_tree.lock, flags);
+ ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
+ spin_unlock_irqrestore(&inode->ordered_tree.lock, flags);
+
+ if (ret)
+ btrfs_queue_ordered_fn(ordered);
+ return ret;
+}
+
/*
* Mark all ordered extents io inside the specified range finished.
*
@@ -329,22 +405,11 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
u64 num_bytes, bool uptodate)
{
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_workqueue *wq;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
unsigned long flags;
u64 cur = file_offset;
- if (btrfs_is_free_space_inode(inode))
- wq = fs_info->endio_freespace_worker;
- else
- wq = fs_info->endio_write_workers;
-
- if (page)
- ASSERT(page->mapping && page_offset(page) <= file_offset &&
- file_offset + num_bytes <= page_offset(page) + PAGE_SIZE);
-
spin_lock_irqsave(&tree->lock, flags);
while (cur < file_offset + num_bytes) {
u64 entry_end;
@@ -397,50 +462,9 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
ASSERT(end + 1 - cur < U32_MAX);
len = end + 1 - cur;
- if (page) {
- /*
- * Ordered (Private2) bit indicates whether we still
- * have pending io unfinished for the ordered extent.
- *
- * If there's no such bit, we need to skip to next range.
- */
- if (!btrfs_page_test_ordered(fs_info, page, cur, len)) {
- cur += len;
- continue;
- }
- btrfs_page_clear_ordered(fs_info, page, cur, len);
- }
-
- /* Now we're fine to update the accounting */
- if (unlikely(len > entry->bytes_left)) {
- WARN_ON(1);
- btrfs_crit(fs_info,
-"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu",
- inode->root->root_key.objectid,
- btrfs_ino(inode),
- entry->file_offset,
- entry->num_bytes,
- len, entry->bytes_left);
- entry->bytes_left = 0;
- } else {
- entry->bytes_left -= len;
- }
-
- if (!uptodate)
- set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
-
- /*
- * All the IO of the ordered extent is finished, we need to queue
- * the finish_func to be executed.
- */
- if (entry->bytes_left == 0) {
- set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
- cond_wake_up(&entry->wait);
- refcount_inc(&entry->refs);
- trace_btrfs_ordered_extent_mark_finished(inode, entry);
+ if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
spin_unlock_irqrestore(&tree->lock, flags);
- btrfs_init_work(&entry->work, finish_ordered_fn, NULL, NULL);
- btrfs_queue_work(wq, &entry->work);
+ btrfs_queue_ordered_fn(entry);
spin_lock_irqsave(&tree->lock, flags);
}
cur += len;
@@ -564,7 +588,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
- /* This is paired with btrfs_add_ordered_extent. */
+ /* This is paired with btrfs_alloc_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
@@ -1117,17 +1141,22 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
}
/* Split out a new ordered extent for this first @len bytes of @ordered. */
-int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
+struct btrfs_ordered_extent *btrfs_split_ordered_extent(
+ struct btrfs_ordered_extent *ordered, u64 len)
{
- struct inode *inode = ordered->inode;
- struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_inode *inode = BTRFS_I(ordered->inode);
+ struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
+ struct btrfs_root *root = inode->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
u64 file_offset = ordered->file_offset;
u64 disk_bytenr = ordered->disk_bytenr;
- unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
+ unsigned long flags = ordered->flags;
+ struct btrfs_ordered_sum *sum, *tmpsum;
+ struct btrfs_ordered_extent *new;
struct rb_node *node;
+ u64 offset = 0;
- trace_btrfs_ordered_extent_split(BTRFS_I(inode), ordered);
+ trace_btrfs_ordered_extent_split(inode, ordered);
ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
@@ -1136,18 +1165,27 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
* reduce the original extent to a zero length either.
*/
if (WARN_ON_ONCE(len >= ordered->num_bytes))
- return -EINVAL;
- /* We cannot split once ordered extent is past end_bio. */
- if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ /* We cannot split partially completed ordered extents. */
+ if (ordered->bytes_left) {
+ ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
+ if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
+ return ERR_PTR(-EINVAL);
+ }
/* We cannot split a compressed ordered extent. */
if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes))
- return -EINVAL;
- /* Checksum list should be empty. */
- if (WARN_ON_ONCE(!list_empty(&ordered->list)))
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
- spin_lock_irq(&tree->lock);
+ new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
+ len, 0, flags, ordered->compress_type);
+ if (IS_ERR(new))
+ return new;
+
+ /* One ref for the tree. */
+ refcount_inc(&new->refs);
+
+ spin_lock_irq(&root->ordered_extent_lock);
+ spin_lock(&tree->lock);
/* Remove from tree once */
node = &ordered->rb_node;
rb_erase(node, &tree->tree);
@@ -1159,26 +1197,48 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
ordered->disk_bytenr += len;
ordered->num_bytes -= len;
ordered->disk_num_bytes -= len;
- ordered->bytes_left -= len;
+
+ if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
+ ASSERT(ordered->bytes_left == 0);
+ new->bytes_left = 0;
+ } else {
+ ordered->bytes_left -= len;
+ }
+
+ if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) {
+ if (ordered->truncated_len > len) {
+ ordered->truncated_len -= len;
+ } else {
+ new->truncated_len = ordered->truncated_len;
+ ordered->truncated_len = 0;
+ }
+ }
+
+ list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) {
+ if (offset == len)
+ break;
+ list_move_tail(&sum->list, &new->list);
+ offset += sum->len;
+ }
/* Re-insert the node */
node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"zoned: inconsistency in ordered tree at offset %llu",
- ordered->file_offset);
+ ordered->file_offset);
- spin_unlock_irq(&tree->lock);
-
- /*
- * The splitting extent is already counted and will be added again in
- * btrfs_add_ordered_extent(). Subtract len to avoid double counting.
- */
- percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch);
+ node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
+ if (node)
+ btrfs_panic(fs_info, -EEXIST,
+ "zoned: inconsistency in ordered tree at offset %llu",
+ new->file_offset);
+ spin_unlock(&tree->lock);
- return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
- disk_bytenr, len, 0, flags,
- ordered->compress_type);
+ list_add_tail(&new->root_extent_list, &root->ordered_extents);
+ root->nr_ordered_extents++;
+ spin_unlock_irq(&root->ordered_extent_lock);
+ return new;
}
int __init ordered_data_init(void)