summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h4
-rw-r--r--fs/btrfs/ctree.h19
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/extent-tree.c132
-rw-r--r--fs/btrfs/extent_io.c52
-rw-r--r--fs/btrfs/extent_io.h14
-rw-r--r--fs/btrfs/extent_map.c5
-rw-r--r--fs/btrfs/file-item.c62
-rw-r--r--fs/btrfs/file.c67
-rw-r--r--fs/btrfs/inode.c447
-rw-r--r--fs/btrfs/ordered-data.c455
-rw-r--r--fs/btrfs/ordered-data.h71
-rw-r--r--fs/btrfs/transaction.c67
-rw-r--r--fs/btrfs/transaction.h4
14 files changed, 910 insertions, 502 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 40b4e0c9cd09..8d03687510e0 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -21,6 +21,7 @@
#include "extent_map.h"
#include "extent_io.h"
+#include "ordered-data.h"
/* in memory btrfs inode */
struct btrfs_inode {
@@ -32,9 +33,8 @@ struct btrfs_inode {
struct extent_io_tree io_failure_tree;
struct mutex csum_mutex;
struct inode vfs_inode;
- atomic_t ordered_writeback;
+ struct btrfs_ordered_inode_tree ordered_tree;
- u64 ordered_trans;
/*
* transid of the trans_handle that last modified this inode
*/
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f3783dbd9b60..ceebc052ddcb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -25,6 +25,7 @@
#include <linux/fs.h>
#include <linux/completion.h>
#include <linux/backing-dev.h>
+#include <linux/wait.h>
#include <asm/kmap_types.h>
#include "bit-radix.h"
#include "extent_io.h"
@@ -37,6 +38,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_transaction_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
+struct btrfs_ordered_sum;
#define BTRFS_MAGIC "_B5RfS_M"
@@ -510,6 +512,7 @@ struct btrfs_fs_info {
u64 max_inline;
u64 alloc_start;
struct btrfs_transaction *running_transaction;
+ wait_queue_head_t transaction_throttle;
struct btrfs_super_block super_copy;
struct btrfs_super_block super_for_commit;
struct block_device *__bdev;
@@ -541,6 +544,7 @@ struct btrfs_fs_info {
*/
struct btrfs_workers workers;
struct btrfs_workers endio_workers;
+ struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
@@ -1384,6 +1388,17 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
u64 owner, u64 owner_offset,
u64 empty_size, u64 hint_byte,
u64 search_end, struct btrfs_key *ins, u64 data);
+int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ struct btrfs_key *ins);
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -1556,9 +1571,9 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
u64 bytenr, int mod);
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- struct bio *bio, char *sums);
+ struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root,
- struct bio *bio, char **sums_ret);
+ struct bio *bio, struct btrfs_ordered_sum **sums_ret);
struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b01b3f4f92a9..4a5ebafb935a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -407,7 +407,11 @@ static int end_workqueue_bio(struct bio *bio,
end_io_wq->error = err;
end_io_wq->work.func = end_workqueue_fn;
end_io_wq->work.flags = 0;
- btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+ if (bio->bi_rw & (1 << BIO_RW))
+ btrfs_queue_worker(&fs_info->endio_write_workers,
+ &end_io_wq->work);
+ else
+ btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
return 0;
@@ -1286,6 +1290,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
+ init_waitqueue_head(&fs_info->transaction_throttle);
#if 0
ret = add_hasher(fs_info, "crc32c");
@@ -1325,9 +1330,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+ btrfs_init_workers(&fs_info->endio_write_workers,
+ fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->workers, 1);
btrfs_start_workers(&fs_info->submit_workers, 1);
btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->endio_write_workers,
+ fs_info->thread_pool_size);
err = -EINVAL;
if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
@@ -1447,6 +1456,7 @@ fail_sb_buffer:
extent_io_tree_empty_lru(&BTRFS_I(fs_info->btree_inode)->io_tree);
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
iput(fs_info->btree_inode);
@@ -1702,6 +1712,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
iput(fs_info->btree_inode);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8ebfa6be0790..343d1101c31c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1895,36 +1895,17 @@ error:
return ret;
}
-/*
- * finds a free extent and does all the dirty work required for allocation
- * returns the key for the extent through ins, and a tree buffer for
- * the first block of the extent through buf.
- *
- * returns 0 if everything worked, non-zero otherwise.
- */
-int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 root_objectid, u64 ref_generation,
- u64 owner, u64 owner_offset,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins, u64 data)
+static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data)
{
int ret;
- int pending_ret;
- u64 super_used;
- u64 root_used;
u64 search_start = 0;
u64 alloc_profile;
- u32 sizes[2];
struct btrfs_fs_info *info = root->fs_info;
- struct btrfs_root *extent_root = info->extent_root;
- struct btrfs_extent_item *extent_item;
- struct btrfs_extent_ref *ref;
- struct btrfs_path *path;
- struct btrfs_key keys[2];
-
- maybe_lock_mutex(root);
if (data) {
alloc_profile = info->avail_data_alloc_bits &
@@ -1974,11 +1955,48 @@ again:
}
if (ret) {
printk("allocation failed flags %Lu\n", data);
- }
- if (ret) {
BUG();
- goto out;
}
+ clear_extent_dirty(&root->fs_info->free_space_cache,
+ ins->objectid, ins->objectid + ins->offset - 1,
+ GFP_NOFS);
+ return 0;
+}
+
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data)
+{
+ int ret;
+ maybe_lock_mutex(root);
+ ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
+ empty_size, hint_byte, search_end, ins,
+ data);
+ maybe_unlock_mutex(root);
+ return ret;
+}
+
+static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ struct btrfs_key *ins)
+{
+ int ret;
+ int pending_ret;
+ u64 super_used;
+ u64 root_used;
+ u64 num_bytes = ins->offset;
+ u32 sizes[2];
+ struct btrfs_fs_info *info = root->fs_info;
+ struct btrfs_root *extent_root = info->extent_root;
+ struct btrfs_extent_item *extent_item;
+ struct btrfs_extent_ref *ref;
+ struct btrfs_path *path;
+ struct btrfs_key keys[2];
/* block accounting for super block */
spin_lock_irq(&info->delalloc_lock);
@@ -1990,10 +2008,6 @@ again:
root_used = btrfs_root_used(&root->root_item);
btrfs_set_root_used(&root->root_item, root_used + num_bytes);
- clear_extent_dirty(&root->fs_info->free_space_cache,
- ins->objectid, ins->objectid + ins->offset - 1,
- GFP_NOFS);
-
if (root == extent_root) {
set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
ins->objectid + ins->offset - 1,
@@ -2001,10 +2015,6 @@ again:
goto update_block;
}
- WARN_ON(trans->alloc_exclude_nr);
- trans->alloc_exclude_start = ins->objectid;
- trans->alloc_exclude_nr = ins->offset;
-
memcpy(&keys[0], ins, sizeof(*ins));
keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
owner, owner_offset);
@@ -2054,6 +2064,51 @@ update_block:
BUG();
}
out:
+ return ret;
+}
+
+int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ struct btrfs_key *ins)
+{
+ int ret;
+ maybe_lock_mutex(root);
+ ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
+ ref_generation, owner,
+ owner_offset, ins);
+ maybe_unlock_mutex(root);
+ return ret;
+}
+/*
+ * finds a free extent and does all the dirty work required for allocation
+ * returns the key for the extent through ins, and a tree buffer for
+ * the first block of the extent through buf.
+ *
+ * returns 0 if everything worked, non-zero otherwise.
+ */
+int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 root_objectid, u64 ref_generation,
+ u64 owner, u64 owner_offset,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins, u64 data)
+{
+ int ret;
+
+ maybe_lock_mutex(root);
+
+ ret = __btrfs_reserve_extent(trans, root, num_bytes,
+ min_alloc_size, empty_size, hint_byte,
+ search_end, ins, data);
+ BUG_ON(ret);
+ ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
+ ref_generation, owner,
+ owner_offset, ins);
+ BUG_ON(ret);
+
maybe_unlock_mutex(root);
return ret;
}
@@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->alloc_mutex);
/* we've dropped the lock, double check */
- ret = drop_snap_lookup_refcount(root, bytenr,
- blocksize, &refs);
+ ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
+ &refs);
BUG_ON(ret);
if (refs != 1) {
parent = path->nodes[*level];
@@ -2584,7 +2639,6 @@ out_unlock:
kfree(ra);
trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
if (trans) {
- btrfs_add_ordered_inode(inode);
btrfs_end_transaction(trans, BTRFS_I(inode)->root);
mark_inode_dirty(inode);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 40a5f53cb040..3f82a6e9ca4f 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -793,6 +793,13 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
}
EXPORT_SYMBOL(set_extent_dirty);
+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
+}
+EXPORT_SYMBOL(set_extent_ordered);
+
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask)
{
@@ -812,8 +819,8 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
- mask);
+ EXTENT_DELALLOC | EXTENT_DIRTY,
+ 0, NULL, mask);
}
EXPORT_SYMBOL(set_extent_delalloc);
@@ -825,6 +832,13 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
}
EXPORT_SYMBOL(clear_extent_dirty);
+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
+}
+EXPORT_SYMBOL(clear_extent_ordered);
+
int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask)
{
@@ -1395,10 +1409,9 @@ static int end_bio_extent_writepage(struct bio *bio,
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
-
if (tree->ops && tree->ops->writepage_end_io_hook) {
ret = tree->ops->writepage_end_io_hook(page, start,
- end, state);
+ end, state, uptodate);
if (ret)
uptodate = 0;
}
@@ -1868,9 +1881,14 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unlock_extent(tree, cur, end, GFP_NOFS);
break;
}
-
extent_offset = cur - em->start;
+ if (extent_map_end(em) <= cur) {
+printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
+ }
BUG_ON(extent_map_end(em) <= cur);
+ if (end < cur) {
+printk("2bad mapping end %Lu cur %Lu\n", end, cur);
+ }
BUG_ON(end < cur);
iosize = min(extent_map_end(em) - cur, end - cur + 1);
@@ -1976,6 +1994,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 last_byte = i_size_read(inode);
u64 block_start;
u64 iosize;
+ u64 unlock_start;
sector_t sector;
struct extent_map *em;
struct block_device *bdev;
@@ -1988,7 +2007,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
u64 nr_delalloc;
u64 delalloc_end;
-
WARN_ON(!PageLocked(page));
page_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
@@ -2030,6 +2048,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
delalloc_start = delalloc_end + 1;
}
lock_extent(tree, start, page_end, GFP_NOFS);
+ unlock_start = start;
end = page_end;
if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
@@ -2038,6 +2057,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
if (last_byte <= start) {
clear_extent_dirty(tree, start, page_end, GFP_NOFS);
+ unlock_extent(tree, start, page_end, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, start,
+ page_end, NULL, 1);
+ unlock_start = page_end + 1;
goto done;
}
@@ -2047,6 +2071,11 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
while (cur <= end) {
if (cur >= last_byte) {
clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
+ unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, cur,
+ page_end, NULL, 1);
+ unlock_start = page_end + 1;
break;
}
em = epd->get_extent(inode, page, page_offset, cur,
@@ -2071,8 +2100,16 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
block_start == EXTENT_MAP_INLINE) {
clear_extent_dirty(tree, cur,
cur + iosize - 1, GFP_NOFS);
+
+ unlock_extent(tree, unlock_start, cur + iosize -1,
+ GFP_NOFS);
+ if (tree->ops && tree->ops->writepage_end_io_hook)
+ tree->ops->writepage_end_io_hook(page, cur,
+ cur + iosize - 1,
+ NULL, 1);
cur = cur + iosize;
page_offset += iosize;
+ unlock_start = cur;
continue;
}
@@ -2119,7 +2156,8 @@ done:
set_page_writeback(page);
end_page_writeback(page);
}
- unlock_extent(tree, start, page_end, GFP_NOFS);
+ if (unlock_start <= page_end)
+ unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
unlock_page(page);
return 0;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index f1960dafaa19..2268a7995896 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -13,6 +13,8 @@
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_DEFRAG_DONE (1 << 7)
#define EXTENT_BUFFER_FILLED (1 << 8)
+#define EXTENT_ORDERED (1 << 9)
+#define EXTENT_ORDERED_METADATA (1 << 10)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
/*
@@ -42,7 +44,7 @@ struct extent_io_ops {
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
- struct extent_state *state);
+ struct extent_state *state, int uptodate);
int (*set_bit_hook)(struct inode *inode, u64 start, u64 end,
unsigned long old, unsigned long bits);
int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end,
@@ -131,6 +133,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bits, int filled);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int wake, int delete, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int bits, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
@@ -141,8 +145,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
+int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
+int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
+int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
+ gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, int bits);
struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
@@ -209,6 +219,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
struct extent_buffer *eb);
+int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end);
+int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
int clear_extent_buffer_dirty(struct extent_io_tree *tree,
struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_io_tree *tree,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f5a04eb9a2ac..81123277c2b8 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -206,10 +206,11 @@ int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *merge = NULL;
struct rb_node *rb;
+ BUG_ON(spin_trylock(&tree->lock));
rb = tree_insert(&tree->map, em->start, &em->rb_node);
if (rb) {
- merge = rb_entry(rb, struct extent_map, rb_node);
ret = -EEXIST;
+ free_extent_map(merge);
goto out;
}
atomic_inc(&em->refs);
@@ -268,6 +269,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
struct rb_node *next = NULL;
u64 end = range_end(start, len);
+ BUG_ON(spin_trylock(&tree->lock));
em = tree->last;
if (em && end > em->start && start < extent_map_end(em))
goto found;
@@ -318,6 +320,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{
int ret = 0;
+ BUG_ON(spin_trylock(&tree->lock));
rb_erase(&em->rb_node, &tree->map);
em->in_tree = 0;
if (tree->last == em)
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f537eb43c2c6..345caf8ff516 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -135,26 +135,37 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
}
int btrfs_csum_one_bio(struct btrfs_root *root,
- struct bio *bio, char **sums_ret)
+ struct bio *bio, struct btrfs_ordered_sum **sums_ret)
{
- u32 *sums;
+ struct btrfs_ordered_sum *sums;
+ struct btrfs_sector_sum *sector_sum;
char *data;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
- sums = kmalloc(bio->bi_vcnt * BTRFS_CRC32_SIZE, GFP_NOFS);
+ WARN_ON(bio->bi_vcnt <= 0);
+ sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
if (!sums)
return -ENOMEM;
- *sums_ret = (char *)sums;
+ *sums_ret = sums;
+ sector_sum = &sums->sums;
+ sums->file_offset = page_offset(bvec->bv_page);
+ sums->len = bio->bi_size;
+ INIT_LIST_HEAD(&sums->list);
while(bio_index < bio->bi_vcnt) {
data = kmap_atomic(bvec->bv_page, KM_USER0);
- *sums = ~(u32)0;
- *sums = btrfs_csum_data(root, data + bvec->bv_offset,
- *sums, bvec->bv_len);
+ sector_sum->sum = ~(u32)0;
+ sector_sum->sum = btrfs_csum_data(root,
+ data + bvec->bv_offset,
+ sector_sum->sum,
+ bvec->bv_len);
kunmap_atomic(data, KM_USER0);
- btrfs_csum_final(*sums, (char *)sums);
- sums++;
+ btrfs_csum_final(sector_sum->sum,
+ (char *)&sector_sum->sum);
+ sector_sum->offset = page_offset(bvec->bv_page) +
+ bvec->bv_offset;
+ sector_sum++;
bio_index++;
bvec++;
}
@@ -163,7 +174,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root,
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- struct bio *bio, char *sums)
+ struct btrfs_ordered_sum *sums)
{
u64 objectid = inode->i_ino;
u64 offset;
@@ -171,17 +182,16 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_key file_key;
struct btrfs_key found_key;
u64 next_offset;
+ u64 total_bytes = 0;
int found_next;
struct btrfs_path *path;
struct btrfs_csum_item *item;
struct btrfs_csum_item *item_end;
struct extent_buffer *leaf = NULL;
u64 csum_offset;
- u32 *sums32 = (u32 *)sums;
+ struct btrfs_sector_sum *sector_sum;
u32 nritems;
u32 ins_size;
- int bio_index = 0;
- struct bio_vec *bvec = bio->bi_io_vec;
char *eb_map;
char *eb_token;
unsigned long map_len;
@@ -189,10 +199,11 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
+ sector_sum = &sums->sums;
again:
next_offset = (u64)-1;
found_next = 0;
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ offset = sector_sum->offset;
file_key.objectid = objectid;
file_key.offset = offset;
btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
@@ -303,7 +314,7 @@ found:
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
eb_token = NULL;
-next_bvec:
+next_sector:
if (!eb_token ||
(unsigned long)item + BTRFS_CRC32_SIZE >= map_start + map_len) {
@@ -321,21 +332,20 @@ next_bvec:
}
if (eb_token) {
memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)),
- sums32, BTRFS_CRC32_SIZE);
+ &sector_sum->sum, BTRFS_CRC32_SIZE);
} else {
- write_extent_buffer(leaf, sums32, (unsigned long)item,
- BTRFS_CRC32_SIZE);
+ write_extent_buffer(leaf, &sector_sum->sum,
+ (unsigned long)item, BTRFS_CRC32_SIZE);
}
- bio_index++;
- bvec++;
- sums32++;
- if (bio_index < bio->bi_vcnt) {
+ total_bytes += root->sectorsize;
+ sector_sum++;
+ if (total_bytes < sums->len) {
item = (struct btrfs_csum_item *)((char *)item +
BTRFS_CRC32_SIZE);
if (item < item_end && offset + PAGE_CACHE_SIZE ==
- page_offset(bvec->bv_page)) {
- offset = page_offset(bvec->bv_page);
- goto next_bvec;
+ sector_sum->offset) {
+ offset = sector_sum->offset;
+ goto next_sector;
}
}
if (eb_token) {
@@ -343,7 +353,7 @@ next_bvec:
eb_token = NULL;
}
btrfs_mark_buffer_dirty(path->nodes[0]);
- if (bio_index < bio->bi_vcnt) {
+ if (total_bytes < sums->len) {
btrfs_release_path(root, path);
goto again;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8037792f8789..12e765f7e0d4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,7 +34,6 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "ordered-data.h"
#include "ioctl.h"
#include "print-tree.h"
#include "compat.h"
@@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
u64 mask = root->sectorsize - 1;
last_pos_in_file = (isize + mask) & ~mask;
hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
- if (last_pos_in_file < start_pos) {
+ if (hole_size > 0) {
+ btrfs_wait_ordered_range(inode, last_pos_in_file,
+ last_pos_in_file + hole_size);
err = btrfs_drop_extents(trans, root, inode,
last_pos_in_file,
last_pos_in_file + hole_size,
@@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
inline_size > root->fs_info->max_inline ||
(inline_size & (root->sectorsize -1)) == 0 ||
inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
- u64 last_end;
-
+ /* check for reserved extents on each page, we don't want
+ * to reset the delalloc bit on things that already have
+ * extents reserved.
+ */
+ set_extent_delalloc(io_tree, start_pos,
+ end_of_last_block, GFP_NOFS);
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
SetPageUptodate(p);
set_page_dirty(p);
}
- last_end = (u64)(pages[num_pages -1]->index) <<
- PAGE_CACHE_SHIFT;
- last_end += PAGE_CACHE_SIZE - 1;
- set_extent_delalloc(io_tree, start_pos, end_of_last_block,
- GFP_NOFS);
- btrfs_add_ordered_inode(inode);
} else {
u64 aligned_end;
/* step one, delete the existing extents in this range */
@@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
struct extent_map *split = NULL;
struct extent_map *split2 = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *tmp;
u64 len = end - start + 1;
+ u64 next_start;
int ret;
int testend = 1;
+ WARN_ON(end < start);
if (end == (u64)-1) {
len = (u64)-1;
testend = 0;
@@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
spin_unlock(&em_tree->lock);
break;
}
+ tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
+ next_start = tmp->start;
remove_extent_mapping(em_tree, em);
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
@@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
struct inode *inode = fdentry(file)->d_inode;
int err = 0;
u64 start_pos;
+ u64 last_pos;
start_pos = pos & ~((u64)root->sectorsize - 1);
+ last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
memset(pages, 0, num_pages * sizeof(struct page *));
-
+again:
for (i = 0; i < num_pages; i++) {
pages[i] = grab_cache_page(inode->i_mapping, index + i);
if (!pages[i]) {
err = -ENOMEM;
BUG_ON(1);
}
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
- ClearPageDirty(pages[i]);
-#else
- cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
-#endif
wait_on_page_writeback(pages[i]);
- set_page_extent_mapped(pages[i]);
- WARN_ON(!PageLocked(pages[i]));
}
if (start_pos < inode->i_size) {
- u64 last_pos;
- last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
+ struct btrfs_ordered_extent *ordered;
lock_extent(&BTRFS_I(inode)->io_tree,
start_pos, last_pos - 1, GFP_NOFS);
+ ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
+ if (ordered &&
+ ordered->file_offset + ordered->len > start_pos &&
+ ordered->file_offset < last_pos) {
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent(&BTRFS_I(inode)->io_tree,
+ start_pos, last_pos - 1, GFP_NOFS);
+ for (i = 0; i < num_pages; i++) {
+ unlock_page(pages[i]);
+ page_cache_release(pages[i]);
+ }
+ btrfs_wait_ordered_range(inode, start_pos,
+ last_pos - start_pos);
+ goto again;
+ }
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+
clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
GFP_NOFS);
unlock_extent(&BTRFS_I(inode)->io_tree,
start_pos, last_pos - 1, GFP_NOFS);
}
+ for (i = 0; i < num_pages; i++) {
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
+ ClearPageDirty(pages[i]);
+#else
+ cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
+#endif
+ set_page_extent_mapped(pages[i]);
+ WARN_ON(!PageLocked(pages[i]));
+ }
return 0;
}
@@ -969,13 +994,11 @@ out_nolock:
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
}
current->backing_dev_info = NULL;
- btrfs_ordered_throttle(root, inode);
return num_written ? num_written : err;
}
int btrfs_release_file(struct inode * inode, struct file * filp)
{
- btrfs_del_ordered_inode(inode, 0);
if (filp->private_data)
btrfs_ioctl_trans_end(filp);
return 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d39433dfb2c7..c5a62f0b9595 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -43,6 +43,7 @@
#include "ioctl.h"
#include "print-tree.h"
#include "volumes.h"
+#include "ordered-data.h"
struct btrfs_iget_args {
u64 ino;
@@ -109,10 +110,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
u64 num_bytes;
u64 cur_alloc_size;
u64 blocksize = root->sectorsize;
- u64 orig_start = start;
u64 orig_num_bytes;
struct btrfs_key ins;
- int ret;
+ struct extent_map *em;
+ struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ int ret = 0;
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
@@ -120,33 +122,44 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
- ret = btrfs_drop_extents(trans, root, inode,
- start, start + num_bytes, start, &alloc_hint);
orig_num_bytes = num_bytes;
if (alloc_hint == EXTENT_MAP_INLINE)
goto out;
BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
+ btrfs_drop_extent_cache(inode, start, start + num_bytes - 1);
while(num_bytes > 0) {
cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
- ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
- root->sectorsize,
- root->root_key.objectid,
- trans->transid,
- inode->i_ino, start, 0,
- alloc_hint, (u64)-1, &ins, 1);
+ ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
+ root->sectorsize, 0, 0,
+ (u64)-1, &ins, 1);
if (ret) {
WARN_ON(1);
goto out;
}
+ em = alloc_extent_map(GFP_NOFS);
+ em->start = start;
+ em->len = ins.offset;
+ em->block_start = ins.objectid;
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
+ while(1) {
+ spin_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em);
+ spin_unlock(&em_tree->lock);
+ if (ret != -EEXIST) {
+ free_extent_map(em);
+ break;
+ }
+ btrfs_drop_extent_cache(inode, start,
+ start + ins.offset - 1);
+ }
+
cur_alloc_size = ins.offset;
- ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
- start, ins.objectid, ins.offset,
- ins.offset, 0);
- inode->i_blocks += ins.offset >> 9;
- btrfs_check_file(root, inode);
+ ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
+ ins.offset);
+ BUG_ON(ret);
if (num_bytes < cur_alloc_size) {
printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
cur_alloc_size);
@@ -156,10 +169,6 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
- btrfs_drop_extent_cache(inode, orig_start,
- orig_start + orig_num_bytes - 1);
- btrfs_add_ordered_inode(inode);
- btrfs_update_inode(trans, root, inode);
out:
btrfs_end_transaction(trans, root);
return ret;
@@ -341,25 +350,15 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
int ret = 0;
- char *sums = NULL;
+ struct btrfs_ordered_sum *sums;
ret = btrfs_csum_one_bio(root, bio, &sums);
BUG_ON(ret);
- trans = btrfs_start_transaction(root, 1);
-
- btrfs_set_trans_block_group(trans, inode);
- mutex_lock(&BTRFS_I(inode)->csum_mutex);
- btrfs_csum_file_blocks(trans, root, inode, bio, sums);
- mutex_unlock(&BTRFS_I(inode)->csum_mutex);
-
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_add_ordered_sum(inode, sums);
BUG_ON(ret);
- kfree(sums);
-
return btrfs_map_bio(root, rw, bio, mirror_num, 1);
}
@@ -369,14 +368,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
- if (!(rw & (1 << BIO_RW))) {
- ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
- BUG_ON(ret);
- goto mapit;
- }
+ ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ BUG_ON(ret);
- if (btrfs_test_opt(root, NODATASUM) ||
- btrfs_test_flag(inode, NODATASUM)) {
+ if (!(rw & (1 << BIO_RW))) {
goto mapit;
}
@@ -387,6 +382,96 @@ mapit:
return btrfs_map_bio(root, rw, bio, mirror_num, 0);
}
+static int add_pending_csums(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 file_offset,
+ struct list_head *list)
+{
+ struct list_head *cur;
+ struct btrfs_ordered_sum *sum;
+
+ btrfs_set_trans_block_group(trans, inode);
+ while(!list_empty(list)) {
+ cur = list->next;
+ sum = list_entry(cur, struct btrfs_ordered_sum, list);
+ mutex_lock(&BTRFS_I(inode)->csum_mutex);
+ btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
+ inode, sum);
+ mutex_unlock(&BTRFS_I(inode)->csum_mutex);
+ list_del(&sum->list);
+ kfree(sum);
+ }
+ return 0;
+}
+
+int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
+ struct extent_state *state, int uptodate)
+{
+ struct inode *inode = page->mapping->host;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_ordered_extent *ordered_extent;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ u64 alloc_hint = 0;
+ struct list_head list;
+ struct btrfs_key ins;
+ int ret;
+
+ ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
+ if (!ret) {
+ return 0;
+ }
+
+ trans = btrfs_start_transaction(root, 1);
+
+ ordered_extent = btrfs_lookup_ordered_extent(inode, start);
+ BUG_ON(!ordered_extent);
+
+ lock_extent(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ GFP_NOFS);
+
+ INIT_LIST_HEAD(&list);
+
+ ins.objectid = ordered_extent->start;
+ ins.offset = ordered_extent->len;
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
+ ret = btrfs_alloc_reserved_extent(trans, root, root->root_key.objectid,
+ trans->transid, inode->i_ino,
+ ordered_extent->file_offset, &ins);
+ BUG_ON(ret);
+ ret = btrfs_drop_extents(trans, root, inode,
+ ordered_extent->file_offset,
+ ordered_extent->file_offset +
+ ordered_extent->len,
+ ordered_extent->file_offset, &alloc_hint);
+ BUG_ON(ret);
+ ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
+ ordered_extent->file_offset,
+ ordered_extent->start,
+ ordered_extent->len,
+ ordered_extent->len, 0);
+ BUG_ON(ret);
+ btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
+ ordered_extent->file_offset +
+ ordered_extent->len - 1);
+ inode->i_blocks += ordered_extent->len >> 9;
+ unlock_extent(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ GFP_NOFS);
+ add_pending_csums(trans, inode, ordered_extent->file_offset,
+ &ordered_extent->list);
+
+ btrfs_remove_ordered_extent(inode, ordered_extent);
+ /* once for us */
+ btrfs_put_ordered_extent(ordered_extent);
+ /* once for the tree */
+ btrfs_put_ordered_extent(ordered_extent);
+
+ btrfs_update_inode(trans, root, inode);
+ btrfs_end_transaction(trans, root);
+ return 0;
+}
+
int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
{
int ret = 0;
@@ -409,7 +494,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
if (ret == -ENOENT || ret == -EFBIG)
ret = 0;
csum = 0;
- printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
+ printk("no csum found for inode %lu start %Lu\n", inode->i_ino,
+ start);
goto out;
}
read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
@@ -833,7 +919,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root;
struct btrfs_trans_handle *trans;
- struct inode *inode = dentry->d_inode;
int ret;
unsigned long nr = 0;
@@ -849,14 +934,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_trans(trans, root, dir, dentry);
nr = trans->blocks_used;
- if (inode->i_nlink == 0) {
- /* if the inode isn't linked anywhere,
- * we don't need to worry about
- * data=ordered
- */
- btrfs_del_ordered_inode(inode, 1);
- }
-
btrfs_end_transaction_throttle(trans, root);
fail:
btrfs_btree_balance_dirty(root, nr);
@@ -931,6 +1008,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
int extent_type = -1;
u64 mask = root->sectorsize - 1;
+ btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
path = btrfs_alloc_path();
path->reada = -1;
@@ -1117,34 +1195,6 @@ error:
return ret;
}
-static int btrfs_cow_one_page(struct inode *inode, struct page *page,
- size_t zero_start)
-{
- char *kaddr;
- struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
- u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
- int ret = 0;
-
- WARN_ON(!PageLocked(page));
- set_page_extent_mapped(page);
-
- lock_extent(io_tree, page_start, page_end, GFP_NOFS);
- set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
- page_end, GFP_NOFS);
-
- if (zero_start != PAGE_CACHE_SIZE) {
- kaddr = kmap(page);
- memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
- flush_dcache_page(page);
- kunmap(page);
- }
- set_page_dirty(page);
- unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
-
- return ret;
-}
-
/*
* taken from block_truncate_page, but does cow as it zeros out
* any bytes left in the last page in the file.
@@ -1153,12 +1203,16 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
{
struct inode *inode = mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_ordered_extent *ordered;
+ char *kaddr;
u32 blocksize = root->sectorsize;
pgoff_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1);
struct page *page;
int ret = 0;
u64 page_start;
+ u64 page_end;
if ((offset & (blocksize - 1)) == 0)
goto out;
@@ -1168,6 +1222,10 @@ again:
page = grab_cache_page(mapping, index);
if (!page)
goto out;
+
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_CACHE_SIZE - 1;
+
if (!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
lock_page(page);
@@ -1181,10 +1239,32 @@ again:
goto out;
}
}
-
- page_start = (u64)page->index << PAGE_CACHE_SHIFT;
wait_on_page_writeback(page);
- ret = btrfs_cow_one_page(inode, page, offset);
+
+ lock_extent(io_tree, page_start, page_end, GFP_NOFS);
+ set_page_extent_mapped(page);
+
+ ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ if (ordered) {
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+ unlock_page(page);
+ page_cache_release(page);
+ btrfs_wait_ordered_extent(inode, ordered);
+ btrfs_put_ordered_extent(ordered);
+ goto again;
+ }
+
+ set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
+ page_end, GFP_NOFS);
+ ret = 0;
+ if (offset != PAGE_CACHE_SIZE) {
+ kaddr = kmap(page);
+ memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+ flush_dcache_page(page);
+ kunmap(page);
+ }
+ set_page_dirty(page);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_page(page);
page_cache_release(page);
@@ -1222,8 +1302,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
btrfs_truncate_page(inode->i_mapping, inode->i_size);
- lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
hole_size = block_end - hole_start;
+ btrfs_wait_ordered_range(inode, hole_start, hole_size);
+ lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
@@ -1258,6 +1339,7 @@ void btrfs_delete_inode(struct inode *inode)
unsigned long nr;
int ret;
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
truncate_inode_pages(&inode->i_data, 0);
if (is_bad_inode(inode)) {
goto no_delete;
@@ -1403,7 +1485,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS);
mutex_init(&BTRFS_I(inode)->csum_mutex);
- atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
return 0;
}
@@ -1705,7 +1786,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
inode->i_mapping, GFP_NOFS);
mutex_init(&BTRFS_I(inode)->csum_mutex);
- atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
BTRFS_I(inode)->delalloc_bytes = 0;
BTRFS_I(inode)->root = root;
@@ -1930,7 +2010,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
inode->i_mapping, GFP_NOFS);
mutex_init(&BTRFS_I(inode)->csum_mutex);
BTRFS_I(inode)->delalloc_bytes = 0;
- atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
dir->i_sb->s_dirt = 1;
@@ -2066,64 +2145,18 @@ out_unlock:
static int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
- struct extent_map *em)
+ struct extent_map *em,
+ u64 map_start, u64 map_len)
{
u64 start_diff;
- u64 new_end;
- int ret = 0;
- int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
-
- if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
- goto invalid;
-
- if (!real_blocks && em->block_start != existing->block_start)
- goto invalid;
-
- new_end = max(existing->start + existing->len, em->start + em->len);
-
- if (existing->start >= em->start) {
- if (em->start + em->len < existing->start)
- goto invalid;
- start_diff = existing->start - em->start;
- if (real_blocks && em->block_start + start_diff !=
- existing->block_start)
- goto invalid;
-
- em->len = new_end - em->start;
-
- remove_extent_mapping(em_tree, existing);
- /* free for the tree */
- free_extent_map(existing);
- ret = add_extent_mapping(em_tree, em);
-
- } else if (em->start > existing->start) {
-
- if (existing->start + existing->len < em->start)
- goto invalid;
-
- start_diff = em->start - existing->start;
- if (real_blocks && existing->block_start + start_diff !=
- em->block_start)
- goto invalid;
-
- remove_extent_mapping(em_tree, existing);
- em->block_start = existing->block_start;
- em->start = existing->start;
- em->len = new_end - existing->start;
- free_extent_map(existing);
-
- ret = add_extent_mapping(em_tree, em);
- } else {
- goto invalid;
- }
- return ret;
-
-invalid:
- printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
- existing->start, existing->len, existing->block_start,
- em->start, em->len, em->block_start);
- return -EIO;
+ BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
+ start_diff = map_start - em->start;
+ em->start = map_start;
+ em->len = map_len;
+ if (em->block_start < EXTENT_MAP_LAST_BYTE)
+ em->block_start += start_diff;
+ return add_extent_mapping(em_tree, em);
}
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2170,10 +2203,9 @@ again:
err = -ENOMEM;
goto out;
}
-
+ em->bdev = root->fs_info->fs_devices->latest_bdev;
em->start = EXTENT_MAP_HOLE;
em->len = (u64)-1;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
ret = btrfs_lookup_file_extent(trans, root, path,
objectid, start, trans != NULL);
if (ret < 0) {
@@ -2314,6 +2346,9 @@ insert:
*/
if (ret == -EEXIST) {
struct extent_map *existing;
+
+ ret = 0;
+
existing = lookup_extent_mapping(em_tree, start, len);
if (existing && (existing->start > start ||
existing->start + existing->len <= start)) {
@@ -2325,7 +2360,8 @@ insert:
em->len);
if (existing) {
err = merge_extent_mapping(em_tree, existing,
- em);
+ em, start,
+ root->sectorsize);
free_extent_map(existing);
if (err) {
free_extent_map(em);
@@ -2341,6 +2377,7 @@ insert:
} else {
free_extent_map(em);
em = existing;
+ err = 0;
}
}
spin_unlock(&em_tree->lock);
@@ -2348,8 +2385,9 @@ out:
btrfs_free_path(path);
if (trans) {
ret = btrfs_end_transaction(trans, root);
- if (!err)
+ if (!err) {
err = ret;
+ }
}
if (err) {
free_extent_map(em);
@@ -2474,8 +2512,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
return extent_readpages(tree, mapping, pages, nr_pages,
btrfs_get_extent);
}
-
-static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
+static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
struct extent_io_tree *tree;
struct extent_map_tree *map;
@@ -2493,15 +2530,54 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
return ret;
}
+static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
+{
+ struct btrfs_ordered_extent *ordered;
+
+ ordered = btrfs_lookup_ordered_extent(page->mapping->host,
+ page_offset(page));
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ return 0;
+ }
+ return __btrfs_releasepage(page, gfp_flags);
+}
+
static void btrfs_invalidatepage(struct page *page, unsigned long offset)
{
struct extent_io_tree *tree;
+ struct btrfs_ordered_extent *ordered;
+ u64 page_start = page_offset(page);
+ u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+ wait_on_page_writeback(page);
tree = &BTRFS_I(page->mapping->host)->io_tree;
- extent_invalidatepage(tree, page, offset);
- btrfs_releasepage(page, GFP_NOFS);
+ if (offset) {
+ btrfs_releasepage(page, GFP_NOFS);
+ return;
+ }
+
+ lock_extent(tree, page_start, page_end, GFP_NOFS);
+ ordered = btrfs_lookup_ordered_extent(page->mapping->host,
+ page_offset(page));
+ if (ordered) {
+ clear_extent_bit(tree, page_start, page_end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_LOCKED, 1, 0, GFP_NOFS);
+ btrfs_writepage_end_io_hook(page, page_start,
+ page_end, NULL, 1);
+ btrfs_put_ordered_extent(ordered);
+ lock_extent(tree, page_start, page_end, GFP_NOFS);
+ }
+ clear_extent_bit(tree, page_start, page_end,
+ EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_ORDERED,
+ 1, 1, GFP_NOFS);
+ __btrfs_releasepage(page, GFP_NOFS);
+
if (PagePrivate(page)) {
- invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
+ invalidate_extent_lru(tree, page_offset(page),
+ PAGE_CACHE_SIZE);
ClearPagePrivate(page);
set_page_private(page, 0);
page_cache_release(page);
@@ -2527,35 +2603,63 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
{
struct inode *inode = fdentry(vma->vm_file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- unsigned long end;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_ordered_extent *ordered;
+ char *kaddr;
+ unsigned long zero_start;
loff_t size;
int ret;
u64 page_start;
+ u64 page_end;
ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
if (ret)
goto out;
ret = -EINVAL;
-
+again:
lock_page(page);
- wait_on_page_writeback(page);
size = i_size_read(inode);
- page_start = (u64)page->index << PAGE_CACHE_SHIFT;
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_CACHE_SIZE - 1;
if ((page->mapping != inode->i_mapping) ||
- (page_start > size)) {
+ (page_start >= size)) {
/* page got truncated out from underneath us */
goto out_unlock;
}
+ wait_on_page_writeback(page);
+
+ lock_extent(io_tree, page_start, page_end, GFP_NOFS);
+ set_page_extent_mapped(page);
+
+ ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ if (ordered) {
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
+ unlock_page(page);
+ btrfs_wait_ordered_extent(inode, ordered);
+ btrfs_put_ordered_extent(ordered);
+ goto again;
+ }
+
+ set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
+ page_end, GFP_NOFS);
+ ret = 0;
/* page is wholly or partially inside EOF */
if (page_start + PAGE_CACHE_SIZE > size)
- end = size & ~PAGE_CACHE_MASK;
+ zero_start = size & ~PAGE_CACHE_MASK;
else
- end = PAGE_CACHE_SIZE;
+ zero_start = PAGE_CACHE_SIZE;
- ret = btrfs_cow_one_page(inode, page, end);
+ if (zero_start != PAGE_CACHE_SIZE) {
+ kaddr = kmap(page);
+ memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
+ flush_dcache_page(page);
+ kunmap(page);
+ }
+ set_page_dirty(page);
+ unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
out_unlock:
unlock_page(page);
@@ -2662,15 +2766,28 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
if (!ei)
return NULL;
ei->last_trans = 0;
- ei->ordered_trans = 0;
+ btrfs_ordered_inode_tree_init(&ei->ordered_tree);
return &ei->vfs_inode;
}
void btrfs_destroy_inode(struct inode *inode)
{
+ struct btrfs_ordered_extent *ordered;
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
+ while(1) {
+ ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
+ if (!ordered)
+ break;
+ else {
+ printk("found ordered extent %Lu %Lu\n",
+ ordered->file_offset, ordered->len);
+ btrfs_remove_ordered_extent(inode, ordered);
+ btrfs_put_ordered_extent(ordered);
+ btrfs_put_ordered_extent(ordered);
+ }
+ }
btrfs_drop_extent_cache(inode, 0, (u64)-1);
kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
}
@@ -2869,7 +2986,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_mapping, GFP_NOFS);
mutex_init(&BTRFS_I(inode)->csum_mutex);
BTRFS_I(inode)->delalloc_bytes = 0;
- atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
dir->i_sb->s_dirt = 1;
@@ -2921,6 +3037,20 @@ out_fail:
return err;
}
+static int btrfs_set_page_dirty(struct page *page)
+{
+ struct inode *inode = page->mapping->host;
+ u64 page_start = page_offset(page);
+ u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+
+ if (!test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ EXTENT_DELALLOC, 0)) {
+printk("inode %lu page %Lu not delalloc\n", inode->i_ino, page_offset(page));
+WARN_ON(1);
+ }
+ return __set_page_dirty_nobuffers(page);
+}
+
static int btrfs_permission(struct inode *inode, int mask,
struct nameidata *nd)
{
@@ -2967,6 +3097,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {
.merge_bio_hook = btrfs_merge_bio_hook,
.readpage_io_hook = btrfs_readpage_io_hook,
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
+ .writepage_end_io_hook = btrfs_writepage_end_io_hook,
.readpage_io_failed_hook = btrfs_io_failed_hook,
.set_bit_hook = btrfs_set_bit_hook,
.clear_bit_hook = btrfs_clear_bit_hook,
@@ -2982,7 +3113,7 @@ static struct address_space_operations btrfs_aops = {
.direct_IO = btrfs_direct_IO,
.invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = btrfs_set_page_dirty,
};
static struct address_space_operations btrfs_symlink_aops = {
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 254da8225664..6513270f054c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -22,48 +22,30 @@
#include "ctree.h"
#include "transaction.h"
#include "btrfs_inode.h"
+#include "extent_io.h"
-struct tree_entry {
- u64 root_objectid;
- u64 objectid;
- struct inode *inode;
- struct rb_node rb_node;
-};
-/*
- * returns > 0 if entry passed (root, objectid) is > entry,
- * < 0 if (root, objectid) < entry and zero if they are equal
- */
-static int comp_entry(struct tree_entry *entry, u64 root_objectid,
- u64 objectid)
+static u64 entry_end(struct btrfs_ordered_extent *entry)
{
- if (root_objectid < entry->root_objectid)
- return -1;
- if (root_objectid > entry->root_objectid)
- return 1;
- if (objectid < entry->objectid)
- return -1;
- if (objectid > entry->objectid)
- return 1;
- return 0;
+ if (entry->file_offset + entry->len < entry->file_offset)
+ return (u64)-1;
+ return entry->file_offset + entry->len;
}
-static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
- u64 objectid, struct rb_node *node)
+static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
+ struct rb_node *node)
{
struct rb_node ** p = &root->rb_node;
struct rb_node * parent = NULL;
- struct tree_entry *entry;
- int comp;
+ struct btrfs_ordered_extent *entry;
while(*p) {
parent = *p;
- entry = rb_entry(parent, struct tree_entry, rb_node);
+ entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
- comp = comp_entry(entry, root_objectid, objectid);
- if (comp < 0)
+ if (file_offset < entry->file_offset)
p = &(*p)->rb_left;
- else if (comp > 0)
+ else if (file_offset >= entry_end(entry))
p = &(*p)->rb_right;
else
return parent;
@@ -74,24 +56,23 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
return NULL;
}
-static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
- u64 objectid, struct rb_node **prev_ret)
+static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
+ struct rb_node **prev_ret)
{
struct rb_node * n = root->rb_node;
struct rb_node *prev = NULL;
- struct tree_entry *entry;
- struct tree_entry *prev_entry = NULL;
- int comp;
+ struct rb_node *test;
+ struct btrfs_ordered_extent *entry;
+ struct btrfs_ordered_extent *prev_entry = NULL;
while(n) {
- entry = rb_entry(n, struct tree_entry, rb_node);
+ entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
prev = n;
prev_entry = entry;
- comp = comp_entry(entry, root_objectid, objectid);
- if (comp < 0)
+ if (file_offset < entry->file_offset)
n = n->rb_left;
- else if (comp > 0)
+ else if (file_offset >= entry_end(entry))
n = n->rb_right;
else
return n;
@@ -99,195 +80,329 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
if (!prev_ret)
return NULL;
- while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) {
- prev = rb_next(prev);
- prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+ while(prev && file_offset >= entry_end(prev_entry)) {
+ test = rb_next(prev);
+ if (!test)
+ break;
+ prev_entry = rb_entry(test, struct btrfs_ordered_extent,
+ rb_node);
+ if (file_offset < entry_end(prev_entry))
+ break;
+
+ prev = test;
+ }
+ if (prev)
+ prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
+ rb_node);
+ while(prev && file_offset < entry_end(prev_entry)) {
+ test = rb_prev(prev);
+ if (!test)
+ break;
+ prev_entry = rb_entry(test, struct btrfs_ordered_extent,
+ rb_node);
+ prev = test;
}
*prev_ret = prev;
return NULL;
}
-static inline struct rb_node *tree_search(struct rb_root *root,
- u64 root_objectid, u64 objectid)
+static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
+{
+ if (file_offset < entry->file_offset ||
+ entry->file_offset + entry->len <= file_offset)
+ return 0;
+ return 1;
+}
+
+static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
+ u64 file_offset)
{
+ struct rb_root *root = &tree->tree;
struct rb_node *prev;
struct rb_node *ret;
- ret = __tree_search(root, root_objectid, objectid, &prev);
+ struct btrfs_ordered_extent *entry;
+
+ if (tree->last) {
+ entry = rb_entry(tree->last, struct btrfs_ordered_extent,
+ rb_node);
+ if (offset_in_entry(entry, file_offset))
+ return tree->last;
+ }
+ ret = __tree_search(root, file_offset, &prev);
if (!ret)
- return prev;
+ ret = prev;
+ if (ret)
+ tree->last = ret;
return ret;
}
-int btrfs_add_ordered_inode(struct inode *inode)
+int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+ u64 start, u64 len)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 root_objectid = root->root_key.objectid;
- u64 transid = root->fs_info->running_transaction->transid;
- struct tree_entry *entry;
- struct rb_node *node;
struct btrfs_ordered_inode_tree *tree;
+ struct rb_node *node;
+ struct btrfs_ordered_extent *entry;
- if (transid <= BTRFS_I(inode)->ordered_trans)
- return 0;
-
- tree = &root->fs_info->running_transaction->ordered_inode_tree;
-
- read_lock(&tree->lock);
- node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
- read_unlock(&tree->lock);
- if (node) {
- return 0;
- }
-
- entry = kmalloc(sizeof(*entry), GFP_NOFS);
+ tree = &BTRFS_I(inode)->ordered_tree;
+ entry = kzalloc(sizeof(*entry), GFP_NOFS);
if (!entry)
return -ENOMEM;
- write_lock(&tree->lock);
- entry->objectid = inode->i_ino;
- entry->root_objectid = root_objectid;
+ mutex_lock(&tree->mutex);
+ entry->file_offset = file_offset;
+ entry->start = start;
+ entry->len = len;
entry->inode = inode;
+ /* one ref for the tree */
+ atomic_set(&entry->refs, 1);
+ init_waitqueue_head(&entry->wait);
+ INIT_LIST_HEAD(&entry->list);
- node = tree_insert(&tree->tree, root_objectid,
- inode->i_ino, &entry->rb_node);
-
- BTRFS_I(inode)->ordered_trans = transid;
- if (!node)
- igrab(inode);
-
- write_unlock(&tree->lock);
+ node = tree_insert(&tree->tree, file_offset,
+ &entry->rb_node);
+ if (node) {
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ atomic_inc(&entry->refs);
+ }
+ set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
+ entry_end(entry) - 1, GFP_NOFS);
- if (node)
- kfree(entry);
+ set_bit(BTRFS_ORDERED_START, &entry->flags);
+ mutex_unlock(&tree->mutex);
+ BUG_ON(node);
return 0;
}
-int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
- u64 *root_objectid, u64 *objectid,
- struct inode **inode)
+int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
{
- struct tree_entry *entry;
+ struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
+ struct btrfs_ordered_extent *entry;
- write_lock(&tree->lock);
- node = tree_search(&tree->tree, *root_objectid, *objectid);
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ node = tree_search(tree, sum->file_offset);
if (!node) {
- write_unlock(&tree->lock);
- return 0;
+search_fail:
+printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset);
+ node = rb_first(&tree->tree);
+ while(node) {
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start);
+ node = rb_next(node);
+ }
+ BUG();
}
- entry = rb_entry(node, struct tree_entry, rb_node);
+ BUG_ON(!node);
- while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
- node = rb_next(node);
- if (!node)
- break;
- entry = rb_entry(node, struct tree_entry, rb_node);
- }
- if (!node) {
- write_unlock(&tree->lock);
- return 0;
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ if (!offset_in_entry(entry, sum->file_offset)) {
+ goto search_fail;
}
- *root_objectid = entry->root_objectid;
- *inode = entry->inode;
- atomic_inc(&entry->inode->i_count);
- *objectid = entry->objectid;
- write_unlock(&tree->lock);
- return 1;
+ list_add_tail(&sum->list, &entry->list);
+ mutex_unlock(&tree->mutex);
+ return 0;
}
-int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
- u64 *root_objectid, u64 *objectid,
- struct inode **inode)
+int btrfs_dec_test_ordered_pending(struct inode *inode,
+ u64 file_offset, u64 io_size)
{
- struct tree_entry *entry;
+ struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
-
- write_lock(&tree->lock);
- node = tree_search(&tree->tree, *root_objectid, *objectid);
+ struct btrfs_ordered_extent *entry;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ int ret;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
+ GFP_NOFS);
+ node = tree_search(tree, file_offset);
if (!node) {
- write_unlock(&tree->lock);
- return 0;
+ ret = 1;
+ goto out;
}
- entry = rb_entry(node, struct tree_entry, rb_node);
- while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
- node = rb_next(node);
- if (!node)
- break;
- entry = rb_entry(node, struct tree_entry, rb_node);
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ if (!offset_in_entry(entry, file_offset)) {
+ ret = 1;
+ goto out;
}
- if (!node) {
- write_unlock(&tree->lock);
- return 0;
+
+ ret = test_range_bit(io_tree, entry->file_offset,
+ entry->file_offset + entry->len - 1,
+ EXTENT_ORDERED, 0);
+ if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) {
+printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry));
}
+ if (ret == 0)
+ ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+out:
+ mutex_unlock(&tree->mutex);
+ return ret == 0;
+}
- *root_objectid = entry->root_objectid;
- *objectid = entry->objectid;
- *inode = entry->inode;
- atomic_inc(&entry->inode->i_count);
- rb_erase(node, &tree->tree);
- write_unlock(&tree->lock);
- kfree(entry);
- return 1;
+int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
+{
+ if (atomic_dec_and_test(&entry->refs))
+ kfree(entry);
+ return 0;
}
-static void __btrfs_del_ordered_inode(struct btrfs_ordered_inode_tree *tree,
- struct inode *inode,
- u64 root_objectid, u64 objectid)
+int btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
{
- struct tree_entry *entry;
+ struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
- struct rb_node *prev;
- write_lock(&tree->lock);
- node = __tree_search(&tree->tree, root_objectid, objectid, &prev);
- if (!node) {
- write_unlock(&tree->lock);
- return;
- }
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ node = &entry->rb_node;
rb_erase(node, &tree->tree);
- BTRFS_I(inode)->ordered_trans = 0;
- write_unlock(&tree->lock);
- atomic_dec(&inode->i_count);
- entry = rb_entry(node, struct tree_entry, rb_node);
- kfree(entry);
- return;
+ tree->last = NULL;
+ set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
+ mutex_unlock(&tree->mutex);
+ wake_up(&entry->wait);
+ return 0;
}
-void btrfs_del_ordered_inode(struct inode *inode, int force)
+void btrfs_wait_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- u64 root_objectid = root->root_key.objectid;
+ u64 start = entry->file_offset;
+ u64 end = start + entry->len - 1;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+ do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
+#else
+ do_sync_mapping_range(inode->i_mapping, start, end,
+ SYNC_FILE_RANGE_WRITE);
+#endif
+ wait_event(entry->wait,
+ test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
+}
- if (!BTRFS_I(inode)->ordered_trans) {
- return;
- }
+static void btrfs_start_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry, int wait)
+{
+ u64 start = entry->file_offset;
+ u64 end = start + entry->len - 1;
- if (!force && (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
- mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
- return;
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+ do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
+#else
+ do_sync_mapping_range(inode->i_mapping, start, end,
+ SYNC_FILE_RANGE_WRITE);
+#endif
+ if (wait)
+ wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
+ &entry->flags));
+}
- spin_lock(&root->fs_info->new_trans_lock);
- if (root->fs_info->running_transaction) {
- struct btrfs_ordered_inode_tree *tree;
- tree = &root->fs_info->running_transaction->ordered_inode_tree;
- __btrfs_del_ordered_inode(tree, inode, root_objectid,
- inode->i_ino);
+void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
+{
+ u64 end;
+ struct btrfs_ordered_extent *ordered;
+ int found;
+ int should_wait = 0;
+
+again:
+ if (start + len < start)
+ end = (u64)-1;
+ else
+ end = start + len - 1;
+ found = 0;
+ while(1) {
+ ordered = btrfs_lookup_first_ordered_extent(inode, end);
+ if (!ordered) {
+ break;
+ }
+ if (ordered->file_offset >= start + len) {
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ if (ordered->file_offset + ordered->len < start) {
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ btrfs_start_ordered_extent(inode, ordered, should_wait);
+ found++;
+ end = ordered->file_offset;
+ btrfs_put_ordered_extent(ordered);
+ if (end == 0)
+ break;
+ end--;
+ }
+ if (should_wait && found) {
+ should_wait = 0;
+ goto again;
}
- spin_unlock(&root->fs_info->new_trans_lock);
}
-int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode)
+int btrfs_add_ordered_pending(struct inode *inode,
+ struct btrfs_ordered_extent *ordered,
+ u64 start, u64 len)
{
- struct btrfs_transaction *cur = root->fs_info->running_transaction;
- while(cur == root->fs_info->running_transaction &&
- atomic_read(&BTRFS_I(inode)->ordered_writeback)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
- congestion_wait(WRITE, HZ/20);
-#else
- blk_congestion_wait(WRITE, HZ/20);
-#endif
- }
+ WARN_ON(1);
return 0;
+#if 0
+ int ret;
+ struct btrfs_ordered_inode_tree *tree;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS);
+ ret = 0;
+out:
+ mutex_unlock(&tree->mutex);
+ return ret;
+#endif
+}
+
+struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
+ u64 file_offset)
+{
+ struct btrfs_ordered_inode_tree *tree;
+ struct rb_node *node;
+ struct btrfs_ordered_extent *entry = NULL;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ node = tree_search(tree, file_offset);
+ if (!node)
+ goto out;
+
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ if (!offset_in_entry(entry, file_offset))
+ entry = NULL;
+ if (entry)
+ atomic_inc(&entry->refs);
+out:
+ mutex_unlock(&tree->mutex);
+ return entry;
+}
+
+struct btrfs_ordered_extent *
+btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
+{
+ struct btrfs_ordered_inode_tree *tree;
+ struct rb_node *node;
+ struct btrfs_ordered_extent *entry = NULL;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ node = tree_search(tree, file_offset);
+ if (!node)
+ goto out;
+
+ entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ atomic_inc(&entry->refs);
+out:
+ mutex_unlock(&tree->mutex);
+ return entry;
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 4fa78736423e..33292c5fe90c 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -20,24 +20,73 @@
#define __BTRFS_ORDERED_DATA__
struct btrfs_ordered_inode_tree {
- rwlock_t lock;
+ struct mutex mutex;
struct rb_root tree;
+ struct rb_node *last;
};
+struct btrfs_sector_sum {
+ u64 offset;
+ u32 sum;
+};
+
+struct btrfs_ordered_sum {
+ u64 file_offset;
+ u64 len;
+ struct list_head list;
+ struct btrfs_sector_sum sums;
+};
+
+/* bits for the flags field */
+#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
+#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
+#define BTRFS_ORDERED_START 2 /* set when tree setup */
+
+struct btrfs_ordered_extent {
+ u64 file_offset;
+ u64 start;
+ u64 len;
+ unsigned long flags;
+ atomic_t refs;
+ struct list_head list;
+ struct inode *inode;
+ wait_queue_head_t wait;
+ struct rb_node rb_node;
+};
+
+
+static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes)
+{
+ unsigned long num_sectors = (bytes + root->sectorsize - 1) /
+ root->sectorsize;
+ return sizeof(struct btrfs_ordered_sum) +
+ num_sectors * sizeof(struct btrfs_sector_sum);
+}
+
static inline void
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
{
- rwlock_init(&t->lock);
+ mutex_init(&t->mutex);
t->tree.rb_node = NULL;
+ t->last = NULL;
}
-int btrfs_add_ordered_inode(struct inode *inode);
-int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
- u64 *root_objectid, u64 *objectid,
- struct inode **inode);
-int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
- u64 *root_objectid, u64 *objectid,
- struct inode **inode);
-void btrfs_del_ordered_inode(struct inode *inode, int force);
-int btrfs_ordered_throttle(struct btrfs_root *root, struct inode *inode);
+int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
+int btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry);
+int btrfs_dec_test_ordered_pending(struct inode *inode,
+ u64 file_offset, u64 io_size);
+int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
+ u64 start, u64 len);
+int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum);
+struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
+ u64 file_offset);
+void btrfs_wait_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry);
+void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
+struct btrfs_ordered_extent *
+btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
+int btrfs_add_ordered_pending(struct inode *inode,
+ struct btrfs_ordered_extent *ordered,
+ u64 start, u64 len);
#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a8a3cb03de59..86a5acc19ce7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -67,7 +67,6 @@ static noinline int join_transaction(struct btrfs_root *root)
cur_trans->start_time = get_seconds();
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
- btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
extent_io_tree_init(&cur_trans->dirty_pages,
root->fs_info->btree_inode->i_mapping,
GFP_NOFS);
@@ -158,10 +157,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
wake_up(&cur_trans->writer_wait);
if (cur_trans->in_commit && throttle) {
- int ret;
+ DEFINE_WAIT(wait);
mutex_unlock(&root->fs_info->trans_mutex);
- ret = wait_for_commit(root, cur_trans);
- BUG_ON(ret);
+ prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
+ TASK_UNINTERRUPTIBLE);
+ schedule();
+ finish_wait(&root->fs_info->transaction_throttle, &wait);
mutex_lock(&root->fs_info->trans_mutex);
}
@@ -486,58 +487,6 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
return ret;
}
-int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct inode *inode;
- u64 root_objectid = 0;
- u64 objectid = 0;
- int ret;
-
- atomic_inc(&root->fs_info->throttles);
- while(1) {
- ret = btrfs_find_first_ordered_inode(
- &cur_trans->ordered_inode_tree,
- &root_objectid, &objectid, &inode);
- if (!ret)
- break;
-
- mutex_unlock(&root->fs_info->trans_mutex);
-
- if (S_ISREG(inode->i_mode)) {
- atomic_inc(&BTRFS_I(inode)->ordered_writeback);
- filemap_fdatawrite(inode->i_mapping);
- atomic_dec(&BTRFS_I(inode)->ordered_writeback);
- }
- iput(inode);
-
- mutex_lock(&root->fs_info->trans_mutex);
- }
- while(1) {
- root_objectid = 0;
- objectid = 0;
- ret = btrfs_find_del_first_ordered_inode(
- &cur_trans->ordered_inode_tree,
- &root_objectid, &objectid, &inode);
- if (!ret)
- break;
- mutex_unlock(&root->fs_info->trans_mutex);
-
- if (S_ISREG(inode->i_mode)) {
- atomic_inc(&BTRFS_I(inode)->ordered_writeback);
- filemap_write_and_wait(inode->i_mapping);
- atomic_dec(&BTRFS_I(inode)->ordered_writeback);
- }
- atomic_dec(&inode->i_count);
- iput(inode);
-
- mutex_lock(&root->fs_info->trans_mutex);
- }
- atomic_dec(&root->fs_info->throttles);
- return 0;
-}
-
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_pending_snapshot *pending)
@@ -666,6 +615,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
extent_io_tree_init(pinned_copy,
root->fs_info->btree_inode->i_mapping, GFP_NOFS);
+printk("commit trans %Lu\n", trans->transid);
trans->transaction->in_commit = 1;
cur_trans = trans->transaction;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
@@ -699,8 +649,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_lock(&root->fs_info->trans_mutex);
finish_wait(&cur_trans->writer_wait, &wait);
- ret = btrfs_write_ordered_inodes(trans, root);
-
} while (cur_trans->num_writers > 1 ||
(cur_trans->num_joined != joined));
@@ -736,6 +684,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_copy_pinned(root, pinned_copy);
+ wake_up(&root->fs_info->transaction_throttle);
+
mutex_unlock(&root->fs_info->trans_mutex);
ret = btrfs_write_and_wait_transaction(trans, root);
BUG_ON(ret);
@@ -758,6 +708,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
mutex_unlock(&root->fs_info->trans_mutex);
+printk("done commit trans %Lu\n", trans->transid);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (root->fs_info->closing) {
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 9ccd5a5b170f..910350cd4cf0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -19,7 +19,6 @@
#ifndef __BTRFS_TRANSACTION__
#define __BTRFS_TRANSACTION__
#include "btrfs_inode.h"
-#include "ordered-data.h"
struct btrfs_transaction {
u64 transid;
@@ -31,7 +30,6 @@ struct btrfs_transaction {
struct list_head list;
struct extent_io_tree dirty_pages;
unsigned long start_time;
- struct btrfs_ordered_inode_tree ordered_inode_tree;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
@@ -88,8 +86,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
int btrfs_clean_old_snapshots(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
#endif