summaryrefslogtreecommitdiff
path: root/fs/ext4/extents.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r--fs/ext4/extents.c394
1 files changed, 296 insertions, 98 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 715264b4bae..94c8ee81f5e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -195,8 +195,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
if (S_ISREG(inode->i_mode))
block_group++;
}
- bg_start = (block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
+ bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
/*
@@ -296,29 +295,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
* to allocate @blocks
* Worse case is one block per extent
*/
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- int lcap, icap, rcap, leafs, idxs, num;
- int newextents = blocks;
-
- rcap = ext4_ext_space_root_idx(inode, 0);
- lcap = ext4_ext_space_block(inode, 0);
- icap = ext4_ext_space_block_idx(inode, 0);
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int idxs, num = 0;
- /* number of new leaf blocks needed */
- num = leafs = (newextents + lcap - 1) / lcap;
+ idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+ / sizeof(struct ext4_extent_idx));
/*
- * Worse case, we need separate index block(s)
- * to link all new leaf blocks
+ * If the new delayed allocation block is contiguous with the
+ * previous da block, it can share index blocks with the
+ * previous block, so we only need to allocate a new index
+ * block every idxs leaf blocks. At ldxs**2 blocks, we need
+ * an additional index block, and at ldxs**3 blocks, yet
+ * another index blocks.
*/
- idxs = (leafs + icap - 1) / icap;
- do {
- num += idxs;
- idxs = (idxs + icap - 1) / icap;
- } while (idxs > rcap);
+ if (ei->i_da_metadata_calc_len &&
+ ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+ if ((ei->i_da_metadata_calc_len % idxs) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
+ num++;
+ ei->i_da_metadata_calc_len = 0;
+ } else
+ ei->i_da_metadata_calc_len++;
+ ei->i_da_metadata_calc_last_lblock++;
+ return num;
+ }
- return num;
+ /*
+ * In the worst case we need a new set of index blocks at
+ * every level of the inode's extent tree.
+ */
+ ei->i_da_metadata_calc_len = 1;
+ ei->i_da_metadata_calc_last_lblock = lblock;
+ return ext_depth(inode) + 1;
}
static int
@@ -425,7 +439,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0;
corrupted:
- ext4_error(inode->i_sb, function,
+ __ext4_error(inode->i_sb, function,
"bad header/extent in inode #%lu: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)",
inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
@@ -688,7 +702,12 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
}
eh = ext_block_hdr(bh);
ppos++;
- BUG_ON(ppos > depth);
+ if (unlikely(ppos > depth)) {
+ put_bh(bh);
+ EXT4_ERROR_INODE(inode,
+ "ppos %d > depth %d", ppos, depth);
+ goto err;
+ }
path[ppos].p_bh = bh;
path[ppos].p_hdr = eh;
i--;
@@ -734,7 +753,12 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
if (err)
return err;
- BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
+ if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d == ei_block %d!",
+ logical, le32_to_cpu(curp->p_idx->ei_block));
+ return -EIO;
+ }
len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
/* insert after */
@@ -764,9 +788,17 @@ int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ext4_idx_store_pblock(ix, ptr);
le16_add_cpu(&curp->p_hdr->eh_entries, 1);
- BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
- > le16_to_cpu(curp->p_hdr->eh_max));
- BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
+ if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
+ > le16_to_cpu(curp->p_hdr->eh_max))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d == ei_block %d!",
+ logical, le32_to_cpu(curp->p_idx->ei_block));
+ return -EIO;
+ }
+ if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
+ EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
+ return -EIO;
+ }
err = ext4_ext_dirty(handle, inode, curp);
ext4_std_error(inode->i_sb, err);
@@ -804,7 +836,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* if current leaf will be split, then we should use
* border from split point */
- BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
+ if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
+ EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
+ return -EIO;
+ }
if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
border = path[depth].p_ext[1].ee_block;
ext_debug("leaf will be split."
@@ -845,7 +880,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* initialize new leaf */
newblock = ablocks[--a];
- BUG_ON(newblock == 0);
+ if (unlikely(newblock == 0)) {
+ EXT4_ERROR_INODE(inode, "newblock == 0!");
+ err = -EIO;
+ goto cleanup;
+ }
bh = sb_getblk(inode->i_sb, newblock);
if (!bh) {
err = -EIO;
@@ -865,7 +904,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ex = EXT_FIRST_EXTENT(neh);
/* move remainder of path[depth] to the new leaf */
- BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
+ if (unlikely(path[depth].p_hdr->eh_entries !=
+ path[depth].p_hdr->eh_max)) {
+ EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
+ path[depth].p_hdr->eh_entries,
+ path[depth].p_hdr->eh_max);
+ err = -EIO;
+ goto cleanup;
+ }
/* start copy from next extent */
/* TODO: we could do it by single memmove */
m = 0;
@@ -912,7 +958,11 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
/* create intermediate indexes */
k = depth - at - 1;
- BUG_ON(k < 0);
+ if (unlikely(k < 0)) {
+ EXT4_ERROR_INODE(inode, "k %d < 0!", k);
+ err = -EIO;
+ goto cleanup;
+ }
if (k)
ext_debug("create %d intermediate indices\n", k);
/* insert new index into current index block */
@@ -949,8 +999,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
EXT_MAX_INDEX(path[i].p_hdr));
- BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
- EXT_LAST_INDEX(path[i].p_hdr));
+ if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
+ EXT_LAST_INDEX(path[i].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
+ le32_to_cpu(path[i].p_ext->ee_block));
+ err = -EIO;
+ goto cleanup;
+ }
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
@@ -1007,7 +1063,8 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
- ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
+ ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+ EXT4_FREE_BLOCKS_METADATA);
}
}
kfree(ablocks);
@@ -1187,7 +1244,10 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent *ex;
int depth, ee_len;
- BUG_ON(path == NULL);
+ if (unlikely(path == NULL)) {
+ EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
+ return -EIO;
+ }
depth = path->p_depth;
*phys = 0;
@@ -1201,15 +1261,33 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) {
- BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+ if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
+ EXT4_ERROR_INODE(inode,
+ "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
+ *logical, le32_to_cpu(ex->ee_block));
+ return -EIO;
+ }
while (--depth >= 0) {
ix = path[depth].p_idx;
- BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+ if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
+ ix != NULL ? ix->ei_block : 0,
+ EXT_FIRST_INDEX(path[depth].p_hdr) != NULL ?
+ EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block : 0,
+ depth);
+ return -EIO;
+ }
}
return 0;
}
- BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+ if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d < ee_block %d + ee_len %d!",
+ *logical, le32_to_cpu(ex->ee_block), ee_len);
+ return -EIO;
+ }
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
*phys = ext_pblock(ex) + ee_len - 1;
@@ -1235,7 +1313,10 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
int depth; /* Note, NOT eh_depth; depth from top of tree */
int ee_len;
- BUG_ON(path == NULL);
+ if (unlikely(path == NULL)) {
+ EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
+ return -EIO;
+ }
depth = path->p_depth;
*phys = 0;
@@ -1249,17 +1330,32 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) {
- BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
+ if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
+ EXT4_ERROR_INODE(inode,
+ "first_extent(path[%d].p_hdr) != ex",
+ depth);
+ return -EIO;
+ }
while (--depth >= 0) {
ix = path[depth].p_idx;
- BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
+ if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "ix != EXT_FIRST_INDEX *logical %d!",
+ *logical);
+ return -EIO;
+ }
}
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
return 0;
}
- BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
+ if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
+ EXT4_ERROR_INODE(inode,
+ "logical %d < ee_block %d + ee_len %d!",
+ *logical, le32_to_cpu(ex->ee_block), ee_len);
+ return -EIO;
+ }
if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
/* next allocated block in this leaf */
@@ -1398,8 +1494,12 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
- BUG_ON(ex == NULL);
- BUG_ON(eh == NULL);
+
+ if (unlikely(ex == NULL || eh == NULL)) {
+ EXT4_ERROR_INODE(inode,
+ "ex %p == NULL or eh %p == NULL", ex, eh);
+ return -EIO;
+ }
if (depth == 0) {
/* there is no tree at all */
@@ -1522,8 +1622,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
merge_done = 1;
WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries)
- ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
- "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
+ ext4_error(inode->i_sb,
+ "inode#%lu, eh->eh_entries = 0!",
+ inode->i_ino);
}
return merge_done;
@@ -1596,13 +1697,19 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
ext4_lblk_t next;
unsigned uninitialized = 0;
- BUG_ON(ext4_ext_get_actual_len(newext) == 0);
+ if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
+ EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
+ return -EIO;
+ }
depth = ext_depth(inode);
ex = path[depth].p_ext;
- BUG_ON(path[depth].p_hdr == NULL);
+ if (unlikely(path[depth].p_hdr == NULL)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+ return -EIO;
+ }
/* try to insert block into found extent and return */
- if (ex && (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT)
+ if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
ext4_ext_is_uninitialized(newext),
@@ -1723,7 +1830,7 @@ has_space:
merge:
/* try to merge extents to the right */
- if (flag != EXT4_GET_BLOCKS_DIO_CREATE_EXT)
+ if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
ext4_ext_try_to_merge(inode, path, nearex);
/* try to merge extents to the left */
@@ -1761,7 +1868,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
while (block < last && block != EXT_MAX_BLOCK) {
num = last - block;
/* find extent for this block */
+ down_read(&EXT4_I(inode)->i_data_sem);
path = ext4_ext_find_extent(inode, block, path);
+ up_read(&EXT4_I(inode)->i_data_sem);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
@@ -1769,7 +1878,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
}
depth = ext_depth(inode);
- BUG_ON(path[depth].p_hdr == NULL);
+ if (unlikely(path[depth].p_hdr == NULL)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+ err = -EIO;
+ break;
+ }
ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path);
@@ -1820,7 +1933,11 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
- BUG_ON(cbex.ec_len == 0);
+ if (unlikely(cbex.ec_len == 0)) {
+ EXT4_ERROR_INODE(inode, "cbex.ec_len == 0");
+ err = -EIO;
+ break;
+ }
err = func(inode, path, &cbex, ex, cbdata);
ext4_ext_drop_refs(path);
@@ -1934,7 +2051,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
cex->ec_type != EXT4_EXT_CACHE_EXTENT);
- if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
+ if (in_range(block, cex->ec_block, cex->ec_len)) {
ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start);
ex->ee_len = cpu_to_le16(cex->ec_len);
@@ -1957,14 +2074,16 @@ errout:
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path)
{
- struct buffer_head *bh;
int err;
ext4_fsblk_t leaf;
/* free index block */
path--;
leaf = idx_pblock(path->p_idx);
- BUG_ON(path->p_hdr->eh_entries == 0);
+ if (unlikely(path->p_hdr->eh_entries == 0)) {
+ EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
+ return -EIO;
+ }
err = ext4_ext_get_access(handle, inode, path);
if (err)
return err;
@@ -1973,9 +2092,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
if (err)
return err;
ext_debug("index is empty, remove it, free block %llu\n", leaf);
- bh = sb_find_get_block(inode->i_sb, leaf);
- ext4_forget(handle, 1, inode, bh, leaf);
- ext4_free_blocks(handle, inode, leaf, 1, 1);
+ ext4_free_blocks(handle, inode, 0, leaf, 1,
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return err;
}
@@ -2042,12 +2160,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex,
ext4_lblk_t from, ext4_lblk_t to)
{
- struct buffer_head *bh;
unsigned short ee_len = ext4_ext_get_actual_len(ex);
- int i, metadata = 0;
+ int flags = EXT4_FREE_BLOCKS_FORGET;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- metadata = 1;
+ flags |= EXT4_FREE_BLOCKS_METADATA;
#ifdef EXTENTS_STATS
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2189,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
- for (i = 0; i < num; i++) {
- bh = sb_find_get_block(inode->i_sb, start + i);
- ext4_forget(handle, 0, inode, bh, start + i);
- }
- ext4_free_blocks(handle, inode, start, num, metadata);
+ ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2108,8 +2221,10 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (!path[depth].p_hdr)
path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
eh = path[depth].p_hdr;
- BUG_ON(eh == NULL);
-
+ if (unlikely(path[depth].p_hdr == NULL)) {
+ EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+ return -EIO;
+ }
/* find where to start removing */
ex = EXT_LAST_EXTENT(eh);
@@ -2167,7 +2282,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
correct_index = 1;
credits += (ext_depth(inode)) + 1;
}
- credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+ credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
if (err)
@@ -2972,7 +3087,7 @@ fix_extent_len:
ext4_ext_dirty(handle, inode, path + depth);
return err;
}
-static int ext4_convert_unwritten_extents_dio(handle_t *handle,
+static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path)
{
@@ -3027,6 +3142,14 @@ out:
return err;
}
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
+ sector_t block, int count)
+{
+ int i;
+ for (i = 0; i < count; i++)
+ unmap_underlying_metadata(bdev, block + i);
+}
+
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3044,8 +3167,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
flags, allocated);
ext4_ext_show_leaf(inode, path);
- /* DIO get_block() before submit the IO, split the extent */
- if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
+ /* get_block() before submit the IO, split the extent */
+ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
ret = ext4_split_unwritten_extents(handle,
inode, path, iblock,
max_blocks, flags);
@@ -3055,15 +3178,19 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* completed
*/
if (io)
- io->flag = DIO_AIO_UNWRITTEN;
+ io->flag = EXT4_IO_UNWRITTEN;
else
- EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN;
+ ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+ if (ext4_should_dioread_nolock(inode))
+ set_buffer_uninit(bh_result);
goto out;
}
- /* async DIO end_io complete, convert the filled extent to written */
- if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
- ret = ext4_convert_unwritten_extents_dio(handle, inode,
+ /* IO end_io complete, convert the filled extent to written */
+ if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
+ ret = ext4_convert_unwritten_extents_endio(handle, inode,
path);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
goto out2;
}
/* buffered IO case */
@@ -3091,6 +3218,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock,
max_blocks);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out:
if (ret <= 0) {
err = ret;
@@ -3098,6 +3227,30 @@ out:
} else
allocated = ret;
set_buffer_new(bh_result);
+ /*
+ * if we allocated more blocks than requested
+ * we need to make sure we unmap the extra block
+ * allocated. The actual needed block will get
+ * unmapped later when we find the buffer_head marked
+ * new.
+ */
+ if (allocated > max_blocks) {
+ unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
+ newblock + max_blocks,
+ allocated - max_blocks);
+ allocated = max_blocks;
+ }
+
+ /*
+ * If we have done fallocate with the offset that is already
+ * delayed allocated, we would have block reservation
+ * and quota reservation done in the delayed write path.
+ * But fallocate would have already updated quota and block
+ * count for this offset. So cancel these reservation
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, allocated, 0);
+
map_out:
set_buffer_mapped(bh_result);
out1:
@@ -3138,7 +3291,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
- struct ext4_extent newex, *ex;
+ struct ext4_extent newex, *ex, *last_ex;
ext4_fsblk_t newblock;
int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
@@ -3190,7 +3343,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* this situation is possible, though, _during_ tree modification;
* this is why assert can't be put in ext4_ext_find_extent()
*/
- BUG_ON(path[depth].p_ext == NULL && depth != 0);
+ if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
+ EXT4_ERROR_INODE(inode, "bad extent address "
+ "iblock: %d, depth: %d pblock %lld",
+ iblock, depth, path[depth].p_block);
+ err = -EIO;
+ goto out2;
+ }
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
@@ -3205,7 +3364,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
*/
ee_len = ext4_ext_get_actual_len(ex);
/* if found extent covers block, simply return it */
- if (iblock >= ee_block && iblock < ee_block + ee_len) {
+ if (in_range(iblock, ee_block, ee_len)) {
newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block);
@@ -3297,21 +3456,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
ext4_ext_mark_uninitialized(&newex);
/*
- * io_end structure was created for every async
- * direct IO write to the middle of the file.
- * To avoid unecessary convertion for every aio dio rewrite
- * to the mid of file, here we flag the IO that is really
- * need the convertion.
+ * io_end structure was created for every IO write to an
+ * uninitialized extent. To avoid unecessary conversion,
+ * here we flag the IO that really needs the conversion.
* For non asycn direct IO case, flag the inode state
* that we need to perform convertion when IO is done.
*/
- if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
+ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
- io->flag = DIO_AIO_UNWRITTEN;
+ io->flag = EXT4_IO_UNWRITTEN;
else
- EXT4_I(inode)->i_state |=
- EXT4_STATE_DIO_UNWRITTEN;;
+ ext4_set_inode_state(inode,
+ EXT4_STATE_DIO_UNWRITTEN);
}
+ if (ext4_should_dioread_nolock(inode))
+ set_buffer_uninit(bh_result);
+ }
+
+ if (unlikely(EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL)) {
+ if (unlikely(!eh->eh_entries)) {
+ EXT4_ERROR_INODE(inode,
+ "eh->eh_entries == 0 ee_block %d",
+ ex->ee_block);
+ err = -EIO;
+ goto out2;
+ }
+ last_ex = EXT_LAST_EXTENT(eh);
+ if (iblock + ar.len > le32_to_cpu(last_ex->ee_block)
+ + ext4_ext_get_actual_len(last_ex))
+ EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
}
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) {
@@ -3319,20 +3492,35 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, ext_pblock(&newex),
- ext4_ext_get_actual_len(&newex), 0);
+ ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+ ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
+ if (allocated > max_blocks)
+ allocated = max_blocks;
set_buffer_new(bh_result);
- /* Cache only when it is _not_ an uninitialized extent */
- if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
+ /*
+ * Update reserved blocks/metadata blocks after successful
+ * block allocation which had been deferred till now.
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, allocated, 1);
+
+ /*
+ * Cache the extent and update transaction to commit on fdatasync only
+ * when it is _not_ an uninitialized extent.
+ */
+ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
+ } else
+ ext4_update_inode_fsync_trans(handle, inode, 0);
out:
if (allocated > max_blocks)
allocated = max_blocks;
@@ -3431,6 +3619,13 @@ static void ext4_falloc_update_inode(struct inode *inode,
i_size_write(inode, new_size);
if (new_size > EXT4_I(inode)->i_disksize)
ext4_update_i_disksize(inode, new_size);
+ } else {
+ /*
+ * Mark that we allocate beyond EOF so the subsequent truncate
+ * can proceed even if the new size is the same as i_size.
+ */
+ if (new_size > i_size_read(inode))
+ EXT4_I(inode)->i_flags |= EXT4_EOFBLOCKS_FL;
}
}
@@ -3535,7 +3730,7 @@ retry:
* Returns 0 on success.
*/
int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
- loff_t len)
+ ssize_t len)
{
handle_t *handle;
ext4_lblk_t block;
@@ -3567,7 +3762,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
map_bh.b_state = 0;
ret = ext4_get_blocks(handle, inode, block,
max_blocks, &map_bh,
- EXT4_GET_BLOCKS_DIO_CONVERT_EXT);
+ EXT4_GET_BLOCKS_IO_CONVERT_EXT);
if (ret <= 0) {
WARN_ON(ret <= 0);
printk(KERN_ERR "%s: ext4_ext_get_blocks "
@@ -3671,7 +3866,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
int error = 0;
/* in-inode? */
- if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+ if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
struct ext4_iloc iloc;
int offset; /* offset of xattr in inode */
@@ -3699,7 +3894,6 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
ext4_lblk_t start_blk;
- ext4_lblk_t len_blks;
int error = 0;
/* fallback to generic here if not in extents fmt */
@@ -3713,17 +3907,21 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
error = ext4_xattr_fiemap(inode, fieinfo);
} else {
+ ext4_lblk_t len_blks;
+ __u64 last_blk;
+
start_blk = start >> inode->i_sb->s_blocksize_bits;
- len_blks = len >> inode->i_sb->s_blocksize_bits;
+ last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
+ if (last_blk >= EXT_MAX_BLOCK)
+ last_blk = EXT_MAX_BLOCK-1;
+ len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
/*
* Walk the extent tree gathering extent information.
* ext4_ext_fiemap_cb will push extents back to user.
*/
- down_read(&EXT4_I(inode)->i_data_sem);
error = ext4_ext_walk_space(inode, start_blk, len_blks,
ext4_ext_fiemap_cb, fieinfo);
- up_read(&EXT4_I(inode)->i_data_sem);
}
return error;