diff options
Diffstat (limited to 'fs/ext4/extents.c')
-rw-r--r-- | fs/ext4/extents.c | 117 |
1 files changed, 71 insertions, 46 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0554c48cb1fd..7516fb9c0bd5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -117,11 +117,33 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, struct ext4_extent *ex; depth = path->p_depth; - /* try to predict block placement */ + /* + * Try to predict block placement assuming that we are + * filling in a file which will eventually be + * non-sparse --- i.e., in the case of libbfd writing + * an ELF object sections out-of-order but in a way + * the eventually results in a contiguous object or + * executable file, or some database extending a table + * space file. However, this is actually somewhat + * non-ideal if we are writing a sparse file such as + * qemu or KVM writing a raw image file that is going + * to stay fairly sparse, since it will end up + * fragmenting the file system's free space. Maybe we + * should have some hueristics or some way to allow + * userspace to pass a hint to file system, + * especially if the latter case turns out to be + * common. + */ ex = path[depth].p_ext; - if (ex) - return (ext4_ext_pblock(ex) + - (block - le32_to_cpu(ex->ee_block))); + if (ex) { + ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex); + ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block); + + if (block > ext_block) + return ext_pblk + (block - ext_block); + else + return ext_pblk - (ext_block - block); + } /* it looks like index is empty; * try to find starting block from index itself */ @@ -244,7 +266,7 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check) * to allocate @blocks * Worse case is one block per extent */ -int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) +int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock) { struct ext4_inode_info *ei = EXT4_I(inode); int idxs, num = 0; @@ -1872,12 +1894,10 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, cbex.ec_block = start; cbex.ec_len = end - start; cbex.ec_start = 0; - cbex.ec_type = EXT4_EXT_CACHE_GAP; } else { cbex.ec_block = le32_to_cpu(ex->ee_block); cbex.ec_len = ext4_ext_get_actual_len(ex); cbex.ec_start = ext4_ext_pblock(ex); - cbex.ec_type = EXT4_EXT_CACHE_EXTENT; } if (unlikely(cbex.ec_len == 0)) { @@ -1917,13 +1937,12 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, static void ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, - __u32 len, ext4_fsblk_t start, int type) + __u32 len, ext4_fsblk_t start) { struct ext4_ext_cache *cex; BUG_ON(len == 0); spin_lock(&EXT4_I(inode)->i_block_reservation_lock); cex = &EXT4_I(inode)->i_cached_extent; - cex->ec_type = type; cex->ec_block = block; cex->ec_len = len; cex->ec_start = start; @@ -1976,15 +1995,18 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, } ext_debug(" -> %u:%lu\n", lblock, len); - ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); + ext4_ext_put_in_cache(inode, lblock, len, 0); } +/* + * Return 0 if cache is invalid; 1 if the cache is valid + */ static int ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, struct ext4_extent *ex) { struct ext4_ext_cache *cex; - int ret = EXT4_EXT_CACHE_NO; + int ret = 0; /* * We borrow i_block_reservation_lock to protect i_cached_extent @@ -1993,11 +2015,9 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, cex = &EXT4_I(inode)->i_cached_extent; /* has cache valid data? */ - if (cex->ec_type == EXT4_EXT_CACHE_NO) + if (cex->ec_len == 0) goto errout; - BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP && - cex->ec_type != EXT4_EXT_CACHE_EXTENT); if (in_range(block, cex->ec_block, cex->ec_len)) { ex->ee_block = cpu_to_le32(cex->ec_block); ext4_ext_store_pblock(ex, cex->ec_start); @@ -2005,7 +2025,7 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block, ext_debug("%u cached by %u:%u:%llu\n", block, cex->ec_block, cex->ec_len, cex->ec_start); - ret = cex->ec_type; + ret = 1; } errout: spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); @@ -2824,15 +2844,15 @@ fix_extent_len: * ext4_get_blocks_dio_write() when DIO to write * to an uninitialized extent. * - * Writing to an uninitized extent may result in splitting the uninitialized - * extent into multiple /intialized unintialized extents (up to three) + * Writing to an uninitialized extent may result in splitting the uninitialized + * extent into multiple /initialized uninitialized extents (up to three) * There are three possibilities: * a> There is no split required: Entire extent should be uninitialized * b> Splits in two extents: Write is happening at either end of the extent * c> Splits in three extents: Somone is writing in middle of the extent * * One of more index blocks maybe needed if the extent tree grow after - * the unintialized extent split. To prevent ENOSPC occur at the IO + * the uninitialized extent split. To prevent ENOSPC occur at the IO * complete, we need to split the uninitialized extent before DIO submit * the IO. The uninitialized extent called at this time will be split * into three uninitialized extent(at most). After IO complete, the part @@ -3082,7 +3102,7 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev, * Handle EOFBLOCKS_FL flag, clearing it if necessary */ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, - struct ext4_map_blocks *map, + ext4_lblk_t lblk, struct ext4_ext_path *path, unsigned int len) { @@ -3112,7 +3132,7 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, * this turns out to be false, we can bail out from this * function immediately. */ - if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) + + if (lblk + len < le32_to_cpu(last_ex->ee_block) + ext4_ext_get_actual_len(last_ex)) return 0; /* @@ -3154,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, * that this IO needs to convertion to written when IO is * completed */ - if (io) + if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { io->flag = EXT4_IO_END_UNWRITTEN; - else + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); if (ext4_should_dioread_nolock(inode)) map->m_flags |= EXT4_MAP_UNINIT; @@ -3168,8 +3189,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, path); if (ret >= 0) { ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map, path, - map->m_len); + err = check_eofblocks_fl(handle, inode, map->m_lblk, + path, map->m_len); } else err = ret; goto out2; @@ -3199,7 +3220,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ret = ext4_ext_convert_to_initialized(handle, inode, map, path); if (ret >= 0) { ext4_update_inode_fsync_trans(handle, inode, 1); - err = check_eofblocks_fl(handle, inode, map, path, map->m_len); + err = check_eofblocks_fl(handle, inode, map->m_lblk, path, + map->m_len); if (err < 0) goto out2; } @@ -3276,7 +3298,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent newex, *ex; ext4_fsblk_t newblock; - int err = 0, depth, ret, cache_type; + int err = 0, depth, ret; unsigned int allocated = 0; struct ext4_allocation_request ar; ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; @@ -3285,9 +3307,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, map->m_lblk, map->m_len, inode->i_ino); /* check in cache */ - cache_type = ext4_ext_in_cache(inode, map->m_lblk, &newex); - if (cache_type) { - if (cache_type == EXT4_EXT_CACHE_GAP) { + if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { + if (!newex.ee_start_lo && !newex.ee_start_hi) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* * block isn't allocated yet and @@ -3296,7 +3317,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, goto out2; } /* we should allocate requested block */ - } else if (cache_type == EXT4_EXT_CACHE_EXTENT) { + } else { /* block is already allocated */ newblock = map->m_lblk - le32_to_cpu(newex.ee_block) @@ -3305,8 +3326,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, allocated = ext4_ext_get_actual_len(&newex) - (map->m_lblk - le32_to_cpu(newex.ee_block)); goto out; - } else { - BUG(); } } @@ -3357,8 +3376,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, /* Do not put uninitialized extent in the cache */ if (!ext4_ext_is_uninitialized(ex)) { ext4_ext_put_in_cache(inode, ee_block, - ee_len, ee_start, - EXT4_EXT_CACHE_EXTENT); + ee_len, ee_start); goto out; } ret = ext4_ext_handle_uninitialized_extents(handle, @@ -3446,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * that we need to perform convertion when IO is done. */ if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { - if (io) + if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { io->flag = EXT4_IO_END_UNWRITTEN; - else + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } else ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); } @@ -3456,7 +3475,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, map->m_flags |= EXT4_MAP_UNINIT; } - err = check_eofblocks_fl(handle, inode, map, path, ar.len); + err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len); if (err) goto out2; @@ -3490,8 +3509,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * when it is _not_ an uninitialized extent. */ if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) { - ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock, - EXT4_EXT_CACHE_EXTENT); + ext4_ext_put_in_cache(inode, map->m_lblk, allocated, newblock); ext4_update_inode_fsync_trans(handle, inode, 1); } else ext4_update_inode_fsync_trans(handle, inode, 0); @@ -3519,6 +3537,12 @@ void ext4_ext_truncate(struct inode *inode) int err = 0; /* + * finish any pending end_io work so we won't run the risk of + * converting any truncated blocks to initialized later + */ + ext4_flush_completed_IO(inode); + + /* * probably first extent we're gonna free will be last in block */ err = ext4_writepage_trans_blocks(inode); @@ -3605,14 +3629,15 @@ static void ext4_falloc_update_inode(struct inode *inode, } /* - * preallocate space for a file. This implements ext4's fallocate inode + * preallocate space for a file. This implements ext4's fallocate file * operation, which gets called from sys_fallocate system call. * For block-mapped files, posix_fallocate should fall back to the method * of writing zeroes to the required new blocks (the same behavior which is * expected for file systems which do not support fallocate() system call). */ -long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) +long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { + struct inode *inode = file->f_path.dentry->d_inode; handle_t *handle; loff_t new_size; unsigned int max_blocks; @@ -3622,6 +3647,10 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) struct ext4_map_blocks map; unsigned int credits, blkbits = inode->i_blkbits; + /* We only support the FALLOC_FL_KEEP_SIZE mode */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + return -EOPNOTSUPP; + /* * currently supporting (pre)allocate mode for extent-based * files _only_ @@ -3629,10 +3658,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) return -EOPNOTSUPP; - /* preallocation to directories is currently not supported */ - if (S_ISDIR(inode->i_mode)) - return -ENODEV; - map.m_lblk = offset >> blkbits; /* * We can't just convert len to max_blocks because @@ -3767,7 +3792,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, logical = (__u64)newex->ec_block << blksize_bits; - if (newex->ec_type == EXT4_EXT_CACHE_GAP) { + if (newex->ec_start == 0) { pgoff_t offset; struct page *page; struct buffer_head *bh = NULL; |