diff options
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/balloc.c | 49 | ||||
-rw-r--r-- | fs/ext4/dir.c | 3 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 6 | ||||
-rw-r--r-- | fs/ext4/ext4_extents.h | 1 | ||||
-rw-r--r-- | fs/ext4/ext4_i.h | 7 | ||||
-rw-r--r-- | fs/ext4/extents.c | 32 | ||||
-rw-r--r-- | fs/ext4/inode.c | 184 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 20 | ||||
-rw-r--r-- | fs/ext4/super.c | 5 |
9 files changed, 257 insertions, 50 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 6369bacf0dcb..495ab21b9832 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -1701,7 +1701,12 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, } sbi = EXT4_SB(sb); - *count = ext4_has_free_blocks(sbi, *count); + if (!EXT4_I(inode)->i_delalloc_reserved_flag) { + /* + * With delalloc we already reserved the blocks + */ + *count = ext4_has_free_blocks(sbi, *count); + } if (*count == 0) { *errp = -ENOSPC; return 0; /*return with ENOSPC error */ @@ -1902,7 +1907,8 @@ allocated: le16_add_cpu(&gdp->bg_free_blocks_count, -num); gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_sub(&sbi->s_freeblocks_counter, num); + if (!EXT4_I(inode)->i_delalloc_reserved_flag) + percpu_counter_sub(&sbi->s_freeblocks_counter, num); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, group_no); @@ -1976,40 +1982,49 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, } /* - * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks + * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks * * @handle: handle to this transaction * @inode: file inode * @goal: given target block(filesystem wide) + * @count: total number of blocks need * @errp: error code * - * Return allocated block number on success + * Return 1st allocated block numberon success, *count stores total account + * error stores in errp pointer */ -ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, int *errp) +ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, unsigned long *count, int *errp) { - unsigned long count = 1; - return do_blk_alloc(handle, inode, 0, goal, - &count, errp, EXT4_META_BLOCK); + ext4_fsblk_t ret; + ret = do_blk_alloc(handle, inode, 0, goal, + count, errp, EXT4_META_BLOCK); + /* + * Account for the allocated meta blocks + */ + if (!(*errp)) { + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + EXT4_I(inode)->i_allocated_meta_blocks += *count; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + } + return ret; } /* - * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks + * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks * * @handle: handle to this transaction * @inode: file inode * @goal: given target block(filesystem wide) - * @count: total number of blocks need * @errp: error code * - * Return 1st allocated block numberon success, *count stores total account - * error stores in errp pointer + * Return allocated block number on success */ -ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) +ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode, + ext4_fsblk_t goal, int *errp) { - return do_blk_alloc(handle, inode, 0, goal, - count, errp, EXT4_META_BLOCK); + unsigned long count = 1; + return ext4_new_meta_blocks(handle, inode, goal, &count, errp); } /* diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 5ed5108766c1..d3d23d73c08b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp, struct buffer_head *bh = NULL; map_bh.b_state = 0; - err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0); + err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, + 0, 0, 0); if (err > 0) { pgoff_t index = map_bh.b_blocknr >> (PAGE_CACHE_SHIFT - inode->i_blkbits); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ee9576dc0ba1..0962f4e26579 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -74,6 +74,9 @@ #define EXT4_MB_HINT_GOAL_ONLY 256 /* goal is meaningful */ #define EXT4_MB_HINT_TRY_GOAL 512 +/* blocks already pre-reserved by delayed allocation */ +#define EXT4_MB_DELALLOC_RESERVED 1024 + struct ext4_allocation_request { /* target inode for block we're allocating */ @@ -1041,6 +1044,7 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp, /* inode.c */ +void ext4_da_release_space(struct inode *inode, int used, int to_free); int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *ext4_getblk(handle_t *, struct inode *, @@ -1234,7 +1238,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, int create, - int extend_disksize); + int extend_disksize, int flag); #endif /* __KERNEL__ */ #endif /* _EXT4_H */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 75333b595fab..6c166c0a54b7 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } +extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern int ext4_extent_tree_init(handle_t *, struct inode *); diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index c2903ef72159..ef7409f0e7e4 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h @@ -163,6 +163,13 @@ struct ext4_inode_info { /* mballoc */ struct list_head i_prealloc_list; spinlock_t i_prealloc_lock; + + /* allocation reservation info for delalloc */ + unsigned long i_reserved_data_blocks; + unsigned long i_reserved_meta_blocks; + unsigned long i_allocated_meta_blocks; + unsigned short i_delalloc_reserved_flag; + spinlock_t i_block_reservation_lock; }; #endif /* _EXT4_I */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7844bbb2bac0..dabc3b68d249 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -248,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode) return size; } +/* + * Calculate the number of metadata blocks needed + * to allocate @blocks + * Worse case is one block per extent + */ +int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) +{ + int lcap, icap, rcap, leafs, idxs, num; + int newextents = blocks; + + rcap = ext4_ext_space_root_idx(inode); + lcap = ext4_ext_space_block(inode); + icap = ext4_ext_space_block_idx(inode); + + /* number of new leaf blocks needed */ + num = leafs = (newextents + lcap - 1) / lcap; + + /* + * Worse case, we need separate index block(s) + * to link all new leaf blocks + */ + idxs = (leafs + icap - 1) / icap; + do { + num += idxs; + idxs = (idxs + icap - 1) / icap; + } while (idxs > rcap); + + return num; +} + static int ext4_ext_max_entries(struct inode *inode, int depth) { @@ -2910,7 +2940,7 @@ retry: } ret = ext4_get_blocks_wrap(handle, inode, block, max_blocks, &map_bh, - EXT4_CREATE_UNINITIALIZED_EXT, 0); + EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2bef4f879e4b..a6b800c58474 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -39,6 +39,7 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" +#include "ext4_extents.h" static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) @@ -982,7 +983,7 @@ out: */ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, unsigned long max_blocks, struct buffer_head *bh, - int create, int extend_disksize) + int create, int extend_disksize, int flag) { int retval; @@ -1023,6 +1024,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, * with create == 1 flag. */ down_write((&EXT4_I(inode)->i_data_sem)); + + /* + * if the caller is from delayed allocation writeout path + * we have already reserved fs blocks for allocation + * let the underlying get_block() function know to + * avoid double accounting + */ + if (flag) + EXT4_I(inode)->i_delalloc_reserved_flag = 1; /* * We need to check for EXT4 here because migrate * could have changed the inode type in between @@ -1044,6 +1054,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, ~EXT4_EXT_MIGRATE; } } + + if (flag) { + EXT4_I(inode)->i_delalloc_reserved_flag = 0; + /* + * Update reserved blocks/metadata blocks + * after successful block allocation + * which were deferred till now + */ + if ((retval > 0) && buffer_delay(bh)) + ext4_da_release_space(inode, retval, 0); + } + up_write((&EXT4_I(inode)->i_data_sem)); return retval; } @@ -1069,7 +1091,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, } ret = ext4_get_blocks_wrap(handle, inode, iblock, - max_blocks, bh_result, create, 0); + max_blocks, bh_result, create, 0, 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -1095,7 +1117,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); err = ext4_get_blocks_wrap(handle, inode, block, 1, - &dummy, create, 1); + &dummy, create, 1, 0); /* * ext4_get_blocks_handle() returns number of blocks * mapped. 0 in case of a HOLE. @@ -1409,6 +1431,122 @@ static int ext4_journalled_write_end(struct file *file, return ret ? ret : copied; } +/* + * Calculate the number of metadata blocks need to reserve + * to allocate @blocks for non extent file based file + */ +static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) +{ + int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ind_blks, dind_blks, tind_blks; + + /* number of new indirect blocks needed */ + ind_blks = (blocks + icap - 1) / icap; + + dind_blks = (ind_blks + icap - 1) / icap; + + tind_blks = 1; + + return ind_blks + dind_blks + tind_blks; +} + +/* + * Calculate the number of metadata blocks need to reserve + * to allocate given number of blocks + */ +static int ext4_calc_metadata_amount(struct inode *inode, int blocks) +{ + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_calc_metadata_amount(inode, blocks); + + return ext4_indirect_calc_metadata_amount(inode, blocks); +} + +static int ext4_da_reserve_space(struct inode *inode, int nrblocks) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + unsigned long md_needed, mdblocks, total = 0; + + /* + * recalculate the amount of metadata blocks to reserve + * in order to allocate nrblocks + * worse case is one extent per block + */ + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; + mdblocks = ext4_calc_metadata_amount(inode, total); + BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); + + md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; + total = md_needed + nrblocks; + + if (ext4_has_free_blocks(sbi, total) < total) { + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return -ENOSPC; + } + + /* reduce fs free blocks counter */ + percpu_counter_sub(&sbi->s_freeblocks_counter, total); + + EXT4_I(inode)->i_reserved_data_blocks += nrblocks; + EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; + + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return 0; /* success */ +} + +void ext4_da_release_space(struct inode *inode, int used, int to_free) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + int total, mdb, mdb_free, release; + + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + /* recalculate the number of metablocks still need to be reserved */ + total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; + mdb = ext4_calc_metadata_amount(inode, total); + + /* figure out how many metablocks to release */ + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; + + /* Account for allocated meta_blocks */ + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + + release = to_free + mdb_free; + + /* update fs free blocks counter for truncate case */ + percpu_counter_add(&sbi->s_freeblocks_counter, release); + + /* update per-inode reservations */ + BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); + EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); + + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + EXT4_I(inode)->i_reserved_meta_blocks = mdb; + EXT4_I(inode)->i_allocated_meta_blocks = 0; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); +} + +static void ext4_da_page_release_reservation(struct page *page, + unsigned long offset) +{ + int to_release = 0; + struct buffer_head *head, *bh; + unsigned int curr_off = 0; + + head = page_buffers(page); + bh = head; + do { + unsigned int next_off = curr_off + bh->b_size; + + if ((offset <= curr_off) && (buffer_delay(bh))) { + to_release++; + clear_buffer_delay(bh); + } + curr_off = next_off; + } while ((bh = bh->b_this_page) != head); + ext4_da_release_space(page->mapping->host, 0, to_release); +} /* * Delayed allocation stuff @@ -1829,14 +1967,18 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * preallocated blocks are unmapped but should treated * the same as allocated blocks. */ - ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0); - if (ret == 0) { - /* the block isn't allocated yet, let's reserve space */ - /* XXX: call reservation here */ + ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); + if ((ret == 0) && !buffer_delay(bh_result)) { + /* the block isn't (pre)allocated yet, let's reserve space */ /* * XXX: __block_prepare_write() unmaps passed block, * is it OK? */ + ret = ext4_da_reserve_space(inode, 1); + if (ret) + /* not enough space to reserve */ + return ret; + map_bh(bh_result, inode->i_sb, 0); set_buffer_new(bh_result); set_buffer_delay(bh_result); @@ -1847,7 +1989,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, return ret; } - +#define EXT4_DELALLOC_RSVED 1 static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { @@ -1865,7 +2007,7 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, } ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0); + bh_result, create, 0, EXT4_DELALLOC_RSVED); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); @@ -1952,7 +2094,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - int ret; + int ret, retries = 0; struct page *page; pgoff_t index; unsigned from, to; @@ -1963,6 +2105,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; +retry: /* * With delayed allocation, we don't log the i_disksize update * if there is delayed block allocation. But we still need @@ -1988,6 +2131,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, page_cache_release(page); } + if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; out: return ret; } @@ -2040,9 +2185,6 @@ static int ext4_da_write_end(struct file *file, static void ext4_da_invalidatepage(struct page *page, unsigned long offset) { - struct buffer_head *head, *bh; - unsigned int curr_off = 0; - /* * Drop reserved blocks */ @@ -2050,21 +2192,7 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset) if (!page_has_buffers(page)) goto out; - head = page_buffers(page); - bh = head; - do { - unsigned int next_off = curr_off + bh->b_size; - - /* - * is this block fully invalidated? - */ - if (offset <= curr_off && buffer_delay(bh)) { - clear_buffer_delay(bh); - /* XXX: add real stuff here */ - } - curr_off = next_off; - bh = bh->b_this_page; - } while (bh != head); + ext4_da_page_release_reservation(page, offset); out: ext4_invalidatepage(page, offset); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8d254ca83d9e..8d141a25bbee 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2964,7 +2964,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); + + /* + * free blocks account has already be reduced/reserved + * at write_begin() time for delayed allocation + * do not double accounting + */ + if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) + percpu_counter_sub(&sbi->s_freeblocks_counter, + ac->ac_b_ex.fe_len); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, @@ -4169,7 +4177,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, &(ar->len), errp); return block; } - ar->len = ext4_has_free_blocks(sbi, ar->len); + if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { + /* + * With delalloc we already reserved the blocks + */ + ar->len = ext4_has_free_blocks(sbi, ar->len); + } if (ar->len == 0) { *errp = -ENOSPC; @@ -4186,6 +4199,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, } inquota = ar->len; + if (EXT4_I(ar->inode)->i_delalloc_reserved_flag) + ar->flags |= EXT4_MB_DELALLOC_RESERVED; + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); if (!ac) { ar->len = 0; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index de9d3d0eb203..25e2f2488cd2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -574,6 +574,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); + ei->i_reserved_data_blocks = 0; + ei->i_reserved_meta_blocks = 0; + ei->i_allocated_meta_blocks = 0; + ei->i_delalloc_reserved_flag = 0; + spin_lock_init(&(ei->i_block_reservation_lock)); return &ei->vfs_inode; } |