From 650495dedc34daf8590c708a5b48f82ed2787b75 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 May 2013 08:38:35 +0900 Subject: f2fs: fix the inconsistent state of data pages In get_lock_data_page, if there is a data race between get_dnode_of_data for node and grab_cache_page for data, f2fs is able to face with the following BUG_ON(dn.data_blkaddr == NEW_ADDR). kernel BUG at /home/zeus/f2fs_test/src/fs/f2fs/data.c:251! [] get_lock_data_page+0x1ec/0x210 [f2fs] Call Trace: [] f2fs_readdir+0x89/0x210 [f2fs] [] ? fillonedir+0x100/0x100 [] ? fillonedir+0x100/0x100 [] vfs_readdir+0xb8/0xe0 [] sys_getdents+0x8f/0x110 [] system_call_fastpath+0x16/0x1b This bug is able to be occurred when the block address of the data block is changed after f2fs_put_dnode(). In order to avoid that, this patch fixes the lock order of node and data blocks in which the node block lock is covered by the data block lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91ff93b0b0f4..05fb5c6077b8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -233,18 +233,23 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct page *page; int err; +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); + } f2fs_put_dnode(&dn); - if (dn.data_blkaddr == NULL_ADDR) + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); -repeat: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); + } if (PageUptodate(page)) return page; -- cgit v1.2.3 From 64aa7ed98db489d1c41ef140876ada38498678ab Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 09:55:50 +0900 Subject: f2fs: change get_new_data_page to pass a locked node page This patch is for passing a locked node page to get_dnode_of_data. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 +++++++----- fs/f2fs/dir.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 05fb5c6077b8..af7454939362 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -280,8 +280,8 @@ repeat: * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). */ -struct page *get_new_data_page(struct inode *inode, pgoff_t index, - bool new_i_size) +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; @@ -289,18 +289,20 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, struct dnode_of_data dn; int err; - set_new_dnode(&dn, inode, NULL, NULL, 0); + set_new_dnode(&dn, inode, npage, npage, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) return ERR_PTR(err); if (dn.data_blkaddr == NULL_ADDR) { if (reserve_new_block(&dn)) { - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); return ERR_PTR(-ENOSPC); } } - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); repeat: page = grab_cache_page(mapping, index); if (!page) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1ac6b93036b7..7db6e58622d9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -287,7 +287,7 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, 0, true); + dentry_page = get_new_data_page(inode, NULL, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -448,7 +448,7 @@ start: bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, block, true); + dentry_page = get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ef6cac8c16a5..cbae2b663eba 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1027,7 +1027,7 @@ int reserve_new_block(struct dnode_of_data *); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); -struct page *get_new_data_page(struct inode *, pgoff_t, bool); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864f8dfc..b8e34db37ae8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -387,7 +387,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - page = get_new_data_page(inode, index, false); + page = get_new_data_page(inode, NULL, index, false); mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { -- cgit v1.2.3 From 44a83ff6a81d84ab83bcb43a49ff1ba6c7e17cd1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:10:29 +0900 Subject: f2fs: update inode page after creation I found a bug when testing power-off-recovery as follows. [Bug Scenario] 1. create a file 2. fsync the file 3. reboot w/o any sync 4. try to recover the file - found its fsync mark - found its dentry mark : try to recover its dentry - get its file name - get its parent inode number : here we got zero value The reason why we get the wrong parent inode number is that we didn't synchronize the inode page with its newly created inode information perfectly. Especially, previous f2fs stores fi->i_pino and writes it to the cached node page in a wrong order, which incurs the zero-valued i_pino during the recovery. So, this patch modifies the creation flow to fix the synchronization order of inode page with its inode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/dir.c | 85 +++++++++++++++++++++++++++++++--------------------------- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/node.c | 12 +++------ 4 files changed, 51 insertions(+), 50 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index af7454939362..c320f7f31327 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -279,6 +279,7 @@ repeat: * * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. */ struct page *get_new_data_page(struct inode *inode, struct page *npage, pgoff_t index, bool new_i_size) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7db6e58622d9..fc1dacf55b3a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -264,15 +264,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -void init_dent_inode(const struct qstr *name, struct page *ipage) +static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_node *rn; - if (IS_ERR(ipage)) - return; - - wait_on_page_writeback(ipage); - /* copy name info. to this inode page */ rn = (struct f2fs_node *)page_address(ipage); rn->i.i_namelen = cpu_to_le32(name->len); @@ -280,14 +275,15 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -static int make_empty_dir(struct inode *inode, struct inode *parent) +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, NULL, 0, true); + dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -317,42 +313,47 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) return 0; } -static int init_inode_metadata(struct inode *inode, +static struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { + struct page *page; + int err; + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - int err; - err = new_inode_page(inode, name); - if (err) - return err; + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; if (S_ISDIR(inode->i_mode)) { - err = make_empty_dir(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + err = make_empty_dir(inode, dir, page); + if (err) + goto error; } err = f2fs_init_acl(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + if (err) + goto error; + + wait_on_page_writeback(page); } else { - struct page *ipage; - ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - set_cold_node(inode, ipage); - init_dent_inode(name, ipage); - f2fs_put_page(ipage, 1); + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + + init_dent_inode(name, page); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) inc_nlink(inode); - update_inode_page(inode); - } - return 0; + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); } static void update_parent_metadata(struct inode *dir, struct inode *inode, @@ -423,6 +424,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; int err = 0; int i; @@ -465,12 +467,13 @@ start: ++level; goto start; add_dentry: - err = init_inode_metadata(inode, dir, name); - if (err) - goto fail; - wait_on_page_writeback(dentry_page); + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } de = &dentry_blk->dentry[bit_pos]; de->hash_code = dentry_hash; de->name_len = cpu_to_le16(namelen); @@ -481,10 +484,12 @@ add_dentry: test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); set_page_dirty(dentry_page); - update_parent_metadata(dir, inode, current_depth); - - /* update parent inode number before releasing dentry page */ + /* we don't need to mark_inode_dirty now */ F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); fail: kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cbae2b663eba..9360a03fcc96 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -914,7 +914,6 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -void init_dent_inode(const struct qstr *, struct page *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); @@ -949,7 +948,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int remove_inode_page(struct inode *); -int new_inode_page(struct inode *, const struct qstr *); +struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f63f0a4046c6..b41482de492f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -806,19 +806,15 @@ int remove_inode_page(struct inode *inode) return 0; } -int new_inode_page(struct inode *inode, const struct qstr *name) +struct page *new_inode_page(struct inode *inode, const struct qstr *name) { - struct page *page; struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - page = new_node_page(&dn, 0); - init_dent_inode(name, page); - if (IS_ERR(page)) - return PTR_ERR(page); - f2fs_put_page(page, 1); - return 0; + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0); } struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) -- cgit v1.2.3 From 6f85b3520325a67ee4ac33e75bbcdbc25c79ce69 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 16:15:22 +0900 Subject: f2fs: avoid RECLAIM_FS-ON-W: deadlock This patch tries to avoid the following deadlock condition of which the reclaim path can trigger f2fs_balance_fs again. ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/41 [HC0[0]:SC0[0]:HE1:SE1] takes: (&sbi->gc_mutex){+.+.?.}, at: f2fs_balance_fs+0xe6/0x100 [f2fs] {RECLAIM_FS-ON-W} state was registered at: [] mark_held_locks+0xb9/0x140 [] lockdep_trace_alloc+0x85/0xf0 [] __alloc_pages_nodemask+0x7c/0x9b0 [] alloc_pages_current+0xb8/0x180 [] __page_cache_alloc+0xaf/0xd0 [] find_or_create_page+0x4c/0xb0 [] find_data_page+0x14e/0x210 [f2fs] [] f2fs_gc+0x9eb/0xd90 [f2fs] [] f2fs_balance_fs+0xee/0x100 [f2fs] [] f2fs_setattr+0x6c/0x200 [f2fs] [] notify_change+0x1db/0x3a0 [] do_truncate+0x60/0xa0 [] vfs_truncate+0x185/0x1b0 [] do_sys_truncate+0x5c/0xa0 [] SyS_truncate+0xe/0x10 [] system_call_fastpath+0x16/0x1b Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/inode.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c320f7f31327..1644fffea251 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -199,7 +199,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (dn.data_blkaddr == NEW_ADDR) return ERR_PTR(-EINVAL); - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); @@ -234,7 +234,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; repeat: - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ac7f9d88ee..a18946e2a8b4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -130,8 +130,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | - __GFP_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; -- cgit v1.2.3 From 35b09d82c3cf3fc0b8b6d923e7fd82ff7926aafc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:57:53 +0900 Subject: f2fs: push some variables to debug part Some, counters are needed only for the statistical information while debugging. So, those can be controlled using CONFIG_F2FS_STAT_FS, pushing the usage for few variables under this flag. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++++ fs/f2fs/data.c | 6 ++++++ fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/gc.c | 2 ++ fs/f2fs/segment.c | 5 +++++ 5 files changed, 21 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3d1144908ac6..01ddc911ac9b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -478,7 +478,9 @@ retry: } } list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs++; +#endif BUG_ON(!S_ISDIR(inode->i_mode)); out: @@ -508,7 +510,9 @@ void remove_dirty_dir_inode(struct inode *inode) if (entry->inode == inode) { list_del(&entry->list); kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs--; +#endif break; } } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1644fffea251..93917e31dbdf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -68,7 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif pgoff_t start_fofs, end_fofs; block_t start_blkaddr; @@ -78,7 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } +#ifdef CONFIG_F2FS_STAT_FS sbi->total_hit_ext++; +#endif start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; start_blkaddr = fi->ext.blk_addr; @@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, else bh_result->b_size = UINT_MAX; +#ifdef CONFIG_F2FS_STAT_FS sbi->read_hit_ext++; +#endif read_unlock(&fi->ext.ext_lock); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 92fd4e9285c0..40b137acb8a2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -372,7 +372,6 @@ struct f2fs_sb_info { /* for directory inode management */ struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - unsigned int n_dirty_dirs; /* # of dir inodes */ /* basic file system units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -409,12 +408,15 @@ struct f2fs_sb_info { * for stat information. * one is for the LFS mode, and the other is for the SSR mode. */ +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ - unsigned int last_victim[2]; /* last victim segment # */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 14961593e93c..25b083c81d50 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,7 +76,9 @@ static int gc_thread_func(void *data) else wait_ms = increase_sleep_time(wait_ms); +#ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; +#endif /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3a0d027aad7e..be668ffb001c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -610,7 +610,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else new_curseg(sbi, type, false); out: +#ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; +#endif + return; } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -846,7 +849,9 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS sbi->block_count[curseg->alloc_type]++; +#endif /* * SIT information should be updated before segment allocation, -- cgit v1.2.3 From 699489bbbea4fc3b9b735d69941cf4fca91ce1d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jun 2013 22:08:23 +0900 Subject: f2fs: sync dir->i_size with its block allocation If new dentry block is allocated and its i_size is updated, we should update its inode block together in order to sync i_size and its block allocation. Otherwise, we can loose additional dentry block due to the unconsistent i_size. Errorneous Scenario ------------------- In the recovery routine, - recovery_dentry | - __f2fs_add_link | | - get_new_data_page | | | - i_size_write(new_i_size) | | | - mark_inode_dirty_sync(dir) | | - update_parent_metadata | | | - mark_inode_dirty(dir) | - write_checkpoint - sync_dirty_dir_inodes - filemap_flush(dentry_blocks) - f2fs_write_data_page - skip to write the last dentry block due to index < i_size In the above flow, new_i_size is not updated to its inode block so that the last dentry block will be lost accordingly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/dir.c | 9 ++++----- fs/f2fs/f2fs.h | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 93917e31dbdf..5b145fcc2864 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -339,6 +339,8 @@ repeat: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); mark_inode_dirty_sync(inode); } return page; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index eaea5b50d9c1..69ca049b5168 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -370,22 +370,20 @@ error: static void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - bool need_dir_update = false; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; if (F2FS_I(dir)->i_current_depth != current_depth) { F2FS_I(dir)->i_current_depth = current_depth; - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (need_dir_update) + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) update_inode_page(dir); else mark_inode_dirty(dir); @@ -502,6 +500,7 @@ add_dentry: update_parent_metadata(dir, inode, current_depth); fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c344a4d640cb..27edf59ac12c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -859,6 +859,7 @@ enum { FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ FI_DELAY_IPUT, /* used for the recovery */ }; -- cgit v1.2.3 From b25958b6ecf1dce087e62b9aa27cf8f2fe9b5c86 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:29 +0800 Subject: f2fs: optimize do_write_data_page() Since "need_inplace_update() == true" is a very rare case, using unlikely() to give compiler a chance to optimize the code. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5b145fcc2864..6d4a743caf86 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -497,8 +497,9 @@ int do_write_data_page(struct page *page) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && - need_inplace_update(inode)) { + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { rewrite_data_page(F2FS_SB(inode->i_sb), page, old_blk_addr); } else { -- cgit v1.2.3 From a1dd3c13ce65b726fddfe72b9d2f1009db983ce6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jun 2013 13:04:08 +0900 Subject: f2fs: fix to recover i_size from roll-forward If user requests many data writes and fsync together, the last updated i_size should be stored to the inode block consistently. But, previous write_end just marks the inode as dirty and doesn't update its metadata into its inode block. After that, fsync just writes the inode block with newly updated data index excluding inode metadata updates. So, this patch introduces write_end in which updates inode block too when the i_size is changed. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/data.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6d4a743caf86..e88f46f122aa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -701,6 +701,27 @@ err: return err; } +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { @@ -757,7 +778,7 @@ const struct address_space_operations f2fs_dblock_aops = { .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, - .write_end = nobh_write_end, + .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, .invalidatepage = f2fs_invalidate_data_page, .releasepage = f2fs_release_data_page, -- cgit v1.2.3