From 1228b482c4ee26f3334dd83eb13bcf7e462f9c93 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Sep 2017 13:59:39 +0800 Subject: f2fs: fix to flush multiple device in checkpoint If f2fs manages multiple devices, in checkpoint, we need to issue flush in those devices which contain dirty data/node in their cache before we write checkpoint region, otherwise, filesystem metadata could be corrupted if hitting SPO after checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 933c3d529e65..57e79cc2e792 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1965,6 +1965,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (j = HOT; j < NR_TEMP_TYPE; j++) mutex_init(&sbi->wio_mutex[i][j]); spin_lock_init(&sbi->cp_lock); + + sbi->dirty_device = 0; + spin_lock_init(&sbi->dev_lock); } static int init_percpu_info(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 8412663d177d95beff799c8bb256fccef86d09e6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:32 +0800 Subject: f2fs: support issuing/waiting discard in range Fstrim intends to trim invalid blocks of filesystem only with specified range and granularity, but actually, it will issue all previous cached discard commands which may be out-of-range and be with unmatched granularity, it's unneeded. In order to fix above issues, this patch introduces new helps to support to issue and wait discard in range and adds a new fstrim_list for tracking in-flight discard from ->fstrim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 +- fs/f2fs/segment.c | 127 +++++++++++++++++++++++++++++++++++++++++++++--------- fs/f2fs/super.c | 2 +- 3 files changed, 109 insertions(+), 23 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4a42ab61fc93..07e57b5d9ec3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -249,6 +249,7 @@ struct discard_cmd_control { struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ unsigned char pend_list_tag[MAX_PLIST_NUM];/* tag for pending entries */ struct list_head wait_list; /* store on-flushing entries */ + struct list_head fstrim_list; /* in-flight discard from fstrim */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ unsigned int discard_wake; /* to wake up discard thread */ struct mutex cmd_lock; @@ -2535,7 +2536,7 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5bad5aedded..a0a0a887fc31 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -873,9 +873,11 @@ void __check_sit_bitmap(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, - struct discard_cmd *dc) + struct discard_cmd *dc, bool fstrim) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : + &(dcc->wait_list); struct bio *bio = NULL; if (dc->state != D_PREP) @@ -897,7 +899,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_end_io = f2fs_submit_discard_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); - list_move_tail(&dc->list, &dcc->wait_list); + list_move_tail(&dc->list, wait_list); __check_sit_bitmap(sbi, dc->start, dc->start + dc->len); f2fs_update_iostat(sbi, FS_DISCARD, 1); @@ -1082,6 +1084,68 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static void __issue_discard_cmd_range(struct f2fs_sb_info *sbi, + unsigned int start, unsigned int end, + unsigned int granularity) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + struct discard_cmd *dc; + struct blk_plug plug; + int issued; + +next: + issued = 0; + + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); + + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, start, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (!dc) + dc = next_dc; + + blk_start_plug(&plug); + + while (dc && dc->lstart <= end) { + struct rb_node *node; + + if (dc->len < granularity) + goto skip; + + if (dc->state != D_PREP) { + list_move_tail(&dc->list, &dcc->fstrim_list); + goto skip; + } + + __submit_discard_cmd(sbi, dc, true); + + if (++issued >= DISCARD_ISSUE_RATE) { + start = dc->lstart + dc->len; + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); + + schedule(); + + goto next; + } +skip: + node = rb_next(&dc->rb_node); + dc = rb_entry_safe(node, struct discard_cmd, rb_node); + + if (fatal_signal_pending(current)) + break; + } + + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; @@ -1104,22 +1168,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) /* Hurry up to finish fstrim */ if (dcc->pend_list_tag[i] & P_TRIM) { - __submit_discard_cmd(sbi, dc); + __submit_discard_cmd(sbi, dc, false); issued++; - - if (fatal_signal_pending(current)) - break; continue; } if (!issue_cond) { - __submit_discard_cmd(sbi, dc); + __submit_discard_cmd(sbi, dc, false); issued++; continue; } if (is_idle(sbi)) { - __submit_discard_cmd(sbi, dc); + __submit_discard_cmd(sbi, dc, false); issued++; } else { io_interrupted = true; @@ -1173,10 +1234,14 @@ static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, mutex_unlock(&dcc->cmd_lock); } -static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +static void __wait_discard_cmd_range(struct f2fs_sb_info *sbi, bool wait_cond, + block_t start, block_t end, + unsigned int granularity, + bool fstrim) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); + struct list_head *wait_list = fstrim ? &(dcc->fstrim_list) : + &(dcc->wait_list); struct discard_cmd *dc, *tmp; bool need_wait; @@ -1185,6 +1250,10 @@ next: mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (dc->lstart + dc->len <= start || end <= dc->lstart) + continue; + if (dc->len < granularity) + continue; if (!wait_cond || (dc->state == D_DONE && !dc->ref)) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); @@ -1202,6 +1271,11 @@ next: } } +static void __wait_all_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +{ + __wait_discard_cmd_range(sbi, wait_cond, 0, UINT_MAX, 1, false); +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -1237,12 +1311,12 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } } -/* This comes from f2fs_put_super and f2fs_trim_fs */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi, bool umount) +/* This comes from f2fs_put_super */ +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { __issue_discard_cmd(sbi, false); __drop_discard_cmd(sbi); - __wait_discard_cmd(sbi, !umount); + __wait_all_discard_cmd(sbi, false); } static void mark_discard_range_all(struct f2fs_sb_info *sbi) @@ -1286,7 +1360,7 @@ static int issue_discard_thread(void *data) issued = __issue_discard_cmd(sbi, true); if (issued) { - __wait_discard_cmd(sbi, true); + __wait_all_discard_cmd(sbi, true); wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; } else { wait_ms = DEF_MAX_DISCARD_ISSUE_TIME; @@ -1597,6 +1671,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->pend_list_tag[i] |= P_ACTIVE; } INIT_LIST_HEAD(&dcc->wait_list); + INIT_LIST_HEAD(&dcc->fstrim_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); @@ -2224,7 +2299,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) { __u64 start = F2FS_BYTES_TO_BLK(range->start); __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1; - unsigned int start_segno, end_segno; + unsigned int start_segno, end_segno, cur_segno; + block_t start_block, end_block; struct cp_control cpc; int err = 0; @@ -2245,12 +2321,17 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : GET_SEGNO(sbi, end); + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, end_segno + 1); + cpc.reason = CP_DISCARD; cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen)); /* do checkpoint to issue discard commands safely */ - for (; start_segno <= end_segno; start_segno = cpc.trim_end + 1) { - cpc.trim_start = start_segno; + for (cur_segno = start_segno; cur_segno <= end_segno; + cur_segno = cpc.trim_end + 1) { + cpc.trim_start = cur_segno; if (sbi->discard_blks == 0) break; @@ -2258,7 +2339,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_end = end_segno; else cpc.trim_end = min_t(unsigned int, - rounddown(start_segno + + rounddown(cur_segno + BATCHED_TRIM_SEGMENTS(sbi), sbi->segs_per_sec) - 1, end_segno); @@ -2270,9 +2351,13 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) schedule(); } - /* It's time to issue all the filed discards */ - mark_discard_range_all(sbi); - f2fs_wait_discard_bios(sbi, false); + + start_block = START_BLOCK(sbi, start_segno); + end_block = START_BLOCK(sbi, min(cur_segno, end_segno) + 1); + + __issue_discard_cmd_range(sbi, start_block, end_block, cpc.trim_minlen); + __wait_discard_cmd_range(sbi, true, start_block, end_block, + cpc.trim_minlen, true); out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); return err; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 57e79cc2e792..f98a07c859e9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -801,7 +801,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi, true); + f2fs_wait_discard_bios(sbi); if (f2fs_discard_en(sbi) && !sbi->discard_blks) { struct cp_control cpc = { -- cgit v1.2.3 From cf5c759f927dc92452fa214691be0c8a3e7c115d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 4 Oct 2017 09:08:37 +0800 Subject: f2fs: give up CP_TRIMMED_FLAG if it drops discards In ->umount, once we drop remained discard entries, we should not set CP_TRIMMED_FLAG with another checkpoint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 13 ++++++++++--- fs/f2fs/super.c | 5 +++-- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3173d0393a48..dac50d8b2bb8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2553,7 +2553,7 @@ void init_discard_policy(struct discard_policy *dpolicy, int discard_type, unsigned int granularity); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); void stop_discard_thread(struct f2fs_sb_info *sbi); -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d6614037e04e..bfbcff8339c5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1196,12 +1196,13 @@ skip: return issued; } -static void __drop_discard_cmd(struct f2fs_sb_info *sbi) +static bool __drop_discard_cmd(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *pend_list; struct discard_cmd *dc, *tmp; int i; + bool dropped = false; mutex_lock(&dcc->cmd_lock); for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { @@ -1209,9 +1210,12 @@ static void __drop_discard_cmd(struct f2fs_sb_info *sbi) list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); __remove_discard_cmd(sbi, dc); + dropped = true; } } mutex_unlock(&dcc->cmd_lock); + + return dropped; } static void __wait_one_discard_bio(struct f2fs_sb_info *sbi, @@ -1306,15 +1310,18 @@ void stop_discard_thread(struct f2fs_sb_info *sbi) } /* This comes from f2fs_put_super */ -void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_policy dpolicy; + bool dropped; init_discard_policy(&dpolicy, DPOLICY_UMOUNT, dcc->discard_granularity); __issue_discard_cmd(sbi, &dpolicy); - __drop_discard_cmd(sbi); + dropped = __drop_discard_cmd(sbi); __wait_all_discard_cmd(sbi, &dpolicy); + + return dropped; } static int issue_discard_thread(void *data) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f98a07c859e9..3a47b3271d0d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -781,6 +781,7 @@ static void f2fs_put_super(struct super_block *sb) { struct f2fs_sb_info *sbi = F2FS_SB(sb); int i; + bool dropped; f2fs_quota_off_umount(sb); @@ -801,9 +802,9 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bios(sbi); + dropped = f2fs_wait_discard_bios(sbi); - if (f2fs_discard_en(sbi) && !sbi->discard_blks) { + if (f2fs_discard_en(sbi) && !sbi->discard_blks && !dropped) { struct cp_control cpc = { .reason = CP_UMOUNT | CP_TRIMMED, }; -- cgit v1.2.3 From 204b4ae06739ddfe59643c44a205e739938c51e5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 7 Oct 2017 00:08:05 -0700 Subject: f2fs/crypto: drop crypto key at evict_inode only This patch avoids dropping crypto key in f2fs_drop_inode, so we can guarantee it happens only at evict_inode. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3a47b3271d0d..dce5c4e48e23 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -673,7 +673,6 @@ static int f2fs_drop_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); - fscrypt_put_encryption_info(inode, NULL); spin_lock(&inode->i_lock); atomic_dec(&inode->i_count); } -- cgit v1.2.3 From 57864ae5ce3ab5c6e3137dd03edefdb2e5531ba1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 18 Oct 2017 19:05:57 -0700 Subject: f2fs: limit # of inmemory pages If some abnormal users try lots of atomic write operations, f2fs is able to produce pinned pages in the main memory which affects system performance. This patch limits that as 20% over total memory size, and if f2fs reaches to the limit, it will drop all the inmemory pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 8 ++++++++ fs/f2fs/f2fs.h | 3 +++ fs/f2fs/node.c | 4 ++++ fs/f2fs/node.h | 1 + fs/f2fs/segment.c | 38 ++++++++++++++++++++++++++++++++++++++ fs/f2fs/super.c | 1 + 6 files changed, 55 insertions(+) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 95fdbe3e6cca..7fd09837820a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1944,6 +1944,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); + if (f2fs_is_atomic_file(inode) && + !available_free_memory(sbi, INMEM_PAGES)) { + err = -ENOMEM; + goto fail; + } + /* * We should check this at this moment to avoid deadlock on inode page * and #0 page. The locking rule for inline_data conversion should be: @@ -2021,6 +2027,8 @@ repeat: fail: f2fs_put_page(page, 1); f2fs_write_failed(mapping, pos + len); + if (f2fs_is_atomic_file(inode)) + drop_inmem_pages_all(sbi); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3cf7494408ff..0b0e4fba69c4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -601,6 +601,7 @@ struct f2fs_inode_info { #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + struct list_head inmem_ilist; /* list for inmem inodes */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct task_struct *inmem_task; /* store inmemory task */ struct mutex inmem_lock; /* lock for inmemory pages */ @@ -965,6 +966,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + ATOMIC_FILE, /* for all atomic files */ NR_INODE_TYPE, }; @@ -2544,6 +2546,7 @@ void destroy_node_manager_caches(void); */ bool need_SSR(struct f2fs_sb_info *sbi); void register_inmem_page(struct inode *inode, struct page *page); +void drop_inmem_pages_all(struct f2fs_sb_info *sbi); void drop_inmem_pages(struct inode *inode); void drop_inmem_page(struct inode *inode, struct page *page); int commit_inmem_pages(struct inode *inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 20f60f9d524b..d571961f42a7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -74,6 +74,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) atomic_read(&sbi->total_ext_node) * sizeof(struct extent_node)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INMEM_PAGES) { + /* it allows 20% / total_ram for inmemory pages */ + mem_size = get_pages(sbi, F2FS_INMEM_PAGES); + res = mem_size < (val.totalram / 5); } else { if (!sbi->sb->s_bdi->wb.dirty_exceeded) return true; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e91b08b4a51a..0ee3e5ff49a3 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -140,6 +140,7 @@ enum mem_type { DIRTY_DENTS, /* indicates dirty dentry pages */ INO_ENTRIES, /* indicates inode entries */ EXTENT_CACHE, /* indicates extent cache */ + INMEM_PAGES, /* indicates inmemory pages */ BASE_CHECK, /* check kernel status */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index bfbcff8339c5..049bbeb8ebff 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -186,6 +186,7 @@ bool need_SSR(struct f2fs_sb_info *sbi) void register_inmem_page(struct inode *inode, struct page *page) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; @@ -204,6 +205,10 @@ void register_inmem_page(struct inode *inode, struct page *page) mutex_lock(&fi->inmem_lock); get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(&fi->inmem_ilist)) + list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); @@ -262,12 +267,41 @@ next: return err; } +void drop_inmem_pages_all(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; + struct inode *inode; + struct f2fs_inode_info *fi; +next: + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + return; + } + fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); + + if (inode) { + drop_inmem_pages(inode); + iput(inode); + } + congestion_wait(BLK_RW_ASYNC, HZ/50); + cond_resched(); + goto next; +} + void drop_inmem_pages(struct inode *inode) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_FILE); @@ -399,6 +433,10 @@ int commit_inmem_pages(struct inode *inode) /* drop all uncommitted pages */ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); } + spin_lock(&sbi->inode_lock[ATOMIC_FILE]); + if (!list_empty(&fi->inmem_ilist)) + list_del_init(&fi->inmem_ilist); + spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); mutex_unlock(&fi->inmem_lock); clear_inode_flag(inode, FI_ATOMIC_COMMIT); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dce5c4e48e23..21e524ecdb1d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -625,6 +625,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); init_rwsem(&fi->dio_rwsem[READ]); -- cgit v1.2.3 From 4e46a023c515de61df61770e03673c3f72cc6b20 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 19 Oct 2017 09:43:56 -0700 Subject: f2fs: retry ENOMEM for quota_read|write This gives another chance to read or write quota data. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 21e524ecdb1d..c32a1e1db240 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1321,8 +1321,13 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); repeat: page = read_mapping_page(mapping, blkidx, NULL); - if (IS_ERR(page)) + if (IS_ERR(page)) { + if (PTR_ERR(page) == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto repeat; + } return PTR_ERR(page); + } lock_page(page); @@ -1365,11 +1370,16 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type, while (towrite > 0) { tocopy = min_t(unsigned long, sb->s_blocksize - offset, towrite); - +retry: err = a_ops->write_begin(NULL, mapping, off, tocopy, 0, &page, NULL); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } break; + } kaddr = kmap_atomic(page); memcpy(kaddr + offset, data, tocopy); -- cgit v1.2.3 From 6e5b5d41c93ef79e9f297f9a3b40730bf0b41f31 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 19 Oct 2017 12:07:11 -0700 Subject: Revert "f2fs: return wrong error number on f2fs_quota_write" This reverts commit 4f31d26b0c17f2aae6a6afeb823a87e20671ab4b. It turns out that we need to report error number if nothing was written. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c32a1e1db240..491dce39d47a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1396,7 +1396,7 @@ retry: } if (len == towrite) - return 0; + return err; inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); -- cgit v1.2.3 From 1f227a3e215d3613ea6cbc2fbe29caafbcaa8f71 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 23 Oct 2017 23:48:49 +0200 Subject: f2fs: stop all the operations by cp_error flag This patch replaces to use cp_error flag instead of RDONLY for quota off. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 3 +++ fs/f2fs/checkpoint.c | 1 - fs/f2fs/file.c | 26 ++++++++++++++++++++++++++ fs/f2fs/namei.c | 30 ++++++++++++++++++++++++++++++ fs/f2fs/super.c | 3 +++ 5 files changed, 62 insertions(+), 1 deletion(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 436b3a1464d9..2bb7c9fc5144 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -250,6 +250,9 @@ static int __f2fs_set_acl(struct inode *inode, int type, int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + return __f2fs_set_acl(inode, type, acl, NULL); } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 201608281681..6b52d4b66c7b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -29,7 +29,6 @@ struct kmem_cache *inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { set_ckpt_flags(sbi, CP_ERROR_FLAG); - sbi->sb->s_flags |= MS_RDONLY; if (!end_io) f2fs_flush_merged_writes(sbi); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 56232a72d2a3..ee12bd42af13 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -53,6 +53,11 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct dnode_of_data dn; int err; + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto err; + } + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -114,6 +119,7 @@ out_sem: out: sb_end_pagefault(inode->i_sb); f2fs_update_time(sbi, REQ_TIME); +err: return block_page_mkwrite_return(err); } @@ -310,6 +316,8 @@ out: int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file))))) + return -EIO; return f2fs_do_sync_file(file, start, end, datasync, false); } @@ -446,6 +454,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) struct inode *inode = file_inode(file); int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -632,6 +643,9 @@ int f2fs_truncate(struct inode *inode) { int err; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) return 0; @@ -731,6 +745,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) int err; bool size_changed = false; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + err = setattr_prepare(dentry, attr); if (err) return err; @@ -1459,6 +1476,9 @@ static long f2fs_fallocate(struct file *file, int mode, struct inode *inode = file_inode(file); long ret = 0; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) return -EINVAL; @@ -2637,6 +2657,9 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) + return -EIO; + switch (cmd) { case F2FS_IOC_GETFLAGS: return f2fs_ioc_getflags(filp, arg); @@ -2694,6 +2717,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct blk_plug plug; ssize_t ret; + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + return -EIO; + inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret > 0) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6ab8ac9fe043..2c5dd1d0140f 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -177,6 +177,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, nid_t ino = 0; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -221,6 +224,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct f2fs_sb_info *sbi = F2FS_I_SB(dir); int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (f2fs_encrypted_inode(dir) && !fscrypt_has_permitted_context(dir, inode)) return -EPERM; @@ -420,6 +426,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -475,6 +484,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, struct fscrypt_symlink_data *sd = NULL; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if (f2fs_encrypted_inode(dir)) { err = fscrypt_get_encryption_info(dir); if (err) @@ -581,6 +593,9 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct inode *inode; int err; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -633,6 +648,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode; int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + err = dquot_initialize(dir); if (err) return err; @@ -727,6 +745,9 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + if (f2fs_encrypted_inode(dir)) { int err = fscrypt_get_encryption_info(dir); if (err) @@ -738,6 +759,9 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) static int f2fs_create_whiteout(struct inode *dir, struct inode **whiteout) { + if (unlikely(f2fs_cp_error(F2FS_I_SB(dir)))) + return -EIO; + return __f2fs_tmpfile(dir, NULL, S_IFCHR | WHITEOUT_MODE, whiteout); } @@ -757,6 +781,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err = -ENOENT; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if ((f2fs_encrypted_inode(old_dir) && !fscrypt_has_encryption_key(old_dir)) || (f2fs_encrypted_inode(new_dir) && @@ -950,6 +977,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, int old_nlink = 0, new_nlink = 0; int err = -ENOENT; + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + if ((f2fs_encrypted_inode(old_dir) && !fscrypt_has_encryption_key(old_dir)) || (f2fs_encrypted_inode(new_dir) && diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 491dce39d47a..97fa81503e1f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -859,6 +859,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) struct f2fs_sb_info *sbi = F2FS_SB(sb); int err = 0; + if (unlikely(f2fs_cp_error(sbi))) + return 0; + trace_f2fs_sync_fs(sb, sync); if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) -- cgit v1.2.3 From 6afc662e68b5f988282ff20afd58a89b1c279dca Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Sep 2017 21:59:50 +0800 Subject: f2fs: support flexible inline xattr size Now, in product, more and more features based on file encryption were introduced, their demand of xattr space is increasing, however, inline xattr has fixed-size of 200 bytes, once inline xattr space is full, new increased xattr data would occupy additional xattr block which may bring us more space usage and performance regression during persisting. In order to resolve above issue, it's better to expand inline xattr size flexibly according to user's requirement. So this patch introduces new filesystem feature 'flexible inline xattr', and new mount option 'inline_xattr_size=%u', once mkfs enables the feature, we can use the option to make f2fs supporting flexible inline xattr size. To support this feature, we add extra attribute i_inline_xattr_size in inode layout, indicating that how many space inline xattr borrows from block address mapping space in inode layout, by this, we can easily locate and store flexible-sized inline xattr data in inode. Inode disk layout: +----------------------+ | .i_mode | | ... | | .i_ext | +----------------------+ | .i_extra_isize | | .i_inline_xattr_size |-----------+ | ... | | +----------------------+ | | .i_addr | | | - block address or | | | - inline data | | +----------------------+<---+ v | inline xattr | +---inline xattr range +----------------------+<---+ | .i_nid | +----------------------+ | node_footer | | (nid, ino, offset) | +----------------------+ Note that, we have to cnosider backward compatibility which reserved inline_data space, 200 bytes, all the time, reported by Sheng Yong. Previous inline data or directory always reserved 200 bytes in inode layout, even if inline_xattr is disabled. In order to keep inline_dentry's structure for backward compatibility, we get the space back only from inline_data. Signed-off-by: Chao Yu Reported-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 39 ++++++++++++++++++++++++++------------- fs/f2fs/inode.c | 21 +++++++++++++++++++++ fs/f2fs/namei.c | 13 +++++++++++++ fs/f2fs/node.c | 10 ++++++++-- fs/f2fs/super.c | 32 +++++++++++++++++++++++++++++++- fs/f2fs/sysfs.c | 7 +++++++ fs/f2fs/xattr.c | 18 +++++++++--------- include/linux/f2fs_fs.h | 5 +++-- 8 files changed, 118 insertions(+), 27 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4034459f1f5d..13a96b8aa2d7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -93,6 +93,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_GRPQUOTA 0x00100000 #define F2FS_MOUNT_PRJQUOTA 0x00200000 #define F2FS_MOUNT_QUOTA 0x00400000 +#define F2FS_MOUNT_INLINE_XATTR_SIZE 0x00800000 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -118,6 +119,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_EXTRA_ATTR 0x0008 #define F2FS_FEATURE_PRJQUOTA 0x0010 #define F2FS_FEATURE_INODE_CHKSUM 0x0020 +#define F2FS_FEATURE_FLEXIBLE_INLINE_XATTR 0x0040 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -397,11 +399,14 @@ struct f2fs_flush_device { /* for inline stuff */ #define DEF_INLINE_RESERVED_SIZE 1 +#define DEF_MIN_INLINE_SIZE 1 static inline int get_extra_isize(struct inode *inode); -#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ - (CUR_ADDRS_PER_INODE(inode) - \ - DEF_INLINE_RESERVED_SIZE - \ - F2FS_INLINE_XATTR_ADDRS)) +static inline int get_inline_xattr_addrs(struct inode *inode); +#define F2FS_INLINE_XATTR_ADDRS(inode) get_inline_xattr_addrs(inode) +#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \ + (CUR_ADDRS_PER_INODE(inode) - \ + F2FS_INLINE_XATTR_ADDRS(inode) - \ + DEF_INLINE_RESERVED_SIZE)) /* for inline dir */ #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ @@ -612,6 +617,7 @@ struct f2fs_inode_info { int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ + int i_inline_xattr_size; /* inline xattr size */ }; static inline void get_extent_info(struct extent_info *ext, @@ -1069,6 +1075,7 @@ struct f2fs_sb_info { loff_t max_file_blocks; /* max block index of file */ int active_logs; /* # of active logs */ int dir_level; /* directory level */ + int inline_xattr_size; /* inline xattr size */ block_t user_block_count; /* # of user blocks */ block_t total_valid_block_count; /* # of valid blocks */ @@ -2186,25 +2193,20 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) static inline unsigned int addrs_per_inode(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS; - return CUR_ADDRS_PER_INODE(inode); + return CUR_ADDRS_PER_INODE(inode) - F2FS_INLINE_XATTR_ADDRS(inode); } -static inline void *inline_xattr_addr(struct page *page) +static inline void *inline_xattr_addr(struct inode *inode, struct page *page) { struct f2fs_inode *ri = F2FS_INODE(page); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - - F2FS_INLINE_XATTR_ADDRS]); + F2FS_INLINE_XATTR_ADDRS(inode)]); } static inline int inline_xattr_size(struct inode *inode) { - if (f2fs_has_inline_xattr(inode)) - return F2FS_INLINE_XATTR_ADDRS << 2; - else - return 0; + return get_inline_xattr_addrs(inode) * sizeof(__le32); } static inline int f2fs_has_inline_data(struct inode *inode) @@ -2354,6 +2356,12 @@ static inline int get_extra_isize(struct inode *inode) return F2FS_I(inode)->i_extra_isize / sizeof(__le32); } +static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb); +static inline int get_inline_xattr_addrs(struct inode *inode) +{ + return F2FS_I(inode)->i_inline_xattr_size; +} + #define get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -3023,6 +3031,11 @@ static inline int f2fs_sb_has_inode_chksum(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_INODE_CHKSUM); } +static inline int f2fs_sb_has_flexible_inline_xattr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_FLEXIBLE_INLINE_XATTR); +} + #ifdef CONFIG_BLK_DEV_ZONED static inline int get_blkz_type(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkaddr) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 3617e7fca930..9684d53563f1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -232,6 +232,23 @@ static int do_read_inode(struct inode *inode) fi->i_extra_isize = f2fs_has_extra_attr(inode) ? le16_to_cpu(ri->i_extra_isize) : 0; + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + fi->i_inline_xattr_size = le16_to_cpu(ri->i_inline_xattr_size); + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + fi->i_inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } else { + + /* + * Previous inline data or directory always reserved 200 bytes + * in inode layout, even if inline_xattr is disabled. In order + * to keep inline_dentry's structure for backward compatibility, + * we get the space back only from inline_data. + */ + fi->i_inline_xattr_size = 0; + } + /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) __recover_inline_status(inode, node_page); @@ -384,6 +401,10 @@ int update_inode(struct inode *inode, struct page *node_page) if (f2fs_has_extra_attr(inode)) { ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize); + if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)->sb)) + ri->i_inline_xattr_size = + cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size); + if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) && F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, i_projid)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 773d327f8fc1..28bdf8828e73 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -29,6 +29,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_t ino; struct inode *inode; bool nid_free = false; + int xattr_size = 0; int err; inode = new_inode(dir->i_sb); @@ -86,11 +87,23 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (test_opt(sbi, INLINE_XATTR)) set_inode_flag(inode, FI_INLINE_XATTR); + if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) set_inode_flag(inode, FI_INLINE_DENTRY); + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)) { + f2fs_bug_on(sbi, !f2fs_has_extra_attr(inode)); + if (f2fs_has_inline_xattr(inode)) + xattr_size = sbi->inline_xattr_size; + /* Otherwise, will be 0 */ + } else if (f2fs_has_inline_xattr(inode) || + f2fs_has_inline_dentry(inode)) { + xattr_size = DEFAULT_INLINE_XATTR_ADDRS; + } + F2FS_I(inode)->i_inline_xattr_size = xattr_size; + f2fs_init_extent_tree(inode, NULL); stat_inc_inline_xattr(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index bdb7e92636b4..f44d83705245 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2191,8 +2191,8 @@ void recover_inline_xattr(struct inode *inode, struct page *page) goto update_inode; } - dst_addr = inline_xattr_addr(ipage); - src_addr = inline_xattr_addr(page); + dst_addr = inline_xattr_addr(inode, ipage); + src_addr = inline_xattr_addr(inode, page); inline_size = inline_xattr_size(inode); f2fs_wait_on_page_writeback(ipage, NODE, true); @@ -2281,6 +2281,12 @@ retry: dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR); if (dst->i_inline & F2FS_EXTRA_ATTR) { dst->i_extra_isize = src->i_extra_isize; + + if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) && + F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), + i_inline_xattr_size)) + dst->i_inline_xattr_size = src->i_inline_xattr_size; + if (f2fs_sb_has_project_quota(sbi->sb) && F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize), i_projid)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 97fa81503e1f..213d2c1e5759 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -92,6 +92,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_noinline_xattr, + Opt_inline_xattr_size, Opt_inline_data, Opt_inline_dentry, Opt_noinline_dentry, @@ -141,6 +142,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_noinline_xattr, "noinline_xattr"}, + {Opt_inline_xattr_size, "inline_xattr_size=%u"}, {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_noinline_dentry, "noinline_dentry"}, @@ -383,6 +385,12 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noinline_xattr: clear_opt(sbi, INLINE_XATTR); break; + case Opt_inline_xattr_size: + if (args->from && match_int(args, &arg)) + return -EINVAL; + set_opt(sbi, INLINE_XATTR_SIZE); + sbi->inline_xattr_size = arg; + break; #else case Opt_user_xattr: f2fs_msg(sb, KERN_INFO, @@ -604,6 +612,24 @@ static int parse_options(struct super_block *sb, char *options) F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } + + if (test_opt(sbi, INLINE_XATTR_SIZE)) { + if (!test_opt(sbi, INLINE_XATTR)) { + f2fs_msg(sb, KERN_ERR, + "inline_xattr_size option should be " + "set with inline_xattr option"); + return -EINVAL; + } + if (!sbi->inline_xattr_size || + sbi->inline_xattr_size >= DEF_ADDRS_PER_INODE - + F2FS_TOTAL_EXTRA_ATTR_SIZE - + DEF_INLINE_RESERVED_SIZE - + DEF_MIN_INLINE_SIZE) { + f2fs_msg(sb, KERN_ERR, + "inline xattr size is out of range"); + return -EINVAL; + } + } return 0; } @@ -1050,6 +1076,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_xattr"); else seq_puts(seq, ",noinline_xattr"); + if (test_opt(sbi, INLINE_XATTR_SIZE)) + seq_printf(seq, ",inline_xattr_size=%u", + sbi->inline_xattr_size); #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL if (test_opt(sbi, POSIX_ACL)) @@ -1112,6 +1141,7 @@ static void default_options(struct f2fs_sb_info *sbi) { /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; + sbi->inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1670,7 +1700,7 @@ static loff_t max_file_blocks(void) /* * note: previously, result is equal to (DEF_ADDRS_PER_INODE - - * F2FS_INLINE_XATTR_ADDRS), but now f2fs try to reserve more + * DEFAULT_INLINE_XATTR_ADDRS), but now f2fs try to reserve more * space in inode.i_addr, it will be more safe to reassign * result as zero. */ diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 48ebe6153cc5..e09e59cc678a 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -107,6 +107,9 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_inode_chksum(sb)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "inode_checksum"); + if (f2fs_sb_has_flexible_inline_xattr(sb)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "flexible_inline_xattr"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -216,6 +219,7 @@ enum feat_id { FEAT_EXTRA_ATTR, FEAT_PROJECT_QUOTA, FEAT_INODE_CHECKSUM, + FEAT_FLEXIBLE_INLINE_XATTR, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -228,6 +232,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_EXTRA_ATTR: case FEAT_PROJECT_QUOTA: case FEAT_INODE_CHECKSUM: + case FEAT_FLEXIBLE_INLINE_XATTR: return snprintf(buf, PAGE_SIZE, "supported\n"); } return 0; @@ -299,6 +304,7 @@ F2FS_FEATURE_RO_ATTR(atomic_write, FEAT_ATOMIC_WRITE); F2FS_FEATURE_RO_ATTR(extra_attr, FEAT_EXTRA_ATTR); F2FS_FEATURE_RO_ATTR(project_quota, FEAT_PROJECT_QUOTA); F2FS_FEATURE_RO_ATTR(inode_checksum, FEAT_INODE_CHECKSUM); +F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -346,6 +352,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(extra_attr), ATTR_LIST(project_quota), ATTR_LIST(inode_checksum), + ATTR_LIST(flexible_inline_xattr), NULL, }; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 9fbcb7687d3b..52b46f598470 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -217,12 +217,12 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, return entry; } -static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr, - void **last_addr, int index, - size_t len, const char *name) +static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, + void *base_addr, void **last_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; - unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2; + unsigned int inline_size = inline_xattr_size(inode); list_for_each_xattr(entry, base_addr) { if ((void *)entry + sizeof(__u32) > base_addr + inline_size || @@ -250,13 +250,13 @@ static int read_inline_xattr(struct inode *inode, struct page *ipage, void *inline_addr; if (ipage) { - inline_addr = inline_xattr_addr(ipage); + inline_addr = inline_xattr_addr(inode, ipage); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) return PTR_ERR(page); - inline_addr = inline_xattr_addr(page); + inline_addr = inline_xattr_addr(inode, page); } memcpy(txattr_addr, inline_addr, inline_size); f2fs_put_page(page, 1); @@ -309,7 +309,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (err) goto out; - *xe = __find_inline_xattr(txattr_addr, &last_addr, + *xe = __find_inline_xattr(inode, txattr_addr, &last_addr, index, len, name); if (*xe) goto check; @@ -404,7 +404,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, void *inline_addr; if (ipage) { - inline_addr = inline_xattr_addr(ipage); + inline_addr = inline_xattr_addr(inode, ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); set_page_dirty(ipage); } else { @@ -413,7 +413,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, alloc_nid_failed(sbi, new_nid); return PTR_ERR(page); } - inline_addr = inline_xattr_addr(page); + inline_addr = inline_xattr_addr(inode, page); f2fs_wait_on_page_writeback(page, NODE, true); } memcpy(inline_addr, txattr_addr, inline_size); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2a0c453d7235..50a8ee501bf1 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -184,7 +184,8 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +/* 200 bytes for inline xattrs by default */ +#define DEFAULT_INLINE_XATTR_ADDRS 50 #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) @@ -238,7 +239,7 @@ struct f2fs_inode { union { struct { __le16 i_extra_isize; /* extra inode attribute size */ - __le16 i_padding; /* padding */ + __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */ __le32 i_projid; /* project id */ __le32 i_inode_checksum;/* inode meta checksum */ __le32 i_extra_end[0]; /* for attribute size calculation */ -- cgit v1.2.3 From 80d421450187e894af841849d66614a0456912f9 Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Fri, 27 Oct 2017 20:45:05 +0800 Subject: f2fs: support soft block reservation It supports to extend reserved_blocks sysfs interface to be soft threshold, which allows user configure it exceeding current available user space. This patch also introduces a new sysfs interface called current_reserved_blocks, which shows the current blocks that have already been reserved. Signed-off-by: Yunlong Song Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/ABI/testing/sysfs-fs-f2fs | 13 ++++++++++++- fs/f2fs/f2fs.h | 13 +++++++++++-- fs/f2fs/super.c | 3 ++- fs/f2fs/sysfs.c | 15 ++++++++++++--- 4 files changed, 37 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 11b7f4ebea7c..7562b0d42105 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -138,7 +138,18 @@ What: /sys/fs/f2fs//reserved_blocks Date: June 2017 Contact: "Chao Yu" Description: - Controls current reserved blocks in system. + Controls target reserved blocks in system, the threshold + is soft, it could exceed current available user space. + +What: /sys/fs/f2fs//current_reserved_blocks +Date: October 2017 +Contact: "Yunlong Song" +Contact: "Chao Yu" +Description: + Shows current reserved blocks in system, it may be temporarily + smaller than target_reserved_blocks, but will gradually + increase to target_reserved_blocks when more free blocks are + freed by user later. What: /sys/fs/f2fs//gc_urgent Date: August 2017 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 13a96b8aa2d7..5ed40fbd1a36 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1082,6 +1082,7 @@ struct f2fs_sb_info { block_t discard_blks; /* discard command candidats */ block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks; /* configurable reserved blocks */ + block_t current_reserved_blocks; /* current reserved blocks */ u32 s_next_generation; /* for NFS support */ @@ -1557,7 +1558,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = sbi->user_block_count - sbi->reserved_blocks; + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks; if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; *count -= diff; @@ -1591,6 +1593,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < sectors); sbi->total_valid_block_count -= (block_t)count; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); f2fs_i_blocks_write(inode, count, false, true); } @@ -1737,7 +1743,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->reserved_blocks > + if (unlikely(valid_block_count + sbi->current_reserved_blocks > sbi->user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; @@ -1780,6 +1786,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, sbi->total_valid_node_count--; sbi->total_valid_block_count--; + if (sbi->reserved_blocks && + sbi->current_reserved_blocks < sbi->reserved_blocks) + sbi->current_reserved_blocks++; spin_unlock(&sbi->stat_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 213d2c1e5759..dab1fedf7148 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -987,7 +987,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi) - - sbi->reserved_blocks; + sbi->current_reserved_blocks; avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -2458,6 +2458,7 @@ try_onemore: le64_to_cpu(sbi->ckpt->valid_block_count); sbi->last_valid_block_count = sbi->total_valid_block_count; sbi->reserved_blocks = 0; + sbi->current_reserved_blocks = 0; for (i = 0; i < NR_INODE_TYPE; i++) { INIT_LIST_HEAD(&sbi->inode_list[i]); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index e09e59cc678a..4166ac74e837 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -30,7 +30,7 @@ enum { FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif - RESERVED_BLOCKS, + RESERVED_BLOCKS, /* struct f2fs_sb_info */ }; struct f2fs_attr { @@ -114,6 +114,12 @@ static ssize_t features_show(struct f2fs_attr *a, return len; } +static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); +} + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -153,12 +159,13 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a, #endif if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); - if ((unsigned long)sbi->total_valid_block_count + t > - (unsigned long)sbi->user_block_count) { + if (t > (unsigned long)sbi->user_block_count) { spin_unlock(&sbi->stat_lock); return -EINVAL; } *ui = t; + sbi->current_reserved_blocks = min(sbi->reserved_blocks, + sbi->user_block_count - valid_user_blocks(sbi)); spin_unlock(&sbi->stat_lock); return count; } @@ -293,6 +300,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); +F2FS_GENERAL_RO_ATTR(current_reserved_blocks); #ifdef CONFIG_F2FS_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -338,6 +346,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), + ATTR_LIST(current_reserved_blocks), NULL, }; -- cgit v1.2.3 From 01eccef7930f137bed9501bf0923931f45909b94 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Oct 2017 16:52:30 +0800 Subject: f2fs: support get_page error injection This patch adds to support get_page error injection to simulate out-of-memory test scenario. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 14 ++++++++++++++ fs/f2fs/gc.c | 4 ++-- fs/f2fs/node.c | 3 ++- fs/f2fs/super.c | 1 + 5 files changed, 20 insertions(+), 4 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7fd09837820a..acb531788c43 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1965,7 +1965,7 @@ repeat: * Do not use grab_cache_page_write_begin() to avoid deadlock due to * wait_for_stable_page. Will wait that below with our IO control. */ - page = pagecache_get_page(mapping, index, + page = f2fs_pagecache_get_page(mapping, index, FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); if (!page) { err = -ENOMEM; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5ed40fbd1a36..7ef3193db438 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,6 +46,7 @@ enum { FAULT_KMALLOC, FAULT_PAGE_ALLOC, + FAULT_PAGE_GET, FAULT_ALLOC_NID, FAULT_ORPHAN, FAULT_BLOCK, @@ -1835,6 +1836,19 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); } +static inline struct page *f2fs_pagecache_get_page( + struct address_space *mapping, pgoff_t index, + int fgp_flags, gfp_t gfp_mask) +{ +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { + f2fs_show_injection_info(FAULT_PAGE_GET); + return NULL; + } +#endif + return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); +} + static inline void f2fs_copy_page(struct page *src, struct page *dst) { char *src_kaddr = kmap(src); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 197ebf45e4e4..c7b1d704846a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -650,8 +650,8 @@ static void move_data_block(struct inode *inode, block_t bidx, allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, CURSEG_COLD_DATA, NULL, false); - fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr, - FGP_LOCK | FGP_CREAT, GFP_NOFS); + fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), + newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; goto recover_block; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f44d83705245..819671febd6d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1218,7 +1218,8 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = pagecache_get_page(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); + page = f2fs_pagecache_get_page(inode->i_mapping, 0, + FGP_LOCK|FGP_NOWAIT, 0); if (!page) goto iput_out; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index dab1fedf7148..4a5af8398f0c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -44,6 +44,7 @@ static struct kmem_cache *f2fs_inode_cachep; char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_PAGE_ALLOC] = "page alloc", + [FAULT_PAGE_GET] = "page get", [FAULT_ALLOC_NID] = "alloc nid", [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", -- cgit v1.2.3 From d62fe971485f15fa08fec2a75ac4e4ffead40a5a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 28 Oct 2017 16:52:31 +0800 Subject: f2fs: support bio allocation error injection This patch adds to support bio allocation error injection to simulate out-of-memory test scenario. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/f2fs.h | 23 +++++++++++++++++------ fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 1 + 4 files changed, 21 insertions(+), 9 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index acb531788c43..b0781edc9ada 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -173,7 +173,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, { struct bio *bio; - bio = f2fs_bio_alloc(npages); + bio = f2fs_bio_alloc(sbi, npages, true); f2fs_target_device(sbi, blk_addr, bio); bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; @@ -473,7 +473,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, f2fs_wait_on_block_writeback(sbi, blkaddr); } - bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) { if (ctx) fscrypt_release_ctx(ctx); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7ef3193db438..d89728c946a9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -47,6 +47,7 @@ enum { FAULT_KMALLOC, FAULT_PAGE_ALLOC, FAULT_PAGE_GET, + FAULT_ALLOC_BIO, FAULT_ALLOC_NID, FAULT_ORPHAN, FAULT_BLOCK, @@ -1898,15 +1899,25 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(int npages) +static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, + int npages, bool no_fail) { struct bio *bio; - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; + if (no_fail) { + /* No failure on bio allocation */ + bio = bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(FAULT_ALLOC_BIO); + return NULL; + } +#endif + return bio_alloc(GFP_KERNEL, npages); } static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 46dfbca61b35..5c3293afeb8e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -511,7 +511,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct block_device *bdev) { - struct bio *bio = f2fs_bio_alloc(0); + struct bio *bio = f2fs_bio_alloc(sbi, 0, true); int ret; bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4a5af8398f0c..96e145c34ba2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -45,6 +45,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", [FAULT_PAGE_ALLOC] = "page alloc", [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_BIO] = "alloc bio", [FAULT_ALLOC_NID] = "alloc nid", [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", -- cgit v1.2.3 From ea6767337f86312ebe473585899a159bf50ef1b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Oct 2017 09:14:28 -0700 Subject: f2fs: support quota sys files This patch supports hidden quota files in the system, which will be used for Android. It requires up-to-date f2fs-tools later than v1.9.0. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 9 ++- fs/f2fs/f2fs.h | 9 ++- fs/f2fs/recovery.c | 8 ++- fs/f2fs/super.c | 149 +++++++++++++++++++++++++++++++++++++++++++----- include/linux/f2fs_fs.h | 1 - 5 files changed, 157 insertions(+), 19 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6b52d4b66c7b..78e1b2998bbd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -615,6 +615,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) block_t start_blk, orphan_blocks, i, j; unsigned int s_flags = sbi->sb->s_flags; int err = 0; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; @@ -627,8 +630,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) #ifdef CONFIG_QUOTA /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; + /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -656,7 +660,8 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9a1c7ffa6845..484b5c6ac36b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1384,6 +1384,13 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } +static inline unsigned long f2fs_qf_ino(struct super_block *sb, int type) +{ + if (type < F2FS_MAX_QUOTAS) + return le32_to_cpu(F2FS_SB(sb)->raw_super->qf_ino[type]); + return 0; +} + static inline __u64 cur_cp_crc(struct f2fs_checkpoint *cp) { size_t crc_offset = le32_to_cpu(cp->checksum_offset); @@ -2526,7 +2533,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi); +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9626758bc762..92c57ace1939 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -594,6 +594,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) int ret = 0; unsigned long s_flags = sbi->sb->s_flags; bool need_writecp = false; +#ifdef CONFIG_QUOTA + int quota_enabled; +#endif if (s_flags & MS_RDONLY) { f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); @@ -604,7 +607,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) /* Needed for iput() to work correctly and not trash data */ sbi->sb->s_flags |= MS_ACTIVE; /* Turn on quotas so that they are updated correctly */ - f2fs_enable_quota_files(sbi); + quota_enabled = f2fs_enable_quota_files(sbi, s_flags & MS_RDONLY); #endif fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", @@ -665,7 +668,8 @@ skip: out: #ifdef CONFIG_QUOTA /* Turn quotas off */ - f2fs_quota_off_umount(sbi->sb); + if (quota_enabled) + f2fs_quota_off_umount(sbi->sb); #endif sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 96e145c34ba2..91eaa7b235eb 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -213,6 +213,12 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, "quota options when quota turned on"); return -EINVAL; } + if (f2fs_sb_has_quota_ino(sb)) { + f2fs_msg(sb, KERN_INFO, + "QUOTA feature is enabled, so ignore qf_name"); + return 0; + } + qname = match_strdup(args); if (!qname) { f2fs_msg(sb, KERN_ERR, @@ -291,6 +297,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) return -1; } } + + if (f2fs_sb_has_quota_ino(sbi->sb) && sbi->s_jquota_fmt) { + f2fs_msg(sbi->sb, KERN_INFO, + "QUOTA feature is enabled, so ignore jquota_fmt"); + sbi->s_jquota_fmt = 0; + } + if (f2fs_sb_has_quota_ino(sbi->sb) && sb_rdonly(sbi->sb)) { + f2fs_msg(sbi->sb, KERN_INFO, + "Filesystem with quota feature cannot be mounted RDWR " + "without CONFIG_QUOTA"); + return -1; + } return 0; } #endif @@ -1172,6 +1190,9 @@ static void default_options(struct f2fs_sb_info *sbi) #endif } +#ifdef CONFIG_QUOTA +static int f2fs_enable_quotas(struct super_block *sb); +#endif static int f2fs_remount(struct super_block *sb, int *flags, char *data) { struct f2fs_sb_info *sbi = F2FS_SB(sb); @@ -1238,6 +1259,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; +#ifdef CONFIG_QUOTA if (!f2fs_readonly(sb) && (*flags & MS_RDONLY)) { err = dquot_suspend(sb, -1); if (err < 0) @@ -1245,9 +1267,15 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } else { /* dquot_resume needs RW */ sb->s_flags &= ~MS_RDONLY; - dquot_resume(sb, -1); + if (sb_any_quota_suspended(sb)) { + dquot_resume(sb, -1); + } else if (f2fs_sb_has_quota_ino(sb)) { + err = f2fs_enable_quotas(sb); + if (err) + goto restore_opts; + } } - +#endif /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; @@ -1454,19 +1482,91 @@ static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) sbi->s_jquota_fmt, type); } -void f2fs_enable_quota_files(struct f2fs_sb_info *sbi) +int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) { - int i, ret; + int enabled = 0; + int i, err; + + if (f2fs_sb_has_quota_ino(sbi->sb) && rdonly) { + err = f2fs_enable_quotas(sbi->sb); + if (err) { + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quota_ino: %d", err); + return 0; + } + return 1; + } for (i = 0; i < MAXQUOTAS; i++) { if (sbi->s_qf_names[i]) { - ret = f2fs_quota_on_mount(sbi, i); - if (ret < 0) - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on journaled " - "quota: error %d", ret); + err = f2fs_quota_on_mount(sbi, i); + if (!err) { + enabled = 1; + continue; + } + f2fs_msg(sbi->sb, KERN_ERR, + "Cannot turn on quotas: %d on %d", err, i); + } + } + return enabled; +} + +static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, + unsigned int flags) +{ + struct inode *qf_inode; + unsigned long qf_inum; + int err; + + BUG_ON(!f2fs_sb_has_quota_ino(sb)); + + qf_inum = f2fs_qf_ino(sb, type); + if (!qf_inum) + return -EPERM; + + qf_inode = f2fs_iget(sb, qf_inum); + if (IS_ERR(qf_inode)) { + f2fs_msg(sb, KERN_ERR, + "Bad quota inode %u:%lu", type, qf_inum); + return PTR_ERR(qf_inode); + } + + /* Don't account quota for quota files to avoid recursion */ + qf_inode->i_flags |= S_NOQUOTA; + err = dquot_enable(qf_inode, type, format_id, flags); + iput(qf_inode); + return err; +} + +static int f2fs_enable_quotas(struct super_block *sb) +{ + int type, err = 0; + unsigned long qf_inum; + bool quota_mopt[MAXQUOTAS] = { + test_opt(F2FS_SB(sb), USRQUOTA), + test_opt(F2FS_SB(sb), GRPQUOTA), + test_opt(F2FS_SB(sb), PRJQUOTA), + }; + + sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY; + for (type = 0; type < MAXQUOTAS; type++) { + qf_inum = f2fs_qf_ino(sb, type); + if (qf_inum) { + err = f2fs_quota_enable(sb, type, QFMT_VFS_V1, + DQUOT_USAGE_ENABLED | + (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Failed to enable quota tracking " + "(type=%d, err=%d). Please run " + "fsck to fix.", type, err); + for (type--; type >= 0; type--) + dquot_quota_off(sb, type); + return err; + } } } + return 0; } static int f2fs_quota_sync(struct super_block *sb, int type) @@ -1537,7 +1637,7 @@ static int f2fs_quota_off(struct super_block *sb, int type) f2fs_quota_sync(sb, type); err = dquot_quota_off(sb, type); - if (err) + if (err || f2fs_sb_has_quota_ino(sb)) goto out_put; inode_lock(inode); @@ -2364,7 +2464,10 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - sb->s_qcop = &f2fs_quotactl_ops; + if (f2fs_sb_has_quota_ino(sb)) + sb->s_qcop = &dquot_quotactl_sysfile_ops; + else + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif @@ -2535,10 +2638,24 @@ try_onemore: if (err) goto free_root_inode; +#ifdef CONFIG_QUOTA + /* + * Turn on quotas which were not enabled for read-only mounts if + * filesystem has quota feature, so that they are updated correctly. + */ + if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) { + err = f2fs_enable_quotas(sb); + if (err) { + f2fs_msg(sb, KERN_ERR, + "Cannot turn on quotas: error %d", err); + goto free_sysfs; + } + } +#endif /* if there are nt orphan nodes free them */ err = recover_orphan_inodes(sbi); if (err) - goto free_sysfs; + goto free_meta; /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { @@ -2572,7 +2689,7 @@ try_onemore: err = -EINVAL; f2fs_msg(sb, KERN_ERR, "Need to recover fsync data"); - goto free_sysfs; + goto free_meta; } } skip_recovery: @@ -2606,6 +2723,10 @@ skip_recovery: return 0; free_meta: +#ifdef CONFIG_QUOTA + if (f2fs_sb_has_quota_ino(sb) && !sb_rdonly(sb)) + f2fs_quota_off_umount(sbi->sb); +#endif f2fs_sync_inode_meta(sbi); /* * Some dirty meta pages can be produced by recover_orphan_inodes() @@ -2614,7 +2735,9 @@ free_meta: * falls into an infinite loop in sync_meta_pages(). */ truncate_inode_pages_final(META_MAPPING(sbi)); +#ifdef CONFIG_QUOTA free_sysfs: +#endif f2fs_unregister_sysfs(sbi); free_root_inode: dput(sb->s_root); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ce34007972c3..43e98d30d2df 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,7 +36,6 @@ #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) -#define F2FS_QUOTA_INO 3 #define F2FS_MAX_QUOTAS 3 #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -- cgit v1.2.3 From d1954ab4c9aa67a0d5a2457120cbc8c02594de6a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Oct 2017 11:11:56 -0400 Subject: f2fs: don't bother with inode->i_version f2fs does not set the SB_I_VERSION flag, so the i_version will never be incremented on write. It was recently changed to increment the i_version on a quota write, which isn't necessary here. Signed-off-by: Jeff Layton Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/f2fs/super.c') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 91eaa7b235eb..031cb26a3a76 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -664,7 +664,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) init_once((void *) fi); /* Initialize f2fs-specific inode info */ - fi->vfs_inode.i_version = 1; atomic_set(&fi->dirty_pages, 0); fi->i_current_depth = 1; fi->i_advise = 0; @@ -1460,7 +1459,6 @@ retry: if (len == towrite) return err; - inode->i_version++; inode->i_mtime = inode->i_ctime = current_time(inode); f2fs_mark_inode_dirty_sync(inode, false); return len - towrite; -- cgit v1.2.3