diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-24 15:12:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-24 15:12:38 -0700 |
commit | 79952bdcbcea53e57c2ca97e7448f8a6bdb6106a (patch) | |
tree | 0596983639bfae68fefe771edb8fe04470148f53 /fs/f2fs | |
parent | fa8380a06bd0523e51f826520aac1beb8c585521 (diff) | |
parent | ae87b9c2dc9800e6ab52febd09341140599ff8e3 (diff) |
Merge tag 'f2fs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"The main changes include converting major IO paths to use folio, and
adding various knobs to control GC more flexibly for Zoned devices.
In addition, there are several patches to address corner cases of
atomic file operations and better support for file pinning on zoned
device.
Enhancement:
- add knobs to tune foreground/background GCs for Zoned devices
- convert IO paths to use folio
- reduce expensive checkpoint trigger frequency
- allow F2FS_IPU_NOCACHE for pinned file
- forcibly migrate to secure space for zoned device file pinning
- get rid of buffer_head use
- add write priority option based on zone UFS
- get rid of online repair on corrupted directory
Bug fixes:
- fix to don't panic system for no free segment fault injection
- fix to don't set SB_RDONLY in f2fs_handle_critical_error()
- avoid unused block when dio write in LFS mode
- compress: don't redirty sparse cluster during {,de}compress
- check discard support for conventional zones
- atomic: prevent atomic file from being dirtied before commit
- atomic: fix to check atomic_file in f2fs ioctl interfaces
- atomic: fix to forbid dio in atomic_file
- atomic: fix to truncate pagecache before on-disk metadata truncation
- atomic: create COW inode from parent dentry
- atomic: fix to avoid racing w/ GC
- atomic: require FMODE_WRITE for atomic write ioctls
- fix to wait page writeback before setting gcing flag
- fix to avoid racing in between read and OPU dio write, dio completion
- fix several potential integer overflows in file offsets and dir_block_index
- fix to avoid use-after-free in f2fs_stop_gc_thread()
As usual, there are several code clean-ups and refactorings"
* tag 'f2fs-for-6.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (60 commits)
f2fs: allow F2FS_IPU_NOCACHE for pinned file
f2fs: forcibly migrate to secure space for zoned device file pinning
f2fs: remove unused parameters
f2fs: fix to don't panic system for no free segment fault injection
f2fs: fix to don't set SB_RDONLY in f2fs_handle_critical_error()
f2fs: add valid block ratio not to do excessive GC for one time GC
f2fs: create gc_no_zoned_gc_percent and gc_boost_zoned_gc_percent
f2fs: do FG_GC when GC boosting is required for zoned devices
f2fs: increase BG GC migration window granularity when boosted for zoned devices
f2fs: add reserved_segments sysfs node
f2fs: introduce migration_window_granularity
f2fs: make BG GC more aggressive for zoned devices
f2fs: avoid unused block when dio write in LFS mode
f2fs: fix to check atomic_file in f2fs ioctl interfaces
f2fs: get rid of online repaire on corrupted directory
f2fs: prevent atomic file from being dirtied before commit
f2fs: get rid of page->index
f2fs: convert read_node_page() to use folio
f2fs: convert __write_node_page() to use folio
f2fs: convert f2fs_write_data_page() to use folio
...
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/checkpoint.c | 17 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 63 | ||||
-rw-r--r-- | fs/f2fs/data.c | 164 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 2 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 8 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 4 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 148 | ||||
-rw-r--r-- | fs/f2fs/file.c | 199 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 113 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 29 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 31 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 9 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 68 | ||||
-rw-r--r-- | fs/f2fs/node.c | 46 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 72 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 5 | ||||
-rw-r--r-- | fs/f2fs/super.c | 119 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 82 | ||||
-rw-r--r-- | fs/f2fs/verity.c | 5 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 14 |
20 files changed, 738 insertions, 460 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bdd96329dddd..7f76460b721f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -99,7 +99,7 @@ repeat: } if (unlikely(!PageUptodate(page))) { - f2fs_handle_page_eio(sbi, page->index, META); + f2fs_handle_page_eio(sbi, page_folio(page), META); f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -345,30 +345,31 @@ static int __f2fs_write_meta_page(struct page *page, enum iostat_type io_type) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); - trace_f2fs_writepage(page_folio(page), META); + trace_f2fs_writepage(folio, META); if (unlikely(f2fs_cp_error(sbi))) { if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_META); - unlock_page(page); + folio_unlock(folio); return 0; } goto redirty_out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) + if (wbc->for_reclaim && folio->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; - f2fs_do_write_meta_page(sbi, page, io_type); + f2fs_do_write_meta_page(sbi, folio, io_type); dec_page_count(sbi, F2FS_DIRTY_META); if (wbc->for_reclaim) f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_write(sbi, META); @@ -1551,7 +1552,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) f2fs_update_meta_page(sbi, nm_i->nat_bits + - (i << F2FS_BLKSIZE_BITS), blk + i); + F2FS_BLK_TO_BYTES(i), blk + i); } /* write out checkpoint buffer at block 0 */ diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 990b93689b46..7f26440e8595 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -90,11 +90,13 @@ bool f2fs_is_compressed_page(struct page *page) static void f2fs_set_compressed_page(struct page *page, struct inode *inode, pgoff_t index, void *data) { - attach_page_private(page, (void *)data); + struct folio *folio = page_folio(page); + + folio_attach_private(folio, (void *)data); /* i_crypto_info and iv index */ - page->index = index; - page->mapping = inode->i_mapping; + folio->index = index; + folio->mapping = inode->i_mapping; } static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) @@ -160,17 +162,17 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->cluster_idx = NULL_CLUSTER; } -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio) { unsigned int cluster_ofs; - if (!f2fs_cluster_can_merge_page(cc, page->index)) + if (!f2fs_cluster_can_merge_page(cc, folio->index)) f2fs_bug_on(F2FS_I_SB(cc->inode), 1); - cluster_ofs = offset_in_cluster(cc, page->index); - cc->rpages[cluster_ofs] = page; + cluster_ofs = offset_in_cluster(cc, folio->index); + cc->rpages[cluster_ofs] = folio_page(folio, 0); cc->nr_rpages++; - cc->cluster_idx = cluster_idx(cc, page->index); + cc->cluster_idx = cluster_idx(cc, folio->index); } #ifdef CONFIG_F2FS_FS_LZO @@ -879,7 +881,7 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc) f2fs_bug_on(F2FS_I_SB(cc->inode), !page); /* beyond EOF */ - if (page->index >= nr_pages) + if (page_folio(page)->index >= nr_pages) return true; } return false; @@ -945,7 +947,7 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; int count, i; - for (i = 1, count = 1; i < cluster_size; i++) { + for (i = 0, count = 0; i < cluster_size; i++) { block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node + i); @@ -956,8 +958,8 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, return count; } -static int __f2fs_cluster_blocks(struct inode *inode, - unsigned int cluster_idx, bool compr_blks) +static int __f2fs_cluster_blocks(struct inode *inode, unsigned int cluster_idx, + enum cluster_check_type type) { struct dnode_of_data dn; unsigned int start_idx = cluster_idx << @@ -978,10 +980,12 @@ static int __f2fs_cluster_blocks(struct inode *inode, } if (dn.data_blkaddr == COMPRESS_ADDR) { - if (compr_blks) - ret = __f2fs_get_cluster_blocks(inode, &dn); - else + if (type == CLUSTER_COMPR_BLKS) + ret = 1 + __f2fs_get_cluster_blocks(inode, &dn); + else if (type == CLUSTER_IS_COMPR) ret = 1; + } else if (type == CLUSTER_RAW_BLKS) { + ret = __f2fs_get_cluster_blocks(inode, &dn); } fail: f2fs_put_dnode(&dn); @@ -991,7 +995,16 @@ fail: /* return # of compressed blocks in compressed cluster */ static int f2fs_compressed_blocks(struct compress_ctx *cc) { - return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, true); + return __f2fs_cluster_blocks(cc->inode, cc->cluster_idx, + CLUSTER_COMPR_BLKS); +} + +/* return # of raw blocks in non-compressed cluster */ +static int f2fs_decompressed_blocks(struct inode *inode, + unsigned int cluster_idx) +{ + return __f2fs_cluster_blocks(inode, cluster_idx, + CLUSTER_RAW_BLKS); } /* return whether cluster is compressed one or not */ @@ -999,7 +1012,16 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) { return __f2fs_cluster_blocks(inode, index >> F2FS_I(inode)->i_log_cluster_size, - false); + CLUSTER_IS_COMPR); +} + +/* return whether cluster contains non raw blocks or not */ +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index) +{ + unsigned int cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size; + + return f2fs_decompressed_blocks(inode, cluster_idx) != + F2FS_I(inode)->i_cluster_size; } static bool cluster_may_compress(struct compress_ctx *cc) @@ -1093,7 +1115,7 @@ retry: if (PageUptodate(page)) f2fs_put_page(page, 1); else - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); } if (!f2fs_cluster_is_empty(cc)) { @@ -1123,7 +1145,7 @@ retry: } f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); + f2fs_compress_ctx_add_page(cc, page_folio(page)); if (!PageUptodate(page)) { release_and_retry: @@ -1523,7 +1545,8 @@ continue_unlock: if (!clear_page_dirty_for_io(cc->rpages[i])) goto continue_unlock; - ret = f2fs_write_single_data_page(cc->rpages[i], &submitted, + ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]), + &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5dfa0207ad8f..94f7b084f601 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/sched/mm.h> #include <linux/mpage.h> #include <linux/writeback.h> @@ -355,7 +354,7 @@ static void f2fs_write_end_io(struct bio *bio) } f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && - page->index != nid_of_node(page)); + page_folio(page)->index != nid_of_node(page)); dec_page_count(sbi, type); if (f2fs_in_warm_node_list(sbi, page)) @@ -704,7 +703,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio = __bio_alloc(fio, 1); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); @@ -803,7 +802,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, fio->page->mapping->host, - fio->page->index, fio) && + page_folio(fio->page)->index, fio) && bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { ret = 0; @@ -903,7 +902,7 @@ alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - fio->page->index, fio, GFP_NOIO); + page_folio(fio->page)->index, fio, GFP_NOIO); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { @@ -996,13 +995,13 @@ next: (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, - bio_page->index, fio))) + page_folio(bio_page)->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, - bio_page->index, fio, GFP_NOIO); + page_folio(bio_page)->index, fio, GFP_NOIO); io->fio = *fio; } @@ -1087,7 +1086,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, } /* This can handle encryption stuffs */ -static int f2fs_submit_page_read(struct inode *inode, struct page *page, +static int f2fs_submit_page_read(struct inode *inode, struct folio *folio, block_t blkaddr, blk_opf_t op_flags, bool for_write) { @@ -1095,14 +1094,14 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page, struct bio *bio; bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) return PTR_ERR(bio); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, blkaddr); - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) { iostat_update_and_unbind_ctx(bio); if (bio->bi_private) mempool_free(bio->bi_private, bio_post_read_ctx_pool); @@ -1270,7 +1269,7 @@ got_it: return page; } - err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, + err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr, op_flags, for_write); if (err) goto put_err; @@ -1713,6 +1712,14 @@ skip: dn.ofs_in_node = end_offset; } + if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) && + map->m_may_create) { + /* the next block to be allocated may not be contiguous. */ + if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) == + CAP_BLKS_PER_SEC(sbi) - 1) + goto sync_out; + } + if (pgofs >= end) goto sync_out; else if (dn.ofs_in_node < end_offset) @@ -1939,7 +1946,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock_shared(inode); - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); if (start > maxbytes) { ret = -EFBIG; goto out; @@ -2064,7 +2071,7 @@ out: static inline loff_t f2fs_readpage_limit(struct inode *inode) { if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) - return inode->i_sb->s_maxbytes; + return F2FS_BLK_TO_BYTES(max_file_blocks(inode)); return i_size_read(inode); } @@ -2208,19 +2215,22 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { struct page *page = cc->rpages[i]; + struct folio *folio; if (!page) continue; - if ((sector_t)page->index >= last_block_in_file) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - } else if (!PageUptodate(page)) { + + folio = page_folio(page); + if ((sector_t)folio->index >= last_block_in_file) { + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + } else if (!folio_test_uptodate(folio)) { continue; } - unlock_page(page); + folio_unlock(folio); if (for_write) - put_page(page); + folio_put(folio); cc->rpages[i] = NULL; cc->nr_rpages--; } @@ -2280,7 +2290,7 @@ skip_reading_dnode: } for (i = 0; i < cc->nr_cpages; i++) { - struct page *page = dic->cpages[i]; + struct folio *folio = page_folio(dic->cpages[i]); block_t blkaddr; struct bio_post_read_ctx *ctx; @@ -2290,7 +2300,8 @@ skip_reading_dnode: f2fs_wait_on_block_writeback(inode, blkaddr); - if (f2fs_load_compressed_page(sbi, page, blkaddr)) { + if (f2fs_load_compressed_page(sbi, folio_page(folio, 0), + blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); break; @@ -2300,7 +2311,7 @@ skip_reading_dnode: if (bio && (!page_is_mergeable(sbi, bio, *last_block_in_bio, blkaddr) || - !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { + !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) { submit_and_realloc: f2fs_submit_read_bio(sbi, bio, DATA); bio = NULL; @@ -2309,7 +2320,7 @@ submit_and_realloc: if (!bio) { bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, f2fs_ra_op_flags(rac), - page->index, for_write); + folio->index, for_write); if (IS_ERR(bio)) { ret = PTR_ERR(bio); f2fs_decompress_end_io(dic, ret, true); @@ -2319,7 +2330,7 @@ submit_and_realloc: } } - if (bio_add_page(bio, page, blocksize, 0) < blocksize) + if (!bio_add_folio(bio, folio, blocksize, 0)) goto submit_and_realloc; ctx = get_post_read_ctx(bio); @@ -2430,7 +2441,7 @@ static int f2fs_mpage_readpages(struct inode *inode, if (ret) goto set_error_page; - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); goto next_page; read_single_page: @@ -2645,21 +2656,24 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) int f2fs_do_write_data_page(struct f2fs_io_info *fio) { - struct page *page = fio->page; - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(fio->page); + struct inode *inode = folio->mapping->host; struct dnode_of_data dn; struct node_info ni; bool ipu_force = false; + bool atomic_commit; int err = 0; /* Use COW inode to make dnode_of_data for atomic write */ - if (f2fs_is_atomic_file(inode)) + atomic_commit = f2fs_is_atomic_file(inode) && + page_private_atomic(folio_page(folio, 0)); + if (atomic_commit) set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); else set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_inplace_update(fio) && - f2fs_lookup_read_extent_cache_block(inode, page->index, + f2fs_lookup_read_extent_cache_block(inode, folio->index, &fio->old_blkaddr)) { if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) @@ -2674,7 +2688,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) return -EAGAIN; - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); if (err) goto out; @@ -2682,8 +2696,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) /* This page is already truncated */ if (fio->old_blkaddr == NULL_ADDR) { - ClearPageUptodate(page); - clear_page_private_gcing(page); + folio_clear_uptodate(folio); + clear_page_private_gcing(folio_page(folio, 0)); goto out_writepage; } got_it: @@ -2709,7 +2723,7 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_put_dnode(&dn); if (fio->need_lock == LOCK_REQ) f2fs_unlock_op(fio->sbi); @@ -2717,11 +2731,11 @@ got_it: if (err) { if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - end_page_writeback(page); + folio_end_writeback(folio); } else { set_inode_flag(inode, FI_UPDATE_WRITE); } - trace_f2fs_do_write_data_page(page_folio(page), IPU); + trace_f2fs_do_write_data_page(folio, IPU); return err; } @@ -2743,15 +2757,17 @@ got_it: if (err) goto out_writepage; - set_page_writeback(page); + folio_start_writeback(folio); if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); - trace_f2fs_do_write_data_page(page_folio(page), OPU); + trace_f2fs_do_write_data_page(folio, OPU); set_inode_flag(inode, FI_APPEND_WRITE); + if (atomic_commit) + clear_page_private_atomic(folio_page(folio, 0)); out_writepage: f2fs_put_dnode(&dn); out: @@ -2760,7 +2776,7 @@ out: return err; } -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, @@ -2768,12 +2784,13 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, int compr_blocks, bool allow_balance) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; + struct page *page = folio_page(folio, 0); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; bool quota_inode = IS_NOQUOTA(inode); @@ -2797,11 +2814,11 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, .last_block = last_block, }; - trace_f2fs_writepage(page_folio(page), DATA); + trace_f2fs_writepage(folio, DATA); /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(page->mapping, -EIO); + mapping_set_error(folio->mapping, -EIO); /* * don't drop any dirty dentry pages for keeping lastest * directory structure. @@ -2819,7 +2836,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || + if (folio->index < end_index || f2fs_verity_in_progress(inode) || compr_blocks) goto write; @@ -2829,10 +2846,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, * this page does not have to be written to disk. */ offset = i_size & (PAGE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) + if ((folio->index >= end_index + 1) || !offset) goto out; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); write: /* Dentry/quota blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode) || quota_inode) { @@ -2862,7 +2879,7 @@ write: err = -EAGAIN; if (f2fs_has_inline_data(inode)) { - err = f2fs_write_inline_data(inode, page); + err = f2fs_write_inline_data(inode, folio); if (!err) goto out; } @@ -2892,7 +2909,7 @@ done: out: inode_dec_dirty_pages(inode); if (err) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); clear_page_private_gcing(page); } @@ -2902,7 +2919,7 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); @@ -2920,7 +2937,7 @@ out: return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); /* * pageout() in MM translates EAGAIN, so calls handle_write_error() * -> mapping_set_error() -> set_bit(AS_EIO, ...). @@ -2929,29 +2946,30 @@ redirty_out: */ if (!err || wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; - unlock_page(page); + folio_unlock(folio); return err; } static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); #ifdef CONFIG_F2FS_FS_COMPRESSION - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) goto out; if (f2fs_compressed_file(inode)) { - if (f2fs_is_compressed_cluster(inode, page->index)) { - redirty_page_for_writepage(wbc, page); + if (f2fs_is_compressed_cluster(inode, folio->index)) { + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } } out: #endif - return f2fs_write_single_data_page(page, NULL, NULL, NULL, + return f2fs_write_single_data_page(folio, NULL, NULL, NULL, wbc, FS_DATA_IO, 0, true); } @@ -3157,11 +3175,11 @@ continue_unlock: #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { folio_get(folio); - f2fs_compress_ctx_add_page(&cc, &folio->page); + f2fs_compress_ctx_add_page(&cc, folio); continue; } #endif - ret = f2fs_write_single_data_page(&folio->page, + ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); if (ret == AOP_WRITEPAGE_ACTIVATE) @@ -3369,11 +3387,11 @@ void f2fs_write_failed(struct inode *inode, loff_t to) } static int prepare_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed) { - struct inode *inode = page->mapping->host; - pgoff_t index = page->index; + struct inode *inode = folio->mapping->host; + pgoff_t index = folio->index; struct dnode_of_data dn; struct page *ipage; bool locked = false; @@ -3410,13 +3428,13 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(page_folio(page), ipage); + f2fs_do_read_inline_data(folio, ipage); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_page_private_inline(ipage); goto out; } - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_page(&dn, folio_page(folio, 0)); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } @@ -3509,12 +3527,12 @@ unlock_out: } static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, - struct page *page, loff_t pos, unsigned int len, + struct folio *folio, loff_t pos, unsigned int len, block_t *blk_addr, bool *node_changed, bool *use_cow) { - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct inode *cow_inode = F2FS_I(inode)->cow_inode; - pgoff_t index = page->index; + pgoff_t index = folio->index; int err = 0; block_t ori_blk_addr = NULL_ADDR; @@ -3620,10 +3638,10 @@ repeat: *foliop = folio; if (f2fs_is_atomic_file(inode)) - err = prepare_atomic_write_begin(sbi, &folio->page, pos, len, + err = prepare_atomic_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance, &use_cow); else - err = prepare_write_begin(sbi, &folio->page, pos, len, + err = prepare_write_begin(sbi, folio, pos, len, &blkaddr, &need_balance); if (err) goto put_folio; @@ -3648,7 +3666,7 @@ repeat: if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && !f2fs_verity_in_progress(inode)) { - folio_zero_segment(folio, len, PAGE_SIZE); + folio_zero_segment(folio, len, folio_size(folio)); return 0; } @@ -3662,8 +3680,8 @@ repeat: goto put_folio; } err = f2fs_submit_page_read(use_cow ? - F2FS_I(inode)->cow_inode : inode, &folio->page, - blkaddr, 0, true); + F2FS_I(inode)->cow_inode : inode, + folio, blkaddr, 0, true); if (err) goto put_folio; @@ -3727,6 +3745,9 @@ static int f2fs_write_end(struct file *file, folio_mark_dirty(folio); + if (f2fs_is_atomic_file(inode)) + set_page_private_atomic(folio_page(folio, 0)); + if (pos + copied > i_size_read(inode) && !f2fs_verity_in_progress(inode)) { f2fs_i_size_write(inode, pos + copied); @@ -4117,9 +4138,8 @@ const struct address_space_operations f2fs_dblock_aops = { .swap_deactivate = f2fs_swap_deactivate, }; -void f2fs_clear_page_cache_dirty_tag(struct page *page) +void f2fs_clear_page_cache_dirty_tag(struct folio *folio) { - struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; unsigned long flags; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8b0e1e71b667..546b8ba91261 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -275,7 +275,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); - si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS); + si->base_mem += F2FS_BLK_TO_BYTES(NM_I(sbi)->nat_bits_blocks); si->base_mem += NM_I(sbi)->nat_blocks * f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK); si->base_mem += NM_I(sbi)->nat_blocks / 8; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cbd7a5e96a37..1136539a57a8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -166,7 +166,8 @@ static unsigned long dir_block_index(unsigned int level, unsigned long bidx = 0; for (i = 0; i < level; i++) - bidx += dir_buckets(i, dir_level) * bucket_blocks(i); + bidx += mul_u32_u32(dir_buckets(i, dir_level), + bucket_blocks(i)); bidx += idx * bucket_blocks(level); return bidx; } @@ -841,6 +842,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + pgoff_t index = page_folio(page)->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -866,8 +868,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); if (bit_pos == NR_DENTRY_IN_BLOCK && - !f2fs_truncate_hole(dir, page->index, page->index + 1)) { - f2fs_clear_page_cache_dirty_tag(page); + !f2fs_truncate_hole(dir, index, index + 1)) { + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); ClearPageUptodate(page); clear_page_private_all(page); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index fd1fc06359ee..62ac440d9416 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -366,7 +366,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, static void __drop_largest_extent(struct extent_tree *et, pgoff_t fofs, unsigned int len) { - if (fofs < et->largest.fofs + et->largest.len && + if (fofs < (pgoff_t)et->largest.fofs + et->largest.len && fofs + len > et->largest.fofs) { et->largest.len = 0; et->largest_updated = true; @@ -456,7 +456,7 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, if (type == EX_READ && et->largest.fofs <= pgofs && - et->largest.fofs + et->largest.len > pgofs) { + (pgoff_t)et->largest.fofs + et->largest.len > pgofs) { *ei = et->largest; ret = true; stat_inc_largest_node_hit(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ac19c61f0c3e..33f5449dc22d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -11,7 +11,6 @@ #include <linux/uio.h> #include <linux/types.h> #include <linux/page-flags.h> -#include <linux/buffer_head.h> #include <linux/slab.h> #include <linux/crc32.h> #include <linux/magic.h> @@ -134,6 +133,12 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +enum blkzone_allocation_policy { + BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */ + BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */ + BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */ +}; + /* * An implementation of an rwsem that is explicitly unfair to readers. This * prevents priority inversion when a low-priority reader acquires the read lock @@ -285,6 +290,7 @@ enum { APPEND_INO, /* for append ino list */ UPDATE_INO, /* for update ino list */ TRANS_DIR_INO, /* for transactions dir ino list */ + XATTR_DIR_INO, /* for xattr updated dir ino list */ FLUSH_INO, /* for multiple device flushing */ MAX_INO_ENTRY, /* max. list */ }; @@ -784,7 +790,6 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_DATA_EXIST, /* indicate data exists */ - FI_INLINE_DOTS, /* indicate inline dot dentries */ FI_SKIP_WRITES, /* should skip data page writeback */ FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ @@ -802,6 +807,7 @@ enum { FI_ALIGNED_WRITE, /* enable aligned write */ FI_COW_FILE, /* indicate COW file */ FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */ + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_OPENED_FILE, /* indicate file has been opened */ FI_MAX, /* max flag, never be used */ @@ -1155,6 +1161,7 @@ enum cp_reason_type { CP_FASTBOOT_MODE, CP_SPEC_LOG_NUM, CP_RECOVER_DIR, + CP_XATTR_DIR, }; enum iostat_type { @@ -1293,6 +1300,7 @@ struct f2fs_gc_control { bool no_bg_gc; /* check the space and stop bg_gc */ bool should_migrate_blocks; /* should migrate blocks */ bool err_gc_skipped; /* return EAGAIN if GC skipped */ + bool one_time; /* require one time GC in one migration unit */ unsigned int nr_free_secs; /* # of free sections to do GC */ }; @@ -1418,7 +1426,8 @@ static inline void f2fs_clear_bit(unsigned int nr, char *addr); * bit 1 PAGE_PRIVATE_ONGOING_MIGRATION * bit 2 PAGE_PRIVATE_INLINE_INODE * bit 3 PAGE_PRIVATE_REF_RESOURCE - * bit 4- f2fs private data + * bit 4 PAGE_PRIVATE_ATOMIC_WRITE + * bit 5- f2fs private data * * Layout B: lowest bit should be 0 * page.private is a wrapped pointer. @@ -1428,6 +1437,7 @@ enum { PAGE_PRIVATE_ONGOING_MIGRATION, /* data page which is on-going migrating */ PAGE_PRIVATE_INLINE_INODE, /* inode page contains inline data */ PAGE_PRIVATE_REF_RESOURCE, /* dirty page has referenced resources */ + PAGE_PRIVATE_ATOMIC_WRITE, /* data page from atomic write path */ PAGE_PRIVATE_MAX }; @@ -1559,6 +1569,8 @@ struct f2fs_sb_info { #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ unsigned int max_open_zones; /* max open zone resources of the zoned device */ + /* For adjust the priority writing position of data in zone UFS */ + unsigned int blkzone_alloc_policy; #endif /* for node-related operations */ @@ -1685,6 +1697,8 @@ struct f2fs_sb_info { unsigned int max_victim_search; /* migration granularity of garbage collection, unit: segment */ unsigned int migration_granularity; + /* migration window granularity of garbage collection, unit: segment */ + unsigned int migration_window_granularity; /* * for stat information. @@ -1994,6 +2008,16 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) return (struct f2fs_super_block *)(sbi->raw_super); } +static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio, + pgoff_t index) +{ + pgoff_t idx_in_folio = index % (1 << folio_order(folio)); + + return (struct f2fs_super_block *) + (page_address(folio_page(folio, idx_in_folio)) + + F2FS_SUPER_OFFSET); +} + static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) { return (struct f2fs_checkpoint *)(sbi->ckpt); @@ -2396,14 +2420,17 @@ static inline void clear_page_private_##name(struct page *page) \ PAGE_PRIVATE_GET_FUNC(nonpointer, NOT_POINTER); PAGE_PRIVATE_GET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_GET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_GET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_SET_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_SET_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_SET_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_SET_FUNC(atomic, ATOMIC_WRITE); PAGE_PRIVATE_CLEAR_FUNC(reference, REF_RESOURCE); PAGE_PRIVATE_CLEAR_FUNC(inline, INLINE_INODE); PAGE_PRIVATE_CLEAR_FUNC(gcing, ONGOING_MIGRATION); +PAGE_PRIVATE_CLEAR_FUNC(atomic, ATOMIC_WRITE); static inline unsigned long get_page_private_data(struct page *page) { @@ -2435,6 +2462,7 @@ static inline void clear_page_private_all(struct page *page) clear_page_private_reference(page); clear_page_private_gcing(page); clear_page_private_inline(page); + clear_page_private_atomic(page); f2fs_bug_on(F2FS_P_SB(page), page_private(page)); } @@ -2854,13 +2882,26 @@ static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type) return false; } +static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi) +{ + return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ); +} + static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + bool zoned_gc = (type == GC_TIME && + F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED)); + if (sbi->gc_mode == GC_URGENT_HIGH) return true; - if (is_inflight_io(sbi, type)) - return false; + if (zoned_gc) { + if (is_inflight_read_io(sbi)) + return false; + } else { + if (is_inflight_io(sbi, type)) + return false; + } if (sbi->gc_mode == GC_URGENT_MID) return true; @@ -2869,6 +2910,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) (type == DISCARD_TIME || type == GC_TIME)) return true; + if (zoned_gc) + return true; + return f2fs_time_over(sbi, type); } @@ -2900,26 +2944,27 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node) } static inline int f2fs_has_extra_attr(struct inode *inode); -static inline block_t data_blkaddr(struct inode *inode, - struct page *node_page, unsigned int offset) +static inline unsigned int get_dnode_base(struct inode *inode, + struct page *node_page) { - struct f2fs_node *raw_node; - __le32 *addr_array; - int base = 0; - bool is_inode = IS_INODE(node_page); + if (!IS_INODE(node_page)) + return 0; - raw_node = F2FS_NODE(node_page); + return inode ? get_extra_isize(inode) : + offset_in_addr(&F2FS_NODE(node_page)->i); +} - if (is_inode) { - if (!inode) - /* from GC path only */ - base = offset_in_addr(&raw_node->i); - else if (f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - } +static inline __le32 *get_dnode_addr(struct inode *inode, + struct page *node_page) +{ + return blkaddr_in_node(F2FS_NODE(node_page)) + + get_dnode_base(inode, node_page); +} - addr_array = blkaddr_in_node(raw_node); - return le32_to_cpu(addr_array[base + offset]); +static inline block_t data_blkaddr(struct inode *inode, + struct page *node_page, unsigned int offset) +{ + return le32_to_cpu(*(get_dnode_addr(inode, node_page) + offset)); } static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) @@ -3038,10 +3083,8 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, return; fallthrough; case FI_DATA_EXIST: - case FI_INLINE_DOTS: case FI_PIN_FILE: case FI_COMPRESS_RELEASED: - case FI_ATOMIC_COMMITTED: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -3163,8 +3206,6 @@ static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) set_bit(FI_INLINE_DENTRY, fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) set_bit(FI_DATA_EXIST, fi->flags); - if (ri->i_inline & F2FS_INLINE_DOTS) - set_bit(FI_INLINE_DOTS, fi->flags); if (ri->i_inline & F2FS_EXTRA_ATTR) set_bit(FI_EXTRA_ATTR, fi->flags); if (ri->i_inline & F2FS_PIN_FILE) @@ -3185,8 +3226,6 @@ static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) ri->i_inline |= F2FS_INLINE_DENTRY; if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(inode, FI_INLINE_DOTS)) - ri->i_inline |= F2FS_INLINE_DOTS; if (is_inode_flag_set(inode, FI_EXTRA_ATTR)) ri->i_inline |= F2FS_EXTRA_ATTR; if (is_inode_flag_set(inode, FI_PIN_FILE)) @@ -3267,11 +3306,6 @@ static inline int f2fs_exist_data(struct inode *inode) return is_inode_flag_set(inode, FI_DATA_EXIST); } -static inline int f2fs_has_inline_dots(struct inode *inode) -{ - return is_inode_flag_set(inode, FI_INLINE_DOTS); -} - static inline int f2fs_is_mmap_file(struct inode *inode) { return is_inode_flag_set(inode, FI_MMAP_FILE); @@ -3292,8 +3326,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page); static inline void *inline_data_addr(struct inode *inode, struct page *page) { __le32 *addr = get_dnode_addr(inode, page); @@ -3432,17 +3464,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode) return F2FS_I(inode)->i_inline_xattr_size; } -static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page) -{ - int base = 0; - - if (IS_INODE(node_page) && f2fs_has_extra_attr(inode)) - base = get_extra_isize(inode); - - return blkaddr_in_node(F2FS_NODE(node_page)) + base; -} - #define f2fs_get_inode_mode(i) \ ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) @@ -3495,7 +3516,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count); int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly); + bool readonly, bool need_lock); int f2fs_precache_extents(struct inode *inode); int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int f2fs_fileattr_set(struct mnt_idmap *idmap, @@ -3719,7 +3740,7 @@ bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type); void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio); void f2fs_outplace_write_data(struct dnode_of_data *dn, @@ -3759,8 +3780,7 @@ void f2fs_destroy_segment_manager_caches(void); int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint); enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, enum page_type type, enum temp_type temp); -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno); +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); @@ -3868,7 +3888,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); -int f2fs_write_single_data_page(struct page *page, int *submitted, +int f2fs_write_single_data_page(struct folio *folio, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, @@ -3877,7 +3897,7 @@ void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length); bool f2fs_release_folio(struct folio *folio, gfp_t wait); bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); -void f2fs_clear_page_cache_dirty_tag(struct page *page); +void f2fs_clear_page_cache_dirty_tag(struct folio *folio); int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); @@ -3901,7 +3921,7 @@ void f2fs_destroy_garbage_collection_cache(void); /* victim selection function for cleaning and SSR */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age); + unsigned long long age, bool one_time); /* * recovery.c @@ -3987,7 +4007,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_call_count(sbi, foreground) \ atomic_inc(&sbi->cp_call_count[(foreground)]) -#define stat_inc_cp_count(si) (F2FS_STAT(sbi)->cp_count++) +#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -4172,7 +4192,7 @@ int f2fs_read_inline_data(struct inode *inode, struct folio *folio); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); -int f2fs_write_inline_data(struct inode *inode, struct page *page); +int f2fs_write_inline_data(struct inode *inode, struct folio *folio); int f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, @@ -4289,6 +4309,11 @@ static inline bool f2fs_meta_inode_gc_required(struct inode *inode) * compress.c */ #ifdef CONFIG_F2FS_FS_COMPRESSION +enum cluster_check_type { + CLUSTER_IS_COMPR, /* check only if compressed cluster */ + CLUSTER_COMPR_BLKS, /* return # of compressed blocks in a cluster */ + CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ +}; bool f2fs_is_compressed_page(struct page *page); struct page *f2fs_compress_control_page(struct page *page); int f2fs_prepare_compress_overwrite(struct inode *inode, @@ -4309,12 +4334,13 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); -void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio); int f2fs_write_multi_pages(struct compress_ctx *cc, int *submitted, struct writeback_control *wbc, enum iostat_type io_type); int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +bool f2fs_is_sparse_cluster(struct inode *inode, pgoff_t index); void f2fs_update_read_extent_tree_range_compressed(struct inode *inode, pgoff_t fofs, block_t blkaddr, unsigned int llen, unsigned int c_len); @@ -4401,6 +4427,12 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) +static inline int f2fs_is_compressed_cluster( + struct inode *inode, + pgoff_t index) { return 0; } +static inline bool f2fs_is_sparse_cluster( + struct inode *inode, + pgoff_t index) { return true; } static inline void f2fs_update_read_extent_tree_range_compressed( struct inode *inode, pgoff_t fofs, block_t blkaddr, @@ -4653,9 +4685,11 @@ static inline void f2fs_io_schedule_timeout(long timeout) io_schedule_timeout(timeout); } -static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs, - enum page_type type) +static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, + struct folio *folio, enum page_type type) { + pgoff_t ofs = folio->index; + if (unlikely(f2fs_cp_error(sbi))) return; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 903337f8d21a..9ae54c4c72fe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -8,7 +8,6 @@ #include <linux/fs.h> #include <linux/f2fs_fs.h> #include <linux/stat.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/falloc.h> @@ -54,7 +53,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -86,7 +85,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { - int ret = f2fs_is_compressed_cluster(inode, page->index); + int ret = f2fs_is_compressed_cluster(inode, folio->index); if (ret < 0) { err = ret; @@ -106,11 +105,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); - lock_page(page); - if (unlikely(page->mapping != inode->i_mapping || - page_offset(page) > i_size_read(inode) || - !PageUptodate(page))) { - unlock_page(page); + folio_lock(folio); + if (unlikely(folio->mapping != inode->i_mapping || + folio_pos(folio) > i_size_read(inode) || + !folio_test_uptodate(folio))) { + folio_unlock(folio); err = -EFAULT; goto out_sem; } @@ -118,9 +117,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - err = f2fs_get_block_locked(&dn, page->index); + err = f2fs_get_block_locked(&dn, folio->index); } else { - err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); + err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE); f2fs_put_dnode(&dn); if (f2fs_is_pinned_file(inode) && !__is_valid_data_blkaddr(dn.data_blkaddr)) @@ -128,11 +127,11 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) } if (err) { - unlock_page(page); + folio_unlock(folio); goto out_sem; } - f2fs_wait_on_page_writeback(page, DATA, false, true); + f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -140,18 +139,18 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) /* * check to see if the page is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto out_sem; /* page is wholly or partially inside EOF */ - if (((loff_t)(page->index + 1) << PAGE_SHIFT) > + if (((loff_t)(folio->index + 1) << PAGE_SHIFT) > i_size_read(inode)) { loff_t offset; offset = i_size_read(inode) & ~PAGE_MASK; - zero_user_segment(page, offset, PAGE_SIZE); + folio_zero_segment(folio, offset, folio_size(folio)); } - set_page_dirty(page); + folio_mark_dirty(folio); f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE); f2fs_update_time(sbi, REQ_TIME); @@ -163,7 +162,7 @@ out_sem: out: ret = vmf_fs_error(err); - trace_f2fs_vm_page_mkwrite(inode, page->index, vmf->vma->vm_flags, ret); + trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret); return ret; } @@ -218,6 +217,9 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, TRANS_DIR_INO)) cp_reason = CP_RECOVER_DIR; + else if (f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino, + XATTR_DIR_INO)) + cp_reason = CP_XATTR_DIR; return cp_reason; } @@ -373,8 +375,7 @@ sync_nodes: f2fs_remove_ino_entry(sbi, ino, APPEND_INO); clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: - if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) || - (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi))) + if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ret = f2fs_issue_flush(sbi, inode->i_ino); if (!ret) { f2fs_remove_ino_entry(sbi, ino, UPDATE_INO); @@ -431,7 +432,7 @@ static bool __found_offset(struct address_space *mapping, static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); struct dnode_of_data dn; pgoff_t pgofs, end_offset; loff_t data_ofs = offset; @@ -513,10 +514,7 @@ fail: static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; - loff_t maxbytes = inode->i_sb->s_maxbytes; - - if (f2fs_compressed_file(inode)) - maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; + loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); switch (whence) { case SEEK_SET: @@ -1052,6 +1050,13 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return err; } + /* + * wait for inflight dio, blocks should be removed after + * IO completion. + */ + if (attr->ia_size < old_size) + inode_dio_wait(inode); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -1888,6 +1893,12 @@ static long f2fs_fallocate(struct file *file, int mode, if (ret) goto out; + /* + * wait for inflight dio, blocks should be removed after IO + * completion. + */ + inode_dio_wait(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { if (offset >= inode->i_size) goto out; @@ -2116,10 +2127,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) struct mnt_idmap *idmap = file_mnt_idmap(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct inode *pinode; loff_t isize; int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2149,6 +2162,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) goto out; f2fs_down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[READ]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2158,27 +2172,18 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; /* Check if the inode already has a COW inode */ if (fi->cow_inode == NULL) { /* Create a COW inode for atomic write */ - pinode = f2fs_iget(inode->i_sb, fi->i_pino); - if (IS_ERR(pinode)) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - ret = PTR_ERR(pinode); - goto out; - } + struct dentry *dentry = file_dentry(filp); + struct inode *dir = d_inode(dentry->d_parent); - ret = f2fs_get_tmpfile(idmap, pinode, &fi->cow_inode); - iput(pinode); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode); + if (ret) + goto out_unlock; set_inode_flag(fi->cow_inode, FI_COW_FILE); clear_inode_flag(fi->cow_inode, FI_INLINE_DATA); @@ -2187,11 +2192,13 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) F2FS_I(fi->cow_inode)->atomic_inode = inode; } else { /* Reuse the already created COW inode */ + f2fs_bug_on(sbi, get_dirty_pages(fi->cow_inode)); + + invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1); + ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true); - if (ret) { - f2fs_up_write(&fi->i_gc_rwsem[WRITE]); - goto out; - } + if (ret) + goto out_unlock; } f2fs_write_inode(inode, NULL); @@ -2210,7 +2217,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate) } f2fs_i_size_write(fi->cow_inode, isize); +out_unlock: + f2fs_up_write(&fi->i_gc_rwsem[READ]); f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + if (ret) + goto out; f2fs_update_time(sbi, REQ_TIME); fi->atomic_write_task = current; @@ -2228,6 +2239,9 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2260,6 +2274,9 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) struct mnt_idmap *idmap = file_mnt_idmap(filp); int ret; + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + if (!inode_owner_or_capable(idmap, inode)) return -EACCES; @@ -2279,7 +2296,7 @@ static int f2fs_ioc_abort_atomic_write(struct file *filp) } int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, - bool readonly) + bool readonly, bool need_lock) { struct super_block *sb = sbi->sb; int ret = 0; @@ -2326,12 +2343,19 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, if (readonly) goto out; + /* grab sb->s_umount to avoid racing w/ remount() */ + if (need_lock) + down_read(&sbi->sb->s_umount); + f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); f2fs_drop_discard_cmd(sbi); clear_opt(sbi, DISCARD); + if (need_lock) + up_read(&sbi->sb->s_umount); + f2fs_update_time(sbi, REQ_TIME); out: @@ -2368,7 +2392,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) } } - ret = f2fs_do_shutdown(sbi, in, readonly); + ret = f2fs_do_shutdown(sbi, in, readonly, true); if (need_drop) mnt_drop_write_file(filp); @@ -2686,7 +2710,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, (range->start + range->len) >> PAGE_SHIFT, DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) || + f2fs_is_atomic_file(inode)) { err = -EINVAL; goto unlock_out; } @@ -2710,7 +2735,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * block addresses are continuous. */ if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) { - if (ei.fofs + ei.len >= pg_end) + if ((pgoff_t)ei.fofs + ei.len >= pg_end) goto out; } @@ -2793,6 +2818,8 @@ do_map: goto clear_out; } + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -2917,6 +2944,11 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out_unlock; } + if (f2fs_is_atomic_file(src) || f2fs_is_atomic_file(dst)) { + ret = -EINVAL; + goto out_unlock; + } + ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) goto out_unlock; @@ -2968,9 +3000,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, } f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, - pos_out >> F2FS_BLKSIZE_BITS, - len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in), + F2FS_BYTES_TO_BLK(pos_out), + F2FS_BYTES_TO_BLK(len), false); if (!ret) { if (dst_max_i_size) @@ -3300,6 +3332,11 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + ret = -EINVAL; + goto out; + } + if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); @@ -4193,6 +4230,8 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) /* It will never fail, when page has pinned above */ f2fs_bug_on(F2FS_I_SB(inode), !page); + f2fs_wait_on_page_writeback(page, DATA, true, true); + set_page_dirty(page); set_page_private_gcing(page); f2fs_put_page(page, 1); @@ -4207,9 +4246,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = fi->i_cluster_size; - int count, ret; + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4244,10 +4282,15 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (!f2fs_is_compressed_cluster(inode, page_idx)) + continue; + + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4257,9 +4300,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4286,9 +4326,9 @@ static int f2fs_ioc_compress_file(struct file *filp) { struct inode *inode = file_inode(filp); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t page_idx = 0, last_idx; - int cluster_size = F2FS_I(inode)->i_cluster_size; - int count, ret; + struct f2fs_inode_info *fi = F2FS_I(inode); + pgoff_t page_idx = 0, last_idx, cluster_idx; + int ret; if (!f2fs_sb_has_compression(sbi) || F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER) @@ -4322,10 +4362,15 @@ static int f2fs_ioc_compress_file(struct file *filp) set_inode_flag(inode, FI_ENABLE_COMPRESS); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); + last_idx >>= fi->i_log_cluster_size; + + for (cluster_idx = 0; cluster_idx < last_idx; cluster_idx++) { + page_idx = cluster_idx << fi->i_log_cluster_size; + + if (f2fs_is_sparse_cluster(inode, page_idx)) + continue; - count = last_idx - page_idx; - while (count && count >= cluster_size) { - ret = redirty_blocks(inode, page_idx, cluster_size); + ret = redirty_blocks(inode, page_idx, fi->i_cluster_size); if (ret < 0) break; @@ -4335,9 +4380,6 @@ static int f2fs_ioc_compress_file(struct file *filp) break; } - count -= cluster_size; - page_idx += cluster_size; - cond_resched(); if (fatal_signal_pending(current)) { ret = -EINTR; @@ -4538,6 +4580,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_down_read(&fi->i_gc_rwsem[READ]); } + /* dio is not compatible w/ atomic file */ + if (f2fs_is_atomic_file(inode)) { + f2fs_up_read(&fi->i_gc_rwsem[READ]); + ret = -EOPNOTSUPP; + goto out; + } + /* * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of * the higher-level function iomap_dio_rw() in order to ensure that the @@ -4597,6 +4646,10 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) f2fs_trace_rw_file_path(iocb->ki_filp, iocb->ki_pos, iov_iter_count(to), READ); + /* In LFS mode, if there is inflight dio, wait for its completion */ + if (f2fs_lfs_mode(F2FS_I_SB(inode))) + inode_dio_wait(inode); + if (f2fs_should_use_dio(inode, iocb, to)) { ret = f2fs_dio_read_iter(iocb, to); } else { @@ -4949,6 +5002,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* Determine whether we will do a direct write or a buffered write. */ dio = f2fs_should_use_dio(inode, iocb, from); + /* dio is not compatible w/ atomic write */ + if (dio && f2fs_is_atomic_file(inode)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + /* Possibly preallocate the blocks for the write. */ target_size = iocb->ki_pos + iov_iter_count(from); preallocated = f2fs_preallocate_blocks(iocb, from, dio); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 724bbcb447d3..9322a7200e31 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -81,6 +81,8 @@ static int gc_thread_func(void *data) continue; } + gc_control.one_time = false; + /* * [GC triggering condition] * 0. GC is not conducted currently. @@ -116,15 +118,30 @@ static int gc_thread_func(void *data) goto next; } - if (has_enough_invalid_blocks(sbi)) + if (f2fs_sb_has_blkzoned(sbi)) { + if (has_enough_free_blocks(sbi, + gc_th->no_zoned_gc_percent)) { + wait_ms = gc_th->no_gc_sleep_time; + f2fs_up_write(&sbi->gc_lock); + goto next; + } + if (wait_ms == gc_th->no_gc_sleep_time) + wait_ms = gc_th->max_sleep_time; + } + + if (need_to_boost_gc(sbi)) { decrease_sleep_time(gc_th, &wait_ms); - else + if (f2fs_sb_has_blkzoned(sbi)) + gc_control.one_time = true; + } else { increase_sleep_time(gc_th, &wait_ms); + } do_gc: stat_inc_gc_call_count(sbi, foreground ? FOREGROUND : BACKGROUND); - sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) || + gc_control.one_time; /* foreground GC was been triggered via f2fs_balance_fs() */ if (foreground) @@ -179,9 +196,21 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) return -ENOMEM; gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME; - gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; - gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; - gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO; + + if (f2fs_sb_has_blkzoned(sbi)) { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED; + gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC; + gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC; + } else { + gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME; + gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME; + gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME; + gc_th->no_zoned_gc_percent = 0; + gc_th->boost_zoned_gc_percent = 0; + } gc_th->gc_wake = false; @@ -339,7 +368,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) unsigned char age = 0; unsigned char u; unsigned int i; - unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); for (i = 0; i < usable_segs_per_sec; i++) mtime += get_seg_entry(sbi, start + i)->mtime; @@ -368,6 +397,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >= + CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio / + 100)) + return UINT_MAX; + /* alloc_mode == LFS */ if (p->gc_mode == GC_GREEDY) return get_valid_blocks(sbi, segno, true); @@ -742,7 +776,7 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type, */ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode, - unsigned long long age) + unsigned long long age, bool one_time) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct sit_info *sm = SIT_I(sbi); @@ -759,6 +793,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result, p.alloc_mode = alloc_mode; p.age = age; p.age_threshold = sbi->am.age_threshold; + p.one_time_gc = one_time; retry: select_policy(sbi, gc_type, type, &p); @@ -1670,13 +1705,14 @@ next_step: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type) + int gc_type, bool one_time) { struct sit_info *sit_i = SIT_I(sbi); int ret; down_write(&sit_i->sentry_lock); - ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0); + ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, + LFS, 0, one_time); up_write(&sit_i->sentry_lock); return ret; } @@ -1684,30 +1720,49 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int start_segno, struct gc_inode_list *gc_list, int gc_type, - bool force_migrate) + bool force_migrate, bool one_time) { struct page *sum_page; struct f2fs_summary_block *sum; struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); + unsigned int sec_end_segno; int seg_freed = 0, migrated = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE; int submitted = 0; - if (__is_large_section(sbi)) - end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); + if (__is_large_section(sbi)) { + sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi)); - /* - * zone-capacity can be less than zone-size in zoned devices, - * resulting in less than expected usable segments in the zone, - * calculate the end segno in the zone which can be garbage collected - */ - if (f2fs_sb_has_blkzoned(sbi)) - end_segno -= SEGS_PER_SEC(sbi) - - f2fs_usable_segs_in_sec(sbi, segno); + /* + * zone-capacity can be less than zone-size in zoned devices, + * resulting in less than expected usable segments in the zone, + * calculate the end segno in the zone which can be garbage + * collected + */ + if (f2fs_sb_has_blkzoned(sbi)) + sec_end_segno -= SEGS_PER_SEC(sbi) - + f2fs_usable_segs_in_sec(sbi); + + if (gc_type == BG_GC || one_time) { + unsigned int window_granularity = + sbi->migration_window_granularity; + + if (f2fs_sb_has_blkzoned(sbi) && + !has_enough_free_blocks(sbi, + sbi->gc_thread->boost_zoned_gc_percent)) + window_granularity *= + BOOST_GC_MULTIPLE; + + end_segno = start_segno + window_granularity; + } + + if (end_segno > sec_end_segno) + end_segno = sec_end_segno; + } sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type); @@ -1786,7 +1841,8 @@ freed: if (__is_large_section(sbi)) sbi->next_victim_seg[gc_type] = - (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO; + (segno + 1 < sec_end_segno) ? + segno + 1 : NULL_SEGNO; skip: f2fs_put_page(sum_page, 0); } @@ -1863,7 +1919,7 @@ gc_more: goto stop; } retry: - ret = __get_victim(sbi, &segno, gc_type); + ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time); if (ret) { /* allow to search victim from sections has pinned data */ if (ret == -ENODATA && gc_type == FG_GC && @@ -1875,17 +1931,21 @@ retry: } seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, - gc_control->should_migrate_blocks); + gc_control->should_migrate_blocks, + gc_control->one_time); if (seg_freed < 0) goto stop; total_freed += seg_freed; - if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) { + if (seg_freed == f2fs_usable_segs_in_sec(sbi)) { sec_freed++; total_sec_freed++; } + if (gc_control->one_time) + goto stop; + if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -2010,8 +2070,7 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - do_garbage_collect(sbi, segno, &gc_list, FG_GC, - dry_run_sections == 0); + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); if (!dry_run && get_valid_blocks(sbi, segno, true)) diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index a8ea3301b815..2914b678bf8f 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -15,16 +15,27 @@ #define DEF_GC_THREAD_MAX_SLEEP_TIME 60000 #define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */ +/* GC sleep parameters for zoned deivces */ +#define DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED 10 +#define DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED 20 +#define DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED 60000 + /* choose candidates from sections which has age of more than 7 days */ #define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7) #define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */ #define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */ #define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */ +#define DEF_GC_THREAD_VALID_THRESH_RATIO 95 /* do not GC over 95% valid block ratio for one time GC */ #define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */ #define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */ #define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */ +#define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */ +#define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ +#define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 +#define BOOST_GC_MULTIPLE 5 + #define DEF_GC_FAILED_PINNED_FILES 2048 #define MAX_GC_FAILED_PINNED_FILES USHRT_MAX @@ -51,6 +62,11 @@ struct f2fs_gc_kthread { * caller of f2fs_balance_fs() * will wait on this wait queue. */ + + /* for gc control for zoned devices */ + unsigned int no_zoned_gc_percent; + unsigned int boost_zoned_gc_percent; + unsigned int valid_thresh_ratio; }; struct gc_inode_list { @@ -152,6 +168,12 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th, *wait -= min_time; } +static inline bool has_enough_free_blocks(struct f2fs_sb_info *sbi, + unsigned int limit_perc) +{ + return free_sections(sbi) > ((sbi->total_sections * limit_perc) / 100); +} + static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) { block_t user_block_count = sbi->user_block_count; @@ -167,3 +189,10 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) free_user_blocks(sbi) < limit_free_user_blocks(invalid_user_blocks)); } + +static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi) +{ + if (f2fs_sb_has_blkzoned(sbi)) + return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC); + return has_enough_invalid_blocks(sbi); +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index cca7d448e55c..005babf1bed1 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -260,35 +260,34 @@ out: return err; } -int f2fs_write_inline_data(struct inode *inode, struct page *page) +int f2fs_write_inline_data(struct inode *inode, struct folio *folio) { - struct dnode_of_data dn; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct page *ipage; - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE); - if (err) - return err; + ipage = f2fs_get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); if (!f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); + f2fs_put_page(ipage, 1); return -EAGAIN; } - f2fs_bug_on(F2FS_I_SB(inode), page->index); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); - f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true); - memcpy_from_page(inline_data_addr(inode, dn.inode_page), - page, 0, MAX_INLINE_DATA(inode)); - set_page_dirty(dn.inode_page); + f2fs_wait_on_page_writeback(ipage, NODE, true, true); + memcpy_from_folio(inline_data_addr(inode, ipage), + folio, 0, MAX_INLINE_DATA(inode)); + set_page_dirty(ipage); - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(folio); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(dn.inode_page); - f2fs_put_dnode(&dn); + clear_page_private_inline(ipage); + f2fs_put_page(ipage, 1); return 0; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index aef57172014f..1ed86df343a5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -7,7 +7,6 @@ */ #include <linux/fs.h> #include <linux/f2fs_fs.h> -#include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/sched/mm.h> #include <linux/lz4.h> @@ -35,6 +34,11 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (f2fs_inode_dirtied(inode, sync)) return; + if (f2fs_is_atomic_file(inode)) { + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return; + } + mark_inode_dirty_sync(inode); } @@ -175,7 +179,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + page_folio(page)->index, ino_of_node(page), + provided, calculated); return provided == calculated; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 38b4750475db..57d46e1439de 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -457,62 +457,6 @@ struct dentry *f2fs_get_parent(struct dentry *child) return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } -static int __recover_dot_dentries(struct inode *dir, nid_t pino) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct qstr dot = QSTR_INIT(".", 1); - struct f2fs_dir_entry *de; - struct page *page; - int err = 0; - - if (f2fs_readonly(sbi->sb)) { - f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", - dir->i_ino, pino); - return 0; - } - - if (!S_ISDIR(dir->i_mode)) { - f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)", - dir->i_ino, dir->i_mode, pino); - set_sbi_flag(sbi, SBI_NEED_FSCK); - return -ENOTDIR; - } - - err = f2fs_dquot_initialize(dir); - if (err) - return err; - - f2fs_balance_fs(sbi, true); - - f2fs_lock_op(sbi); - - de = f2fs_find_entry(dir, &dot, &page); - if (de) { - f2fs_put_page(page, 0); - } else if (IS_ERR(page)) { - err = PTR_ERR(page); - goto out; - } else { - err = f2fs_do_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); - if (err) - goto out; - } - - de = f2fs_find_entry(dir, &dotdot_name, &page); - if (de) - f2fs_put_page(page, 0); - else if (IS_ERR(page)) - err = PTR_ERR(page); - else - err = f2fs_do_add_link(dir, &dotdot_name, NULL, pino, S_IFDIR); -out: - if (!err) - clear_inode_flag(dir, FI_INLINE_DOTS); - - f2fs_unlock_op(sbi); - return err; -} - static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -522,7 +466,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, struct dentry *new; nid_t ino = -1; int err = 0; - unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir)); struct f2fs_filename fname; trace_f2fs_lookup_start(dir, dentry, flags); @@ -558,17 +501,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) { - err = __recover_dot_dentries(dir, root_ino); - if (err) - goto out_iput; - } - - if (f2fs_has_inline_dots(inode)) { - err = __recover_dot_dentries(inode, dir->i_ino); - if (err) - goto out_iput; - } if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b72ef96f7e33..59b13ff243fa 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -20,7 +20,7 @@ #include "iostat.h" #include <trace/events/f2fs.h> -#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) +#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -123,7 +123,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) static void clear_node_page_dirty(struct page *page) { if (PageDirty(page)) { - f2fs_clear_page_cache_dirty_tag(page); + f2fs_clear_page_cache_dirty_tag(page_folio(page)); clear_page_dirty_for_io(page); dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); } @@ -919,7 +919,7 @@ static int truncate_node(struct dnode_of_data *dn) clear_node_page_dirty(dn->node_page); set_sbi_flag(sbi, SBI_IS_DIRTY); - index = dn->node_page->index; + index = page_folio(dn->node_page)->index; f2fs_put_page(dn->node_page, 1); invalidate_mapping_pages(NODE_MAPPING(sbi), @@ -1369,6 +1369,7 @@ fail: */ static int read_node_page(struct page *page, blk_opf_t op_flags) { + struct folio *folio = page_folio(page); struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct node_info ni; struct f2fs_io_info fio = { @@ -1381,21 +1382,21 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) }; int err; - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { if (!f2fs_inode_chksum_verify(sbi, page)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -EFSBADCRC; } return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni, false); + err = f2fs_get_node_info(sbi, folio->index, &ni, false); if (err) return err; /* NEW_ADDR can be seen, after cp_error drops some dirty node pages */ if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); return -ENOENT; } @@ -1492,7 +1493,7 @@ out_err: out_put_err: /* ENOENT comes from read_node_page which is not an error. */ if (err != -ENOENT) - f2fs_handle_page_eio(sbi, page->index, NODE); + f2fs_handle_page_eio(sbi, page_folio(page), NODE); f2fs_put_page(page, 1); return ERR_PTR(err); } @@ -1535,7 +1536,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!clear_page_dirty_for_io(page)) goto page_out; - ret = f2fs_write_inline_data(inode, page); + ret = f2fs_write_inline_data(inode, page_folio(page)); inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); if (ret) @@ -1608,6 +1609,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, enum iostat_type io_type, unsigned int *seq_id) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct folio *folio = page_folio(page); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { @@ -1624,15 +1626,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, }; unsigned int seq; - trace_f2fs_writepage(page_folio(page), NODE); + trace_f2fs_writepage(folio, NODE); if (unlikely(f2fs_cp_error(sbi))) { /* keep node pages in remount-ro mode */ if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY) goto redirty_out; - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1646,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* get old block addr of this node page */ nid = nid_of_node(page); - f2fs_bug_on(sbi, page->index != nid); + f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; @@ -1660,10 +1662,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - unlock_page(page); + folio_unlock(folio); return 0; } @@ -1674,7 +1676,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, goto redirty_out; } - if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi)) + if (atomic && !test_opt(sbi, NOBARRIER)) fio.op_flags |= REQ_PREFLUSH | REQ_FUA; /* should add to global list before clearing PAGECACHE status */ @@ -1684,7 +1686,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, *seq_id = seq; } - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); @@ -1697,7 +1699,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, submitted = NULL; } - unlock_page(page); + folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_write(sbi, NODE); @@ -1711,7 +1713,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, return 0; redirty_out: - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); return AOP_WRITEPAGE_ACTIVATE; } @@ -1867,7 +1869,7 @@ continue_unlock: } if (!ret && atomic && !marked) { f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + ino, page_folio(last_page)->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); @@ -3166,7 +3168,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kvzalloc(sbi, - nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL); + F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; @@ -3185,7 +3187,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) if (IS_ERR(page)) return PTR_ERR(page); - memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS), + memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), page_address(page), F2FS_BLKSIZE); f2fs_put_page(page, 1); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 78c3198a6308..1766254279d2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -199,6 +199,10 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_COMMITTED); clear_inode_flag(inode, FI_ATOMIC_REPLACE); clear_inode_flag(inode, FI_ATOMIC_FILE); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } stat_dec_atomic_inode(inode); F2FS_I(inode)->atomic_write_task = NULL; @@ -366,6 +370,10 @@ out: } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + f2fs_mark_inode_dirty_sync(inode, true); + } } __complete_revoke_list(inode, &revoke_list, ret ? true : false); @@ -1282,6 +1290,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, wait_list, issued); return 0; } + + /* + * Issue discard for conventional zones only if the device + * supports discard. + */ + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; } #endif @@ -2686,22 +2701,47 @@ static int get_new_segment(struct f2fs_sb_info *sbi, goto got_it; } +#ifdef CONFIG_BLK_DEV_ZONED /* * If we format f2fs on zoned storage, let's try to get pinned sections * from beginning of the storage, which should be a conventional one. */ if (f2fs_sb_has_blkzoned(sbi)) { - segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); + /* Prioritize writing to conventional zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) + segno = 0; + else + segno = max(first_zoned_segno(sbi), *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } +#endif find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + +#ifdef CONFIG_BLK_DEV_ZONED + if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { + /* Write only to sequential zones */ + if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { + hint = GET_SEC_FROM_SEG(sbi, first_zoned_segno(sbi)); + secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); + } else + secno = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); + if (secno >= MAIN_SECS(sbi)) { + ret = -ENOSPC; + f2fs_bug_on(sbi, 1); + goto out_unlock; + } + } +#endif + if (secno >= MAIN_SECS(sbi)) { secno = find_first_zero_bit(free_i->free_secmap, MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; + f2fs_bug_on(sbi, 1); goto out_unlock; } } @@ -2743,10 +2783,8 @@ got_it: out_unlock: spin_unlock(&free_i->segmap_lock); - if (ret == -ENOSPC) { + if (ret == -ENOSPC) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); - f2fs_bug_on(sbi, 1); - } return ret; } @@ -3052,7 +3090,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, sanity_check_seg_type(sbi, seg_type); /* f2fs_need_SSR() already forces to do this */ - if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3079,7 +3118,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, for (; cnt-- > 0; reversed ? i-- : i++) { if (i == seg_type) continue; - if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) { + if (!f2fs_get_victim(sbi, &segno, BG_GC, i, + alloc_mode, age, false)) { curseg->next_segno = segno; return 1; } @@ -3522,7 +3562,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (file_is_cold(inode) || f2fs_need_compress_data(inode)) return CURSEG_COLD_DATA; - type = __get_age_segment_type(inode, fio->page->index); + type = __get_age_segment_type(inode, + page_folio(fio->page)->index); if (type != NO_CHECK_TYPE) return type; @@ -3781,7 +3822,7 @@ out: f2fs_up_read(&fio->sbi->io_order_lock); } -void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, +void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, enum iostat_type io_type) { struct f2fs_io_info fio = { @@ -3790,20 +3831,20 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, .temp = HOT, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_META | REQ_PRIO, - .old_blkaddr = page->index, - .new_blkaddr = page->index, - .page = page, + .old_blkaddr = folio->index, + .new_blkaddr = folio->index, + .page = folio_page(folio, 0), .encrypted_page = NULL, .in_list = 0, }; - if (unlikely(page->index >= MAIN_BLKADDR(sbi))) + if (unlikely(folio->index >= MAIN_BLKADDR(sbi))) fio.op_flags &= ~REQ_META; - set_page_writeback(page); + folio_start_writeback(folio); f2fs_submit_page_write(&fio); - stat_inc_meta_count(sbi, page->index); + stat_inc_meta_count(sbi, folio->index); f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE); } @@ -5381,8 +5422,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, return BLKS_PER_SEG(sbi); } -unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, - unsigned int segno) +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) { if (f2fs_sb_has_blkzoned(sbi)) return CAP_SEGS_PER_SEC(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index bfc01a521cb9..71adb4a43bec 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -188,6 +188,7 @@ struct victim_sel_policy { unsigned int min_segno; /* segment # having min. cost */ unsigned long long age; /* mtime of GCed section*/ unsigned long long age_threshold;/* age threshold */ + bool one_time_gc; /* one time GC */ }; struct seg_entry { @@ -430,7 +431,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -464,7 +465,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno); + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); if (test_and_clear_bit(segno, free_i->free_segmap)) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 176b5177c89d..87ab5696bd48 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -11,7 +11,6 @@ #include <linux/fs_context.h> #include <linux/sched/mm.h> #include <linux/statfs.h> -#include <linux/buffer_head.h> #include <linux/kthread.h> #include <linux/parser.h> #include <linux/mount.h> @@ -707,6 +706,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) if (!strcmp(name, "on")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON; } else if (!strcmp(name, "off")) { + if (f2fs_sb_has_blkzoned(sbi)) { + f2fs_warn(sbi, "zoned devices need bggc"); + kfree(name); + return -EINVAL; + } F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF; } else if (!strcmp(name, "sync")) { F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC; @@ -2561,7 +2565,7 @@ restore_opts: static void f2fs_shutdown(struct super_block *sb) { - f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false); + f2fs_do_shutdown(F2FS_SB(sb), F2FS_GOING_DOWN_NOSYNC, false, false); } #ifdef CONFIG_QUOTA @@ -3318,29 +3322,47 @@ loff_t max_file_blocks(struct inode *inode) * fit within U32_MAX + 1 data units. */ - result = min(result, (((loff_t)U32_MAX + 1) * 4096) >> F2FS_BLKSIZE_BITS); + result = min(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); return result; } -static int __f2fs_commit_super(struct buffer_head *bh, - struct f2fs_super_block *super) +static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, + pgoff_t index, bool update) { - lock_buffer(bh); - if (super) - memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super)); - set_buffer_dirty(bh); - unlock_buffer(bh); - + struct bio *bio; /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); + blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA; + int ret; + + folio_lock(folio); + folio_wait_writeback(folio); + if (update) + memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi), + sizeof(struct f2fs_super_block)); + folio_mark_dirty(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); + + bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS); + + /* it doesn't need to set crypto context for superblock update */ + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio)); + + if (!bio_add_folio(bio, folio, folio_size(folio), 0)) + f2fs_bug_on(sbi, 1); + + ret = submit_bio_wait(bio); + folio_end_writeback(folio); + + return ret; } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); struct super_block *sb = sbi->sb; u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr); u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr); @@ -3356,9 +3378,9 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, u32 segment_count = le32_to_cpu(raw_super->segment_count); u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); u64 main_end_blkaddr = main_blkaddr + - (segment_count_main << log_blocks_per_seg); + ((u64)segment_count_main << log_blocks_per_seg); u64 seg_end_blkaddr = segment0_blkaddr + - (segment_count << log_blocks_per_seg); + ((u64)segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", @@ -3415,7 +3437,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, set_sbi_flag(sbi, SBI_NEED_SB_WRITE); res = "internally"; } else { - err = __f2fs_commit_super(bh, NULL); + err = __f2fs_commit_super(sbi, folio, index, false); res = err ? "failed" : "done"; } f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)", @@ -3428,12 +3450,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, } static int sanity_check_raw_super(struct f2fs_sb_info *sbi, - struct buffer_head *bh) + struct folio *folio, pgoff_t index) { block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main; block_t total_sections, blocks_per_seg; - struct f2fs_super_block *raw_super = (struct f2fs_super_block *) - (bh->b_data + F2FS_SUPER_OFFSET); + struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index); size_t crc_offset = 0; __u32 crc = 0; @@ -3591,7 +3612,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, } /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ - if (sanity_check_area_boundary(sbi, bh)) + if (sanity_check_area_boundary(sbi, folio, index)) return -EFSCORRUPTED; return 0; @@ -3786,6 +3807,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->migration_granularity = SEGS_PER_SEC(sbi); + sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ? + DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi); sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; @@ -3938,7 +3961,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, { struct super_block *sb = sbi->sb; int block; - struct buffer_head *bh; + struct folio *folio; struct f2fs_super_block *super; int err = 0; @@ -3947,32 +3970,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, return -ENOMEM; for (block = 0; block < 2; block++) { - bh = sb_bread(sb, block); - if (!bh) { + folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL); + if (IS_ERR(folio)) { f2fs_err(sbi, "Unable to read %dth superblock", block + 1); - err = -EIO; + err = PTR_ERR(folio); *recovery = 1; continue; } /* sanity checking of raw super */ - err = sanity_check_raw_super(sbi, bh); + err = sanity_check_raw_super(sbi, folio, block); if (err) { f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); - brelse(bh); + folio_put(folio); *recovery = 1; continue; } if (!*raw_super) { - memcpy(super, bh->b_data + F2FS_SUPER_OFFSET, + memcpy(super, F2FS_SUPER_BLOCK(folio, block), sizeof(*super)); *valid_super_block = block; *raw_super = super; } - brelse(bh); + folio_put(folio); } /* No valid superblock */ @@ -3986,7 +4009,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) { - struct buffer_head *bh; + struct folio *folio; + pgoff_t index; __u32 crc = 0; int err; @@ -4004,22 +4028,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) } /* write back-up superblock first */ - bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block ? 0 : 1; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); /* if we are in recovery path, skip writing valid superblock */ if (recover || err) return err; /* write current valid superblock */ - bh = sb_bread(sbi->sb, sbi->valid_super_block); - if (!bh) - return -EIO; - err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi)); - brelse(bh); + index = sbi->valid_super_block; + folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL); + if (IS_ERR(folio)) + return PTR_ERR(folio); + err = __f2fs_commit_super(sbi, folio, index, true); + folio_put(folio); return err; } @@ -4173,12 +4199,14 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, } f2fs_warn(sbi, "Remounting filesystem read-only"); + /* - * Make sure updated value of ->s_mount_flags will be visible before - * ->s_flags update + * We have already set CP_ERROR_FLAG flag to stop all updates + * to filesystem, so it doesn't need to set SB_RDONLY flag here + * because the flag should be set covered w/ sb->s_umount semaphore + * via remount procedure, otherwise, it will confuse code like + * freeze_super() which will lead to deadlocks and other problems. */ - smp_wmb(); - sb->s_flags |= SB_RDONLY; } static void f2fs_record_error_work(struct work_struct *work) @@ -4219,6 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) sbi->aligned_blksize = true; #ifdef CONFIG_BLK_DEV_ZONED sbi->max_open_zones = UINT_MAX; + sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ; #endif for (i = 0; i < max_devices; i++) { diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index fee7ee45ceaa..c56e8c873935 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -170,6 +170,12 @@ static ssize_t undiscard_blks_show(struct f2fs_attr *a, SM_I(sbi)->dcc_info->undiscard_blks); } +static ssize_t atgc_enabled_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", sbi->am.atgc_enabled ? 1 : 0); +} + static ssize_t gc_mode_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -182,50 +188,50 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (f2fs_sb_has_encrypt(sbi)) - len += scnprintf(buf, PAGE_SIZE - len, "%s", + len += sysfs_emit_at(buf, len, "%s", "encryption"); if (f2fs_sb_has_blkzoned(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "blkzoned"); if (f2fs_sb_has_extra_attr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "extra_attr"); if (f2fs_sb_has_project_quota(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "projquota"); if (f2fs_sb_has_inode_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_checksum"); if (f2fs_sb_has_flexible_inline_xattr(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "flexible_inline_xattr"); if (f2fs_sb_has_quota_ino(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "quota_ino"); if (f2fs_sb_has_inode_crtime(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "inode_crtime"); if (f2fs_sb_has_lost_found(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "lost_found"); if (f2fs_sb_has_verity(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "verity"); if (f2fs_sb_has_sb_chksum(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "sb_checksum"); if (f2fs_sb_has_casefold(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "casefold"); if (f2fs_sb_has_readonly(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "readonly"); if (f2fs_sb_has_compression(sbi)) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "compression"); - len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s", + len += sysfs_emit_at(buf, len, "%s%s", len ? ", " : "", "pin_file"); - len += scnprintf(buf + len, PAGE_SIZE - len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } @@ -323,17 +329,14 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, int hot_count = sbi->raw_super->hot_ext_count; int len = 0, i; - len += scnprintf(buf + len, PAGE_SIZE - len, - "cold file extension:\n"); + len += sysfs_emit_at(buf, len, "cold file extension:\n"); for (i = 0; i < cold_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); - len += scnprintf(buf + len, PAGE_SIZE - len, - "hot file extension:\n"); + len += sysfs_emit_at(buf, len, "hot file extension:\n"); for (i = cold_count; i < cold_count + hot_count; i++) - len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n", - extlist[i]); + len += sysfs_emit_at(buf, len, "%s\n", extlist[i]); + return len; } @@ -561,6 +564,11 @@ out: return -EINVAL; } + if (!strcmp(a->attr.name, "migration_window_granularity")) { + if (t == 0 || t > SEGS_PER_SEC(sbi)) + return -EINVAL; + } + if (!strcmp(a->attr.name, "gc_urgent")) { if (t == 0) { sbi->gc_mode = GC_NORMAL; @@ -627,6 +635,15 @@ out: } #endif +#ifdef CONFIG_BLK_DEV_ZONED + if (!strcmp(a->attr.name, "blkzone_alloc_policy")) { + if (t < BLKZONE_ALLOC_PRIOR_SEQ || t > BLKZONE_ALLOC_PRIOR_CONV) + return -EINVAL; + sbi->blkzone_alloc_policy = t; + return count; + } +#endif + #ifdef CONFIG_F2FS_FS_COMPRESSION if (!strcmp(a->attr.name, "compr_written_block") || !strcmp(a->attr.name, "compr_saved_block")) { @@ -775,7 +792,8 @@ out: if (!strcmp(a->attr.name, "ipu_policy")) { if (t >= BIT(F2FS_IPU_MAX)) return -EINVAL; - if (t && f2fs_lfs_mode(sbi)) + /* allow F2FS_IPU_NOCACHE only for IPU in the pinned file */ + if (f2fs_lfs_mode(sbi) && (t & ~BIT(F2FS_IPU_NOCACHE))) return -EINVAL; SM_I(sbi)->ipu_policy = (unsigned int)t; return count; @@ -960,6 +978,9 @@ GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time); GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time); GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time); GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time); +GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent); +GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio); /* SM_INFO ATTR */ SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments); @@ -969,6 +990,7 @@ SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks); SM_INFO_GENERAL_RW_ATTR(min_seq_blocks); SM_INFO_GENERAL_RW_ATTR(min_hot_blocks); SM_INFO_GENERAL_RW_ATTR(min_ssr_sections); +SM_INFO_GENERAL_RW_ATTR(reserved_segments); /* DCC_INFO ATTR */ DCC_INFO_RW_ATTR(max_small_discards, max_discards); @@ -1001,6 +1023,7 @@ F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold); F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs); F2FS_SBI_GENERAL_RW_ATTR(max_victim_search); F2FS_SBI_GENERAL_RW_ATTR(migration_granularity); +F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity); F2FS_SBI_GENERAL_RW_ATTR(dir_level); #ifdef CONFIG_F2FS_IOSTAT F2FS_SBI_GENERAL_RW_ATTR(iostat_enable); @@ -1033,6 +1056,7 @@ F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); +F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif /* STAT_INFO ATTR */ @@ -1072,6 +1096,7 @@ F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); +F2FS_GENERAL_RO_ATTR(atgc_enabled); F2FS_GENERAL_RO_ATTR(gc_mode); #ifdef CONFIG_F2FS_STAT_FS F2FS_GENERAL_RO_ATTR(moved_blocks_background); @@ -1116,6 +1141,9 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_min_sleep_time), ATTR_LIST(gc_max_sleep_time), ATTR_LIST(gc_no_gc_sleep_time), + ATTR_LIST(gc_no_zoned_gc_percent), + ATTR_LIST(gc_boost_zoned_gc_percent), + ATTR_LIST(gc_valid_thresh_ratio), ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), @@ -1138,8 +1166,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(min_seq_blocks), ATTR_LIST(min_hot_blocks), ATTR_LIST(min_ssr_sections), + ATTR_LIST(reserved_segments), ATTR_LIST(max_victim_search), ATTR_LIST(migration_granularity), + ATTR_LIST(migration_window_granularity), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), @@ -1187,6 +1217,7 @@ static struct attribute *f2fs_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_ZONED ATTR_LIST(unusable_blocks_per_sec), + ATTR_LIST(blkzone_alloc_policy), #endif #ifdef CONFIG_F2FS_FS_COMPRESSION ATTR_LIST(compr_written_block), @@ -1200,6 +1231,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(atgc_candidate_count), ATTR_LIST(atgc_age_weight), ATTR_LIST(atgc_age_threshold), + ATTR_LIST(atgc_enabled), ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 84a33fe49bed..2287f238ae09 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -74,7 +74,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; - if (pos + count > inode->i_sb->s_maxbytes) + if (pos + count > F2FS_BLK_TO_BYTES(max_file_blocks(inode))) return -EFBIG; while (count) { @@ -237,7 +237,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, pos = le64_to_cpu(dloc.pos); /* Get the descriptor */ - if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes || + if (pos + size < pos || + pos + size > F2FS_BLK_TO_BYTES(max_file_blocks(inode)) || pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) { f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr"); f2fs_handle_error(F2FS_I_SB(inode), diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index f290fe9327c4..3f3874943679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -629,6 +629,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; void *base_addr, *last_base_addr; int found, newsize; @@ -772,9 +773,18 @@ retry: if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - if (S_ISDIR(inode->i_mode)) - set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); + if (!S_ISDIR(inode->i_mode)) + goto same; + /* + * In restrict mode, fsync() always try to trigger checkpoint for all + * metadata consistency, in other mode, it triggers checkpoint when + * parent's xattr metadata was updated. + */ + if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) + set_sbi_flag(sbi, SBI_NEED_CP); + else + f2fs_add_ino_entry(sbi, inode->i_ino, XATTR_DIR_INO); same: if (is_inode_flag_set(inode, FI_ACL_MODE)) { inode->i_mode = F2FS_I(inode)->i_acl_mode; |