diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-25 20:03:04 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-01-25 20:03:04 -0800 |
commit | c2da8b3f914f83fb9089d26a692eb8f22146ddb9 (patch) | |
tree | 3d8ada017dc3c01a6542ed9c9d3efacc9c7d6bd9 | |
parent | aa22f4da2a46b484a257d167c67a2adc1b7aaf68 (diff) | |
parent | 8f9530aeeb4f756bdfa70510b40e5d28ea3c742e (diff) |
Merge tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"Still no new features for this cycle, as some ongoing improvements
remain premature for now.
This includes a micro-optimization for the superblock checksum, along
with minor bugfixes and code cleanups, as usual:
- Micro-optimize superblock checksum
- Avoid overly large bvecs[] for file-backed mounts
- Some leftover folio conversion in z_erofs_bind_cache()
- Minor bugfixes and cleanups"
* tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: refine z_erofs_get_extent_compressedlen()
erofs: remove dead code in erofs_fc_parse_param
erofs: return SHRINK_EMPTY if no objects to free
erofs: convert z_erofs_bind_cache() to folios
erofs: tidy up zdata.c
erofs: get rid of `z_erofs_next_pcluster_t`
erofs: simplify z_erofs_load_compact_lcluster()
erofs: fix potential return value overflow of z_erofs_shrink_scan()
erofs: shorten bvecs[] for file-backed mounts
erofs: micro-optimize superblock checksum
fs: erofs: xattr.c change kzalloc to kcalloc
-rw-r--r-- | fs/erofs/compress.h | 23 | ||||
-rw-r--r-- | fs/erofs/erofs_fs.h | 3 | ||||
-rw-r--r-- | fs/erofs/fileio.c | 4 | ||||
-rw-r--r-- | fs/erofs/super.c | 32 | ||||
-rw-r--r-- | fs/erofs/xattr.c | 2 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 243 | ||||
-rw-r--r-- | fs/erofs/zmap.c | 125 | ||||
-rw-r--r-- | fs/erofs/zutil.c | 2 |
8 files changed, 162 insertions, 272 deletions
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 7bfe251680ec..65ff39401020 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -29,29 +29,8 @@ struct z_erofs_decompressor { char *name; }; -/* some special page->private (unsigned long, see below) */ #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) -#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) - -/* - * For all pages in a pcluster, page->private should be one of - * Type Last 2bits page->private - * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE - * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE - * cached/managed page 00 pointer to z_erofs_pcluster - * online page (file-backed, 01/10/11 sub-index << 2 | count - * some pages can be used for inplace I/O) - * - * page->mapping should be one of - * Type page->mapping - * short-lived page NULL - * preallocated page NULL - * cached/managed page non-NULL or NULL (invalidated/truncated page) - * online page non-NULL - * - * For all managed pages, PG_private should be set with 1 extra refcount, - * which is used for page reclaim / migration. - */ +#define Z_EROFS_PREALLOCATED_FOLIO ((void *)(-2UL << 2)) /* * Currently, short-lived pages are pages directly from buddy system diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index c8f2ae845bd2..199395ed1c1f 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -9,6 +9,7 @@ #ifndef __EROFS_FS_H #define __EROFS_FS_H +/* to allow for x86 boot sectors and other oddities. */ #define EROFS_SUPER_OFFSET 1024 #define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001 @@ -54,7 +55,7 @@ struct erofs_deviceslot { /* erofs on-disk super block (currently 128 bytes) */ struct erofs_super_block { __le32 magic; /* file system magic number */ - __le32 checksum; /* crc32c(super_block) */ + __le32 checksum; /* crc32c to avoid unexpected on-disk overlap */ __le32 feature_compat; __u8 blkszbits; /* filesystem block size in bit shift */ __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index 33f8539dda4a..0ffd1c63beeb 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -6,7 +6,7 @@ #include <trace/events/erofs.h> struct erofs_fileio_rq { - struct bio_vec bvecs[BIO_MAX_VECS]; + struct bio_vec bvecs[16]; struct bio bio; struct kiocb iocb; struct super_block *sb; @@ -68,7 +68,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev) struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq), GFP_KERNEL | __GFP_NOFAIL); - bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ); + bio_init(&rq->bio, NULL, rq->bvecs, ARRAY_SIZE(rq->bvecs), REQ_OP_READ); rq->iocb.ki_filp = mdev->m_dif->file; rq->sb = mdev->m_sb; return rq; diff --git a/fs/erofs/super.c b/fs/erofs/super.c index f5956474bfde..827b62665649 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -39,29 +39,21 @@ void _erofs_printk(struct super_block *sb, const char *fmt, ...) static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) { - size_t len = 1 << EROFS_SB(sb)->blkszbits; - struct erofs_super_block *dsb; - u32 expected_crc, crc; + struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET; + u32 len = 1 << EROFS_SB(sb)->blkszbits, crc; if (len > EROFS_SUPER_OFFSET) len -= EROFS_SUPER_OFFSET; + len -= offsetof(struct erofs_super_block, checksum) + + sizeof(dsb->checksum); - dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL); - if (!dsb) - return -ENOMEM; - - expected_crc = le32_to_cpu(dsb->checksum); - dsb->checksum = 0; - /* to allow for x86 boot sectors and other oddities. */ - crc = crc32c(~0, dsb, len); - kfree(dsb); - - if (crc != expected_crc) { - erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", - crc, expected_crc); - return -EBADMSG; - } - return 0; + /* skip .magic(pre-verified) and .checksum(0) fields */ + crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len); + if (crc == le32_to_cpu(dsb->checksum)) + return 0; + erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", + crc, le32_to_cpu(dsb->checksum)); + return -EBADMSG; } static void erofs_inode_init_once(void *ptr) @@ -516,8 +508,6 @@ static int erofs_fc_parse_param(struct fs_context *fc, errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); #endif break; - default: - return -ENOPARAM; } return 0; } diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index a90d7d649739..7940241d9355 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -478,7 +478,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb) if (!sbi->xattr_prefix_count) return 0; - pfs = kzalloc(sbi->xattr_prefix_count * sizeof(*pfs), GFP_KERNEL); + pfs = kcalloc(sbi->xattr_prefix_count, sizeof(*pfs), GFP_KERNEL); if (!pfs) return -ENOMEM; diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 33a128252687..29f8963bb523 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -12,12 +12,6 @@ #define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE) #define Z_EROFS_INLINE_BVECS 2 -/* - * let's leave a type here in case of introducing - * another tagged pointer later. - */ -typedef void *z_erofs_next_pcluster_t; - struct z_erofs_bvec { struct page *page; int offset; @@ -48,7 +42,7 @@ struct z_erofs_pcluster { struct lockref lockref; /* A: point to next chained pcluster or TAILs */ - z_erofs_next_pcluster_t next; + struct z_erofs_pcluster *next; /* I: start block address of this pcluster */ erofs_off_t index; @@ -94,12 +88,11 @@ struct z_erofs_pcluster { /* the end of a chain of pclusters */ #define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA) -#define Z_EROFS_PCLUSTER_NIL (NULL) struct z_erofs_decompressqueue { struct super_block *sb; + struct z_erofs_pcluster *head; atomic_t pending_bios; - z_erofs_next_pcluster_t head; union { struct completion done; @@ -462,39 +455,32 @@ err_decompressor: } enum z_erofs_pclustermode { + /* It has previously been linked into another processing chain */ Z_EROFS_PCLUSTER_INFLIGHT, /* - * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it - * could be dispatched into bypass queue later due to uptodated managed - * pages. All related online pages cannot be reused for inplace I/O (or - * bvpage) since it can be directly decoded without I/O submission. + * A weaker form of Z_EROFS_PCLUSTER_FOLLOWED; the difference is that it + * may be dispatched to the bypass queue later due to uptodated managed + * folios. All file-backed folios related to this pcluster cannot be + * reused for in-place I/O (or bvpage) since the pcluster may be decoded + * in a separate queue (and thus out of order). */ Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE, /* - * The pcluster was just linked to a decompression chain by us. It can - * also be linked with the remaining pclusters, which means if the - * processing page is the tail page of a pcluster, this pcluster can - * safely use the whole page (since the previous pcluster is within the - * same chain) for in-place I/O, as illustrated below: - * ___________________________________________________ - * | tail (partial) page | head (partial) page | - * | (of the current pcl) | (of the previous pcl) | - * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____| - * - * [ (*) the page above can be used as inplace I/O. ] + * The pcluster has just been linked to our processing chain. + * File-backed folios (except for the head page) related to it can be + * used for in-place I/O (or bvpage). */ Z_EROFS_PCLUSTER_FOLLOWED, }; -struct z_erofs_decompress_frontend { +struct z_erofs_frontend { struct inode *const inode; struct erofs_map_blocks map; struct z_erofs_bvec_iter biter; struct page *pagepool; struct page *candidate_bvpage; - struct z_erofs_pcluster *pcl; - z_erofs_next_pcluster_t owned_head; + struct z_erofs_pcluster *pcl, *head; enum z_erofs_pclustermode mode; erofs_off_t headoffset; @@ -503,11 +489,11 @@ struct z_erofs_decompress_frontend { unsigned int icur; }; -#define DECOMPRESS_FRONTEND_INIT(__i) { \ - .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \ - .mode = Z_EROFS_PCLUSTER_FOLLOWED } +#define Z_EROFS_DEFINE_FRONTEND(fe, i, ho) struct z_erofs_frontend fe = { \ + .inode = i, .head = Z_EROFS_PCLUSTER_TAIL, \ + .mode = Z_EROFS_PCLUSTER_FOLLOWED, .headoffset = ho } -static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) +static bool z_erofs_should_alloc_cache(struct z_erofs_frontend *fe) { unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy; @@ -524,19 +510,17 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe) return false; } -static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) +static void z_erofs_bind_cache(struct z_erofs_frontend *fe) { struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode)); struct z_erofs_pcluster *pcl = fe->pcl; unsigned int pclusterpages = z_erofs_pclusterpages(pcl); bool shouldalloc = z_erofs_should_alloc_cache(fe); - bool standalone = true; - /* - * optimistic allocation without direct reclaim since inplace I/O - * can be used if low memory otherwise. - */ + bool may_bypass = true; + /* Optimistic allocation, as in-place I/O can be used as a fallback */ gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; + struct folio *folio, *newfolio; unsigned int i; if (i_blocksize(fe->inode) != PAGE_SIZE || @@ -544,47 +528,42 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) return; for (i = 0; i < pclusterpages; ++i) { - struct page *page, *newpage; - /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; - page = find_get_page(mc, pcl->index + i); - if (!page) { - /* I/O is needed, no possible to decompress directly */ - standalone = false; + folio = filemap_get_folio(mc, pcl->index + i); + if (IS_ERR(folio)) { + may_bypass = false; if (!shouldalloc) continue; /* - * Try cached I/O if allocation succeeds or fallback to - * in-place I/O instead to avoid any direct reclaim. + * Allocate a managed folio for cached I/O, or it may be + * then filled with a file-backed folio for in-place I/O */ - newpage = erofs_allocpage(&fe->pagepool, gfp); - if (!newpage) + newfolio = filemap_alloc_folio(gfp, 0); + if (!newfolio) continue; - set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); + newfolio->private = Z_EROFS_PREALLOCATED_FOLIO; + folio = NULL; } spin_lock(&pcl->lockref.lock); if (!pcl->compressed_bvecs[i].page) { - pcl->compressed_bvecs[i].page = page ? page : newpage; + pcl->compressed_bvecs[i].page = + folio_page(folio ?: newfolio, 0); spin_unlock(&pcl->lockref.lock); continue; } spin_unlock(&pcl->lockref.lock); - - if (page) - put_page(page); - else if (newpage) - erofs_pagepool_add(&fe->pagepool, newpage); + folio_put(folio ?: newfolio); } /* - * don't do inplace I/O if all compressed pages are available in - * managed cache since it can be moved to the bypass queue instead. + * Don't perform in-place I/O if all compressed pages are available in + * the managed cache, as the pcluster can be moved to the bypass queue. */ - if (standalone) + if (may_bypass) fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } @@ -681,7 +660,7 @@ int erofs_init_managed_cache(struct super_block *sb) } /* callers must be with pcluster lock held */ -static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, +static int z_erofs_attach_page(struct z_erofs_frontend *fe, struct z_erofs_bvec *bvec, bool exclusive) { struct z_erofs_pcluster *pcl = fe->pcl; @@ -727,7 +706,7 @@ static bool z_erofs_get_pcluster(struct z_erofs_pcluster *pcl) return true; } -static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) +static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) { struct erofs_map_blocks *map = &fe->map; struct super_block *sb = fe->inode->i_sb; @@ -751,9 +730,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) pcl->algorithmformat = map->m_algorithmformat; pcl->length = 0; pcl->partial = true; - - /* new pclusters should be claimed as type 1, primary and followed */ - pcl->next = fe->owned_head; + pcl->next = fe->head; pcl->pageofs_out = map->m_la & ~PAGE_MASK; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; @@ -789,8 +766,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) goto err_out; } } - fe->owned_head = &pcl->next; - fe->pcl = pcl; + fe->head = fe->pcl = pcl; return 0; err_out: @@ -799,7 +775,7 @@ err_out: return err; } -static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) +static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe) { struct erofs_map_blocks *map = &fe->map; struct super_block *sb = fe->inode->i_sb; @@ -809,7 +785,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) DBG_BUGON(fe->pcl); /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */ - DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); + DBG_BUGON(!fe->head); if (!(map->m_flags & EROFS_MAP_META)) { while (1) { @@ -837,10 +813,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) if (ret == -EEXIST) { mutex_lock(&fe->pcl->lock); /* check if this pcluster hasn't been linked into any chain. */ - if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL, - fe->owned_head) == Z_EROFS_PCLUSTER_NIL) { + if (!cmpxchg(&fe->pcl->next, NULL, fe->head)) { /* .. so it can be attached to our submission chain */ - fe->owned_head = &fe->pcl->next; + fe->head = fe->pcl; fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; } else { /* otherwise, it belongs to an inflight chain */ fe->mode = Z_EROFS_PCLUSTER_INFLIGHT; @@ -873,14 +848,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) return 0; } -/* - * keep in mind that no referenced pclusters will be freed - * only after a RCU grace period. - */ static void z_erofs_rcu_callback(struct rcu_head *head) { - z_erofs_free_pcluster(container_of(head, - struct z_erofs_pcluster, rcu)); + z_erofs_free_pcluster(container_of(head, struct z_erofs_pcluster, rcu)); } static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi, @@ -922,12 +892,10 @@ static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi, return free; } -unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, - unsigned long nr_shrink) +unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr) { struct z_erofs_pcluster *pcl; - unsigned int freed = 0; - unsigned long index; + unsigned long index, freed = 0; xa_lock(&sbi->managed_pslots); xa_for_each(&sbi->managed_pslots, index, pcl) { @@ -937,7 +905,7 @@ unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, xa_unlock(&sbi->managed_pslots); ++freed; - if (!--nr_shrink) + if (!--nr) return freed; xa_lock(&sbi->managed_pslots); } @@ -966,7 +934,7 @@ static void z_erofs_put_pcluster(struct erofs_sb_info *sbi, call_rcu(&pcl->rcu, z_erofs_rcu_callback); } -static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) +static void z_erofs_pcluster_end(struct z_erofs_frontend *fe) { struct z_erofs_pcluster *pcl = fe->pcl; @@ -979,13 +947,9 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) if (fe->candidate_bvpage) fe->candidate_bvpage = NULL; - /* - * if all pending pages are added, don't hold its reference - * any longer if the pcluster isn't hosted by ourselves. - */ + /* Drop refcount if it doesn't belong to our processing chain */ if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE) z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false); - fe->pcl = NULL; } @@ -1014,7 +978,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio, return 0; } -static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, +static int z_erofs_scan_folio(struct z_erofs_frontend *f, struct folio *folio, bool ra) { struct inode *const inode = f->inode; @@ -1129,7 +1093,7 @@ static bool z_erofs_page_is_invalidated(struct page *page) return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page); } -struct z_erofs_decompress_backend { +struct z_erofs_backend { struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES]; struct super_block *sb; struct z_erofs_pcluster *pcl; @@ -1149,7 +1113,7 @@ struct z_erofs_bvec_item { struct list_head list; }; -static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, +static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be, struct z_erofs_bvec *bvec) { struct z_erofs_bvec_item *item; @@ -1172,8 +1136,7 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be, list_add(&item->list, &be->decompressed_secondary_bvecs); } -static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, - int err) +static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err) { unsigned int off0 = be->pcl->pageofs_out; struct list_head *p, *n; @@ -1214,7 +1177,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, } } -static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be) +static void z_erofs_parse_out_bvecs(struct z_erofs_backend *be) { struct z_erofs_pcluster *pcl = be->pcl; struct z_erofs_bvec_iter biter; @@ -1239,8 +1202,7 @@ static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be) z_erofs_put_shortlivedpage(be->pagepool, old_bvpage); } -static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, - bool *overlapped) +static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped) { struct z_erofs_pcluster *pcl = be->pcl; unsigned int pclusterpages = z_erofs_pclusterpages(pcl); @@ -1275,8 +1237,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, return err; } -static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, - int err) +static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err) { struct erofs_sb_info *const sbi = EROFS_SB(be->sb); struct z_erofs_pcluster *pcl = be->pcl; @@ -1393,7 +1354,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, pcl->vcnt = 0; /* pcluster lock MUST be taken before the following line */ - WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL); + WRITE_ONCE(pcl->next, NULL); mutex_unlock(&pcl->lock); if (z_erofs_is_inline_pcluster(pcl)) @@ -1406,21 +1367,19 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, struct page **pagepool) { - struct z_erofs_decompress_backend be = { + struct z_erofs_backend be = { .sb = io->sb, .pagepool = pagepool, .decompressed_secondary_bvecs = LIST_HEAD_INIT(be.decompressed_secondary_bvecs), + .pcl = io->head, }; - z_erofs_next_pcluster_t owned = io->head; + struct z_erofs_pcluster *next; int err = io->eio ? -EIO : 0; - while (owned != Z_EROFS_PCLUSTER_TAIL) { - DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL); - - be.pcl = container_of(owned, struct z_erofs_pcluster, next); - owned = READ_ONCE(be.pcl->next); - + for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) { + DBG_BUGON(!be.pcl); + next = READ_ONCE(be.pcl->next); err = z_erofs_decompress_pcluster(&be, err) ?: err; } return err; @@ -1486,7 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, } static void z_erofs_fill_bio_vec(struct bio_vec *bvec, - struct z_erofs_decompress_frontend *f, + struct z_erofs_frontend *f, struct z_erofs_pcluster *pcl, unsigned int nr, struct address_space *mc) @@ -1513,12 +1472,8 @@ repeat: DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); folio = page_folio(zbv.page); - /* - * Handle preallocated cached folios. We tried to allocate such folios - * without triggering direct reclaim. If allocation failed, inplace - * file-backed folios will be used instead. - */ - if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { + /* For preallocated managed folios, add them to page cache here */ + if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) { tocache = true; goto out_tocache; } @@ -1630,18 +1585,13 @@ enum { NR_JOBQUEUES, }; -static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, - z_erofs_next_pcluster_t qtail[], - z_erofs_next_pcluster_t owned_head) +static void z_erofs_move_to_bypass_queue(struct z_erofs_pcluster *pcl, + struct z_erofs_pcluster *next, + struct z_erofs_pcluster **qtail[]) { - z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT]; - z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS]; - WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL); - - WRITE_ONCE(*submit_qtail, owned_head); - WRITE_ONCE(*bypass_qtail, &pcl->next); - + WRITE_ONCE(*qtail[JQ_SUBMIT], next); + WRITE_ONCE(*qtail[JQ_BYPASS], pcl); qtail[JQ_BYPASS] = &pcl->next; } @@ -1670,15 +1620,15 @@ static void z_erofs_endio(struct bio *bio) bio_put(bio); } -static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, +static void z_erofs_submit_queue(struct z_erofs_frontend *f, struct z_erofs_decompressqueue *fgq, bool *force_fg, bool readahead) { struct super_block *sb = f->inode->i_sb; struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb)); - z_erofs_next_pcluster_t qtail[NR_JOBQUEUES]; + struct z_erofs_pcluster **qtail[NR_JOBQUEUES]; struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; - z_erofs_next_pcluster_t owned_head = f->owned_head; + struct z_erofs_pcluster *pcl, *next; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ erofs_off_t last_pa; unsigned int nr_bios = 0; @@ -1694,22 +1644,19 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head; /* by default, all need io submission */ - q[JQ_SUBMIT]->head = owned_head; + q[JQ_SUBMIT]->head = next = f->head; do { struct erofs_map_dev mdev; - struct z_erofs_pcluster *pcl; erofs_off_t cur, end; struct bio_vec bvec; unsigned int i = 0; bool bypass = true; - DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL); - pcl = container_of(owned_head, struct z_erofs_pcluster, next); - owned_head = READ_ONCE(pcl->next); - + pcl = next; + next = READ_ONCE(pcl->next); if (z_erofs_is_inline_pcluster(pcl)) { - move_to_bypass_jobqueue(pcl, qtail, owned_head); + z_erofs_move_to_bypass_queue(pcl, next, qtail); continue; } @@ -1781,8 +1728,8 @@ drain_io: if (!bypass) qtail[JQ_SUBMIT] = &pcl->next; else - move_to_bypass_jobqueue(pcl, qtail, owned_head); - } while (owned_head != Z_EROFS_PCLUSTER_TAIL); + z_erofs_move_to_bypass_queue(pcl, next, qtail); + } while (next != Z_EROFS_PCLUSTER_TAIL); if (bio) { if (erofs_is_fileio_mode(EROFS_SB(sb))) @@ -1806,17 +1753,16 @@ drain_io: z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios); } -static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f, - unsigned int ra_folios) +static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rapages) { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; struct erofs_sb_info *sbi = EROFS_I_SB(f->inode); - bool force_fg = z_erofs_is_sync_decompress(sbi, ra_folios); + bool force_fg = z_erofs_is_sync_decompress(sbi, rapages); int err; - if (f->owned_head == Z_EROFS_PCLUSTER_TAIL) + if (f->head == Z_EROFS_PCLUSTER_TAIL) return 0; - z_erofs_submit_queue(f, io, &force_fg, !!ra_folios); + z_erofs_submit_queue(f, io, &force_fg, !!rapages); /* handle bypass queue (no i/o pclusters) immediately */ err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool); @@ -1834,7 +1780,7 @@ static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f, * Since partial uptodate is still unimplemented for now, we have to use * approximate readmore strategies as a start. */ -static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, +static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f, struct readahead_control *rac, bool backmost) { struct inode *inode = f->inode; @@ -1889,12 +1835,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, static int z_erofs_read_folio(struct file *file, struct folio *folio) { struct inode *const inode = folio->mapping->host; - struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + Z_EROFS_DEFINE_FRONTEND(f, inode, folio_pos(folio)); int err; trace_erofs_read_folio(folio, false); - f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; - z_erofs_pcluster_readmore(&f, NULL, true); err = z_erofs_scan_folio(&f, folio, false); z_erofs_pcluster_readmore(&f, NULL, false); @@ -1914,17 +1858,14 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) static void z_erofs_readahead(struct readahead_control *rac) { struct inode *const inode = rac->mapping->host; - struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac)); struct folio *head = NULL, *folio; - unsigned int nr_folios; + unsigned int nrpages = readahead_count(rac); int err; - f.headoffset = readahead_pos(rac); - z_erofs_pcluster_readmore(&f, rac, true); - nr_folios = readahead_count(rac); - trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false); - + nrpages = readahead_count(rac); + trace_erofs_readpages(inode, readahead_index(rac), nrpages, false); while ((folio = readahead_folio(rac))) { folio->private = head; head = folio; @@ -1943,7 +1884,7 @@ static void z_erofs_readahead(struct readahead_control *rac) z_erofs_pcluster_readmore(&f, rac, false); z_erofs_pcluster_end(&f); - (void)z_erofs_runqueue(&f, nr_folios); + (void)z_erofs_runqueue(&f, nrpages); erofs_put_metabuf(&f.map.buf); erofs_release_pages(&f.pagepool); } diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 4535f2f0a014..689437e99a5a 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -97,17 +97,48 @@ static int get_compacted_la_distance(unsigned int lobits, return d1; } -static int unpack_compacted_index(struct z_erofs_maprecorder *m, - unsigned int amortizedshift, - erofs_off_t pos, bool lookahead) +static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, + unsigned long lcn, bool lookahead) { - struct erofs_inode *const vi = EROFS_I(m->inode); + struct inode *const inode = m->inode; + struct erofs_inode *const vi = EROFS_I(inode); + const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + + ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); const unsigned int lclusterbits = vi->z_logical_clusterbits; + const unsigned int totalidx = erofs_iblks(inode); + unsigned int compacted_4b_initial, compacted_2b, amortizedshift; unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; - bool big_pcluster; + bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; + erofs_off_t pos; u8 *in, type; int i; + if (lcn >= totalidx || lclusterbits > 14) + return -EINVAL; + + m->lcn = lcn; + /* used to align to 32-byte (compacted_2b) alignment */ + compacted_4b_initial = ((32 - ebase % 32) / 4) & 7; + compacted_2b = 0; + if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && + compacted_4b_initial < totalidx) + compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); + + pos = ebase; + amortizedshift = 2; /* compact_4b */ + if (lcn >= compacted_4b_initial) { + pos += compacted_4b_initial * 4; + lcn -= compacted_4b_initial; + if (lcn < compacted_2b) { + amortizedshift = 1; + } else { + pos += compacted_2b * 2; + lcn -= compacted_2b; + } + } + pos += lcn * (1 << amortizedshift); + + /* figure out the lcluster count in this pack */ if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; else if (1 << amortizedshift == 2 && lclusterbits <= 12) @@ -122,7 +153,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, /* it doesn't equal to round_up(..) */ m->nextpackoff = round_down(pos, vcnt << amortizedshift) + (vcnt << amortizedshift); - big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U); encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; bytes = pos & ((vcnt << amortizedshift) - 1); @@ -207,53 +237,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, return 0; } -static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, - unsigned long lcn, bool lookahead) -{ - struct inode *const inode = m->inode; - struct erofs_inode *const vi = EROFS_I(inode); - const erofs_off_t ebase = sizeof(struct z_erofs_map_header) + - ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8); - unsigned int totalidx = erofs_iblks(inode); - unsigned int compacted_4b_initial, compacted_2b; - unsigned int amortizedshift; - erofs_off_t pos; - - if (lcn >= totalidx || vi->z_logical_clusterbits > 14) - return -EINVAL; - - m->lcn = lcn; - /* used to align to 32-byte (compacted_2b) alignment */ - compacted_4b_initial = (32 - ebase % 32) / 4; - if (compacted_4b_initial == 32 / 4) - compacted_4b_initial = 0; - - if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) && - compacted_4b_initial < totalidx) - compacted_2b = rounddown(totalidx - compacted_4b_initial, 16); - else - compacted_2b = 0; - - pos = ebase; - if (lcn < compacted_4b_initial) { - amortizedshift = 2; - goto out; - } - pos += compacted_4b_initial * 4; - lcn -= compacted_4b_initial; - - if (lcn < compacted_2b) { - amortizedshift = 1; - goto out; - } - pos += compacted_2b * 2; - lcn -= compacted_2b; - amortizedshift = 2; -out: - pos += lcn * (1 << amortizedshift); - return unpack_compacted_index(m, amortizedshift, pos, lookahead); -} - static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { @@ -311,27 +294,23 @@ err_bogus: static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, unsigned int initial_lcn) { - struct super_block *sb = m->inode->i_sb; - struct erofs_inode *const vi = EROFS_I(m->inode); - struct erofs_map_blocks *const map = m->map; - const unsigned int lclusterbits = vi->z_logical_clusterbits; - unsigned long lcn; + struct inode *inode = m->inode; + struct super_block *sb = inode->i_sb; + struct erofs_inode *vi = EROFS_I(inode); + bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; + bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2; + unsigned long lcn = m->lcn + 1; int err; - DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN && - m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 && - m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2); + DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD); DBG_BUGON(m->type != m->headtype); - if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || - ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) && - !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || - ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && - !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { - map->m_plen = 1ULL << lclusterbits; - return 0; - } - lcn = m->lcn + 1; + if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) || + ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN || + m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) || + (lcn << vi->z_logical_clusterbits) >= inode->i_size) + m->compressedblks = 1; + if (m->compressedblks) goto out; @@ -356,9 +335,9 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, case Z_EROFS_LCLUSTER_TYPE_HEAD2: /* * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type - * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. + * rather than CBLKCNT, it's a 1 block-sized pcluster. */ - m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits); + m->compressedblks = 1; break; case Z_EROFS_LCLUSTER_TYPE_NONHEAD: if (m->delta[0] != 1) @@ -373,7 +352,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, return -EFSCORRUPTED; } out: - map->m_plen = erofs_pos(sb, m->compressedblks); + m->map->m_plen = erofs_pos(sb, m->compressedblks); return 0; err_bonus_cblkcnt: erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid); diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 0dd65cefce33..83fbcab70a92 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -243,7 +243,7 @@ void erofs_shrinker_unregister(struct super_block *sb) static unsigned long erofs_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { - return atomic_long_read(&erofs_global_shrink_cnt); + return atomic_long_read(&erofs_global_shrink_cnt) ?: SHRINK_EMPTY; } static unsigned long erofs_shrink_scan(struct shrinker *shrink, |