summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-25 20:03:04 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-25 20:03:04 -0800
commitc2da8b3f914f83fb9089d26a692eb8f22146ddb9 (patch)
tree3d8ada017dc3c01a6542ed9c9d3efacc9c7d6bd9
parentaa22f4da2a46b484a257d167c67a2adc1b7aaf68 (diff)
parent8f9530aeeb4f756bdfa70510b40e5d28ea3c742e (diff)
Merge tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "Still no new features for this cycle, as some ongoing improvements remain premature for now. This includes a micro-optimization for the superblock checksum, along with minor bugfixes and code cleanups, as usual: - Micro-optimize superblock checksum - Avoid overly large bvecs[] for file-backed mounts - Some leftover folio conversion in z_erofs_bind_cache() - Minor bugfixes and cleanups" * tag 'erofs-for-6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: refine z_erofs_get_extent_compressedlen() erofs: remove dead code in erofs_fc_parse_param erofs: return SHRINK_EMPTY if no objects to free erofs: convert z_erofs_bind_cache() to folios erofs: tidy up zdata.c erofs: get rid of `z_erofs_next_pcluster_t` erofs: simplify z_erofs_load_compact_lcluster() erofs: fix potential return value overflow of z_erofs_shrink_scan() erofs: shorten bvecs[] for file-backed mounts erofs: micro-optimize superblock checksum fs: erofs: xattr.c change kzalloc to kcalloc
-rw-r--r--fs/erofs/compress.h23
-rw-r--r--fs/erofs/erofs_fs.h3
-rw-r--r--fs/erofs/fileio.c4
-rw-r--r--fs/erofs/super.c32
-rw-r--r--fs/erofs/xattr.c2
-rw-r--r--fs/erofs/zdata.c243
-rw-r--r--fs/erofs/zmap.c125
-rw-r--r--fs/erofs/zutil.c2
8 files changed, 162 insertions, 272 deletions
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 7bfe251680ec..65ff39401020 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -29,29 +29,8 @@ struct z_erofs_decompressor {
char *name;
};
-/* some special page->private (unsigned long, see below) */
#define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2)
-#define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2)
-
-/*
- * For all pages in a pcluster, page->private should be one of
- * Type Last 2bits page->private
- * short-lived page 00 Z_EROFS_SHORTLIVED_PAGE
- * preallocated page (tryalloc) 00 Z_EROFS_PREALLOCATED_PAGE
- * cached/managed page 00 pointer to z_erofs_pcluster
- * online page (file-backed, 01/10/11 sub-index << 2 | count
- * some pages can be used for inplace I/O)
- *
- * page->mapping should be one of
- * Type page->mapping
- * short-lived page NULL
- * preallocated page NULL
- * cached/managed page non-NULL or NULL (invalidated/truncated page)
- * online page non-NULL
- *
- * For all managed pages, PG_private should be set with 1 extra refcount,
- * which is used for page reclaim / migration.
- */
+#define Z_EROFS_PREALLOCATED_FOLIO ((void *)(-2UL << 2))
/*
* Currently, short-lived pages are pages directly from buddy system
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index c8f2ae845bd2..199395ed1c1f 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -9,6 +9,7 @@
#ifndef __EROFS_FS_H
#define __EROFS_FS_H
+/* to allow for x86 boot sectors and other oddities. */
#define EROFS_SUPER_OFFSET 1024
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
@@ -54,7 +55,7 @@ struct erofs_deviceslot {
/* erofs on-disk super block (currently 128 bytes) */
struct erofs_super_block {
__le32 magic; /* file system magic number */
- __le32 checksum; /* crc32c(super_block) */
+ __le32 checksum; /* crc32c to avoid unexpected on-disk overlap */
__le32 feature_compat;
__u8 blkszbits; /* filesystem block size in bit shift */
__u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 33f8539dda4a..0ffd1c63beeb 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -6,7 +6,7 @@
#include <trace/events/erofs.h>
struct erofs_fileio_rq {
- struct bio_vec bvecs[BIO_MAX_VECS];
+ struct bio_vec bvecs[16];
struct bio bio;
struct kiocb iocb;
struct super_block *sb;
@@ -68,7 +68,7 @@ static struct erofs_fileio_rq *erofs_fileio_rq_alloc(struct erofs_map_dev *mdev)
struct erofs_fileio_rq *rq = kzalloc(sizeof(*rq),
GFP_KERNEL | __GFP_NOFAIL);
- bio_init(&rq->bio, NULL, rq->bvecs, BIO_MAX_VECS, REQ_OP_READ);
+ bio_init(&rq->bio, NULL, rq->bvecs, ARRAY_SIZE(rq->bvecs), REQ_OP_READ);
rq->iocb.ki_filp = mdev->m_dif->file;
rq->sb = mdev->m_sb;
return rq;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index f5956474bfde..827b62665649 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -39,29 +39,21 @@ void _erofs_printk(struct super_block *sb, const char *fmt, ...)
static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata)
{
- size_t len = 1 << EROFS_SB(sb)->blkszbits;
- struct erofs_super_block *dsb;
- u32 expected_crc, crc;
+ struct erofs_super_block *dsb = sbdata + EROFS_SUPER_OFFSET;
+ u32 len = 1 << EROFS_SB(sb)->blkszbits, crc;
if (len > EROFS_SUPER_OFFSET)
len -= EROFS_SUPER_OFFSET;
+ len -= offsetof(struct erofs_super_block, checksum) +
+ sizeof(dsb->checksum);
- dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, len, GFP_KERNEL);
- if (!dsb)
- return -ENOMEM;
-
- expected_crc = le32_to_cpu(dsb->checksum);
- dsb->checksum = 0;
- /* to allow for x86 boot sectors and other oddities. */
- crc = crc32c(~0, dsb, len);
- kfree(dsb);
-
- if (crc != expected_crc) {
- erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
- crc, expected_crc);
- return -EBADMSG;
- }
- return 0;
+ /* skip .magic(pre-verified) and .checksum(0) fields */
+ crc = crc32c(0x5045B54A, (&dsb->checksum) + 1, len);
+ if (crc == le32_to_cpu(dsb->checksum))
+ return 0;
+ erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected",
+ crc, le32_to_cpu(dsb->checksum));
+ return -EBADMSG;
}
static void erofs_inode_init_once(void *ptr)
@@ -516,8 +508,6 @@ static int erofs_fc_parse_param(struct fs_context *fc,
errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name);
#endif
break;
- default:
- return -ENOPARAM;
}
return 0;
}
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index a90d7d649739..7940241d9355 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -478,7 +478,7 @@ int erofs_xattr_prefixes_init(struct super_block *sb)
if (!sbi->xattr_prefix_count)
return 0;
- pfs = kzalloc(sbi->xattr_prefix_count * sizeof(*pfs), GFP_KERNEL);
+ pfs = kcalloc(sbi->xattr_prefix_count, sizeof(*pfs), GFP_KERNEL);
if (!pfs)
return -ENOMEM;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 33a128252687..29f8963bb523 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -12,12 +12,6 @@
#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE)
#define Z_EROFS_INLINE_BVECS 2
-/*
- * let's leave a type here in case of introducing
- * another tagged pointer later.
- */
-typedef void *z_erofs_next_pcluster_t;
-
struct z_erofs_bvec {
struct page *page;
int offset;
@@ -48,7 +42,7 @@ struct z_erofs_pcluster {
struct lockref lockref;
/* A: point to next chained pcluster or TAILs */
- z_erofs_next_pcluster_t next;
+ struct z_erofs_pcluster *next;
/* I: start block address of this pcluster */
erofs_off_t index;
@@ -94,12 +88,11 @@ struct z_erofs_pcluster {
/* the end of a chain of pclusters */
#define Z_EROFS_PCLUSTER_TAIL ((void *) 0x700 + POISON_POINTER_DELTA)
-#define Z_EROFS_PCLUSTER_NIL (NULL)
struct z_erofs_decompressqueue {
struct super_block *sb;
+ struct z_erofs_pcluster *head;
atomic_t pending_bios;
- z_erofs_next_pcluster_t head;
union {
struct completion done;
@@ -462,39 +455,32 @@ err_decompressor:
}
enum z_erofs_pclustermode {
+ /* It has previously been linked into another processing chain */
Z_EROFS_PCLUSTER_INFLIGHT,
/*
- * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
- * could be dispatched into bypass queue later due to uptodated managed
- * pages. All related online pages cannot be reused for inplace I/O (or
- * bvpage) since it can be directly decoded without I/O submission.
+ * A weaker form of Z_EROFS_PCLUSTER_FOLLOWED; the difference is that it
+ * may be dispatched to the bypass queue later due to uptodated managed
+ * folios. All file-backed folios related to this pcluster cannot be
+ * reused for in-place I/O (or bvpage) since the pcluster may be decoded
+ * in a separate queue (and thus out of order).
*/
Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
- * The pcluster was just linked to a decompression chain by us. It can
- * also be linked with the remaining pclusters, which means if the
- * processing page is the tail page of a pcluster, this pcluster can
- * safely use the whole page (since the previous pcluster is within the
- * same chain) for in-place I/O, as illustrated below:
- * ___________________________________________________
- * | tail (partial) page | head (partial) page |
- * | (of the current pcl) | (of the previous pcl) |
- * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
- *
- * [ (*) the page above can be used as inplace I/O. ]
+ * The pcluster has just been linked to our processing chain.
+ * File-backed folios (except for the head page) related to it can be
+ * used for in-place I/O (or bvpage).
*/
Z_EROFS_PCLUSTER_FOLLOWED,
};
-struct z_erofs_decompress_frontend {
+struct z_erofs_frontend {
struct inode *const inode;
struct erofs_map_blocks map;
struct z_erofs_bvec_iter biter;
struct page *pagepool;
struct page *candidate_bvpage;
- struct z_erofs_pcluster *pcl;
- z_erofs_next_pcluster_t owned_head;
+ struct z_erofs_pcluster *pcl, *head;
enum z_erofs_pclustermode mode;
erofs_off_t headoffset;
@@ -503,11 +489,11 @@ struct z_erofs_decompress_frontend {
unsigned int icur;
};
-#define DECOMPRESS_FRONTEND_INIT(__i) { \
- .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = Z_EROFS_PCLUSTER_FOLLOWED }
+#define Z_EROFS_DEFINE_FRONTEND(fe, i, ho) struct z_erofs_frontend fe = { \
+ .inode = i, .head = Z_EROFS_PCLUSTER_TAIL, \
+ .mode = Z_EROFS_PCLUSTER_FOLLOWED, .headoffset = ho }
-static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
+static bool z_erofs_should_alloc_cache(struct z_erofs_frontend *fe)
{
unsigned int cachestrategy = EROFS_I_SB(fe->inode)->opt.cache_strategy;
@@ -524,19 +510,17 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
return false;
}
-static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
{
struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
struct z_erofs_pcluster *pcl = fe->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
bool shouldalloc = z_erofs_should_alloc_cache(fe);
- bool standalone = true;
- /*
- * optimistic allocation without direct reclaim since inplace I/O
- * can be used if low memory otherwise.
- */
+ bool may_bypass = true;
+ /* Optimistic allocation, as in-place I/O can be used as a fallback */
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
+ struct folio *folio, *newfolio;
unsigned int i;
if (i_blocksize(fe->inode) != PAGE_SIZE ||
@@ -544,47 +528,42 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
return;
for (i = 0; i < pclusterpages; ++i) {
- struct page *page, *newpage;
-
/* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
- page = find_get_page(mc, pcl->index + i);
- if (!page) {
- /* I/O is needed, no possible to decompress directly */
- standalone = false;
+ folio = filemap_get_folio(mc, pcl->index + i);
+ if (IS_ERR(folio)) {
+ may_bypass = false;
if (!shouldalloc)
continue;
/*
- * Try cached I/O if allocation succeeds or fallback to
- * in-place I/O instead to avoid any direct reclaim.
+ * Allocate a managed folio for cached I/O, or it may be
+ * then filled with a file-backed folio for in-place I/O
*/
- newpage = erofs_allocpage(&fe->pagepool, gfp);
- if (!newpage)
+ newfolio = filemap_alloc_folio(gfp, 0);
+ if (!newfolio)
continue;
- set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
+ newfolio->private = Z_EROFS_PREALLOCATED_FOLIO;
+ folio = NULL;
}
spin_lock(&pcl->lockref.lock);
if (!pcl->compressed_bvecs[i].page) {
- pcl->compressed_bvecs[i].page = page ? page : newpage;
+ pcl->compressed_bvecs[i].page =
+ folio_page(folio ?: newfolio, 0);
spin_unlock(&pcl->lockref.lock);
continue;
}
spin_unlock(&pcl->lockref.lock);
-
- if (page)
- put_page(page);
- else if (newpage)
- erofs_pagepool_add(&fe->pagepool, newpage);
+ folio_put(folio ?: newfolio);
}
/*
- * don't do inplace I/O if all compressed pages are available in
- * managed cache since it can be moved to the bypass queue instead.
+ * Don't perform in-place I/O if all compressed pages are available in
+ * the managed cache, as the pcluster can be moved to the bypass queue.
*/
- if (standalone)
+ if (may_bypass)
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
}
@@ -681,7 +660,7 @@ int erofs_init_managed_cache(struct super_block *sb)
}
/* callers must be with pcluster lock held */
-static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
+static int z_erofs_attach_page(struct z_erofs_frontend *fe,
struct z_erofs_bvec *bvec, bool exclusive)
{
struct z_erofs_pcluster *pcl = fe->pcl;
@@ -727,7 +706,7 @@ static bool z_erofs_get_pcluster(struct z_erofs_pcluster *pcl)
return true;
}
-static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
@@ -751,9 +730,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
pcl->algorithmformat = map->m_algorithmformat;
pcl->length = 0;
pcl->partial = true;
-
- /* new pclusters should be claimed as type 1, primary and followed */
- pcl->next = fe->owned_head;
+ pcl->next = fe->head;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
@@ -789,8 +766,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
goto err_out;
}
}
- fe->owned_head = &pcl->next;
- fe->pcl = pcl;
+ fe->head = fe->pcl = pcl;
return 0;
err_out:
@@ -799,7 +775,7 @@ err_out:
return err;
}
-static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
@@ -809,7 +785,7 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(fe->pcl);
/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */
- DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
+ DBG_BUGON(!fe->head);
if (!(map->m_flags & EROFS_MAP_META)) {
while (1) {
@@ -837,10 +813,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
if (ret == -EEXIST) {
mutex_lock(&fe->pcl->lock);
/* check if this pcluster hasn't been linked into any chain. */
- if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL,
- fe->owned_head) == Z_EROFS_PCLUSTER_NIL) {
+ if (!cmpxchg(&fe->pcl->next, NULL, fe->head)) {
/* .. so it can be attached to our submission chain */
- fe->owned_head = &fe->pcl->next;
+ fe->head = fe->pcl;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
} else { /* otherwise, it belongs to an inflight chain */
fe->mode = Z_EROFS_PCLUSTER_INFLIGHT;
@@ -873,14 +848,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
return 0;
}
-/*
- * keep in mind that no referenced pclusters will be freed
- * only after a RCU grace period.
- */
static void z_erofs_rcu_callback(struct rcu_head *head)
{
- z_erofs_free_pcluster(container_of(head,
- struct z_erofs_pcluster, rcu));
+ z_erofs_free_pcluster(container_of(head, struct z_erofs_pcluster, rcu));
}
static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
@@ -922,12 +892,10 @@ static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
return free;
}
-unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
- unsigned long nr_shrink)
+unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr)
{
struct z_erofs_pcluster *pcl;
- unsigned int freed = 0;
- unsigned long index;
+ unsigned long index, freed = 0;
xa_lock(&sbi->managed_pslots);
xa_for_each(&sbi->managed_pslots, index, pcl) {
@@ -937,7 +905,7 @@ unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
xa_unlock(&sbi->managed_pslots);
++freed;
- if (!--nr_shrink)
+ if (!--nr)
return freed;
xa_lock(&sbi->managed_pslots);
}
@@ -966,7 +934,7 @@ static void z_erofs_put_pcluster(struct erofs_sb_info *sbi,
call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
-static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_pcluster_end(struct z_erofs_frontend *fe)
{
struct z_erofs_pcluster *pcl = fe->pcl;
@@ -979,13 +947,9 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
if (fe->candidate_bvpage)
fe->candidate_bvpage = NULL;
- /*
- * if all pending pages are added, don't hold its reference
- * any longer if the pcluster isn't hosted by ourselves.
- */
+ /* Drop refcount if it doesn't belong to our processing chain */
if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false);
-
fe->pcl = NULL;
}
@@ -1014,7 +978,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio,
return 0;
}
-static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f,
+static int z_erofs_scan_folio(struct z_erofs_frontend *f,
struct folio *folio, bool ra)
{
struct inode *const inode = f->inode;
@@ -1129,7 +1093,7 @@ static bool z_erofs_page_is_invalidated(struct page *page)
return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page);
}
-struct z_erofs_decompress_backend {
+struct z_erofs_backend {
struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
struct super_block *sb;
struct z_erofs_pcluster *pcl;
@@ -1149,7 +1113,7 @@ struct z_erofs_bvec_item {
struct list_head list;
};
-static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
+static void z_erofs_do_decompressed_bvec(struct z_erofs_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
@@ -1172,8 +1136,7 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
list_add(&item->list, &be->decompressed_secondary_bvecs);
}
-static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
- int err)
+static void z_erofs_fill_other_copies(struct z_erofs_backend *be, int err)
{
unsigned int off0 = be->pcl->pageofs_out;
struct list_head *p, *n;
@@ -1214,7 +1177,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
}
}
-static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
+static void z_erofs_parse_out_bvecs(struct z_erofs_backend *be)
{
struct z_erofs_pcluster *pcl = be->pcl;
struct z_erofs_bvec_iter biter;
@@ -1239,8 +1202,7 @@ static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
}
-static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
- bool *overlapped)
+static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
{
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
@@ -1275,8 +1237,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
return err;
}
-static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
- int err)
+static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
{
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
@@ -1393,7 +1354,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
pcl->vcnt = 0;
/* pcluster lock MUST be taken before the following line */
- WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
+ WRITE_ONCE(pcl->next, NULL);
mutex_unlock(&pcl->lock);
if (z_erofs_is_inline_pcluster(pcl))
@@ -1406,21 +1367,19 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
struct page **pagepool)
{
- struct z_erofs_decompress_backend be = {
+ struct z_erofs_backend be = {
.sb = io->sb,
.pagepool = pagepool,
.decompressed_secondary_bvecs =
LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
+ .pcl = io->head,
};
- z_erofs_next_pcluster_t owned = io->head;
+ struct z_erofs_pcluster *next;
int err = io->eio ? -EIO : 0;
- while (owned != Z_EROFS_PCLUSTER_TAIL) {
- DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
-
- be.pcl = container_of(owned, struct z_erofs_pcluster, next);
- owned = READ_ONCE(be.pcl->next);
-
+ for (; be.pcl != Z_EROFS_PCLUSTER_TAIL; be.pcl = next) {
+ DBG_BUGON(!be.pcl);
+ next = READ_ONCE(be.pcl->next);
err = z_erofs_decompress_pcluster(&be, err) ?: err;
}
return err;
@@ -1486,7 +1445,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
}
static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
- struct z_erofs_decompress_frontend *f,
+ struct z_erofs_frontend *f,
struct z_erofs_pcluster *pcl,
unsigned int nr,
struct address_space *mc)
@@ -1513,12 +1472,8 @@ repeat:
DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page));
folio = page_folio(zbv.page);
- /*
- * Handle preallocated cached folios. We tried to allocate such folios
- * without triggering direct reclaim. If allocation failed, inplace
- * file-backed folios will be used instead.
- */
- if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) {
+ /* For preallocated managed folios, add them to page cache here */
+ if (folio->private == Z_EROFS_PREALLOCATED_FOLIO) {
tocache = true;
goto out_tocache;
}
@@ -1630,18 +1585,13 @@ enum {
NR_JOBQUEUES,
};
-static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
- z_erofs_next_pcluster_t qtail[],
- z_erofs_next_pcluster_t owned_head)
+static void z_erofs_move_to_bypass_queue(struct z_erofs_pcluster *pcl,
+ struct z_erofs_pcluster *next,
+ struct z_erofs_pcluster **qtail[])
{
- z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
- z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
-
WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL);
-
- WRITE_ONCE(*submit_qtail, owned_head);
- WRITE_ONCE(*bypass_qtail, &pcl->next);
-
+ WRITE_ONCE(*qtail[JQ_SUBMIT], next);
+ WRITE_ONCE(*qtail[JQ_BYPASS], pcl);
qtail[JQ_BYPASS] = &pcl->next;
}
@@ -1670,15 +1620,15 @@ static void z_erofs_endio(struct bio *bio)
bio_put(bio);
}
-static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
+static void z_erofs_submit_queue(struct z_erofs_frontend *f,
struct z_erofs_decompressqueue *fgq,
bool *force_fg, bool readahead)
{
struct super_block *sb = f->inode->i_sb;
struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
- z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
+ struct z_erofs_pcluster **qtail[NR_JOBQUEUES];
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
- z_erofs_next_pcluster_t owned_head = f->owned_head;
+ struct z_erofs_pcluster *pcl, *next;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
erofs_off_t last_pa;
unsigned int nr_bios = 0;
@@ -1694,22 +1644,19 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
/* by default, all need io submission */
- q[JQ_SUBMIT]->head = owned_head;
+ q[JQ_SUBMIT]->head = next = f->head;
do {
struct erofs_map_dev mdev;
- struct z_erofs_pcluster *pcl;
erofs_off_t cur, end;
struct bio_vec bvec;
unsigned int i = 0;
bool bypass = true;
- DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
- pcl = container_of(owned_head, struct z_erofs_pcluster, next);
- owned_head = READ_ONCE(pcl->next);
-
+ pcl = next;
+ next = READ_ONCE(pcl->next);
if (z_erofs_is_inline_pcluster(pcl)) {
- move_to_bypass_jobqueue(pcl, qtail, owned_head);
+ z_erofs_move_to_bypass_queue(pcl, next, qtail);
continue;
}
@@ -1781,8 +1728,8 @@ drain_io:
if (!bypass)
qtail[JQ_SUBMIT] = &pcl->next;
else
- move_to_bypass_jobqueue(pcl, qtail, owned_head);
- } while (owned_head != Z_EROFS_PCLUSTER_TAIL);
+ z_erofs_move_to_bypass_queue(pcl, next, qtail);
+ } while (next != Z_EROFS_PCLUSTER_TAIL);
if (bio) {
if (erofs_is_fileio_mode(EROFS_SB(sb)))
@@ -1806,17 +1753,16 @@ drain_io:
z_erofs_decompress_kickoff(q[JQ_SUBMIT], nr_bios);
}
-static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
- unsigned int ra_folios)
+static int z_erofs_runqueue(struct z_erofs_frontend *f, unsigned int rapages)
{
struct z_erofs_decompressqueue io[NR_JOBQUEUES];
struct erofs_sb_info *sbi = EROFS_I_SB(f->inode);
- bool force_fg = z_erofs_is_sync_decompress(sbi, ra_folios);
+ bool force_fg = z_erofs_is_sync_decompress(sbi, rapages);
int err;
- if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
+ if (f->head == Z_EROFS_PCLUSTER_TAIL)
return 0;
- z_erofs_submit_queue(f, io, &force_fg, !!ra_folios);
+ z_erofs_submit_queue(f, io, &force_fg, !!rapages);
/* handle bypass queue (no i/o pclusters) immediately */
err = z_erofs_decompress_queue(&io[JQ_BYPASS], &f->pagepool);
@@ -1834,7 +1780,7 @@ static int z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
* Since partial uptodate is still unimplemented for now, we have to use
* approximate readmore strategies as a start.
*/
-static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
+static void z_erofs_pcluster_readmore(struct z_erofs_frontend *f,
struct readahead_control *rac, bool backmost)
{
struct inode *inode = f->inode;
@@ -1889,12 +1835,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
static int z_erofs_read_folio(struct file *file, struct folio *folio)
{
struct inode *const inode = folio->mapping->host;
- struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+ Z_EROFS_DEFINE_FRONTEND(f, inode, folio_pos(folio));
int err;
trace_erofs_read_folio(folio, false);
- f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
-
z_erofs_pcluster_readmore(&f, NULL, true);
err = z_erofs_scan_folio(&f, folio, false);
z_erofs_pcluster_readmore(&f, NULL, false);
@@ -1914,17 +1858,14 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
static void z_erofs_readahead(struct readahead_control *rac)
{
struct inode *const inode = rac->mapping->host;
- struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+ Z_EROFS_DEFINE_FRONTEND(f, inode, readahead_pos(rac));
struct folio *head = NULL, *folio;
- unsigned int nr_folios;
+ unsigned int nrpages = readahead_count(rac);
int err;
- f.headoffset = readahead_pos(rac);
-
z_erofs_pcluster_readmore(&f, rac, true);
- nr_folios = readahead_count(rac);
- trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
-
+ nrpages = readahead_count(rac);
+ trace_erofs_readpages(inode, readahead_index(rac), nrpages, false);
while ((folio = readahead_folio(rac))) {
folio->private = head;
head = folio;
@@ -1943,7 +1884,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
z_erofs_pcluster_readmore(&f, rac, false);
z_erofs_pcluster_end(&f);
- (void)z_erofs_runqueue(&f, nr_folios);
+ (void)z_erofs_runqueue(&f, nrpages);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
}
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 4535f2f0a014..689437e99a5a 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -97,17 +97,48 @@ static int get_compacted_la_distance(unsigned int lobits,
return d1;
}
-static int unpack_compacted_index(struct z_erofs_maprecorder *m,
- unsigned int amortizedshift,
- erofs_off_t pos, bool lookahead)
+static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
+ unsigned long lcn, bool lookahead)
{
- struct erofs_inode *const vi = EROFS_I(m->inode);
+ struct inode *const inode = m->inode;
+ struct erofs_inode *const vi = EROFS_I(inode);
+ const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
+ ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const unsigned int totalidx = erofs_iblks(inode);
+ unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
- bool big_pcluster;
+ bool big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ erofs_off_t pos;
u8 *in, type;
int i;
+ if (lcn >= totalidx || lclusterbits > 14)
+ return -EINVAL;
+
+ m->lcn = lcn;
+ /* used to align to 32-byte (compacted_2b) alignment */
+ compacted_4b_initial = ((32 - ebase % 32) / 4) & 7;
+ compacted_2b = 0;
+ if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
+ compacted_4b_initial < totalidx)
+ compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
+
+ pos = ebase;
+ amortizedshift = 2; /* compact_4b */
+ if (lcn >= compacted_4b_initial) {
+ pos += compacted_4b_initial * 4;
+ lcn -= compacted_4b_initial;
+ if (lcn < compacted_2b) {
+ amortizedshift = 1;
+ } else {
+ pos += compacted_2b * 2;
+ lcn -= compacted_2b;
+ }
+ }
+ pos += lcn * (1 << amortizedshift);
+
+ /* figure out the lcluster count in this pack */
if (1 << amortizedshift == 4 && lclusterbits <= 14)
vcnt = 2;
else if (1 << amortizedshift == 2 && lclusterbits <= 12)
@@ -122,7 +153,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
/* it doesn't equal to round_up(..) */
m->nextpackoff = round_down(pos, vcnt << amortizedshift) +
(vcnt << amortizedshift);
- big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U);
encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
bytes = pos & ((vcnt << amortizedshift) - 1);
@@ -207,53 +237,6 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
return 0;
}
-static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
- unsigned long lcn, bool lookahead)
-{
- struct inode *const inode = m->inode;
- struct erofs_inode *const vi = EROFS_I(inode);
- const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
- ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
- unsigned int totalidx = erofs_iblks(inode);
- unsigned int compacted_4b_initial, compacted_2b;
- unsigned int amortizedshift;
- erofs_off_t pos;
-
- if (lcn >= totalidx || vi->z_logical_clusterbits > 14)
- return -EINVAL;
-
- m->lcn = lcn;
- /* used to align to 32-byte (compacted_2b) alignment */
- compacted_4b_initial = (32 - ebase % 32) / 4;
- if (compacted_4b_initial == 32 / 4)
- compacted_4b_initial = 0;
-
- if ((vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B) &&
- compacted_4b_initial < totalidx)
- compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
- else
- compacted_2b = 0;
-
- pos = ebase;
- if (lcn < compacted_4b_initial) {
- amortizedshift = 2;
- goto out;
- }
- pos += compacted_4b_initial * 4;
- lcn -= compacted_4b_initial;
-
- if (lcn < compacted_2b) {
- amortizedshift = 1;
- goto out;
- }
- pos += compacted_2b * 2;
- lcn -= compacted_2b;
- amortizedshift = 2;
-out:
- pos += lcn * (1 << amortizedshift);
- return unpack_compacted_index(m, amortizedshift, pos, lookahead);
-}
-
static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn, bool lookahead)
{
@@ -311,27 +294,23 @@ err_bogus:
static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
unsigned int initial_lcn)
{
- struct super_block *sb = m->inode->i_sb;
- struct erofs_inode *const vi = EROFS_I(m->inode);
- struct erofs_map_blocks *const map = m->map;
- const unsigned int lclusterbits = vi->z_logical_clusterbits;
- unsigned long lcn;
+ struct inode *inode = m->inode;
+ struct super_block *sb = inode->i_sb;
+ struct erofs_inode *vi = EROFS_I(inode);
+ bool bigpcl1 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1;
+ bool bigpcl2 = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2;
+ unsigned long lcn = m->lcn + 1;
int err;
- DBG_BUGON(m->type != Z_EROFS_LCLUSTER_TYPE_PLAIN &&
- m->type != Z_EROFS_LCLUSTER_TYPE_HEAD1 &&
- m->type != Z_EROFS_LCLUSTER_TYPE_HEAD2);
+ DBG_BUGON(m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
DBG_BUGON(m->type != m->headtype);
- if (m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
- ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1) &&
- !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) ||
- ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) &&
- !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) {
- map->m_plen = 1ULL << lclusterbits;
- return 0;
- }
- lcn = m->lcn + 1;
+ if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
+ ((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
+ m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
+ (lcn << vi->z_logical_clusterbits) >= inode->i_size)
+ m->compressedblks = 1;
+
if (m->compressedblks)
goto out;
@@ -356,9 +335,9 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
case Z_EROFS_LCLUSTER_TYPE_HEAD2:
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
- * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+ * rather than CBLKCNT, it's a 1 block-sized pcluster.
*/
- m->compressedblks = 1 << (lclusterbits - sb->s_blocksize_bits);
+ m->compressedblks = 1;
break;
case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
if (m->delta[0] != 1)
@@ -373,7 +352,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
return -EFSCORRUPTED;
}
out:
- map->m_plen = erofs_pos(sb, m->compressedblks);
+ m->map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
err_bonus_cblkcnt:
erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
index 0dd65cefce33..83fbcab70a92 100644
--- a/fs/erofs/zutil.c
+++ b/fs/erofs/zutil.c
@@ -243,7 +243,7 @@ void erofs_shrinker_unregister(struct super_block *sb)
static unsigned long erofs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
- return atomic_long_read(&erofs_global_shrink_cnt);
+ return atomic_long_read(&erofs_global_shrink_cnt) ?: SHRINK_EMPTY;
}
static unsigned long erofs_shrink_scan(struct shrinker *shrink,