From ef851d44a83ed625ec22eae6bd36a1348c8af571 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:05 +0900 Subject: nilfs2: add support for FS_IOC_GETUUID Patch series "nilfs2: add support for some common ioctls". This series adds support for common ioctls to nilfs2 for getting the volume UUID and the relative path of an FS instance within the sysfs namespace, and also implements ioctls for nilfs2 to get and set the volume label. This patch (of 2): Expose the UUID of a file system instance using the super_set_uuid helper and support the FS_IOC_GETUUID ioctl. Link: https://lkml.kernel.org/r/20240815074408.5550-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240815074408.5550-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index e835e1f5a712..167050b3ce7e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1063,6 +1063,9 @@ nilfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { -- cgit v1.2.3 From 8d1dba2e7cc74381087ae8ef03673abee758fcd0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:06 +0900 Subject: nilfs2: add support for FS_IOC_GETFSSYSFSPATH Use the standard helper super_set_sysfs_name_bdev() to give the sysfs subpath of the filesystem for the FS_IOC_GETFSSYSFSPATH ioctl. For nilfs2, it will output "nilfs2/". Link: https://lkml.kernel.org/r/20240815074408.5550-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 167050b3ce7e..76e35e6773d1 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1065,6 +1065,7 @@ nilfs_fill_super(struct super_block *sb, struct fs_context *fc) super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); -- cgit v1.2.3 From 4b901256a7bf6db3ca84ee2b2e87a1af4d40b8a3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:07 +0900 Subject: nilfs2: add support for FS_IOC_GETFSLABEL Implement support for FS_IOC_GETFSLABEL ioctl to read filesystem label. Link: https://lkml.kernel.org/r/20240815074408.5550-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 27 +++++++++++++++++++++++++++ fs/nilfs2/nilfs.h | 12 ++++++++++++ 2 files changed, 39 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 8be471ce4f19..b5c6a50d6d5d 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -17,6 +17,7 @@ #include /* mnt_want_write_file(), mnt_drop_write_file() */ #include #include +#include #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -1266,6 +1267,29 @@ out: return ret; } +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1308,6 +1332,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_set_alloc_range(inode, argp); case FITRIM: return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); default: return -ENOTTY; } @@ -1334,6 +1360,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: + case FS_IOC_GETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 4017f7856440..3097490b6621 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -103,6 +103,18 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ -- cgit v1.2.3 From 79785f7801275bc070035e3982f8ff4b336a1ceb Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 15 Aug 2024 16:44:08 +0900 Subject: nilfs2: add support for FS_IOC_SETFSLABEL Implement support for FS_IOC_SETFSLABEL ioctl to write filesystem label. Link: https://lkml.kernel.org/r/20240815074408.5550-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index b5c6a50d6d5d..297989e51ee6 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1290,6 +1290,68 @@ static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) return 0; } +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1334,6 +1396,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_trim_fs(inode, argp); case FS_IOC_GETFSLABEL: return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -1361,6 +1425,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; -- cgit v1.2.3 From 299910dcb4525ac0274f3efa9527876315ba4f67 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 18:01:28 +0900 Subject: nilfs2: do not output warnings when clearing dirty buffers After detecting file system corruption and degrading to a read-only mount, dirty folios and buffers in the page cache are cleared, and a large number of warnings are output at that time, often filling up the kernel log. In this case, since the degrading to a read-only mount is output to the kernel log, these warnings are not very meaningful, and are rather a nuisance in system management and debugging. The related nilfs2-specific page/folio routines have a silent argument that suppresses the warning output, but since it is not currently used meaningfully, remove both the silent argument and the warning output. Link: https://lkml.kernel.org/r/20240816090128.4561-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 4 ++-- fs/nilfs2/mdt.c | 6 +++--- fs/nilfs2/page.c | 19 +++---------------- fs/nilfs2/page.h | 4 ++-- 4 files changed, 10 insertions(+), 23 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7340a01d80e1..c39bc940e6f2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -162,7 +162,7 @@ static int nilfs_writepages(struct address_space *mapping, int err = 0; if (sb_rdonly(inode->i_sb)) { - nilfs_clear_dirty_pages(mapping, false); + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -186,7 +186,7 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) * have dirty pages that try to be flushed in background. * So, here we simply discard this dirty page. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 4f792a0ad0f0..ceb7dc0b5bad 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -411,7 +411,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) * have dirty folios that try to be flushed in background. * So, here we simply discard this dirty folio. */ - nilfs_clear_folio_dirty(folio, false); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); return -EROFS; } @@ -638,10 +638,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_clear_dirty_pages(inode->i_mapping); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 14e470fb8870..7797903e014e 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -357,9 +357,8 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { struct folio_batch fbatch; unsigned int i; @@ -380,7 +379,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) * was acquired. Skip processing in that case. */ if (likely(folio->mapping == mapping)) - nilfs_clear_folio_dirty(folio, silent); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); } @@ -392,20 +391,13 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) /** * nilfs_clear_folio_dirty - discard dirty folio * @folio: dirty folio that will be discarded - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_folio_dirty(struct folio *folio, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = folio->mapping->host; - struct super_block *sb = inode->i_sb; struct buffer_head *bh, *head; BUG_ON(!folio_test_locked(folio)); - if (!silent) - nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", - folio_pos(folio), inode->i_ino); - folio_clear_uptodate(folio); folio_clear_mappedtodisk(folio); @@ -419,11 +411,6 @@ void nilfs_clear_folio_dirty(struct folio *folio, bool silent) bh = head; do { lock_buffer(bh); - if (!silent) - nilfs_warn(sb, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); - set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 7e1a2c455a10..64521a03a19e 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -41,8 +41,8 @@ void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_folio_dirty(struct folio *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int, unsigned int); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, -- cgit v1.2.3 From b79bdfdd31a4ffe441ad347f4d869fde997bb69c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:12 +0900 Subject: nilfs2: add missing argument description for __nilfs_error() Patch series "This series fixes a number of formatting issues in kernel doc comments" This series fixes a number of formatting issues in kernel doc comments that were detected as warnings by the kernel-doc script, making violations more noticeable when adding or modifying kernel doc. There are still warnings output by "kernel-doc -Wall", but they are widespread, so I plan to fix them at another time while considering priorities. This patch (of 8): Add missing argument description to __nilfs_error function and remove the following warnings from kernel-doc script output: fs/nilfs2/super.c:121: warning: Function parameter or struct member 'sb' not described in '__nilfs_error' fs/nilfs2/super.c:121: warning: Function parameter or struct member 'function' not described in '__nilfs_error' fs/nilfs2/super.c:121: warning: Function parameter or struct member 'fmt' not described in '__nilfs_error' Link: https://lkml.kernel.org/r/20240816074319.3253-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240816074319.3253-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/super.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 76e35e6773d1..8eb8dbc9f51c 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -105,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb) /** * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when -- cgit v1.2.3 From 3e62c5d7d0a4e8fa826d6e2f8e19c805045edb82 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:13 +0900 Subject: nilfs2: add missing argument descriptions for ioctl-related helpers Add missing argument descriptions and return value information to the kernel-doc comments for ioctl helper functions, and eliminate the following warnings output by the kernel-doc script: fs/nilfs2/ioctl.c:120: warning: Function parameter or struct member 'dentry' not described in 'nilfs_fileattr_get' fs/nilfs2/ioctl.c:120: warning: Function parameter or struct member 'fa' not described in 'nilfs_fileattr_get' fs/nilfs2/ioctl.c:133: warning: Function parameter or struct member 'idmap' not described in 'nilfs_fileattr_set' fs/nilfs2/ioctl.c:133: warning: Function parameter or struct member 'dentry' not described in 'nilfs_fileattr_set' fs/nilfs2/ioctl.c:133: warning: Function parameter or struct member 'fa' not described in 'nilfs_fileattr_set' fs/nilfs2/ioctl.c:164: warning: Function parameter or struct member 'inode' not described in 'nilfs_ioctl_getversion' fs/nilfs2/ioctl.c:164: warning: Function parameter or struct member 'argp' not described in 'nilfs_ioctl_getversion' Link: https://lkml.kernel.org/r/20240816074319.3253-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/ioctl.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 297989e51ee6..fa77f78df681 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -115,7 +115,11 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } /** - * nilfs_fileattr_get - ioctl to support lsattr + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. */ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { @@ -127,7 +131,12 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } /** - * nilfs_fileattr_set - ioctl to support chattr + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) @@ -160,6 +169,10 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap, /** * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { -- cgit v1.2.3 From 60d8b01e55b2c45e73d442b379844d6efd9d16a7 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:14 +0900 Subject: nilfs2: improve kernel-doc comments for b-tree node helpers Revise kernel-doc comments for helper functions related to changing the search key for b-tree node blocks, and eliminate the following warnings output by the kernel-doc script: fs/nilfs2/btnode.c:175: warning: Function parameter or struct member 'btnc' not described in 'nilfs_btnode_prepare_change_key' fs/nilfs2/btnode.c:175: warning: Function parameter or struct member 'ctxt' not described in 'nilfs_btnode_prepare_change_key' fs/nilfs2/btnode.c:238: warning: Function parameter or struct member 'btnc' not described in 'nilfs_btnode_commit_change_key' fs/nilfs2/btnode.c:238: warning: Function parameter or struct member 'ctxt' not described in 'nilfs_btnode_commit_change_key' fs/nilfs2/btnode.c:278: warning: Function parameter or struct member 'btnc' not described in 'nilfs_btnode_abort_change_key' fs/nilfs2/btnode.c:278: warning: Function parameter or struct member 'ctxt' not described in 'nilfs_btnode_abort_change_key' Link: https://lkml.kernel.org/r/20240816074319.3253-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/btnode.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 9 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index c034080c334b..57b4af5ad646 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -179,11 +179,32 @@ void nilfs_btnode_delete(struct buffer_head *bh) } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -245,8 +266,21 @@ retry: } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -285,8 +319,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) -- cgit v1.2.3 From 89a6c1775089eae99940d4a86f2ba34cbe848726 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:15 +0900 Subject: nilfs2: fix incorrect kernel-doc declaration of nilfs_palloc_req structure The "struct" keyword is missing from the kernel-doc comment of the nilfs_palloc_req structure, so add it to eliminate the following warning output by the kernel-doc script: fs/nilfs2/alloc.h:46: warning: cannot understand function prototype: 'struct nilfs_palloc_req ' Link: https://lkml.kernel.org/r/20240816074319.3253-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/alloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index d825a9faca6d..e19d7eb10084 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -37,7 +37,7 @@ void *nilfs_palloc_block_get_entry(const struct inode *, __u64, int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap -- cgit v1.2.3 From 0e13ddee285ffa0815fa66e1eac4bf0fafd06ce4 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:16 +0900 Subject: nilfs2: add missing description of nilfs_btree_path structure Add missing kernel-doc comment for the 'bp_ctxt' member variable of the nilfs_btree_path structure, and eliminate the following warning output by the kenrel-doc script: fs/nilfs2/btree.h:39: warning: Function parameter or struct member 'bp_ctxt' not described in 'nilfs_btree_path' Link: https://lkml.kernel.org/r/20240816074319.3253-6-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/btree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 92868e1a48ca..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -24,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { -- cgit v1.2.3 From d9e5551ea101203151077c42af0bebeb6825f636 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:17 +0900 Subject: nilfs2: describe the members of nilfs_bmap_operations structure Add missing member variable descriptions in the kernel-doc comments for the nilfs_bmap_operations structure, hiding the internal operations with the "private:" tag. This eliminates the following warnings output by the kernel-doc script: fs/nilfs2/bmap.h:74: warning: Function parameter or struct member 'bop_lookup' not described in 'nilfs_bmap_operations' fs/nilfs2/bmap.h:74: warning: Function parameter or struct member 'bop_lookup_contig' not described in 'nilfs_bmap_operations' ... fs/nilfs2/bmap.h:74: warning: Function parameter or struct member 'bop_gather_data' not described in 'nilfs_bmap_operations' Link: https://lkml.kernel.org/r/20240816074319.3253-7-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/bmap.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 608168a5cb88..5f8c8c552620 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -44,6 +44,19 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); @@ -66,7 +79,7 @@ struct nilfs_bmap_operations { int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); - /* The following functions are internal use only. */ + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); -- cgit v1.2.3 From 7876bc1bd6e89723edd1cb68f7d7bd83568ce82b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:18 +0900 Subject: nilfs2: fix inconsistencies in kernel-doc comments in segment.h Fix incorrect or missing variable names in the member variable descriptions in the nilfs_recovery_info and nilfs_sc_info structures, thereby eliminating the following warnings output by the kernel-doc script: fs/nilfs2/segment.h:49: warning: Function parameter or struct member 'ri_cno' not described in 'nilfs_recovery_info' fs/nilfs2/segment.h:49: warning: Function parameter or struct member 'ri_lsegs_start_seq' not described in 'nilfs_recovery_info' fs/nilfs2/segment.h:49: warning: Excess struct member 'ri_ri_cno' description in 'nilfs_recovery_info' fs/nilfs2/segment.h:49: warning: Excess struct member 'ri_lseg_start_seq' description in 'nilfs_recovery_info' fs/nilfs2/segment.h:177: warning: Function parameter or struct member 'sc_seq_accepted' not described in 'nilfs_sc_info' fs/nilfs2/segment.h:177: warning: Function parameter or struct member 'sc_timer_task' not described in 'nilfs_sc_info' fs/nilfs2/segment.h:177: warning: Excess struct member 'sc_seq_accept' description in 'nilfs_sc_info' Link: https://lkml.kernel.org/r/20240816074319.3253-8-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060f72ebf5a..2499721ebcc9 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -22,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -107,7 +107,7 @@ struct nilfs_segsum_pointer { * @sc_wait_daemon: Daemon wait queue * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -115,6 +115,7 @@ struct nilfs_segsum_pointer { * @sc_lseg_stime: Start time of the latest logical segment * @sc_watermark: Watermark for the number of dirty buffers * @sc_timer: Timer for segctord + * @sc_timer_task: Thread woken up by @sc_timer * @sc_task: current thread of segctord */ struct nilfs_sc_info { -- cgit v1.2.3 From caaab56609ce48076af7361163b6a8f7f14d53b3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 16 Aug 2024 16:43:19 +0900 Subject: nilfs2: fix missing initial short descriptions of kernel-doc comments Update some kernel-doc comments that are missing the initial short description and fix the following warnings output by the kernel-doc script: fs/nilfs2/bmap.c:353: warning: missing initial short description on line: * nilfs_bmap_lookup_dirty_buffers - fs/nilfs2/cpfile.c:708: warning: missing initial short description on line: * nilfs_cpfile_delete_checkpoint - fs/nilfs2/cpfile.c:972: warning: missing initial short description on line: * nilfs_cpfile_is_snapshot - fs/nilfs2/dat.c:275: warning: missing initial short description on line: * nilfs_dat_mark_dirty - fs/nilfs2/sufile.c:844: warning: missing initial short description on line: * nilfs_sufile_get_suinfo - Link: https://lkml.kernel.org/r/20240816074319.3253-9-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/bmap.c | 2 +- fs/nilfs2/cpfile.c | 32 ++++++++++++++++---------------- fs/nilfs2/dat.c | 17 +++++++---------- fs/nilfs2/sufile.c | 20 ++++++++------------ 4 files changed, 32 insertions(+), 39 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index cd14ea25968c..c9e8d9a7d820 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -349,7 +349,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 69a5cced1e84..9c8d531cffa7 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -704,9 +704,15 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -968,21 +974,15 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * @cno: checkpoint number * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fc1caf63a42a..0bef662176a4 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -271,18 +271,15 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 6748218be7c5..7bfc0860acee 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -840,21 +840,17 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * Description: - * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) -- cgit v1.2.3 From 62e6e7841701619c2390e4e6cc4089f38c2a6798 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:23 +0900 Subject: nilfs2: treat missing sufile header block as metadata corruption Patch series "nilfs2: prevent unexpected ENOENT propagation". This series fixes potential issues where the result code -ENOENT, which is returned internally when a metadata file operation encouters a hole block, is exposed to user space without being properly handled. Several issues with the same cause leading to hangs or WARN_ON check failures have been reported by syzbot and fixed each time in the past. This collectively fixes the missing -ENOENT conversions that do not cause stability issues and are not covered by syzbot. This patch (of 5): The sufile, a metadata file that holds metadata for segment management, has statistical information in its first block, but if reading this block fails, it receives the internal code -ENOENT and returns it unchanged to the callers. To prevent this -ENOENT from being propagated to system calls, if reading the header block fails, return -EIO (or -EINVAL depending on the context) instead. Link: https://lkml.kernel.org/r/20240821154627.11848-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240821154627.11848-2-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/sufile.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 7bfc0860acee..f071eba48163 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -79,10 +79,17 @@ nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -1237,9 +1244,15 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); kaddr = kmap_local_page(header_bh->b_page); -- cgit v1.2.3 From d07d8ba4cee7b56aa8ff499776ce76323562660e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:24 +0900 Subject: nilfs2: treat missing cpfile header block as metadata corruption The cpfile, a metadata file that holds metadata for checkpoint management, also has statistical information in its first block, and if reading this block fails, it receives the internal code -ENOENT and returns that code to the callers. As with sufile, to prevent this -ENOENT from being propagated to system calls, return -EIO instead when reading the header block fails. Link: https://lkml.kernel.org/r/20240821154627.11848-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/cpfile.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 9c8d531cffa7..f0ce37552446 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -125,10 +125,17 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -283,14 +290,9 @@ int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) down_write(&NILFS_MDT(cpfile)->mi_sem); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (unlikely(ret < 0)) { - if (ret == -ENOENT) { - nilfs_error(cpfile->i_sb, - "checkpoint creation failed due to metadata corruption."); - ret = -EIO; - } + if (unlikely(ret < 0)) goto out_sem; - } + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); if (unlikely(ret < 0)) goto out_header; -- cgit v1.2.3 From 5b527d38644686dc11e29468463aa7affa282e31 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:25 +0900 Subject: nilfs2: do not propagate ENOENT error from sufile during recovery nilfs_sufile_free() returns the error code -ENOENT when the block where the segment usage should be placed does not exist (hole block case), but this error should not be propagated upwards to the mount system call. In nilfs_prepare_segment_for_recovery(), one of the recovery steps during mount, nilfs_sufile_free() is used and may return -ENOENT as is, so in that case return -EINVAL instead. Link: https://lkml.kernel.org/r/20240821154627.11848-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/recovery.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index b638dc06df2f..fe3a5a767700 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -433,8 +433,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); -- cgit v1.2.3 From 0b9aad46c1634527c6a9f951f72c31be67f9b25c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:26 +0900 Subject: nilfs2: do not propagate ENOENT error from sufile during GC nilfs_sufile_freev(), which is used to free segments in GC, aborts with -ENOENT if the target segment usage is on a hole block. This error only occurs if one of the segment numbers to be freed passed by the GC ioctl is invalid, so return -EINVAL instead. To avoid impairing readability, introduce a wrapper function that encapsulates error handling including the error code conversion (and error message output). Link: https://lkml.kernel.org/r/20240821154627.11848-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 8 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0ca3110d6386..2a771e222d86 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1102,12 +1102,64 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; switch (nilfs_sc_cstage_get(sci)) { @@ -1201,14 +1253,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) nilfs_sc_cstage_inc(sci); fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, -- cgit v1.2.3 From d18e4233d88b1ed95016a465ad5751629f9d70b9 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 22 Aug 2024 00:46:27 +0900 Subject: nilfs2: do not propagate ENOENT error from nilfs_sufile_mark_dirty() nilfs_sufile_mark_dirty(), which marks a block in the sufile metadata file as dirty in preparation for log writing, returns -ENOENT to the caller if the block containing the segment usage of the specified segment is missing. This internal code can propagate through the log writer to system calls such as fsync. To prevent this, treat this case as a filesystem error and return -EIO instead. Link: https://lkml.kernel.org/r/20240821154627.11848-6-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/sufile.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index f071eba48163..eea5a6a12f7b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -513,8 +513,15 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (ret) + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } goto out_sem; + } kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); -- cgit v1.2.3 From 9abca1a71c0e5f78e7ce91af8ad03e8d9893dc54 Mon Sep 17 00:00:00 2001 From: Huang Xiaojia Date: Tue, 27 Aug 2024 02:41:09 +0900 Subject: nilfs2: use common implementation of file type Patch series "nilfs2: assorted cleanups". This is a collection of cleanup patches, with only the last three focused on the log writer thread, the rest are miscellaneous. Patches 1/8, 4/8, and 7/8 adopt common implementations, 2/8 uses a generic macro, 5/8 removes dead code, 6/8 removes an unnecessary reference, and 3/8 and 8/8 each simplify a paticular messy implementation. This patch (of 8): Deduplicate the nilfs2 file type conversion implementation. Link: https://lkml.kernel.org/r/20240826174116.5008-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240815013442.1220909-1-huangxiaojia2@huawei.com Link: https://lkml.kernel.org/r/20240826174116.5008-2-konishi.ryusuke@gmail.com Signed-off-by: Huang Xiaojia Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/dir.c | 44 +++++--------------------------------------- 1 file changed, 5 insertions(+), 39 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 4a29b0138d75..ba6bc6efcf11 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -231,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; @@ -297,10 +266,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { @@ -444,7 +410,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, err = nilfs_prepare_chunk(folio, from, to); BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } @@ -531,7 +497,7 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); nilfs_commit_chunk(folio, folio->mapping, from, to); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); @@ -612,14 +578,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); kunmap_local(kaddr); nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: -- cgit v1.2.3 From 21176c0ae4ac9ab5ca7e1b7e6c234dee2a0022f0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:10 +0900 Subject: nilfs2: use the BITS_PER_LONG macro The macros NILFS_BMAP_KEY_BIT and NILFS_BMAP_NEW_PTR_INIT calculate, within their definitions, the number of bits in an unsigned long variable. Use the BITS_PER_LONG macro to make them simpler. Link: https://lkml.kernel.org/r/20240826174116.5008-3-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/bmap.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 5f8c8c552620..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -87,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { -- cgit v1.2.3 From d7cee0b342cd90abe5d09976a69d1a22ad0c3441 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:11 +0900 Subject: nilfs2: separate inode type information from i_state field In nilfs_iget_locked() and nilfs_ilookup(), which are used to find or obtain nilfs2 inodes, the nilfs_iget_args structure used to identify inodes has type information divided into multiple booleans, making type determination complicated. Simplify inode type determination by consolidating inode type information into an unsigned integer represented by a comibination of flags and by separating the type identification information for on-memory inodes from the i_state member in the nilfs_inode_info structure. Link: https://lkml.kernel.org/r/20240826174116.5008-4-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 56 ++++++++++++++--------------------------------------- fs/nilfs2/nilfs.h | 15 +++++++++++--- fs/nilfs2/segment.c | 2 +- fs/nilfs2/super.c | 1 + 4 files changed, 28 insertions(+), 46 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index c39bc940e6f2..35f966cb4ece 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -28,17 +28,13 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag - * @for_btnc: inode for B-tree node cache flag - * @for_shadow: inode for shadowed page cache flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - bool for_gc; - bool for_btnc; - bool for_shadow; + unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -315,8 +311,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -343,6 +338,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); @@ -546,23 +542,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (test_bit(NILFS_I_BTNC, &ii->i_state)) { - if (!args->for_btnc) - return 0; - } else if (args->for_btnc) { + if (ii->i_type != args->type) return 0; - } - if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { - if (!args->for_shadow) - return 0; - } else if (args->for_shadow) { - return 0; - } - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -572,15 +555,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); - - if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - if (args->for_btnc) - NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); - if (args->for_shadow) - NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -588,8 +565,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -599,8 +575,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -631,8 +606,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -677,9 +651,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; - args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; - args.for_btnc = true; - args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -733,8 +705,8 @@ void nilfs_detach_btree_node_cache(struct inode *inode) struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { - .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = true + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; @@ -900,7 +872,7 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 3097490b6621..fb1c4c5bae7c 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -22,6 +22,7 @@ /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping @@ -37,6 +38,7 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; @@ -90,9 +92,16 @@ enum { NILFS_I_UPDATED, /* The file has been written back */ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_BTNC, /* inode for btree node cache */ - NILFS_I_SHADOW, /* inode for shadowed page cache */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2a771e222d86..e4ec36d66607 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -519,7 +519,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 8eb8dbc9f51c..eca79cca3803 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -160,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; -- cgit v1.2.3 From 9860f434056e6358ac63e7068254aeaf339cf71e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:12 +0900 Subject: nilfs2: eliminate the shared counter and spinlock for i_generation Use get_random_u32() as the source for inode->i_generation for new inodes, and eliminate the original source, the shared counter ns_next_generation along with its exclusive access spinlock ns_next_gen_lock. Link: https://lkml.kernel.org/r/20240826174116.5008-5-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 6 ++---- fs/nilfs2/the_nilfs.c | 5 ----- fs/nilfs2/the_nilfs.h | 6 ------ 3 files changed, 2 insertions(+), 15 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 35f966cb4ece..3c4a0577bc71 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -320,7 +321,6 @@ static int nilfs_insert_inode_locked(struct inode *inode, struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -381,9 +381,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); + inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index e44dde57ab65..ac03fd3c330c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include "nilfs.h" @@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb) INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -754,9 +752,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 1e829ed7b0ef..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -71,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -161,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; -- cgit v1.2.3 From 33d23d849610df330ffb02420df705730a79f8e8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:13 +0900 Subject: nilfs2: do not repair reserved inode bitmap in nilfs_new_inode() After commit 93aef9eda1ce ("nilfs2: fix incorrect inode allocation from reserved inodes") is applied, the inode number returned by nilfs_ifile_create_inode() is guaranteed to always be greater than or equal to NILFS_USER_INO, so if the inode number is a reserved inode number (less than NILFS_USER_INO), the code to repair the bitmap immediately following it is no longer executed. So, delete it. Link: https://lkml.kernel.org/r/20240826174116.5008-6-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/inode.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 3c4a0577bc71..3742baec4920 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -345,19 +345,6 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - - if (unlikely(ino < NILFS_USER_INO)) { - nilfs_warn(sb, - "inode bitmap is inconsistent for reserved inodes"); - do { - brelse(bh); - err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - } while (ino < NILFS_USER_INO); - - nilfs_info(sb, "repaired inode bitmap for reserved inodes"); - } ii->i_bh = bh; atomic64_inc(&root->inodes_count); -- cgit v1.2.3 From cfdfe9e17c4142cb556f34f25f93f4f85ca494f0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:14 +0900 Subject: nilfs2: remove sc_timer_task After commit f5d4e04634c9 ("nilfs2: fix use-after-free of timer for log writer thread") is applied, nilfs_construct_timeout(), which is called by a timer and wakes up the log writer thread, is never called after the log writer thread has terminated. As a result, the member variable "sc_timer_task" of the "nilfs_sc_info" structure, which was added when timer_setup() was adopted to retain a reference to the log writer thread's task even after it had terminated, is no longer needed, as it should be; we can simply use "sc_task" instead, which holds a reference to the log writer thread's task for its lifetime. So, eliminate "sc_timer_task" by this means. Link: https://lkml.kernel.org/r/20240826174116.5008-7-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 3 +-- fs/nilfs2/segment.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e4ec36d66607..ed14e5d4a354 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2502,7 +2502,7 @@ static void nilfs_construction_timeout(struct timer_list *t) { struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); - wake_up_process(sci->sc_timer_task); + wake_up_process(sci->sc_task); } static void @@ -2640,7 +2640,6 @@ static int nilfs_segctor_thread(void *arg) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; - sci->sc_timer_task = current; timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); /* start sync. */ diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 2499721ebcc9..7d1160a266df 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -115,7 +115,6 @@ struct nilfs_segsum_pointer { * @sc_lseg_stime: Start time of the latest logical segment * @sc_watermark: Watermark for the number of dirty buffers * @sc_timer: Timer for segctord - * @sc_timer_task: Thread woken up by @sc_timer * @sc_task: current thread of segctord */ struct nilfs_sc_info { @@ -172,7 +171,6 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; - struct task_struct *sc_timer_task; struct task_struct *sc_task; }; -- cgit v1.2.3 From 3f66cc261ccb54a8e4d8d5aa51c389c19453b00c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:15 +0900 Subject: nilfs2: use kthread_create and kthread_stop for the log writer thread By using kthread_create() and kthread_stop() to start and stop the log writer thread, eliminate custom thread start and stop helpers, as well as the wait queue "sc_wait_task" on the "nilfs_sc_info" struct and NILFS_SEGCTOR_QUIT flag that exist only to implement them. Also, update the kernel doc comments of the changed functions as appropriate. Link: https://lkml.kernel.org/r/20240826174116.5008-8-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 82 ++++++++++++++++++++--------------------------------- fs/nilfs2/segment.h | 3 -- 2 files changed, 31 insertions(+), 54 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index ed14e5d4a354..4ff219f90f47 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2628,11 +2628,15 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { @@ -2640,11 +2644,6 @@ static int nilfs_segctor_thread(void *arg) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int timeout = 0; - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); @@ -2655,7 +2654,7 @@ static int nilfs_segctor_thread(void *arg) for (;;) { int mode; - if (sci->sc_state & NILFS_SEGCTOR_QUIT) + if (kthread_should_stop()) goto end_thread; if (timeout || sci->sc_seq_request != sci->sc_seq_done) @@ -2709,41 +2708,10 @@ static int nilfs_segctor_thread(void *arg) /* end sync. */ sci->sc_task = NULL; timer_shutdown_sync(&sci->sc_timer); - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; } -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - nilfs_err(sci->sc_super, "error %d creating segctord thread", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); - return 0; -} - -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2764,7 +2732,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); @@ -2819,8 +2786,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + kthread_stop(sci->sc_task); + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); @@ -2868,14 +2839,15 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { @@ -2888,15 +2860,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (unlikely(err)) + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); nilfs_detach_log_writer(sb); + return err; + } + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - return err; + wake_up_process(sci->sc_task); + return 0; } /** diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 7d1160a266df..f723f47ddc4e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -105,7 +105,6 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter @@ -158,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -191,7 +189,6 @@ enum { }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* -- cgit v1.2.3 From 74b0099340e0be96b37f5f8b0b5d02b48bb25a2b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Tue, 27 Aug 2024 02:41:16 +0900 Subject: nilfs2: refactor nilfs_segctor_thread() Simplify nilfs_segctor_thread(), the main loop function of the log writer thread, to make the basic structure easier to understand. In particular, the acquisition and release of the sc_state_lock spinlock was scattered throughout the function, so extract the determination of whether log writing is required into a helper function and make the spinlock lock sections clearer. Link: https://lkml.kernel.org/r/20240826174116.5008-9-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Cc: Huang Xiaojia Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 85 +++++++++++++++++++++++++---------------------------- 1 file changed, 40 insertions(+), 45 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 4ff219f90f47..7c99d71204f1 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2627,6 +2627,32 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) return SC_LSEG_SR; } +/** + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + /** * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. @@ -2642,70 +2668,39 @@ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); set_freezable(); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { - int mode; - - if (kthread_should_stop()) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (sci->sc_flush_request) - mode = nilfs_segctor_flush_mode(sci); - else - break; - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { + while (!kthread_should_stop()) { DEFINE_WAIT(wait); - int should_sleep = 1; + bool should_write; + int mode; + + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); + + if (should_write) + nilfs_segctor_thread_construct(sci, mode); } - goto loop; - end_thread: /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; timer_shutdown_sync(&sci->sc_timer); spin_unlock(&sci->sc_state_lock); -- cgit v1.2.3 From 9403001ad65ae4f4c5de368bdda3a0636b51d51a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:07 +0900 Subject: nilfs2: fix potential null-ptr-deref in nilfs_btree_insert() Patch series "nilfs2: fix potential issues with empty b-tree nodes". This series addresses three potential issues with empty b-tree nodes that can occur with corrupted filesystem images, including one recently discovered by syzbot. This patch (of 3): If a b-tree is broken on the device, and the b-tree height is greater than 2 (the level of the root node is greater than 1) even if the number of child nodes of the b-tree root is 0, a NULL pointer dereference occurs in nilfs_btree_prepare_insert(), which is called from nilfs_btree_insert(). This is because, when the number of child nodes of the b-tree root is 0, nilfs_btree_do_lookup() does not set the block buffer head in any of path[x].bp_bh, leaving it as the initial value of NULL, but if the level of the b-tree root node is greater than 1, nilfs_btree_get_nonroot_node(), which accesses the buffer memory of path[x].bp_bh, is called. Fix this issue by adding a check to nilfs_btree_root_broken(), which performs sanity checks when reading the root node from the device, to detect this inconsistency. Thanks to Lizhi Xu for trying to solve the bug and clarifying the cause early on. Link: https://lkml.kernel.org/r/20240904081401.16682-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240902084101.138971-1-lizhi.xu@windriver.com Link: https://lkml.kernel.org/r/20240904081401.16682-2-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+9bff4c7b992038a7409f@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=9bff4c7b992038a7409f Cc: Lizhi Xu Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 862bdf23120e..d390b8ba00d4 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -381,7 +381,8 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || - nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { + nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || + (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); -- cgit v1.2.3 From 111b812d3662f3a1b831d19208f83aa711583fe6 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:08 +0900 Subject: nilfs2: determine empty node blocks as corrupted Due to the nature of b-trees, nilfs2 itself and admin tools such as mkfs.nilfs2 will never create an intermediate b-tree node block with 0 child nodes, nor will they delete (key, pointer)-entries that would result in such a state. However, it is possible that a b-tree node block is corrupted on the backing device and is read with 0 child nodes. Because operation is not guaranteed if the number of child nodes is 0 for intermediate node blocks other than the root node, modify nilfs_btree_node_broken(), which performs sanity checks when reading a b-tree node block, so that such cases will be judged as metadata corruption. Link: https://lkml.kernel.org/r/20240904081401.16682-3-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Cc: Lizhi Xu Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index d390b8ba00d4..dedd3c480842 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -350,7 +350,7 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || - nchildren < 0 || + nchildren <= 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", -- cgit v1.2.3 From f9c96351aa6718b42a9f42eaf7adce0356bdb5e8 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 4 Sep 2024 17:13:09 +0900 Subject: nilfs2: fix potential oob read in nilfs_btree_check_delete() The function nilfs_btree_check_delete(), which checks whether degeneration to direct mapping occurs before deleting a b-tree entry, causes memory access outside the block buffer when retrieving the maximum key if the root node has no entries. This does not usually happen because b-tree mappings with 0 child nodes are never created by mkfs.nilfs2 or nilfs2 itself. However, it can happen if the b-tree root node read from a device is configured that way, so fix this potential issue by adding a check for that case. Link: https://lkml.kernel.org/r/20240904081401.16682-4-konishi.ryusuke@gmail.com Fixes: 17c76b0104e4 ("nilfs2: B-tree based block mapping") Signed-off-by: Ryusuke Konishi Cc: Lizhi Xu Signed-off-by: Andrew Morton --- fs/nilfs2/btree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index dedd3c480842..ef5061bb56da 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1659,13 +1659,16 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) int nchildren, ret; root = nilfs_btree_get_root(btree); + nchildren = nilfs_btree_node_get_nchildren(root); + if (unlikely(nchildren == 0)) + return 0; + switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: - nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, @@ -1674,12 +1677,12 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + nchildren = nilfs_btree_node_get_nchildren(node); break; default: return 0; } - nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; -- cgit v1.2.3 From fd127b155523bbfaa91a5872f4d93a80f70b8238 Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 4 Sep 2024 19:16:03 +0900 Subject: nilfs2: remove duplicate 'unlikely()' usage Nested unlikely() calls, IS_ERR already uses unlikely() internally Link: https://lkml.kernel.org/r/20240904101618.17716-1-konishi.ryusuke@gmail.com Signed-off-by: Kunwu Chan Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton --- fs/nilfs2/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 7797903e014e..9c0b7cddeaae 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -262,7 +262,7 @@ repeat: NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); dfolio = filemap_grab_folio(dmap, folio->index); - if (unlikely(IS_ERR(dfolio))) { + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ folio_unlock(folio); err = PTR_ERR(dfolio); -- cgit v1.2.3