diff options
53 files changed, 525 insertions, 153 deletions
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index 5b2e47c246f3..6f59045be0f9 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -369,8 +369,6 @@ static int ll_readdir(struct file *filp, struct dir_context *ctx) } ctx->pos = pos; ll_finish_md_op_data(op_data); - filp->f_version = inode->i_version; - out: if (!rc) ll_stats_ops_tally(sbi, LPROC_LL_READDIR, 1); @@ -1678,7 +1676,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) else fd->lfd_pos = offset; file->f_pos = offset; - file->f_version = 0; } ret = offset; } diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 0f0e6925e97d..14a6c1b90c9f 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -10,6 +10,7 @@ */ #include <linux/math64.h> +#include <linux/iversion.h> #include "affs.h" /* @@ -60,7 +61,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh) affs_brelse(dir_bh); dir->i_mtime = dir->i_ctime = current_time(dir); - dir->i_version++; + inode_inc_iversion(dir); mark_inode_dirty(dir); return 0; @@ -114,7 +115,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) affs_brelse(bh); dir->i_mtime = dir->i_ctime = current_time(dir); - dir->i_version++; + inode_inc_iversion(dir); mark_inode_dirty(dir); return retval; diff --git a/fs/affs/dir.c b/fs/affs/dir.c index a105e77df2c1..d180b46453cf 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -14,6 +14,7 @@ * */ +#include <linux/iversion.h> #include "affs.h" static int affs_readdir(struct file *, struct dir_context *); @@ -80,7 +81,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) * we can jump directly to where we left off. */ ino = (u32)(long)file->private_data; - if (ino && file->f_version == inode->i_version) { + if (ino && inode_cmp_iversion(inode, file->f_version) == 0) { pr_debug("readdir() left off=%d\n", ino); goto inside; } @@ -130,7 +131,7 @@ inside: } while (ino); } done: - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); file->private_data = (void *)(long)ino; affs_brelse(fh_bh); diff --git a/fs/affs/super.c b/fs/affs/super.c index 1117e36134cc..e602619aed9d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -21,6 +21,7 @@ #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/seq_file.h> +#include <linux/iversion.h> #include "affs.h" static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); @@ -102,7 +103,7 @@ static struct inode *affs_alloc_inode(struct super_block *sb) if (!i) return NULL; - i->vfs_inode.i_version = 1; + inode_set_iversion(&i->vfs_inode, 1); i->i_lc = NULL; i->i_ext_bh = NULL; i->i_pa_cnt = 0; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index b90ef39ae914..88ec38c2d83c 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/circ_buf.h> +#include <linux/iversion.h> #include "internal.h" #include "afs_fs.h" @@ -124,7 +125,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp, vnode->vfs_inode.i_ctime.tv_sec = status->mtime_client; vnode->vfs_inode.i_mtime = vnode->vfs_inode.i_ctime; vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime; - vnode->vfs_inode.i_version = data_version; + inode_set_iversion_raw(&vnode->vfs_inode, data_version); } expected_version = status->data_version; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 1e81864ef0b2..c7f17c44c7ce 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -21,6 +21,7 @@ #include <linux/sched.h> #include <linux/mount.h> #include <linux/namei.h> +#include <linux/iversion.h> #include "internal.h" static const struct inode_operations afs_symlink_inode_operations = { @@ -89,7 +90,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key) inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; inode->i_generation = vnode->fid.unique; - inode->i_version = vnode->status.data_version; + inode_set_iversion_raw(inode, vnode->status.data_version); inode->i_mapping->a_ops = &afs_fs_aops; read_sequnlock_excl(&vnode->cb_lock); @@ -218,7 +219,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name, inode->i_ctime.tv_nsec = 0; inode->i_atime = inode->i_mtime = inode->i_ctime; inode->i_blocks = 0; - inode->i_version = 0; + inode_set_iversion_raw(inode, 0); inode->i_generation = 0; set_bit(AFS_VNODE_PSEUDODIR, &vnode->flags); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index a6226cd6063c..d4db406e2d39 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -18,6 +18,7 @@ */ #include <linux/slab.h> +#include <linux/iversion.h> #include "delayed-inode.h" #include "disk-io.h" #include "transaction.h" @@ -1713,7 +1714,8 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode)); btrfs_set_stack_inode_generation(inode_item, BTRFS_I(inode)->generation); - btrfs_set_stack_inode_sequence(inode_item, inode->i_version); + btrfs_set_stack_inode_sequence(inode_item, + inode_peek_iversion(inode)); btrfs_set_stack_inode_transid(inode_item, trans->transid); btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev); btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags); @@ -1767,7 +1769,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev) BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item); BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item); - inode->i_version = btrfs_stack_inode_sequence(inode_item); + inode_set_iversion_queried(inode, + btrfs_stack_inode_sequence(inode_item)); inode->i_rdev = 0; *rdev = btrfs_stack_inode_rdev(inode_item); BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb1bac7c8553..c95d7b2efefb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -31,6 +31,7 @@ #include <linux/slab.h> #include <linux/btrfs.h> #include <linux/uio.h> +#include <linux/iversion.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cb1e2d201434..734b37d9d459 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,6 +43,7 @@ #include <linux/posix_acl_xattr.h> #include <linux/uio.h> #include <linux/magic.h> +#include <linux/iversion.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -3777,7 +3778,8 @@ static int btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->generation = btrfs_inode_generation(leaf, inode_item); BTRFS_I(inode)->last_trans = btrfs_inode_transid(leaf, inode_item); - inode->i_version = btrfs_inode_sequence(leaf, inode_item); + inode_set_iversion_queried(inode, + btrfs_inode_sequence(leaf, inode_item)); inode->i_generation = BTRFS_I(inode)->generation; inode->i_rdev = 0; rdev = btrfs_inode_rdev(leaf, inode_item); @@ -3945,7 +3947,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, &token); btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, &token); - btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); + btrfs_set_token_inode_sequence(leaf, item, inode_peek_iversion(inode), + &token); btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); @@ -6104,19 +6107,20 @@ static int btrfs_update_time(struct inode *inode, struct timespec *now, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; + bool dirty = flags & ~S_VERSION; if (btrfs_root_readonly(root)) return -EROFS; if (flags & S_VERSION) - inode_inc_iversion(inode); + dirty |= inode_maybe_inc_iversion(inode, dirty); if (flags & S_CTIME) inode->i_ctime = *now; if (flags & S_MTIME) inode->i_mtime = *now; if (flags & S_ATIME) inode->i_atime = *now; - return btrfs_dirty_inode(inode); + return dirty ? btrfs_dirty_inode(inode) : 0; } /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2ef8acaac688..aa452c9e2eff 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -43,6 +43,7 @@ #include <linux/uuid.h> #include <linux/btrfs.h> #include <linux/uaccess.h> +#include <linux/iversion.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7bf9b31561db..1b7d92075c1f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -20,6 +20,7 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/list_sort.h> +#include <linux/iversion.h> #include "tree-log.h" #include "disk-io.h" #include "locking.h" @@ -3609,7 +3610,8 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), &token); - btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); + btrfs_set_token_inode_sequence(leaf, item, + inode_peek_iversion(inode), &token); btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 2c7e53f9ff1b..5258c1714830 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -23,6 +23,7 @@ #include <linux/xattr.h> #include <linux/security.h> #include <linux/posix_acl_xattr.h> +#include <linux/iversion.h> #include "ctree.h" #include "btrfs_inode.h" #include "transaction.h" diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 98233a97b7b8..c5a53fcc43ea 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -31,6 +31,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <linux/iversion.h> #include "exofs.h" static inline unsigned exofs_chunk_size(struct inode *inode) @@ -60,7 +61,7 @@ static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len) struct inode *dir = mapping->host; int err = 0; - dir->i_version++; + inode_inc_iversion(dir); if (!PageUptodate(page)) SetPageUptodate(page); @@ -241,7 +242,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); - int need_revalidate = (file->f_version != inode->i_version); + bool need_revalidate = inode_cmp_iversion(inode, file->f_version); if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) return 0; @@ -264,8 +265,8 @@ exofs_readdir(struct file *file, struct dir_context *ctx) chunk_mask); ctx->pos = (n<<PAGE_SHIFT) + offset; } - file->f_version = inode->i_version; - need_revalidate = 0; + file->f_version = inode_query_iversion(inode); + need_revalidate = false; } de = (struct exofs_dir_entry *)(kaddr + offset); limit = kaddr + exofs_last_byte(inode, n) - diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 819624cfc8da..7e244093c0e5 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -38,6 +38,7 @@ #include <linux/module.h> #include <linux/exportfs.h> #include <linux/slab.h> +#include <linux/iversion.h> #include "exofs.h" @@ -159,7 +160,7 @@ static struct inode *exofs_alloc_inode(struct super_block *sb) if (!oi) return NULL; - oi->vfs_inode.i_version = 1; + inode_set_iversion(&oi->vfs_inode, 1); return &oi->vfs_inode; } diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 987647986f47..4111085a129f 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -26,6 +26,7 @@ #include <linux/buffer_head.h> #include <linux/pagemap.h> #include <linux/swap.h> +#include <linux/iversion.h> typedef struct ext2_dir_entry_2 ext2_dirent; @@ -92,7 +93,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) struct inode *dir = mapping->host; int err = 0; - dir->i_version++; + inode_inc_iversion(dir); block_write_end(NULL, mapping, pos, len, len, page, NULL); if (pos+len > dir->i_size) { @@ -293,7 +294,7 @@ ext2_readdir(struct file *file, struct dir_context *ctx) unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); unsigned char *types = NULL; - int need_revalidate = file->f_version != inode->i_version; + bool need_revalidate = inode_cmp_iversion(inode, file->f_version); if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) return 0; @@ -319,8 +320,8 @@ ext2_readdir(struct file *file, struct dir_context *ctx) offset = ext2_validate_entry(kaddr, offset, chunk_mask); ctx->pos = (n<<PAGE_SHIFT) + offset; } - file->f_version = inode->i_version; - need_revalidate = 0; + file->f_version = inode_query_iversion(inode); + need_revalidate = false; } de = (ext2_dirent *)(kaddr+offset); limit = kaddr + ext2_last_byte(inode, n) - EXT2_DIR_REC_LEN(1); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7646818ab266..554c98b8a93a 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -33,6 +33,7 @@ #include <linux/quotaops.h> #include <linux/uaccess.h> #include <linux/dax.h> +#include <linux/iversion.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -184,7 +185,7 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) if (!ei) return NULL; ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); #ifdef CONFIG_QUOTA memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); #endif @@ -1569,7 +1570,7 @@ out: return err; if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); - inode->i_version++; + inode_inc_iversion(inode); inode->i_mtime = inode->i_ctime = current_time(inode); mark_inode_dirty(inode); return len - towrite; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d5babc9f222b..afda0a0499ce 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/slab.h> +#include <linux/iversion.h> #include "ext4.h" #include "xattr.h" @@ -208,7 +209,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (file->f_version != inode->i_version) { + if (inode_cmp_iversion(inode, file->f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ext4_dir_entry_2 *) (bh->b_data + i); @@ -227,7 +228,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); } while (ctx->pos < inode->i_size @@ -568,10 +569,10 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) * cached entries. */ if ((!info->curr_node) || - (file->f_version != inode->i_version)) { + inode_cmp_iversion(inode, file->f_version)) { info->curr_node = NULL; free_rb_tree_fname(&info->root); - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); ret = ext4_htree_fill_tree(file, info->curr_hash, info->curr_minor_hash, &info->next_hash); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 1367553c43bb..a8b987b71173 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -14,6 +14,7 @@ #include <linux/iomap.h> #include <linux/fiemap.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "ext4.h" @@ -1042,7 +1043,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle, */ dir->i_mtime = dir->i_ctime = current_time(dir); ext4_update_dx_flag(dir); - dir->i_version++; + inode_inc_iversion(dir); return 1; } @@ -1494,7 +1495,7 @@ int ext4_read_inline_dir(struct file *file, * dirent right now. Scan from the start of the inline * dir to make sure. */ - if (file->f_version != inode->i_version) { + if (inode_cmp_iversion(inode, file->f_version)) { for (i = 0; i < extra_size && i < offset;) { /* * "." is with offset 0 and @@ -1526,7 +1527,7 @@ int ext4_read_inline_dir(struct file *file, } offset = i; ctx->pos = offset; - file->f_version = inode->i_version; + file->f_version = inode_query_iversion(inode); } while (ctx->pos < extra_size) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 534a9130f625..0eff5b761c6e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <linux/bitops.h> #include <linux/iomap.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -4882,12 +4883,14 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - inode->i_version = le32_to_cpu(raw_inode->i_disk_version); + u64 ivers = le32_to_cpu(raw_inode->i_disk_version); + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) - inode->i_version |= + ivers |= (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; } + inode_set_iversion_queried(inode, ivers); } ret = 0; @@ -5173,11 +5176,13 @@ static int ext4_do_update_inode(handle_t *handle, } if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { - raw_inode->i_disk_version = cpu_to_le32(inode->i_version); + u64 ivers = inode_peek_iversion(inode); + + raw_inode->i_disk_version = cpu_to_le32(ivers); if (ei->i_extra_isize) { if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) raw_inode->i_version_hi = - cpu_to_le32(inode->i_version >> 32); + cpu_to_le32(ivers >> 32); raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 1eec25014f62..7e99ad02f1ba 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -19,6 +19,7 @@ #include <linux/uuid.h> #include <linux/uaccess.h> #include <linux/delay.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "ext4.h" #include <linux/fsmap.h> @@ -144,7 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb, i_gid_write(inode_bl, 0); inode_bl->i_flags = 0; ei_bl->i_flags = 0; - inode_bl->i_version = 1; + inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e750d68fbcb5..6660686e505a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -34,6 +34,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> +#include <linux/iversion.h> #include "ext4.h" #include "ext4_jbd2.h" @@ -2959,7 +2960,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) "empty directory '%.*s' has too many links (%u)", dentry->d_name.len, dentry->d_name.name, inode->i_nlink); - inode->i_version++; + inode_inc_iversion(inode); clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is * zero will ensure that the right thing happens during any @@ -3365,7 +3366,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, ent->de->inode = cpu_to_le32(ino); if (ext4_has_feature_filetype(ent->dir->i_sb)) ent->de->file_type = file_type; - ent->dir->i_version++; + inode_inc_iversion(ent->dir); ent->dir->i_ctime = ent->dir->i_mtime = current_time(ent->dir); ext4_mark_inode_dirty(handle, ent->dir); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7c46693a14d7..5de959fb0244 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -40,6 +40,7 @@ #include <linux/dax.h> #include <linux/cleancache.h> #include <linux/uaccess.h> +#include <linux/iversion.h> #include <linux/kthread.h> #include <linux/freezer.h> @@ -967,7 +968,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) if (!ei) return NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 218a7ba57819..63656dbafdc4 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -56,6 +56,7 @@ #include <linux/slab.h> #include <linux/mbcache.h> #include <linux/quotaops.h> +#include <linux/iversion.h> #include "ext4_jbd2.h" #include "ext4.h" #include "xattr.h" @@ -294,13 +295,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size) static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode) { return ((u64)ea_inode->i_ctime.tv_sec << 32) | - ((u32)ea_inode->i_version); + (u32) inode_peek_iversion_raw(ea_inode); } static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count) { ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32); - ea_inode->i_version = (u32)ref_count; + inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff); } static u32 ext4_xattr_inode_get_hash(struct inode *ea_inode) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index b833ffeee1e1..8e100c3bf72c 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,6 +16,7 @@ #include <linux/slab.h> #include <linux/compat.h> #include <linux/uaccess.h> +#include <linux/iversion.h> #include "fat.h" /* @@ -1055,7 +1056,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) brelse(bh); if (err) return err; - dir->i_version++; + inode_inc_iversion(dir); if (nr_slots) { /* diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 20a0a89eaca5..ffbbf0520d9e 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -20,6 +20,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <asm/unaligned.h> +#include <linux/iversion.h> #include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET @@ -507,7 +508,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) MSDOS_I(inode)->i_pos = 0; inode->i_uid = sbi->options.fs_uid; inode->i_gid = sbi->options.fs_gid; - inode->i_version++; + inode_inc_iversion(inode); inode->i_generation = get_seconds(); if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { @@ -590,7 +591,7 @@ struct inode *fat_build_inode(struct super_block *sb, goto out; } inode->i_ino = iunique(sb, MSDOS_ROOT_INO); - inode->i_version = 1; + inode_set_iversion(inode, 1); err = fat_fill_inode(inode, de); if (err) { iput(inode); @@ -1377,7 +1378,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO; inode->i_uid = sbi->options.fs_uid; inode->i_gid = sbi->options.fs_gid; - inode->i_version++; + inode_inc_iversion(inode); inode->i_generation = 0; inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; @@ -1828,7 +1829,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, if (!root_inode) goto out_fail; root_inode->i_ino = MSDOS_ROOT_INO; - root_inode->i_version = 1; + inode_set_iversion(root_inode, 1); error = fat_read_root(root_inode); if (error < 0) { iput(root_inode); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index d24d2758a363..582ca731a6c9 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -7,6 +7,7 @@ */ #include <linux/module.h> +#include <linux/iversion.h> #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ @@ -480,7 +481,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, } else mark_inode_dirty(old_inode); - old_dir->i_version++; + inode_inc_iversion(old_dir); old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); @@ -508,7 +509,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, goto out; new_i_pos = sinfo.i_pos; } - new_dir->i_version++; + inode_inc_iversion(new_dir); fat_detach(old_inode); fat_attach(old_inode, new_i_pos); @@ -540,7 +541,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_sinfo.bh = NULL; if (err) goto error_dotdot; - old_dir->i_version++; + inode_inc_iversion(old_dir); old_dir->i_ctime = old_dir->i_mtime = ts; if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 02c066663a3a..cefea792cde8 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -20,7 +20,7 @@ #include <linux/slab.h> #include <linux/namei.h> #include <linux/kernel.h> - +#include <linux/iversion.h> #include "fat.h" static inline unsigned long vfat_d_version(struct dentry *dentry) @@ -46,7 +46,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; spin_lock(&dentry->d_lock); - if (vfat_d_version(dentry) != d_inode(dentry->d_parent)->i_version) + if (inode_cmp_iversion(d_inode(dentry->d_parent), vfat_d_version(dentry))) ret = 0; spin_unlock(&dentry->d_lock); return ret; @@ -759,7 +759,7 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, out: mutex_unlock(&MSDOS_SB(sb)->s_lock); if (!inode) - vfat_d_version_set(dentry, dir->i_version); + vfat_d_version_set(dentry, inode_query_iversion(dir)); return d_splice_alias(inode, dentry); error: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -781,7 +781,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, err = vfat_add_entry(dir, &dentry->d_name, 0, 0, &ts, &sinfo); if (err) goto out; - dir->i_version++; + inode_inc_iversion(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); @@ -789,7 +789,7 @@ static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, err = PTR_ERR(inode); goto out; } - inode->i_version++; + inode_inc_iversion(inode); inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ @@ -823,7 +823,7 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); fat_detach(inode); - vfat_d_version_set(dentry, dir->i_version); + vfat_d_version_set(dentry, inode_query_iversion(dir)); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -849,7 +849,7 @@ static int vfat_unlink(struct inode *dir, struct dentry *dentry) clear_nlink(inode); inode->i_mtime = inode->i_atime = current_time(inode); fat_detach(inode); - vfat_d_version_set(dentry, dir->i_version); + vfat_d_version_set(dentry, inode_query_iversion(dir)); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); @@ -875,7 +875,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) err = vfat_add_entry(dir, &dentry->d_name, 1, cluster, &ts, &sinfo); if (err) goto out_free; - dir->i_version++; + inode_inc_iversion(dir); inc_nlink(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); @@ -885,7 +885,7 @@ static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) /* the directory was completed, just return a error */ goto out; } - inode->i_version++; + inode_inc_iversion(inode); set_nlink(inode, 2); inode->i_mtime = inode->i_atime = inode->i_ctime = ts; /* timestamp is already written, so mark_inode_dirty() is unneeded. */ @@ -951,7 +951,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; new_i_pos = sinfo.i_pos; } - new_dir->i_version++; + inode_inc_iversion(new_dir); fat_detach(old_inode); fat_attach(old_inode, new_i_pos); @@ -979,7 +979,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, old_sinfo.bh = NULL; if (err) goto error_dotdot; - old_dir->i_version++; + inode_inc_iversion(old_dir); old_dir->i_ctime = old_dir->i_mtime = ts; if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); diff --git a/fs/inode.c b/fs/inode.c index 03102d6ef044..e2ca0f4b5151 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -18,6 +18,7 @@ #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> #include <linux/list_lru.h> +#include <linux/iversion.h> #include <trace/events/writeback.h> #include "internal.h" @@ -1634,17 +1635,21 @@ static int relatime_need_update(const struct path *path, struct inode *inode, int generic_update_time(struct inode *inode, struct timespec *time, int flags) { int iflags = I_DIRTY_TIME; + bool dirty = false; if (flags & S_ATIME) inode->i_atime = *time; if (flags & S_VERSION) - inode_inc_iversion(inode); + dirty = inode_maybe_inc_iversion(inode, false); if (flags & S_CTIME) inode->i_ctime = *time; if (flags & S_MTIME) inode->i_mtime = *time; + if ((flags & (S_ATIME | S_CTIME | S_MTIME)) && + !(inode->i_sb->s_flags & SB_LAZYTIME)) + dirty = true; - if (!(inode->i_sb->s_flags & SB_LAZYTIME) || (flags & S_VERSION)) + if (dirty) iflags |= I_DIRTY_SYNC; __mark_inode_dirty(inode, iflags); return 0; @@ -1863,7 +1868,7 @@ int file_update_time(struct file *file) if (!timespec_equal(&inode->i_ctime, &now)) sync_it |= S_CTIME; - if (IS_I_VERSION(inode)) + if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode)) sync_it |= S_VERSION; if (!sync_it) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ade44ca0c66c..d8b47624fee2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <linux/iversion.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> @@ -347,7 +348,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs4_stateid_copy(&delegation->stateid, &res->delegation); delegation->type = res->delegation_type; delegation->pagemod_limit = res->pagemod_limit; - delegation->change_attr = inode->i_version; + delegation->change_attr = inode_peek_iversion_raw(inode); delegation->cred = get_rpccred(cred); delegation->inode = inode; delegation->flags = 1<<NFS_DELEGATION_REFERENCED; diff --git a/fs/nfs/fscache-index.c b/fs/nfs/fscache-index.c index 3025fe8584a0..0ee4b93d36ea 100644 --- a/fs/nfs/fscache-index.c +++ b/fs/nfs/fscache-index.c @@ -16,6 +16,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_fs_sb.h> #include <linux/in6.h> +#include <linux/iversion.h> #include "internal.h" #include "fscache.h" @@ -211,7 +212,7 @@ static uint16_t nfs_fscache_inode_get_aux(const void *cookie_netfs_data, auxdata.ctime = nfsi->vfs_inode.i_ctime; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = nfsi->vfs_inode.i_version; + auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); if (bufmax > sizeof(auxdata)) bufmax = sizeof(auxdata); @@ -243,7 +244,7 @@ enum fscache_checkaux nfs_fscache_inode_check_aux(void *cookie_netfs_data, auxdata.ctime = nfsi->vfs_inode.i_ctime; if (NFS_SERVER(&nfsi->vfs_inode)->nfs_client->rpc_ops->version == 4) - auxdata.change_attr = nfsi->vfs_inode.i_version; + auxdata.change_attr = inode_peek_iversion_raw(&nfsi->vfs_inode); if (memcmp(data, &auxdata, datalen) != 0) return FSCACHE_CHECKAUX_OBSOLETE; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b992d2382ffa..93552c482992 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -38,8 +38,8 @@ #include <linux/slab.h> #include <linux/compat.h> #include <linux/freezer.h> - #include <linux/uaccess.h> +#include <linux/iversion.h> #include "nfs4_fs.h" #include "callback.h" @@ -483,7 +483,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st memset(&inode->i_atime, 0, sizeof(inode->i_atime)); memset(&inode->i_mtime, 0, sizeof(inode->i_mtime)); memset(&inode->i_ctime, 0, sizeof(inode->i_ctime)); - inode->i_version = 0; + inode_set_iversion_raw(inode, 0); inode->i_size = 0; clear_nlink(inode); inode->i_uid = make_kuid(&init_user_ns, -2); @@ -508,7 +508,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st else if (nfs_server_capable(inode, NFS_CAP_CTIME)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) - inode->i_version = fattr->change_attr; + inode_set_iversion_raw(inode, fattr->change_attr); else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE); @@ -1289,8 +1289,8 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE) && (fattr->valid & NFS_ATTR_FATTR_CHANGE) - && inode->i_version == fattr->pre_change_attr) { - inode->i_version = fattr->change_attr; + && !inode_cmp_iversion_raw(inode, fattr->pre_change_attr)) { + inode_set_iversion_raw(inode, fattr->change_attr); if (S_ISDIR(inode->i_mode)) nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA); ret |= NFS_INO_INVALID_ATTR; @@ -1348,7 +1348,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (!nfs_file_has_buffered_writers(nfsi)) { /* Verify a few of the more important attributes */ - if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode->i_version != fattr->change_attr) + if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && inode_cmp_iversion_raw(inode, fattr->change_attr)) invalid |= NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE; if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime)) @@ -1642,7 +1642,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa } if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && (fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) { - fattr->pre_change_attr = inode->i_version; + fattr->pre_change_attr = inode_peek_iversion_raw(inode); fattr->valid |= NFS_ATTR_FATTR_PRECHANGE; } if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 && @@ -1778,7 +1778,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* More cache consistency checks */ if (fattr->valid & NFS_ATTR_FATTR_CHANGE) { - if (inode->i_version != fattr->change_attr) { + if (inode_cmp_iversion_raw(inode, fattr->change_attr)) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); /* Could it be a race with writeback? */ @@ -1790,7 +1790,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (S_ISDIR(inode->i_mode)) nfs_force_lookup_revalidate(inode); } - inode->i_version = fattr->change_attr; + inode_set_iversion_raw(inode, fattr->change_attr); } } else { nfsi->cache_validity |= save_cache_validity; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 56fa5a16e097..17a03f2c4330 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -54,6 +54,7 @@ #include <linux/xattr.h> #include <linux/utsname.h> #include <linux/freezer.h> +#include <linux/iversion.h> #include "nfs4_fs.h" #include "delegation.h" @@ -1045,16 +1046,16 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo, spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - if (cinfo->atomic && cinfo->before == dir->i_version) { + if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(dir)) { nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; nfsi->attrtimeo_timestamp = jiffies; } else { nfs_force_lookup_revalidate(dir); - if (cinfo->before != dir->i_version) + if (cinfo->before != inode_peek_iversion_raw(dir)) nfsi->cache_validity |= NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; } - dir->i_version = cinfo->after; + inode_set_iversion_raw(dir, cinfo->after); nfsi->read_cache_jiffies = timestamp; nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfs_fscache_invalidate(dir); @@ -2454,7 +2455,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) data->file_created = true; else if (o_res->cinfo.before != o_res->cinfo.after) data->file_created = true; - if (data->file_created || dir->i_version != o_res->cinfo.after) + if (data->file_created || + inode_peek_iversion_raw(dir) != o_res->cinfo.after) update_changeattr(dir, &o_res->cinfo, o_res->f_attr->time_start); } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 093290c42d7c..610d89d8942e 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -9,6 +9,7 @@ #define _TRACE_NFS_H #include <linux/tracepoint.h> +#include <linux/iversion.h> #define nfs_show_file_type(ftype) \ __print_symbolic(ftype, \ @@ -61,7 +62,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); - __entry->version = inode->i_version; + __entry->version = inode_peek_iversion_raw(inode); ), TP_printk( @@ -100,7 +101,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done, __entry->fileid = nfsi->fileid; __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); __entry->type = nfs_umode_to_dtype(inode->i_mode); - __entry->version = inode->i_version; + __entry->version = inode_peek_iversion_raw(inode); __entry->size = i_size_read(inode); __entry->nfsi_flags = nfsi->flags; __entry->cache_validity = nfsi->cache_validity; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4a379d7918f2..12b2d477836b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -23,6 +23,7 @@ #include <linux/export.h> #include <linux/freezer.h> #include <linux/wait.h> +#include <linux/iversion.h> #include <linux/uaccess.h> @@ -753,11 +754,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) */ spin_lock(&mapping->private_lock); if (!nfs_have_writebacks(inode) && - NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) { - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); - } + NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + inode_inc_iversion_raw(inode); if (likely(!PageSwapCache(req->wb_page))) { set_bit(PG_MAPPED, &req->wb_flags); SetPagePrivate(req->wb_page); diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 43f31cf49bae..b8444189223b 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -11,6 +11,7 @@ #include <linux/crc32.h> #include <linux/sunrpc/svc.h> #include <uapi/linux/nfsd/nfsfh.h> +#include <linux/iversion.h> static inline __u32 ino_t_to_u32(ino_t ino) { @@ -259,7 +260,7 @@ static inline u64 nfsd4_change_attribute(struct inode *inode) chattr = inode->i_ctime.tv_sec; chattr <<= 30; chattr += inode->i_ctime.tv_nsec; - chattr += inode->i_version; + chattr += inode_query_iversion(inode); return chattr; } diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index 7c410f879412..1c1ee489284b 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -560,13 +560,6 @@ static int ntfs_read_locked_inode(struct inode *vi) ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); /* Setup the generic vfs inode parts now. */ - - /* - * This is for checking whether an inode has changed w.r.t. a file so - * that the file can be updated if necessary (compare with f_version). - */ - vi->i_version = 1; - vi->i_uid = vol->uid; vi->i_gid = vol->gid; vi->i_mode = 0; @@ -1240,7 +1233,6 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; set_nlink(vi, base_vi->i_nlink); @@ -1507,7 +1499,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi) ni = NTFS_I(vi); base_ni = NTFS_I(base_vi); /* Just mirror the values from the base inode. */ - vi->i_version = base_vi->i_version; vi->i_uid = base_vi->i_uid; vi->i_gid = base_vi->i_gid; set_nlink(vi, base_vi->i_nlink); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index ee8392aee9f6..2831f495a674 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -2641,12 +2641,6 @@ mft_rec_already_initialized: goto undo_mftbmp_alloc; } vi->i_ino = bit; - /* - * This is for checking whether an inode has changed w.r.t. a - * file so that the file can be updated if necessary (compare - * with f_version). - */ - vi->i_version = 1; /* The owner and group come from the ntfs volume. */ vi->i_uid = vol->uid; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index febe6312ceff..32f9c72dff17 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -42,6 +42,7 @@ #include <linux/highmem.h> #include <linux/quotaops.h> #include <linux/sort.h> +#include <linux/iversion.h> #include <cluster/masklog.h> @@ -1174,7 +1175,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir, le16_add_cpu(&pde->rec_len, le16_to_cpu(de->rec_len)); de->inode = 0; - dir->i_version++; + inode_inc_iversion(dir); ocfs2_journal_dirty(handle, bh); goto bail; } @@ -1729,7 +1730,7 @@ int __ocfs2_add_entry(handle_t *handle, if (ocfs2_dir_indexed(dir)) ocfs2_recalc_free_list(dir, handle, lookup); - dir->i_version++; + inode_inc_iversion(dir); ocfs2_journal_dirty(handle, insert_bh); retval = 0; goto bail; @@ -1775,7 +1776,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (*f_version != inode->i_version) { + if (inode_cmp_iversion(inode, *f_version)) { for (i = 0; i < i_size_read(inode) && i < offset; ) { de = (struct ocfs2_dir_entry *) (data->id_data + i); @@ -1791,7 +1792,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, i += le16_to_cpu(de->rec_len); } ctx->pos = offset = i; - *f_version = inode->i_version; + *f_version = inode_query_iversion(inode); } de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); @@ -1869,7 +1870,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ - if (*f_version != inode->i_version) { + if (inode_cmp_iversion(inode, *f_version)) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full @@ -1886,7 +1887,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, offset = i; ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) | offset; - *f_version = inode->i_version; + *f_version = inode_query_iversion(inode); } while (ctx->pos < i_size_read(inode) @@ -1940,7 +1941,7 @@ static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, */ int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx) { - u64 version = inode->i_version; + u64 version = inode_query_iversion(inode); ocfs2_dir_foreach_blk(inode, &version, ctx, true); return 0; } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 1a1e0078ab38..d51b80edd972 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -28,6 +28,7 @@ #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/quotaops.h> +#include <linux/iversion.h> #include <asm/byteorder.h> @@ -302,7 +303,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features); - inode->i_version = 1; + inode_set_iversion(inode, 1); inode->i_generation = le32_to_cpu(fe->i_generation); inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); inode->i_mode = le16_to_cpu(fe->i_mode); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b0a10d9b36f..c801eddc4bf3 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -41,6 +41,7 @@ #include <linux/slab.h> #include <linux/highmem.h> #include <linux/quotaops.h> +#include <linux/iversion.h> #include <cluster/masklog.h> @@ -1520,7 +1521,7 @@ static int ocfs2_rename(struct inode *old_dir, mlog_errno(status); goto bail; } - new_dir->i_version++; + inode_inc_iversion(new_dir); if (S_ISDIR(new_inode->i_mode)) ocfs2_set_links_count(newfe, 0); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b39d14cbfa34..7a922190a8c7 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -12,6 +12,7 @@ #include <linux/writeback.h> #include <linux/workqueue.h> #include <linux/llist.h> +#include <linux/iversion.h> #include <cluster/masklog.h> @@ -289,7 +290,7 @@ out: mlog_errno(err); return err; } - gqinode->i_version++; + inode_inc_iversion(gqinode); ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); return len; } diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 2edc1755b7c5..50dfce000864 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -20,6 +20,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/swap.h> +#include <linux/iversion.h> #include "ufs_fs.h" #include "ufs.h" @@ -47,7 +48,7 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len) struct inode *dir = mapping->host; int err = 0; - dir->i_version++; + inode_inc_iversion(dir); block_write_end(NULL, mapping, pos, len, len, page, NULL); if (pos+len > dir->i_size) { i_size_write(dir, pos+len); @@ -428,7 +429,7 @@ ufs_readdir(struct file *file, struct dir_context *ctx) unsigned long n = pos >> PAGE_SHIFT; unsigned long npages = dir_pages(inode); unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); - int need_revalidate = file->f_version != inode->i_version; + bool need_revalidate = inode_cmp_iversion(inode, file->f_version); unsigned flags = UFS_SB(sb)->s_flags; UFSD("BEGIN\n"); @@ -455,8 +456,8 @@ ufs_readdir(struct file *file, struct dir_context *ctx) offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); ctx->pos = (n<<PAGE_SHIFT) + offset; } - file->f_version = inode->i_version; - need_revalidate = 0; + file->f_version = inode_query_iversion(inode); + need_revalidate = false; } de = (struct ufs_dir_entry *)(kaddr+offset); limit = kaddr + ufs_last_byte(inode, n) - UFS_DIR_REC_LEN(1); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index afb601c0dda0..c843ec858cf7 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -36,6 +36,7 @@ #include <linux/mm.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/iversion.h> #include "ufs_fs.h" #include "ufs.h" @@ -693,7 +694,7 @@ struct inode *ufs_iget(struct super_block *sb, unsigned long ino) if (err) goto bad_inode; - inode->i_version++; + inode_inc_iversion(inode); ufsi->i_lastfrag = (inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift; ufsi->i_dir_start_lookup = 0; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 4d497e9c6883..b6ba80e05bff 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -88,6 +88,7 @@ #include <linux/log2.h> #include <linux/mount.h> #include <linux/seq_file.h> +#include <linux/iversion.h> #include "ufs_fs.h" #include "ufs.h" @@ -1440,7 +1441,7 @@ static struct inode *ufs_alloc_inode(struct super_block *sb) if (!ei) return NULL; - ei->vfs_inode.i_version = 1; + inode_set_iversion(&ei->vfs_inode, 1); seqlock_init(&ei->meta_lock); mutex_init(&ei->truncate_mutex); return &ei->vfs_inode; diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 6b7989038d75..b9c0bf80669c 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -32,6 +32,8 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" +#include <linux/iversion.h> + /* * Check that none of the inode's in the buffer have a next * unlinked field of 0. @@ -264,7 +266,8 @@ xfs_inode_from_disk( to->di_flags = be16_to_cpu(from->di_flags); if (to->di_version == 3) { - inode->i_version = be64_to_cpu(from->di_changecount); + inode_set_iversion_queried(inode, + be64_to_cpu(from->di_changecount)); to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec); to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec); to->di_flags2 = be64_to_cpu(from->di_flags2); @@ -314,7 +317,7 @@ xfs_inode_to_disk( to->di_flags = cpu_to_be16(from->di_flags); if (from->di_version == 3) { - to->di_changecount = cpu_to_be64(inode->i_version); + to->di_changecount = cpu_to_be64(inode_peek_iversion(inode)); to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec); to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec); to->di_flags2 = cpu_to_be64(from->di_flags2); diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3861d61fb265..3bcb8fd2a826 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -37,6 +37,7 @@ #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/iversion.h> /* * Allocate and initialise an xfs_inode. @@ -293,14 +294,14 @@ xfs_reinit_inode( int error; uint32_t nlink = inode->i_nlink; uint32_t generation = inode->i_generation; - uint64_t version = inode->i_version; + uint64_t version = inode_peek_iversion(inode); umode_t mode = inode->i_mode; error = inode_init_always(mp->m_super, inode); set_nlink(inode, nlink); inode->i_generation = generation; - inode->i_version = version; + inode_set_iversion_queried(inode, version); inode->i_mode = mode; return error; } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 6f95bdb408ce..9f424e0aef1f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -16,6 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/log2.h> +#include <linux/iversion.h> #include "xfs.h" #include "xfs_fs.h" @@ -832,7 +833,7 @@ xfs_ialloc( ip->i_d.di_flags = 0; if (ip->i_d.di_version == 3) { - inode->i_version = 1; + inode_set_iversion(inode, 1); ip->i_d.di_flags2 = 0; ip->i_d.di_cowextsize = 0; ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 6ee5c3bf19ad..7571abf5dfb3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -30,6 +30,7 @@ #include "xfs_buf_item.h" #include "xfs_log.h" +#include <linux/iversion.h> kmem_zone_t *xfs_ili_zone; /* inode log item zone */ @@ -354,7 +355,7 @@ xfs_inode_to_log_dinode( to->di_next_unlinked = NULLAGINO; if (from->di_version == 3) { - to->di_changecount = inode->i_version; + to->di_changecount = inode_peek_iversion(inode); to->di_crtime.t_sec = from->di_crtime.t_sec; to->di_crtime.t_nsec = from->di_crtime.t_nsec; to->di_flags2 = from->di_flags2; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index daa7615497f9..4a89da4b6fe7 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -28,6 +28,8 @@ #include "xfs_inode_item.h" #include "xfs_trace.h" +#include <linux/iversion.h> + /* * Add a locked inode to the transaction. * @@ -110,15 +112,17 @@ xfs_trans_log_inode( /* * First time we log the inode in a transaction, bump the inode change - * counter if it is configured for this to occur. We don't use - * inode_inc_version() because there is no need for extra locking around - * i_version as we already hold the inode locked exclusively for - * metadata modification. + * counter if it is configured for this to occur. While we have the + * inode locked exclusively for metadata modification, we can usually + * avoid setting XFS_ILOG_CORE if no one has queried the value since + * the last time it was incremented. If we have XFS_ILOG_CORE already + * set however, then go ahead and bump the i_version counter + * unconditionally. */ if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && IS_I_VERSION(VFS_I(ip))) { - VFS_I(ip)->i_version++; - flags |= XFS_ILOG_CORE; + if (inode_maybe_inc_iversion(VFS_I(ip), flags & XFS_ILOG_CORE)) + flags |= XFS_ILOG_CORE; } tp->t_flags |= XFS_TRANS_DIRTY; diff --git a/include/linux/fs.h b/include/linux/fs.h index 511fbaabf624..6804d075933e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,7 +639,7 @@ struct inode { struct hlist_head i_dentry; struct rcu_head i_rcu; }; - u64 i_version; + atomic64_t i_version; atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; @@ -2036,21 +2036,6 @@ static inline void inode_dec_link_count(struct inode *inode) mark_inode_dirty(inode); } -/** - * inode_inc_iversion - increments i_version - * @inode: inode that need to be updated - * - * Every time the inode is modified, the i_version field will be incremented. - * The filesystem has to be mounted with i_version flag - */ - -static inline void inode_inc_iversion(struct inode *inode) -{ - spin_lock(&inode->i_lock); - inode->i_version++; - spin_unlock(&inode->i_lock); -} - enum file_time_flags { S_ATIME = 1, S_MTIME = 2, diff --git a/include/linux/iversion.h b/include/linux/iversion.h new file mode 100644 index 000000000000..858463fca249 --- /dev/null +++ b/include/linux/iversion.h @@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IVERSION_H +#define _LINUX_IVERSION_H + +#include <linux/fs.h> + +/* + * The inode->i_version field: + * --------------------------- + * The change attribute (i_version) is mandated by NFSv4 and is mostly for + * knfsd, but is also used for other purposes (e.g. IMA). The i_version must + * appear different to observers if there was a change to the inode's data or + * metadata since it was last queried. + * + * Observers see the i_version as a 64-bit number that never decreases. If it + * remains the same since it was last checked, then nothing has changed in the + * inode. If it's different then something has changed. Observers cannot infer + * anything about the nature or magnitude of the changes from the value, only + * that the inode has changed in some fashion. + * + * Not all filesystems properly implement the i_version counter. Subsystems that + * want to use i_version field on an inode should first check whether the + * filesystem sets the SB_I_VERSION flag (usually via the IS_I_VERSION macro). + * + * Those that set SB_I_VERSION will automatically have their i_version counter + * incremented on writes to normal files. If the SB_I_VERSION is not set, then + * the VFS will not touch it on writes, and the filesystem can use it how it + * wishes. Note that the filesystem is always responsible for updating the + * i_version on namespace changes in directories (mkdir, rmdir, unlink, etc.). + * We consider these sorts of filesystems to have a kernel-managed i_version. + * + * It may be impractical for filesystems to keep i_version updates atomic with + * respect to the changes that cause them. They should, however, guarantee + * that i_version updates are never visible before the changes that caused + * them. Also, i_version updates should never be delayed longer than it takes + * the original change to reach disk. + * + * This implementation uses the low bit in the i_version field as a flag to + * track when the value has been queried. If it has not been queried since it + * was last incremented, we can skip the increment in most cases. + * + * In the event that we're updating the ctime, we will usually go ahead and + * bump the i_version anyway. Since that has to go to stable storage in some + * fashion, we might as well increment it as well. + * + * With this implementation, the value should always appear to observers to + * increase over time if the file has changed. It's recommended to use + * inode_cmp_iversion() helper to compare values. + * + * Note that some filesystems (e.g. NFS and AFS) just use the field to store + * a server-provided value (for the most part). For that reason, those + * filesystems do not set SB_I_VERSION. These filesystems are considered to + * have a self-managed i_version. + * + * Persistently storing the i_version + * ---------------------------------- + * Queries of the i_version field are not gated on them hitting the backing + * store. It's always possible that the host could crash after allowing + * a query of the value but before it has made it to disk. + * + * To mitigate this problem, filesystems should always use + * inode_set_iversion_queried when loading an existing inode from disk. This + * ensures that the next attempted inode increment will result in the value + * changing. + * + * Storing the value to disk therefore does not count as a query, so those + * filesystems should use inode_peek_iversion to grab the value to be stored. + * There is no need to flag the value as having been queried in that case. + */ + +/* + * We borrow the lowest bit in the i_version to use as a flag to tell whether + * it has been queried since we last incremented it. If it has, then we must + * increment it on the next change. After that, we can clear the flag and + * avoid incrementing it again until it has again been queried. + */ +#define I_VERSION_QUERIED_SHIFT (1) +#define I_VERSION_QUERIED (1ULL << (I_VERSION_QUERIED_SHIFT - 1)) +#define I_VERSION_INCREMENT (1ULL << I_VERSION_QUERIED_SHIFT) + +/** + * inode_set_iversion_raw - set i_version to the specified raw value + * @inode: inode to set + * @val: new i_version value to set + * + * Set @inode's i_version field to @val. This function is for use by + * filesystems that self-manage the i_version. + * + * For example, the NFS client stores its NFSv4 change attribute in this way, + * and the AFS client stores the data_version from the server here. + */ +static inline void +inode_set_iversion_raw(struct inode *inode, u64 val) +{ + atomic64_set(&inode->i_version, val); +} + +/** + * inode_peek_iversion_raw - grab a "raw" iversion value + * @inode: inode from which i_version should be read + * + * Grab a "raw" inode->i_version value and return it. The i_version is not + * flagged or converted in any way. This is mostly used to access a self-managed + * i_version. + * + * With those filesystems, we want to treat the i_version as an entirely + * opaque value. + */ +static inline u64 +inode_peek_iversion_raw(const struct inode *inode) +{ + return atomic64_read(&inode->i_version); +} + +/** + * inode_set_iversion - set i_version to a particular value + * @inode: inode to set + * @val: new i_version value to set + * + * Set @inode's i_version field to @val. This function is for filesystems with + * a kernel-managed i_version, for initializing a newly-created inode from + * scratch. + * + * In this case, we do not set the QUERIED flag since we know that this value + * has never been queried. + */ +static inline void +inode_set_iversion(struct inode *inode, u64 val) +{ + inode_set_iversion_raw(inode, val << I_VERSION_QUERIED_SHIFT); +} + +/** + * inode_set_iversion_queried - set i_version to a particular value as quereied + * @inode: inode to set + * @val: new i_version value to set + * + * Set @inode's i_version field to @val, and flag it for increment on the next + * change. + * + * Filesystems that persistently store the i_version on disk should use this + * when loading an existing inode from disk. + * + * When loading in an i_version value from a backing store, we can't be certain + * that it wasn't previously viewed before being stored. Thus, we must assume + * that it was, to ensure that we don't end up handing out the same value for + * different versions of the same inode. + */ +static inline void +inode_set_iversion_queried(struct inode *inode, u64 val) +{ + inode_set_iversion_raw(inode, (val << I_VERSION_QUERIED_SHIFT) | + I_VERSION_QUERIED); +} + +/** + * inode_maybe_inc_iversion - increments i_version + * @inode: inode with the i_version that should be updated + * @force: increment the counter even if it's not necessary? + * + * Every time the inode is modified, the i_version field must be seen to have + * changed by any observer. + * + * If "force" is set or the QUERIED flag is set, then ensure that we increment + * the value, and clear the queried flag. + * + * In the common case where neither is set, then we can return "false" without + * updating i_version. + * + * If this function returns false, and no other metadata has changed, then we + * can avoid logging the metadata. + */ +static inline bool +inode_maybe_inc_iversion(struct inode *inode, bool force) +{ + u64 cur, old, new; + + /* + * The i_version field is not strictly ordered with any other inode + * information, but the legacy inode_inc_iversion code used a spinlock + * to serialize increments. + * + * Here, we add full memory barriers to ensure that any de-facto + * ordering with other info is preserved. + * + * This barrier pairs with the barrier in inode_query_iversion() + */ + smp_mb(); + cur = inode_peek_iversion_raw(inode); + for (;;) { + /* If flag is clear then we needn't do anything */ + if (!force && !(cur & I_VERSION_QUERIED)) + return false; + + /* Since lowest bit is flag, add 2 to avoid it */ + new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; + + old = atomic64_cmpxchg(&inode->i_version, cur, new); + if (likely(old == cur)) + break; + cur = old; + } + return true; +} + + +/** + * inode_inc_iversion - forcibly increment i_version + * @inode: inode that needs to be updated + * + * Forcbily increment the i_version field. This always results in a change to + * the observable value. + */ +static inline void +inode_inc_iversion(struct inode *inode) +{ + inode_maybe_inc_iversion(inode, true); +} + +/** + * inode_iversion_need_inc - is the i_version in need of being incremented? + * @inode: inode to check + * + * Returns whether the inode->i_version counter needs incrementing on the next + * change. Just fetch the value and check the QUERIED flag. + */ +static inline bool +inode_iversion_need_inc(struct inode *inode) +{ + return inode_peek_iversion_raw(inode) & I_VERSION_QUERIED; +} + +/** + * inode_inc_iversion_raw - forcibly increment raw i_version + * @inode: inode that needs to be updated + * + * Forcbily increment the raw i_version field. This always results in a change + * to the raw value. + * + * NFS will use the i_version field to store the value from the server. It + * mostly treats it as opaque, but in the case where it holds a write + * delegation, it must increment the value itself. This function does that. + */ +static inline void +inode_inc_iversion_raw(struct inode *inode) +{ + atomic64_inc(&inode->i_version); +} + +/** + * inode_peek_iversion - read i_version without flagging it to be incremented + * @inode: inode from which i_version should be read + * + * Read the inode i_version counter for an inode without registering it as a + * query. + * + * This is typically used by local filesystems that need to store an i_version + * on disk. In that situation, it's not necessary to flag it as having been + * viewed, as the result won't be used to gauge changes from that point. + */ +static inline u64 +inode_peek_iversion(const struct inode *inode) +{ + return inode_peek_iversion_raw(inode) >> I_VERSION_QUERIED_SHIFT; +} + +/** + * inode_query_iversion - read i_version for later use + * @inode: inode from which i_version should be read + * + * Read the inode i_version counter. This should be used by callers that wish + * to store the returned i_version for later comparison. This will guarantee + * that a later query of the i_version will result in a different value if + * anything has changed. + * + * In this implementation, we fetch the current value, set the QUERIED flag and + * then try to swap it into place with a cmpxchg, if it wasn't already set. If + * that fails, we try again with the newly fetched value from the cmpxchg. + */ +static inline u64 +inode_query_iversion(struct inode *inode) +{ + u64 cur, old, new; + + cur = inode_peek_iversion_raw(inode); + for (;;) { + /* If flag is already set, then no need to swap */ + if (cur & I_VERSION_QUERIED) { + /* + * This barrier (and the implicit barrier in the + * cmpxchg below) pairs with the barrier in + * inode_maybe_inc_iversion(). + */ + smp_mb(); + break; + } + + new = cur | I_VERSION_QUERIED; + old = atomic64_cmpxchg(&inode->i_version, cur, new); + if (likely(old == cur)) + break; + cur = old; + } + return cur >> I_VERSION_QUERIED_SHIFT; +} + +/** + * inode_cmp_iversion_raw - check whether the raw i_version counter has changed + * @inode: inode to check + * @old: old value to check against its i_version + * + * Compare the current raw i_version counter with a previous one. Returns 0 if + * they are the same or non-zero if they are different. + */ +static inline s64 +inode_cmp_iversion_raw(const struct inode *inode, u64 old) +{ + return (s64)inode_peek_iversion_raw(inode) - (s64)old; +} + +/** + * inode_cmp_iversion - check whether the i_version counter has changed + * @inode: inode to check + * @old: old value to check against its i_version + * + * Compare an i_version counter with a previous one. Returns 0 if they are + * the same, a positive value if the one in the inode appears newer than @old, + * and a negative value if @old appears to be newer than the one in the + * inode. + * + * Note that we don't need to set the QUERIED flag in this case, as the value + * in the inode is not being recorded for later use. + */ + +static inline s64 +inode_cmp_iversion(const struct inode *inode, u64 old) +{ + return (s64)(inode_peek_iversion_raw(inode) & ~I_VERSION_QUERIED) - + (s64)(old << I_VERSION_QUERIED_SHIFT); +} +#endif diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index c7e8db0ea4c0..c6ae42266270 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -18,6 +18,7 @@ #include <linux/fs.h> #include <linux/xattr.h> #include <linux/evm.h> +#include <linux/iversion.h> #include "ima.h" @@ -215,7 +216,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, * which do not support i_version, support is limited to an initial * measurement/appraisal/audit. */ - i_version = file_inode(file)->i_version; + i_version = inode_query_iversion(inode); hash.hdr.algo = algo; /* Initialize hash digest to 0's in case of failure */ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 770654694efc..06a70c5a2329 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/xattr.h> #include <linux/ima.h> +#include <linux/iversion.h> #include "ima.h" @@ -127,7 +128,8 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint, inode_lock(inode); if (atomic_read(&inode->i_writecount) == 1) { - if ((iint->version != inode->i_version) || + if (!IS_I_VERSION(inode) || + inode_cmp_iversion(inode, iint->version) || (iint->flags & IMA_NEW_FILE)) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; |