From f2b20f6ee842313a0d681dbbf7f87b70291a6a3b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: move permission checking into notify_change() for utimes(NULL) This fixes a bug where the permission was not properly checked in overlayfs. The testcase is ltp/utimensat01. It is also cleaner and safer to do the permission checking in the vfs helper instead of the caller. This patch introduces an additional ia_valid flag ATTR_TOUCH (since touch(1) is the most obvious user of utimes(NULL)) that is passed into notify_change whenever the conditions for this special permission checking mode are met. Reported-by: Aihua Zhang Signed-off-by: Miklos Szeredi Tested-by: Aihua Zhang Cc: # v3.18+ --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 901e25d495cc..7c391366fb43 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -224,6 +224,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_KILL_PRIV (1 << 14) #define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ #define ATTR_TIMES_SET (1 << 16) +#define ATTR_TOUCH (1 << 17) /* * Whiteout is represented by a char device. The following constants define the -- cgit v1.2.3 From 598e3c8f72f5b77c84d2cb26cfd936ffb3cfdbaa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: update ovl inode before relatime check On overlayfs relatime_need_update() needs inode times to be correct on overlay inode. But i_mtime and i_ctime are updated by filesystem code on underlying inode only, so they will be out-of-date on the overlay inode. This patch copies the times from the underlying inode if needed. This can't be done if called from RCU lookup (link following) but link m/ctime are not updated by fs, so this is all right. This patch doesn't change functionality for anything but overlayfs. Signed-off-by: Miklos Szeredi --- fs/inode.c | 33 +++++++++++++++++++++++++++------ fs/internal.h | 9 +++++++++ fs/namei.c | 2 +- include/linux/fs.h | 1 - 4 files changed, 37 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/inode.c b/fs/inode.c index 7e3ef3af3db9..4a1fc1631e00 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1535,17 +1535,37 @@ sector_t bmap(struct inode *inode, sector_t block) } EXPORT_SYMBOL(bmap); +/* + * Update times in overlayed inode from underlying real inode + */ +static void update_ovl_inode_times(struct dentry *dentry, struct inode *inode, + bool rcu) +{ + if (!rcu) { + struct inode *realinode = d_real_inode(dentry); + + if (unlikely(inode != realinode) && + (!timespec_equal(&inode->i_mtime, &realinode->i_mtime) || + !timespec_equal(&inode->i_ctime, &realinode->i_ctime))) { + inode->i_mtime = realinode->i_mtime; + inode->i_ctime = realinode->i_ctime; + } + } +} + /* * With relative atime, only update atime if the previous atime is * earlier than either the ctime or mtime or if at least a day has * passed since the last atime update. */ -static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, - struct timespec now) +static int relatime_need_update(const struct path *path, struct inode *inode, + struct timespec now, bool rcu) { - if (!(mnt->mnt_flags & MNT_RELATIME)) + if (!(path->mnt->mnt_flags & MNT_RELATIME)) return 1; + + update_ovl_inode_times(path->dentry, inode, rcu); /* * Is mtime younger than atime? If yes, update atime: */ @@ -1612,7 +1632,8 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -bool atime_needs_update(const struct path *path, struct inode *inode) +bool __atime_needs_update(const struct path *path, struct inode *inode, + bool rcu) { struct vfsmount *mnt = path->mnt; struct timespec now; @@ -1638,7 +1659,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode) now = current_fs_time(inode->i_sb); - if (!relatime_need_update(mnt, inode, now)) + if (!relatime_need_update(path, inode, now, rcu)) return false; if (timespec_equal(&inode->i_atime, &now)) @@ -1653,7 +1674,7 @@ void touch_atime(const struct path *path) struct inode *inode = d_inode(path->dentry); struct timespec now; - if (!atime_needs_update(path, inode)) + if (!__atime_needs_update(path, inode, false)) return; if (!sb_start_write_trylock(inode->i_sb)) diff --git a/fs/internal.h b/fs/internal.h index ba0737649d4a..a63da5e96148 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -120,6 +120,15 @@ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); extern void inode_add_lru(struct inode *inode); extern int dentry_needs_remove_privs(struct dentry *dentry); +extern bool __atime_needs_update(const struct path *, struct inode *, bool); +static inline bool atime_needs_update_rcu(const struct path *path, + struct inode *inode) +{ + return __atime_needs_update(path, inode, true); +} + +extern bool atime_needs_update_rcu(const struct path *, struct inode *); + /* * fs-writeback.c */ diff --git a/fs/namei.c b/fs/namei.c index adb04146df09..4bbcae1ba58e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1015,7 +1015,7 @@ const char *get_link(struct nameidata *nd) if (!(nd->flags & LOOKUP_RCU)) { touch_atime(&last->link); cond_resched(); - } else if (atime_needs_update(&last->link, inode)) { + } else if (atime_needs_update_rcu(&last->link, inode)) { if (unlikely(unlazy_walk(nd, NULL, 0))) return ERR_PTR(-ECHILD); touch_atime(&last->link); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c391366fb43..7db097d673a8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2007,7 +2007,6 @@ enum file_time_flags { S_VERSION = 8, }; -extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); static inline void file_accessed(struct file *file) { -- cgit v1.2.3 From f3fbbb079263bd29ae592478de6808db7e708267 Mon Sep 17 00:00:00 2001 From: Aihua Zhang Date: Thu, 7 Jul 2016 15:37:53 +0800 Subject: fsnotify: support overlayfs When an event occurs direct it to the overlay inode instead of the real underlying inode. This will work even if the file was first on the lower layer and then copied up, while the watch is there. This is because the watch is on the overlay inode, which stays the same through the copy-up. For filesystems other than overlayfs this is a no-op, except for the performance impact of an extra pointer dereferece. Verified to work correctly with the inotify/fanotify tests in LTP. Signed-off-by: Aihua Zhang Signed-off-by: Miklos Szeredi Cc: Jan Kara Cc: Eric Paris --- include/linux/fsnotify.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index eed9e853a06f..b8bcc058e031 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -29,7 +29,11 @@ static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u3 static inline int fsnotify_perm(struct file *file, int mask) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + /* + * Do not use file_inode() here or anywhere in this file to get the + * inode. That would break *notity on overlayfs. + */ + struct inode *inode = path->dentry->d_inode; __u32 fsnotify_mask = 0; int ret; @@ -173,7 +177,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) @@ -191,7 +195,7 @@ static inline void fsnotify_access(struct file *file) static inline void fsnotify_modify(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) @@ -209,7 +213,7 @@ static inline void fsnotify_modify(struct file *file) static inline void fsnotify_open(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) @@ -225,7 +229,7 @@ static inline void fsnotify_open(struct file *file) static inline void fsnotify_close(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = file_inode(file); + struct inode *inode = path->dentry->d_inode; fmode_t mode = file->f_mode; __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; -- cgit v1.2.3 From c568d68341be7030f5647def68851e469b21ca11 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: locks: fix file locking on overlayfs This patch allows flock, posix locks, ofd locks and leases to work correctly on overlayfs. Instead of using the underlying inode for storing lock context use the overlay inode. This allows locks to be persistent across copy-up. This is done by introducing locks_inode() helper and using it instead of file_inode() to get the inode in locking code. For non-overlayfs the two are equivalent, except for an extra pointer dereference in locks_inode(). Since lock operations are in "struct file_operations" we must also make sure not to call underlying filesystem's lock operations. Introcude a super block flag MS_NOREMOTELOCK to this effect. Signed-off-by: Miklos Szeredi Acked-by: Jeff Layton Cc: "J. Bruce Fields" --- fs/locks.c | 50 +++++++++++++++++++++++++++---------------------- fs/namespace.c | 2 +- fs/open.c | 2 +- fs/overlayfs/super.c | 2 +- include/linux/fs.h | 16 ++++++++++++++-- include/uapi/linux/fs.h | 1 + 6 files changed, 46 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index ee1b15f6fc13..c1656cff53ee 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -139,6 +139,11 @@ #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) +static inline bool is_remote_lock(struct file *filp) +{ + return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK)); +} + static bool lease_breaking(struct file_lock *fl) { return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); @@ -791,7 +796,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; struct file_lock_context *ctx; - struct inode *inode = file_inode(filp); + struct inode *inode = locks_inode(filp); ctx = smp_load_acquire(&inode->i_flctx); if (!ctx || list_empty_careful(&ctx->flc_posix)) { @@ -1192,7 +1197,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { - return posix_lock_inode(file_inode(filp), fl, conflock); + return posix_lock_inode(locks_inode(filp), fl, conflock); } EXPORT_SYMBOL(posix_lock_file); @@ -1232,7 +1237,7 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) int locks_mandatory_locked(struct file *file) { int ret; - struct inode *inode = file_inode(file); + struct inode *inode = locks_inode(file); struct file_lock_context *ctx; struct file_lock *fl; @@ -1572,7 +1577,7 @@ EXPORT_SYMBOL(lease_get_mtime); int fcntl_getlease(struct file *filp) { struct file_lock *fl; - struct inode *inode = file_inode(filp); + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; int type = F_UNLCK; LIST_HEAD(dispose); @@ -1580,7 +1585,7 @@ int fcntl_getlease(struct file *filp) ctx = smp_load_acquire(&inode->i_flctx); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); - time_out_leases(file_inode(filp), &dispose); + time_out_leases(inode, &dispose); list_for_each_entry(fl, &ctx->flc_lease, fl_list) { if (fl->fl_file != filp) continue; @@ -1628,7 +1633,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr { struct file_lock *fl, *my_fl = NULL, *lease; struct dentry *dentry = filp->f_path.dentry; - struct inode *inode = file_inode(filp); + struct inode *inode = dentry->d_inode; struct file_lock_context *ctx; bool is_deleg = (*flp)->fl_flags & FL_DELEG; int error; @@ -1742,7 +1747,7 @@ static int generic_delete_lease(struct file *filp, void *owner) { int error = -EAGAIN; struct file_lock *fl, *victim = NULL; - struct inode *inode = file_inode(filp); + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; LIST_HEAD(dispose); @@ -1782,7 +1787,7 @@ static int generic_delete_lease(struct file *filp, void *owner) int generic_setlease(struct file *filp, long arg, struct file_lock **flp, void **priv) { - struct inode *inode = file_inode(filp); + struct inode *inode = locks_inode(filp); int error; if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE)) @@ -1830,7 +1835,7 @@ EXPORT_SYMBOL(generic_setlease); int vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv) { - if (filp->f_op->setlease) + if (filp->f_op->setlease && is_remote_lock(filp)) return filp->f_op->setlease(filp, arg, lease, priv); else return generic_setlease(filp, arg, lease, priv); @@ -1979,7 +1984,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) if (error) goto out_free; - if (f.file->f_op->flock) + if (f.file->f_op->flock && is_remote_lock(f.file)) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, lock); @@ -2005,7 +2010,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op->lock) + if (filp->f_op->lock && is_remote_lock(filp)) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; @@ -2129,7 +2134,7 @@ out: */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { - if (filp->f_op->lock) + if (filp->f_op->lock && is_remote_lock(filp)) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); @@ -2191,7 +2196,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, if (file_lock == NULL) return -ENOLCK; - inode = file_inode(filp); + inode = locks_inode(filp); /* * This might block, so we do it before checking the inode. @@ -2343,7 +2348,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, if (copy_from_user(&flock, l, sizeof(flock))) goto out; - inode = file_inode(filp); + inode = locks_inode(filp); /* Don't allow mandatory locks on files that may be memory mapped * and shared. @@ -2426,6 +2431,7 @@ out: void locks_remove_posix(struct file *filp, fl_owner_t owner) { int error; + struct inode *inode = locks_inode(filp); struct file_lock lock; struct file_lock_context *ctx; @@ -2434,7 +2440,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ - ctx = smp_load_acquire(&file_inode(filp)->i_flctx); + ctx = smp_load_acquire(&inode->i_flctx); if (!ctx || list_empty(&ctx->flc_posix)) return; @@ -2452,7 +2458,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner) if (lock.fl_ops && lock.fl_ops->fl_release_private) lock.fl_ops->fl_release_private(&lock); - trace_locks_remove_posix(file_inode(filp), &lock, error); + trace_locks_remove_posix(inode, &lock, error); } EXPORT_SYMBOL(locks_remove_posix); @@ -2469,12 +2475,12 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx) .fl_type = F_UNLCK, .fl_end = OFFSET_MAX, }; - struct inode *inode = file_inode(filp); + struct inode *inode = locks_inode(filp); if (list_empty(&flctx->flc_flock)) return; - if (filp->f_op->flock) + if (filp->f_op->flock && is_remote_lock(filp)) filp->f_op->flock(filp, F_SETLKW, &fl); else flock_lock_inode(inode, &fl); @@ -2508,7 +2514,7 @@ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; - ctx = smp_load_acquire(&file_inode(filp)->i_flctx); + ctx = smp_load_acquire(&locks_inode(filp)->i_flctx); if (!ctx) return; @@ -2552,7 +2558,7 @@ EXPORT_SYMBOL(posix_unblock_lock); */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { - if (filp->f_op->lock) + if (filp->f_op->lock && is_remote_lock(filp)) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } @@ -2580,7 +2586,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, fl_pid = fl->fl_pid; if (fl->fl_file != NULL) - inode = file_inode(fl->fl_file); + inode = locks_inode(fl->fl_file); seq_printf(f, "%lld:%s ", id, pfx); if (IS_POSIX(fl)) { @@ -2682,7 +2688,7 @@ static void __show_fd_locks(struct seq_file *f, void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) { - struct inode *inode = file_inode(filp); + struct inode *inode = locks_inode(filp); struct file_lock_context *ctx; int id = 0; diff --git a/fs/namespace.c b/fs/namespace.c index 7bb2cda3bfef..dcd9afe21e62 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2700,7 +2700,7 @@ long do_mount(const char *dev_name, const char __user *dir_name, flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN | MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT | - MS_STRICTATIME); + MS_STRICTATIME | MS_NOREMOTELOCK); if (flags & MS_REMOUNT) retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, diff --git a/fs/open.c b/fs/open.c index 4fd6e256f4f4..648fb9d3e97a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -726,7 +726,7 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; - error = break_lease(inode, f->f_flags); + error = break_lease(locks_inode(f), f->f_flags); if (error) goto cleanup_all; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index e2a94a26767b..3d0b9dee2b76 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1320,7 +1320,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = ovl_xattr_handlers; sb->s_root = root_dentry; sb->s_fs_info = ufs; - sb->s_flags |= MS_POSIXACL; + sb->s_flags |= MS_POSIXACL | MS_NOREMOTELOCK; return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7db097d673a8..8ee0f011547f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1065,6 +1065,18 @@ struct file_lock_context { extern void send_sigio(struct fown_struct *fown, int fd, int band); +/* + * Return the inode to use for locking + * + * For overlayfs this should be the overlay inode, not the real inode returned + * by file_inode(). For any other fs file_inode(filp) and locks_inode(filp) are + * equal. + */ +static inline struct inode *locks_inode(const struct file *f) +{ + return f->f_path.dentry->d_inode; +} + #ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, @@ -1252,7 +1264,7 @@ static inline struct dentry *file_dentry(const struct file *file) static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { - return locks_lock_inode_wait(file_inode(filp), fl); + return locks_lock_inode_wait(locks_inode(filp), fl); } struct fasync_struct { @@ -2155,7 +2167,7 @@ static inline int mandatory_lock(struct inode *ino) static inline int locks_verify_locked(struct file *file) { - if (mandatory_lock(file_inode(file))) + if (mandatory_lock(locks_inode(file))) return locks_mandatory_locked(file); return 0; } diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 3b00f7c8943f..2473272169f2 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -132,6 +132,7 @@ struct inodes_stat_t { #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ +#define MS_NOREMOTELOCK (1<<27) #define MS_NOSEC (1<<28) #define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) -- cgit v1.2.3 From 7b1742eb06ead6d02a6cf3c44587088e5392d1aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 16 Sep 2016 12:44:20 +0200 Subject: vfs: make argument of d_real_inode() const d_op->d_real() leaves the dentry alone except if the third argument is non-zero. Unfortunately very difficult to explain to the compiler without a cast. Signed-off-by: Miklos Szeredi Acked-by: Jeff Layton --- include/linux/dcache.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5ff3e9a4fe5f..5beed7b30561 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -584,9 +584,10 @@ static inline struct dentry *d_real(struct dentry *dentry, * If dentry is on an union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ -static inline struct inode *d_real_inode(struct dentry *dentry) +static inline struct inode *d_real_inode(const struct dentry *dentry) { - return d_backing_inode(d_real(dentry, NULL, 0)); + /* This usage of d_real() results in const dentry */ + return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); } -- cgit v1.2.3