From 41124db869b7e00e12052555f8987867ac01d70c Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 1 Jun 2017 11:08:01 -0700 Subject: fs: warn in case userspace lied about modprobe return kmod <= v19 was broken -- it could return 0 to modprobe calls, incorrectly assuming that a kernel module was built-in, whereas in reality the module was just forming in the kernel. The reason for this is an incorrect userspace heuristics. A userspace kmod fix is available for it [0], however should userspace break again we could go on with an failed get_fs_type() which is hard to debug as the request_module() is detected as returning 0. The first suspect would be that there is something worth with the kernel's module loader and obviously in this case that is not the issue. Since these issues are painful to debug complain when we know userspace has outright lied to us. [0] http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4 Suggested-by: Rusty Russell Cc: Jessica Yu Signed-off-by: Luis R. Rodriguez Signed-off-by: Al Viro --- fs/filesystems.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/filesystems.c b/fs/filesystems.c index cac75547d35c..8b99955e3504 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -275,8 +275,10 @@ struct file_system_type *get_fs_type(const char *name) int len = dot ? dot - name : strlen(name); fs = __get_fs_type(name, len); - if (!fs && (request_module("fs-%.*s", len, name) == 0)) + if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); + WARN_ONCE(!fs, "request_module fs-%.*s succeeded, but still no fs?\n", len, name); + } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { put_filesystem(fs); -- cgit v1.2.3 From cc658db47d6897a8571fb6227f59d1d18151b0b2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Jun 2017 09:53:06 -0700 Subject: fs: Reorder inode_owner_or_capable() to avoid needless Checking for capabilities should be the last operation when performing access control tests so that PF_SUPERPRIV is set only when it was required for success (implying that the capability was needed for the operation). Reported-by: Solar Designer Signed-off-by: Kees Cook Acked-by: Serge Hallyn Reviewed-by: Andy Lutomirski Signed-off-by: Al Viro --- fs/inode.c | 2 +- fs/namei.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index db5914783a71..7092debe90cc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2023,7 +2023,7 @@ bool inode_owner_or_capable(const struct inode *inode) return true; ns = current_user_ns(); - if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid)) + if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER)) return true; return false; } diff --git a/fs/namei.c b/fs/namei.c index 6571a5f5112e..efe53a5d0737 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1008,7 +1008,7 @@ static int may_linkat(struct path *link) /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ - if (inode_owner_or_capable(inode) || safe_hardlink_source(inode)) + if (safe_hardlink_source(inode) || inode_owner_or_capable(inode)) return 0; audit_log_link_denied("linkat", link); -- cgit v1.2.3 From 4f2ed694148131f93baffca9e68b0cd8dcc96c38 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 21 Jun 2017 19:46:47 +0200 Subject: minix: Deinline get_block, save 2691 bytes This function compiles to 1402 bytes of machine code. It has 2 callsites, and also a not-inlined copy gets created by compiler anyway since its address gets passed as a parameter to block_truncate_page(). Signed-off-by: Denys Vlasenko CC: Al Viro CC: linux-fsdevel@vger.kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Al Viro --- fs/minix/itree_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c index 4c57c9af6946..2d1ca08870f7 100644 --- a/fs/minix/itree_common.c +++ b/fs/minix/itree_common.c @@ -142,7 +142,7 @@ changed: return -EAGAIN; } -static inline int get_block(struct inode * inode, sector_t block, +static int get_block(struct inode * inode, sector_t block, struct buffer_head *bh, int create) { int err = -EIO; -- cgit v1.2.3 From 6916363f3083837ed5adb3df2dd90d6b97017dff Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 27 Jun 2017 18:19:11 +0200 Subject: fs/dcache: init in_lookup_hashtable in_lookup_hashtable was introduced in commit 94bdd655caba ("parallel lookups machinery, part 3") and never initialized but since it is in the data it is all zeros. But we need this for -RT. Cc: Alexander Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Al Viro --- fs/dcache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index a9f995f6859e..b85da8897ffa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3608,6 +3608,11 @@ EXPORT_SYMBOL(d_genocide); void __init vfs_caches_init_early(void) { + int i; + + for (i = 0; i < ARRAY_SIZE(in_lookup_hashtable); i++) + INIT_HLIST_BL_HEAD(&in_lookup_hashtable[i]); + dcache_init_early(); inode_init_early(); } -- cgit v1.2.3 From a8e2b6367794e6cee9eecba6d5ff425f338e0754 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 29 Jun 2017 11:25:40 +0200 Subject: Make statfs properly return read-only state after emergency remount Emergency remount (sysrq-u) sets MS_RDONLY to the superblock but doesn't set MNT_READONLY to the mount point. Once calculate_f_flags() only check for the mount point read only state, when setting kstatfs flags, after an emergency remount, statfs does not report the filesystem as read-only, even though it is. Enable flags_by_sb() to also check for superblock read only state, so the kstatfs and consequently statfs can properly show the read-only state of the filesystem. Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/statfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/statfs.c b/fs/statfs.c index 4e4623c7a126..c1dfc374e3c1 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -38,6 +38,8 @@ static int flags_by_sb(int s_flags) flags |= ST_SYNCHRONOUS; if (s_flags & MS_MANDLOCK) flags |= ST_MANDLOCK; + if (s_flags & MS_RDONLY) + flags |= ST_RDONLY; return flags; } -- cgit v1.2.3 From 49d31c2f389acfe83417083e1208422b4091cd9e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Jul 2017 14:51:19 -0400 Subject: dentry name snapshots take_dentry_name_snapshot() takes a safe snapshot of dentry name; if the name is a short one, it gets copied into caller-supplied structure, otherwise an extra reference to external name is grabbed (those are never modified). In either case the pointer to stable string is stored into the same structure. dentry must be held by the caller of take_dentry_name_snapshot(), but may be freely dropped afterwards - the snapshot will stay until destroyed by release_dentry_name_snapshot(). Intended use: struct name_snapshot s; take_dentry_name_snapshot(&s, dentry); ... access s.name ... release_dentry_name_snapshot(&s); Replaces fsnotify_oldname_...(), gets used in fsnotify to obtain the name to pass down with event. Signed-off-by: Al Viro --- fs/dcache.c | 27 +++++++++++++++++++++++++++ fs/debugfs/inode.c | 10 +++++----- fs/namei.c | 8 ++++---- fs/notify/fsnotify.c | 8 ++++++-- include/linux/dcache.h | 6 ++++++ include/linux/fsnotify.h | 31 ------------------------------- 6 files changed, 48 insertions(+), 42 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index b85da8897ffa..831f3a9a8f05 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -277,6 +277,33 @@ static inline int dname_external(const struct dentry *dentry) return dentry->d_name.name != dentry->d_iname; } +void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + if (unlikely(dname_external(dentry))) { + struct external_name *p = external_name(dentry); + atomic_inc(&p->u.count); + spin_unlock(&dentry->d_lock); + name->name = p->name; + } else { + memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN); + spin_unlock(&dentry->d_lock); + name->name = name->inline_name; + } +} +EXPORT_SYMBOL(take_dentry_name_snapshot); + +void release_dentry_name_snapshot(struct name_snapshot *name) +{ + if (unlikely(name->name != name->inline_name)) { + struct external_name *p; + p = container_of(name->name, struct external_name, name[0]); + if (unlikely(atomic_dec_and_test(&p->u.count))) + kfree_rcu(p, u.head); + } +} +EXPORT_SYMBOL(release_dentry_name_snapshot); + static inline void __d_set_inode_and_type(struct dentry *dentry, struct inode *inode, unsigned type_flags) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index e892ae7d89f8..acd3be2cc691 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -766,7 +766,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, { int error; struct dentry *dentry = NULL, *trap; - const char *old_name; + struct name_snapshot old_name; trap = lock_rename(new_dir, old_dir); /* Source or destination directories don't exist? */ @@ -781,19 +781,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry)) goto exit; - old_name = fsnotify_oldname_init(old_dentry->d_name.name); + take_dentry_name_snapshot(&old_name, old_dentry); error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir), dentry, 0); if (error) { - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); goto exit; } d_move(old_dentry, dentry); - fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name, + fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name, d_is_dir(old_dentry), NULL, old_dentry); - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); unlock_rename(new_dir, old_dir); dput(dentry); return old_dentry; diff --git a/fs/namei.c b/fs/namei.c index efe53a5d0737..c5588e837b15 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4362,11 +4362,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, { int error; bool is_dir = d_is_dir(old_dentry); - const unsigned char *old_name; struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; + struct name_snapshot old_name; if (source == target) return 0; @@ -4413,7 +4413,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (error) return error; - old_name = fsnotify_oldname_init(old_dentry->d_name.name); + take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); if (!is_dir || (flags & RENAME_EXCHANGE)) lock_two_nondirectories(source, target); @@ -4468,14 +4468,14 @@ out: inode_unlock(target); dput(new_dentry); if (!error) { - fsnotify_move(old_dir, new_dir, old_name, is_dir, + fsnotify_move(old_dir, new_dir, old_name.name, is_dir, !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); if (flags & RENAME_EXCHANGE) { fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, new_is_dir, NULL, new_dentry); } } - fsnotify_oldname_free(old_name); + release_dentry_name_snapshot(&old_name); return error; } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 01a9f0f007d4..0c4583b61717 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -161,16 +161,20 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask if (unlikely(!fsnotify_inode_watches_children(p_inode))) __fsnotify_update_child_dentry_flags(p_inode); else if (p_inode->i_fsnotify_mask & mask) { + struct name_snapshot name; + /* we are notifying a parent so come up with the new mask which * specifies these are events which came from a child. */ mask |= FS_EVENT_ON_CHILD; + take_dentry_name_snapshot(&name, dentry); if (path) ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH, - dentry->d_name.name, 0); + name.name, 0); else ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, - dentry->d_name.name, 0); + name.name, 0); + release_dentry_name_snapshot(&name); } dput(parent); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2e38dc6172c..025727bf6797 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -591,5 +591,11 @@ static inline struct inode *d_real_inode(const struct dentry *dentry) return d_backing_inode(d_real((struct dentry *) dentry, NULL, 0)); } +struct name_snapshot { + const char *name; + char inline_name[DNAME_INLINE_LEN]; +}; +void take_dentry_name_snapshot(struct name_snapshot *, struct dentry *); +void release_dentry_name_snapshot(struct name_snapshot *); #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index b43d3f5bd9ea..b78aa7ac77ce 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -293,35 +293,4 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) } } -#if defined(CONFIG_FSNOTIFY) /* notify helpers */ - -/* - * fsnotify_oldname_init - save off the old filename before we change it - */ -static inline const unsigned char *fsnotify_oldname_init(const unsigned char *name) -{ - return kstrdup(name, GFP_KERNEL); -} - -/* - * fsnotify_oldname_free - free the name we got from fsnotify_oldname_init - */ -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ - kfree(old_name); -} - -#else /* CONFIG_FSNOTIFY */ - -static inline const char *fsnotify_oldname_init(const unsigned char *name) -{ - return NULL; -} - -static inline void fsnotify_oldname_free(const unsigned char *old_name) -{ -} - -#endif /* CONFIG_FSNOTIFY */ - #endif /* _LINUX_FS_NOTIFY_H */ -- cgit v1.2.3