summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 09:00:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-08-28 09:00:09 -0700
commit84ab1277ce5a90a8d1f377707d662ac43cc0918a (patch)
treeac7ca4f1213806f6489bb85f0049090ffd6a2de1 /fs
parent2dde18cd1d8fac735875f2e4987f11817cc0bc2c (diff)
parent22ed7ecdaefe0cac0c6e6295e83048af60435b13 (diff)
Merge tag 'v6.6-vfs.fs_context' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull mount API updates from Christian Brauner: "This introduces FSCONFIG_CMD_CREATE_EXCL which allows userspace to implement something like $ mount -t ext4 --exclusive /dev/sda /B which fails if a superblock for the requested filesystem does already exist instead of silently reusing an existing superblock. Without it, in the sequence $ move-mount -f xfs -o source=/dev/sda4 /A $ move-mount -f xfs -o noacl,source=/dev/sda4 /B the initial mounter will create a superblock. The second mounter will reuse the existing superblock, creating a bind-mount (see [1] for the source of the move-mount binary). The problem is that reusing an existing superblock means all mount options other than read-only and read-write will be silently ignored even if they are incompatible requests. For example, the second mount has requested no POSIX ACL support but since the existing superblock is reused POSIX ACL support will remain enabled. Such silent superblock reuse can easily become a security issue. After adding support for FSCONFIG_CMD_CREATE_EXCL to mount(8) in util-linux this can be fixed: $ move-mount -f xfs --exclusive -o source=/dev/sda4 /A $ move-mount -f xfs --exclusive -o noacl,source=/dev/sda4 /B Device or resource busy | move-mount.c: 300: do_fsconfig: i xfs: reusing existing filesystem not allowed This requires the new mount api. With the old mount api it would be necessary to plumb this through every legacy filesystem's file_system_type->mount() method. If they want this feature they are most welcome to switch to the new mount api" Link: https://github.com/brauner/move-mount-beneath [1] Link: https://lore.kernel.org/linux-block/20230704-fasching-wertarbeit-7c6ffb01c83d@brauner Link: https://lore.kernel.org/linux-block/20230705-pumpwerk-vielversprechend-a4b1fd947b65@brauner Link: https://lore.kernel.org/linux-fsdevel/20230725-einnahmen-warnschilder-17779aec0a97@brauner Link: https://lore.kernel.org/lkml/20230824-anzog-allheilmittel-e8c63e429a79@brauner/ * tag 'v6.6-vfs.fs_context' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: add FSCONFIG_CMD_CREATE_EXCL fs: add vfs_cmd_reconfigure() fs: add vfs_cmd_create() super: remove get_tree_single_reconf()
Diffstat (limited to 'fs')
-rw-r--r--fs/fs_context.c1
-rw-r--r--fs/fsopen.c106
-rw-r--r--fs/super.c64
3 files changed, 104 insertions, 67 deletions
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 851214d1d013..30d82d2979af 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -692,6 +692,7 @@ void vfs_clean_context(struct fs_context *fc)
security_free_mnt_opts(&fc->security);
kfree(fc->source);
fc->source = NULL;
+ fc->exclusive = false;
fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
fc->phase = FS_CONTEXT_AWAITING_RECONF;
diff --git a/fs/fsopen.c b/fs/fsopen.c
index fc9d2d9fd234..ce03f6521c88 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -209,6 +209,72 @@ err:
return ret;
}
+static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
+{
+ struct super_block *sb;
+ int ret;
+
+ if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
+ return -EBUSY;
+
+ if (!mount_capable(fc))
+ return -EPERM;
+
+ /* require the new mount api */
+ if (exclusive && fc->ops == &legacy_fs_context_ops)
+ return -EOPNOTSUPP;
+
+ fc->phase = FS_CONTEXT_CREATING;
+ fc->exclusive = exclusive;
+
+ ret = vfs_get_tree(fc);
+ if (ret) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+ }
+
+ sb = fc->root->d_sb;
+ ret = security_sb_kern_mount(sb);
+ if (unlikely(ret)) {
+ fc_drop_locked(fc);
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+ }
+
+ /* vfs_get_tree() callchains will have grabbed @s_umount */
+ up_write(&sb->s_umount);
+ fc->phase = FS_CONTEXT_AWAITING_MOUNT;
+ return 0;
+}
+
+static int vfs_cmd_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb;
+ int ret;
+
+ if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
+ return -EBUSY;
+
+ fc->phase = FS_CONTEXT_RECONFIGURING;
+
+ sb = fc->root->d_sb;
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return -EPERM;
+ }
+
+ down_write(&sb->s_umount);
+ ret = reconfigure_super(fc);
+ up_write(&sb->s_umount);
+ if (ret) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+ }
+
+ vfs_clean_context(fc);
+ return 0;
+}
+
/*
* Check the state and apply the configuration. Note that this function is
* allowed to 'steal' the value by setting param->xxx to NULL before returning.
@@ -216,7 +282,6 @@ err:
static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
struct fs_parameter *param)
{
- struct super_block *sb;
int ret;
ret = finish_clean_context(fc);
@@ -224,39 +289,11 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
return ret;
switch (cmd) {
case FSCONFIG_CMD_CREATE:
- if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
- return -EBUSY;
- if (!mount_capable(fc))
- return -EPERM;
- fc->phase = FS_CONTEXT_CREATING;
- ret = vfs_get_tree(fc);
- if (ret)
- break;
- sb = fc->root->d_sb;
- ret = security_sb_kern_mount(sb);
- if (unlikely(ret)) {
- fc_drop_locked(fc);
- break;
- }
- up_write(&sb->s_umount);
- fc->phase = FS_CONTEXT_AWAITING_MOUNT;
- return 0;
+ return vfs_cmd_create(fc, false);
+ case FSCONFIG_CMD_CREATE_EXCL:
+ return vfs_cmd_create(fc, true);
case FSCONFIG_CMD_RECONFIGURE:
- if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
- return -EBUSY;
- fc->phase = FS_CONTEXT_RECONFIGURING;
- sb = fc->root->d_sb;
- if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
- ret = -EPERM;
- break;
- }
- down_write(&sb->s_umount);
- ret = reconfigure_super(fc);
- up_write(&sb->s_umount);
- if (ret)
- break;
- vfs_clean_context(fc);
- return 0;
+ return vfs_cmd_reconfigure(fc);
default:
if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
fc->phase != FS_CONTEXT_RECONF_PARAMS)
@@ -264,8 +301,6 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
return vfs_parse_fs_param(fc, param);
}
- fc->phase = FS_CONTEXT_FAILED;
- return ret;
}
/**
@@ -353,6 +388,7 @@ SYSCALL_DEFINE5(fsconfig,
return -EINVAL;
break;
case FSCONFIG_CMD_CREATE:
+ case FSCONFIG_CMD_CREATE_EXCL:
case FSCONFIG_CMD_RECONFIGURE:
if (_key || _value || aux)
return -EINVAL;
diff --git a/fs/super.c b/fs/super.c
index e781226e2880..e04a403dbffd 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -546,17 +546,31 @@ bool mount_capable(struct fs_context *fc)
* @test: Comparison callback
* @set: Setup callback
*
- * Find or create a superblock using the parameters stored in the filesystem
- * context and the two callback functions.
+ * Create a new superblock or find an existing one.
*
- * If an extant superblock is matched, then that will be returned with an
- * elevated reference count that the caller must transfer or discard.
+ * The @test callback is used to find a matching existing superblock.
+ * Whether or not the requested parameters in @fc are taken into account
+ * is specific to the @test callback that is used. They may even be
+ * completely ignored.
+ *
+ * If an extant superblock is matched, it will be returned unless:
+ *
+ * (1) the namespace the filesystem context @fc and the extant
+ * superblock's namespace differ
+ *
+ * (2) the filesystem context @fc has requested that reusing an extant
+ * superblock is not allowed
+ *
+ * In both cases EBUSY will be returned.
*
* If no match is made, a new superblock will be allocated and basic
- * initialisation will be performed (s_type, s_fs_info and s_id will be set and
- * the set() callback will be invoked), the superblock will be published and it
- * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
- * as yet unset.
+ * initialisation will be performed (s_type, s_fs_info and s_id will be
+ * set and the @set callback will be invoked), the superblock will be
+ * published and it will be returned in a partially constructed state
+ * with SB_BORN and SB_ACTIVE as yet unset.
+ *
+ * Return: On success, an extant or newly created superblock is
+ * returned. On failure an error pointer is returned.
*/
struct super_block *sget_fc(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
@@ -603,9 +617,13 @@ retry:
return s;
share_extant_sb:
- if (user_ns != old->s_user_ns) {
+ if (user_ns != old->s_user_ns || fc->exclusive) {
spin_unlock(&sb_lock);
destroy_unused_super(s);
+ if (fc->exclusive)
+ warnfc(fc, "reusing existing filesystem not allowed");
+ else
+ warnfc(fc, "reusing existing filesystem in another namespace not allowed");
return ERR_PTR(-EBUSY);
}
if (!grab_super(old))
@@ -1136,7 +1154,7 @@ static int test_single_super(struct super_block *s, struct fs_context *fc)
return 1;
}
-static int vfs_get_super(struct fs_context *fc, bool reconf,
+static int vfs_get_super(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
@@ -1154,19 +1172,9 @@ static int vfs_get_super(struct fs_context *fc, bool reconf,
goto error;
sb->s_flags |= SB_ACTIVE;
- fc->root = dget(sb->s_root);
- } else {
- fc->root = dget(sb->s_root);
- if (reconf) {
- err = reconfigure_super(fc);
- if (err < 0) {
- dput(fc->root);
- fc->root = NULL;
- goto error;
- }
- }
}
+ fc->root = dget(sb->s_root);
return 0;
error:
@@ -1178,7 +1186,7 @@ int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, false, NULL, fill_super);
+ return vfs_get_super(fc, NULL, fill_super);
}
EXPORT_SYMBOL(get_tree_nodev);
@@ -1186,25 +1194,17 @@ int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, false, test_single_super, fill_super);
+ return vfs_get_super(fc, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single);
-int get_tree_single_reconf(struct fs_context *fc,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc))
-{
- return vfs_get_super(fc, true, test_single_super, fill_super);
-}
-EXPORT_SYMBOL(get_tree_single_reconf);
-
int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
void *key)
{
fc->s_fs_info = key;
- return vfs_get_super(fc, false, test_keyed_super, fill_super);
+ return vfs_get_super(fc, test_keyed_super, fill_super);
}
EXPORT_SYMBOL(get_tree_keyed);