diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-15 12:34:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-15 12:34:01 -0700 |
commit | 98f3a9a4fd449641010c77abca16aebb0b8d4419 (patch) | |
tree | 20387bef96d75fa921c37ae6695ae9175b00752c /fs | |
parent | 1b074abe885f43b2c207b5e748ffa60604dbc020 (diff) | |
parent | 5b08bd408534bfb3a7cf5778da5b27d4e4fffe12 (diff) |
Merge tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull pidfs updates from Christian Brauner:
"This contains work to make it possible to derive namespace file
descriptors from pidfd file descriptors.
Right now it is already possible to use a pidfd with setns() to
atomically change multiple namespaces at the same time. In other
words, it is possible to switch to the namespace context of a process
using a pidfd. There is no need to first open namespace file
descriptors via procfs.
The work included here is an extension of these abilities by allowing
to open namespace file descriptors using a pidfd. This means it is now
possible to interact with namespaces without ever touching procfs.
To this end a new set of ioctls() on pidfds is introduced covering all
supported namespace types"
* tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
pidfs: allow retrieval of namespace file descriptors
nsfs: add open_namespace()
nsproxy: add helper to go from arbitrary namespace to ns_common
nsproxy: add a cleanup helper for nsproxy
file: add take_fd() cleanup helper
Diffstat (limited to 'fs')
-rw-r--r-- | fs/internal.h | 2 | ||||
-rw-r--r-- | fs/nsfs.c | 55 | ||||
-rw-r--r-- | fs/pidfs.c | 90 |
3 files changed, 123 insertions, 24 deletions
diff --git a/fs/internal.h b/fs/internal.h index f26454c60a98..cdd73209eecb 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -17,6 +17,7 @@ struct fs_context; struct pipe_inode_info; struct iov_iter; struct mnt_idmap; +struct ns_common; /* * block/bdev.c @@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m); * fs/nsfs.c */ extern const struct dentry_operations ns_dentry_operations; +int open_namespace(struct ns_common *ns); /* * fs/stat.c: diff --git a/fs/nsfs.c b/fs/nsfs.c index ad6bb91a3e23..a4a925dce331 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -84,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task, return ns_get_path_cb(path, ns_get_path_task, &args); } -int open_related_ns(struct ns_common *ns, - struct ns_common *(*get_ns)(struct ns_common *ns)) +/** + * open_namespace - open a namespace + * @ns: the namespace to open + * + * This will consume a reference to @ns indendent of success or failure. + * + * Return: A file descriptor on success or a negative error code on failure. + */ +int open_namespace(struct ns_common *ns) { - struct path path = {}; - struct ns_common *relative; + struct path path __free(path_put) = {}; struct file *f; int err; - int fd; - fd = get_unused_fd_flags(O_CLOEXEC); + /* call first to consume reference */ + err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path); + if (err < 0) + return err; + + CLASS(get_unused_fd, fd)(O_CLOEXEC); if (fd < 0) return fd; + f = dentry_open(&path, O_RDONLY, current_cred()); + if (IS_ERR(f)) + return PTR_ERR(f); + + fd_install(fd, f); + return take_fd(fd); +} + +int open_related_ns(struct ns_common *ns, + struct ns_common *(*get_ns)(struct ns_common *ns)) +{ + struct ns_common *relative; + relative = get_ns(ns); - if (IS_ERR(relative)) { - put_unused_fd(fd); + if (IS_ERR(relative)) return PTR_ERR(relative); - } - err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path); - if (err < 0) { - put_unused_fd(fd); - return err; - } - - f = dentry_open(&path, O_RDONLY, current_cred()); - path_put(&path); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = PTR_ERR(f); - } else - fd_install(fd, f); - - return fd; + return open_namespace(relative); } EXPORT_SYMBOL_GPL(open_related_ns); diff --git a/fs/pidfs.c b/fs/pidfs.c index dbb9d854d1c5..c9cb14181def 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -11,10 +11,16 @@ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/pseudo_fs.h> +#include <linux/ptrace.h> #include <linux/seq_file.h> #include <uapi/linux/pidfd.h> +#include <linux/ipc_namespace.h> +#include <linux/time_namespace.h> +#include <linux/utsname.h> +#include <net/net_namespace.h> #include "internal.h" +#include "mount.h" #ifdef CONFIG_PROC_FS /** @@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct task_struct *task __free(put_task) = NULL; + struct nsproxy *nsp __free(put_nsproxy) = NULL; + struct pid *pid = pidfd_pid(file); + struct ns_common *ns_common; + + if (arg) + return -EINVAL; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + scoped_guard(task_lock, task) { + nsp = task->nsproxy; + if (nsp) + get_nsproxy(nsp); + } + if (!nsp) + return -ESRCH; /* just pretend it didn't exist */ + + /* + * We're trying to open a file descriptor to the namespace so perform a + * filesystem cred ptrace check. Also, we mirror nsfs behavior. + */ + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + return -EACCES; + + switch (cmd) { + /* Namespaces that hang of nsproxy. */ + case PIDFD_GET_CGROUP_NAMESPACE: + get_cgroup_ns(nsp->cgroup_ns); + ns_common = to_ns_common(nsp->cgroup_ns); + break; + case PIDFD_GET_IPC_NAMESPACE: + get_ipc_ns(nsp->ipc_ns); + ns_common = to_ns_common(nsp->ipc_ns); + break; + case PIDFD_GET_MNT_NAMESPACE: + get_mnt_ns(nsp->mnt_ns); + ns_common = to_ns_common(nsp->mnt_ns); + break; + case PIDFD_GET_NET_NAMESPACE: + ns_common = to_ns_common(nsp->net_ns); + get_net_ns(ns_common); + break; + case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: + get_pid_ns(nsp->pid_ns_for_children); + ns_common = to_ns_common(nsp->pid_ns_for_children); + break; + case PIDFD_GET_TIME_NAMESPACE: + get_time_ns(nsp->time_ns); + ns_common = to_ns_common(nsp->time_ns); + break; + case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: + get_time_ns(nsp->time_ns_for_children); + ns_common = to_ns_common(nsp->time_ns_for_children); + break; + case PIDFD_GET_UTS_NAMESPACE: + get_uts_ns(nsp->uts_ns); + ns_common = to_ns_common(nsp->uts_ns); + break; + /* Namespaces that don't hang of nsproxy. */ + case PIDFD_GET_USER_NAMESPACE: + rcu_read_lock(); + ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); + rcu_read_unlock(); + break; + case PIDFD_GET_PID_NAMESPACE: + rcu_read_lock(); + ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task))); + rcu_read_unlock(); + break; + default: + return -ENOIOCTLCMD; + } + + /* open_namespace() unconditionally consumes the reference */ + return open_namespace(ns_common); +} + static const struct file_operations pidfs_file_operations = { .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif + .unlocked_ioctl = pidfd_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; struct pid *pidfd_pid(const struct file *file) |