summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 12:34:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-07-15 12:34:01 -0700
commit98f3a9a4fd449641010c77abca16aebb0b8d4419 (patch)
tree20387bef96d75fa921c37ae6695ae9175b00752c /fs
parent1b074abe885f43b2c207b5e748ffa60604dbc020 (diff)
parent5b08bd408534bfb3a7cf5778da5b27d4e4fffe12 (diff)
Merge tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull pidfs updates from Christian Brauner: "This contains work to make it possible to derive namespace file descriptors from pidfd file descriptors. Right now it is already possible to use a pidfd with setns() to atomically change multiple namespaces at the same time. In other words, it is possible to switch to the namespace context of a process using a pidfd. There is no need to first open namespace file descriptors via procfs. The work included here is an extension of these abilities by allowing to open namespace file descriptors using a pidfd. This means it is now possible to interact with namespaces without ever touching procfs. To this end a new set of ioctls() on pidfds is introduced covering all supported namespace types" * tag 'vfs-6.11.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: pidfs: allow retrieval of namespace file descriptors nsfs: add open_namespace() nsproxy: add helper to go from arbitrary namespace to ns_common nsproxy: add a cleanup helper for nsproxy file: add take_fd() cleanup helper
Diffstat (limited to 'fs')
-rw-r--r--fs/internal.h2
-rw-r--r--fs/nsfs.c55
-rw-r--r--fs/pidfs.c90
3 files changed, 123 insertions, 24 deletions
diff --git a/fs/internal.h b/fs/internal.h
index f26454c60a98..cdd73209eecb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -17,6 +17,7 @@ struct fs_context;
struct pipe_inode_info;
struct iov_iter;
struct mnt_idmap;
+struct ns_common;
/*
* block/bdev.c
@@ -239,6 +240,7 @@ extern void mnt_pin_kill(struct mount *m);
* fs/nsfs.c
*/
extern const struct dentry_operations ns_dentry_operations;
+int open_namespace(struct ns_common *ns);
/*
* fs/stat.c:
diff --git a/fs/nsfs.c b/fs/nsfs.c
index ad6bb91a3e23..a4a925dce331 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -84,40 +84,47 @@ int ns_get_path(struct path *path, struct task_struct *task,
return ns_get_path_cb(path, ns_get_path_task, &args);
}
-int open_related_ns(struct ns_common *ns,
- struct ns_common *(*get_ns)(struct ns_common *ns))
+/**
+ * open_namespace - open a namespace
+ * @ns: the namespace to open
+ *
+ * This will consume a reference to @ns indendent of success or failure.
+ *
+ * Return: A file descriptor on success or a negative error code on failure.
+ */
+int open_namespace(struct ns_common *ns)
{
- struct path path = {};
- struct ns_common *relative;
+ struct path path __free(path_put) = {};
struct file *f;
int err;
- int fd;
- fd = get_unused_fd_flags(O_CLOEXEC);
+ /* call first to consume reference */
+ err = path_from_stashed(&ns->stashed, nsfs_mnt, ns, &path);
+ if (err < 0)
+ return err;
+
+ CLASS(get_unused_fd, fd)(O_CLOEXEC);
if (fd < 0)
return fd;
+ f = dentry_open(&path, O_RDONLY, current_cred());
+ if (IS_ERR(f))
+ return PTR_ERR(f);
+
+ fd_install(fd, f);
+ return take_fd(fd);
+}
+
+int open_related_ns(struct ns_common *ns,
+ struct ns_common *(*get_ns)(struct ns_common *ns))
+{
+ struct ns_common *relative;
+
relative = get_ns(ns);
- if (IS_ERR(relative)) {
- put_unused_fd(fd);
+ if (IS_ERR(relative))
return PTR_ERR(relative);
- }
- err = path_from_stashed(&relative->stashed, nsfs_mnt, relative, &path);
- if (err < 0) {
- put_unused_fd(fd);
- return err;
- }
-
- f = dentry_open(&path, O_RDONLY, current_cred());
- path_put(&path);
- if (IS_ERR(f)) {
- put_unused_fd(fd);
- fd = PTR_ERR(f);
- } else
- fd_install(fd, f);
-
- return fd;
+ return open_namespace(relative);
}
EXPORT_SYMBOL_GPL(open_related_ns);
diff --git a/fs/pidfs.c b/fs/pidfs.c
index dbb9d854d1c5..c9cb14181def 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -11,10 +11,16 @@
#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/pseudo_fs.h>
+#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <uapi/linux/pidfd.h>
+#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
+#include <linux/utsname.h>
+#include <net/net_namespace.h>
#include "internal.h"
+#include "mount.h"
#ifdef CONFIG_PROC_FS
/**
@@ -108,11 +114,95 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
return poll_flags;
}
+static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct task_struct *task __free(put_task) = NULL;
+ struct nsproxy *nsp __free(put_nsproxy) = NULL;
+ struct pid *pid = pidfd_pid(file);
+ struct ns_common *ns_common;
+
+ if (arg)
+ return -EINVAL;
+
+ task = get_pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ return -ESRCH;
+
+ scoped_guard(task_lock, task) {
+ nsp = task->nsproxy;
+ if (nsp)
+ get_nsproxy(nsp);
+ }
+ if (!nsp)
+ return -ESRCH; /* just pretend it didn't exist */
+
+ /*
+ * We're trying to open a file descriptor to the namespace so perform a
+ * filesystem cred ptrace check. Also, we mirror nsfs behavior.
+ */
+ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ return -EACCES;
+
+ switch (cmd) {
+ /* Namespaces that hang of nsproxy. */
+ case PIDFD_GET_CGROUP_NAMESPACE:
+ get_cgroup_ns(nsp->cgroup_ns);
+ ns_common = to_ns_common(nsp->cgroup_ns);
+ break;
+ case PIDFD_GET_IPC_NAMESPACE:
+ get_ipc_ns(nsp->ipc_ns);
+ ns_common = to_ns_common(nsp->ipc_ns);
+ break;
+ case PIDFD_GET_MNT_NAMESPACE:
+ get_mnt_ns(nsp->mnt_ns);
+ ns_common = to_ns_common(nsp->mnt_ns);
+ break;
+ case PIDFD_GET_NET_NAMESPACE:
+ ns_common = to_ns_common(nsp->net_ns);
+ get_net_ns(ns_common);
+ break;
+ case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
+ get_pid_ns(nsp->pid_ns_for_children);
+ ns_common = to_ns_common(nsp->pid_ns_for_children);
+ break;
+ case PIDFD_GET_TIME_NAMESPACE:
+ get_time_ns(nsp->time_ns);
+ ns_common = to_ns_common(nsp->time_ns);
+ break;
+ case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
+ get_time_ns(nsp->time_ns_for_children);
+ ns_common = to_ns_common(nsp->time_ns_for_children);
+ break;
+ case PIDFD_GET_UTS_NAMESPACE:
+ get_uts_ns(nsp->uts_ns);
+ ns_common = to_ns_common(nsp->uts_ns);
+ break;
+ /* Namespaces that don't hang of nsproxy. */
+ case PIDFD_GET_USER_NAMESPACE:
+ rcu_read_lock();
+ ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
+ rcu_read_unlock();
+ break;
+ case PIDFD_GET_PID_NAMESPACE:
+ rcu_read_lock();
+ ns_common = to_ns_common(get_pid_ns(task_active_pid_ns(task)));
+ rcu_read_unlock();
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ /* open_namespace() unconditionally consumes the reference */
+ return open_namespace(ns_common);
+}
+
static const struct file_operations pidfs_file_operations = {
.poll = pidfd_poll,
#ifdef CONFIG_PROC_FS
.show_fdinfo = pidfd_show_fdinfo,
#endif
+ .unlocked_ioctl = pidfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
};
struct pid *pidfd_pid(const struct file *file)