diff options
author | Andrei Vagin <avagin@gmail.com> | 2022-09-13 03:25:51 -0700 |
---|---|---|
committer | Kees Cook <keescook@chromium.org> | 2022-09-13 10:38:43 -0700 |
commit | 33a2d6bc3480f9f8ac8c8def29854f98cc8bfee2 (patch) | |
tree | 466386c55362d0c87eefaa71305d21908190df46 | |
parent | 2b1e8921fc355ff5afda65690d6e40ce25b80cd7 (diff) |
Revert "fs/exec: allow to unshare a time namespace on vfork+exec"
This reverts commit 133e2d3e81de5d9706cab2dd1d52d231c27382e5.
Alexey pointed out a few undesirable side effects of the reverted change.
First, it doesn't take into account that CLONE_VFORK can be used with
CLONE_THREAD. Second, a child process doesn't enter a target time name-space,
if its parent dies before the child calls exec. It happens because the parent
clears vfork_done.
Eric W. Biederman suggests installing a time namespace as a task gets a new mm.
It includes all new processes cloned without CLONE_VM and all tasks that call
exec(). This is an user API change, but we think there aren't users that depend
on the old behavior.
It is too late to make such changes in this release, so let's roll back
this patch and introduce the right one in the next release.
Cc: Alexey Izbyshev <izbyshev@ispras.ru>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220913102551.1121611-3-avagin@google.com
-rw-r--r-- | fs/exec.c | 7 | ||||
-rw-r--r-- | kernel/fork.c | 5 | ||||
-rw-r--r-- | kernel/nsproxy.c | 3 |
3 files changed, 2 insertions, 13 deletions
diff --git a/fs/exec.c b/fs/exec.c index 9a5ca7b82bfc..d046dbb9cbd0 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -65,7 +65,6 @@ #include <linux/io_uring.h> #include <linux/syscall_user_dispatch.h> #include <linux/coredump.h> -#include <linux/time_namespace.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> @@ -979,12 +978,10 @@ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; - bool vfork; int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; - vfork = !!tsk->vfork_done; old_mm = current->mm; exec_mm_release(tsk, old_mm); if (old_mm) @@ -1029,10 +1026,6 @@ static int exec_mmap(struct mm_struct *mm) tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); - - if (vfork) - timens_on_fork(tsk->nsproxy, tsk); - if (old_mm) { mmap_read_unlock(old_mm); BUG_ON(active_mm != old_mm); diff --git a/kernel/fork.c b/kernel/fork.c index 90c85b17bf69..66f43e0d4547 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2046,11 +2046,8 @@ static __latent_entropy struct task_struct *copy_process( /* * If the new process will be in a different time namespace * do not allow it to share VM or a thread group with the forking task. - * - * On vfork, the child process enters the target time namespace only - * after exec. */ - if ((clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM) { + if (clone_flags & (CLONE_THREAD | CLONE_VM)) { if (nsp->time_ns != nsp->time_ns_for_children) return ERR_PTR(-EINVAL); } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b4cbb406bc28..eec72ca962e2 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -179,8 +179,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) if (IS_ERR(new_ns)) return PTR_ERR(new_ns); - if ((flags & CLONE_VM) == 0) - timens_on_fork(new_ns, tsk); + timens_on_fork(new_ns, tsk); tsk->nsproxy = new_ns; return 0; |