summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorChristian Brauner <christian.brauner@ubuntu.com>2020-05-31 13:50:29 +0200
committerKees Cook <keescook@chromium.org>2020-07-10 16:01:51 -0700
commit3a15fb6ed92cb32b0a83f406aa4a96f28c9adbc3 (patch)
tree3ca1c281e1a283385214bd49eaa0c3e4a33330b5 /kernel
parentb707ddee11d1dc4518ab7f1aa5e7af9ceaa23317 (diff)
seccomp: release filter after task is fully dead
The seccomp filter used to be released in free_task() which is called asynchronously via call_rcu() and assorted mechanisms. Since we need to inform tasks waiting on the seccomp notifier when a filter goes empty we will notify them as soon as a task has been marked fully dead in release_task(). To not split seccomp cleanup into two parts, move filter release out of free_task() and into release_task() after we've unhashed struct task from struct pid, exited signals, and unlinked it from the threadgroups' thread list. We'll put the empty filter notification infrastructure into it in a follow up patch. This also renames put_seccomp_filter() to seccomp_filter_release() which is a more descriptive name of what we're doing here especially once we've added the empty filter notification mechanism in there. We're also NULL-ing the task's filter tree entrypoint which seems cleaner than leaving a dangling pointer in there. Note that this shouldn't need any memory barriers since we're calling this when the task is in release_task() which means it's EXIT_DEAD. So it can't modify its seccomp filters anymore. You can also see this from the point where we're calling seccomp_filter_release(). It's after __exit_signal() and at this point, tsk->sighand will already have been NULLed which is required for thread-sync and filter installation alike. Cc: Tycho Andersen <tycho@tycho.ws> Cc: Kees Cook <keescook@chromium.org> Cc: Matt Denton <mpdenton@google.com> Cc: Sargun Dhillon <sargun@sargun.me> Cc: Jann Horn <jannh@google.com> Cc: Chris Palmer <palmer@google.com> Cc: Aleksa Sarai <cyphar@cyphar.com> Cc: Robert Sesek <rsesek@google.com> Cc: Jeffrey Vander Stoep <jeffv@google.com> Cc: Linux Containers <containers@lists.linux-foundation.org> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com> Link: https://lore.kernel.org/r/20200531115031.391515-2-christian.brauner@ubuntu.com Signed-off-by: Kees Cook <keescook@chromium.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/seccomp.c62
3 files changed, 38 insertions, 26 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 727150f28103..00d77e5ba700 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -217,6 +217,7 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
+ seccomp_filter_release(p);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
release_thread(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 142b23645d82..c51a9cd824c5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -473,7 +473,6 @@ void free_task(struct task_struct *tsk)
#endif
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
- put_seccomp_filter(tsk);
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d4dd3344e312..0ca6d5243427 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -368,6 +368,42 @@ static inline pid_t seccomp_can_sync_threads(void)
return 0;
}
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+ if (filter) {
+ bpf_prog_destroy(filter->prog);
+ kfree(filter);
+ }
+}
+
+static void __put_seccomp_filter(struct seccomp_filter *orig)
+{
+ /* Clean up single-reference branches iteratively. */
+ while (orig && refcount_dec_and_test(&orig->refs)) {
+ struct seccomp_filter *freeme = orig;
+ orig = orig->prev;
+ seccomp_filter_free(freeme);
+ }
+}
+
+/**
+ * seccomp_filter_release - Detach the task from its filter tree
+ * and drop its reference count during
+ * exit.
+ *
+ * This function should only be called when the task is exiting as
+ * it detaches it from its filter tree. As such, READ_ONCE() and
+ * barriers are not needed here, as would normally be needed.
+ */
+void seccomp_filter_release(struct task_struct *tsk)
+{
+ struct seccomp_filter *orig = tsk->seccomp.filter;
+
+ /* Detach task from its filter tree. */
+ tsk->seccomp.filter = NULL;
+ __put_seccomp_filter(orig);
+}
+
/**
* seccomp_sync_threads: sets all threads to use current's filter
*
@@ -397,7 +433,7 @@ static inline void seccomp_sync_threads(unsigned long flags)
* current's path will hold a reference. (This also
* allows a put before the assignment.)
*/
- put_seccomp_filter(thread);
+ __put_seccomp_filter(thread->seccomp.filter);
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
atomic_set(&thread->seccomp.filter_count,
@@ -571,30 +607,6 @@ void get_seccomp_filter(struct task_struct *tsk)
__get_seccomp_filter(orig);
}
-static inline void seccomp_filter_free(struct seccomp_filter *filter)
-{
- if (filter) {
- bpf_prog_destroy(filter->prog);
- kfree(filter);
- }
-}
-
-static void __put_seccomp_filter(struct seccomp_filter *orig)
-{
- /* Clean up single-reference branches iteratively. */
- while (orig && refcount_dec_and_test(&orig->refs)) {
- struct seccomp_filter *freeme = orig;
- orig = orig->prev;
- seccomp_filter_free(freeme);
- }
-}
-
-/* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
-void put_seccomp_filter(struct task_struct *tsk)
-{
- __put_seccomp_filter(tsk->seccomp.filter);
-}
-
static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
{
clear_siginfo(info);