diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 12:38:26 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-08-28 12:38:26 -0700 |
commit | b03a4342142be0c608061a91fa52ec21f6853152 (patch) | |
tree | fd6e479d544afb432e1e3260897010e8aefafb59 /tools | |
parent | 5b07aaca1809f459d74589c38b20f87da554027f (diff) | |
parent | 46822860a5a9a5a558475d323a55c8aab0b54012 (diff) |
Merge tag 'seccomp-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull seccomp updates from Kees Cook:
- Provide USER_NOTIFY flag for synchronous mode (Andrei Vagin, Peter
Oskolkov). This touches the scheduler and perf but has been Acked by
Peter Zijlstra.
- Fix regression in syscall skipping and restart tracing on arm32. This
touches arch/arm/ but has been Acked by Arnd Bergmann.
* tag 'seccomp-v6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
seccomp: Add missing kerndoc notations
ARM: ptrace: Restore syscall skipping for tracers
ARM: ptrace: Restore syscall restart tracing
selftests/seccomp: Handle arm32 corner cases better
perf/benchmark: add a new benchmark for seccom_unotify
selftest/seccomp: add a new test for the sync mode of seccomp_user_notify
seccomp: add the synchronous mode for seccomp_unotify
sched: add a few helpers to wake up tasks on the current cpu
sched: add WF_CURRENT_CPU and externise ttwu
seccomp: don't use semaphore and wait_queue together
Diffstat (limited to 'tools')
-rw-r--r-- | tools/arch/x86/include/uapi/asm/unistd_32.h | 3 | ||||
-rw-r--r-- | tools/arch/x86/include/uapi/asm/unistd_64.h | 3 | ||||
-rw-r--r-- | tools/perf/bench/Build | 1 | ||||
-rw-r--r-- | tools/perf/bench/bench.h | 1 | ||||
-rw-r--r-- | tools/perf/bench/sched-seccomp-notify.c | 178 | ||||
-rw-r--r-- | tools/perf/builtin-bench.c | 1 | ||||
-rw-r--r-- | tools/testing/selftests/seccomp/seccomp_bpf.c | 67 |
7 files changed, 253 insertions, 1 deletions
diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index bc48a4dabe5d..4798f9d18fe8 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -26,3 +26,6 @@ #ifndef __NR_setns #define __NR_setns 346 #endif +#ifdef __NR_seccomp +#define __NR_seccomp 354 +#endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index f70d2cada256..d0f2043d7132 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -26,3 +26,6 @@ #ifndef __NR_getcpu #define __NR_getcpu 309 #endif +#ifndef __NR_seccomp +#define __NR_seccomp 317 +#endif diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 0f158dc8139b..07bbc449329e 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -1,5 +1,6 @@ perf-y += sched-messaging.o perf-y += sched-pipe.o +perf-y += sched-seccomp-notify.o perf-y += syscall.o perf-y += mem-functions.o perf-y += futex-hash.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 0d2b65976212..a0625c77bea3 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime; int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); int bench_sched_pipe(int argc, const char **argv); +int bench_sched_seccomp_notify(int argc, const char **argv); int bench_syscall_basic(int argc, const char **argv); int bench_syscall_getpgid(int argc, const char **argv); int bench_syscall_fork(int argc, const char **argv); diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c new file mode 100644 index 000000000000..b04ebcde4036 --- /dev/null +++ b/tools/perf/bench/sched-seccomp-notify.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <subcmd/parse-options.h> +#include "bench.h" + +#include <uapi/linux/filter.h> +#include <sys/types.h> +#include <sys/time.h> +#include <linux/unistd.h> +#include <sys/syscall.h> +#include <sys/ioctl.h> +#include <linux/time64.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> + +#include <unistd.h> +#include <limits.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <sys/wait.h> +#include <string.h> +#include <errno.h> +#include <err.h> +#include <inttypes.h> + +#define LOOPS_DEFAULT 1000000UL +static uint64_t loops = LOOPS_DEFAULT; +static bool sync_mode; + +static const struct option options[] = { + OPT_U64('l', "loop", &loops, "Specify number of loops"), + OPT_BOOLEAN('s', "sync-mode", &sync_mode, + "Enable the synchronious mode for seccomp notifications"), + OPT_END() +}; + +static const char * const bench_seccomp_usage[] = { + "perf bench sched secccomp-notify <options>", + NULL +}; + +static int seccomp(unsigned int op, unsigned int flags, void *args) +{ + return syscall(__NR_seccomp, op, flags, args); +} + +static int user_notif_syscall(int nr, unsigned int flags) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); +} + +#define USER_NOTIF_MAGIC INT_MAX +static void user_notification_sync_loop(int listener) +{ + struct seccomp_notif_resp resp; + struct seccomp_notif req; + uint64_t nr; + + for (nr = 0; nr < loops; nr++) { + memset(&req, 0, sizeof(req)); + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req)) + err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed"); + + if (req.data.nr != __NR_gettid) + errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + resp.flags = 0; + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp)) + err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed"); + } +} + +#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) +#endif +int bench_sched_seccomp_notify(int argc, const char **argv) +{ + struct timeval start, stop, diff; + unsigned long long result_usec = 0; + int status, listener; + pid_t pid; + long ret; + + argc = parse_options(argc, argv, options, bench_seccomp_usage, 0); + + gettimeofday(&start, NULL); + + prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + listener = user_notif_syscall(__NR_gettid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + err(EXIT_FAILURE, "can't create a notification descriptor"); + + pid = fork(); + if (pid < 0) + err(EXIT_FAILURE, "fork"); + if (pid == 0) { + if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0)) + err(EXIT_FAILURE, "can't set the parent death signal"); + while (1) { + ret = syscall(__NR_gettid); + if (ret == USER_NOTIF_MAGIC) + continue; + break; + } + _exit(1); + } + + if (sync_mode) { + if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, + SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0)) + err(EXIT_FAILURE, + "can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP"); + } + user_notification_sync_loop(listener); + + kill(pid, SIGKILL); + if (waitpid(pid, &status, 0) != pid) + err(EXIT_FAILURE, "waitpid(%d) failed", pid); + if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL) + errx(EXIT_FAILURE, "unexpected exit code: %d", status); + + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + printf("# Executed %" PRIu64 " system calls\n\n", + loops); + + result_usec = diff.tv_sec * USEC_PER_SEC; + result_usec += diff.tv_usec; + + printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", + (unsigned long) diff.tv_sec, + (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); + + printf(" %14lf usecs/op\n", + (double)result_usec / (double)loops); + printf(" %14d ops/sec\n", + (int)((double)loops / + ((double)result_usec / (double)USEC_PER_SEC))); + break; + + case BENCH_FORMAT_SIMPLE: + printf("%lu.%03lu\n", + (unsigned long) diff.tv_sec, + (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); + break; + + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index db435b791a09..5033e8bab276 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -47,6 +47,7 @@ static struct bench numa_benchmarks[] = { static struct bench sched_benchmarks[] = { { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging }, { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe }, + { "seccomp-notify", "Benchmark for seccomp user notify", bench_sched_seccomp_notify}, { "all", "Run all scheduler benchmarks", NULL }, { NULL, NULL, NULL } }; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 43ec36b179dc..38f651469968 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall) TEST(negative_ENOSYS) { +#if defined(__arm__) + SKIP(return, "arm32 does not support calling syscall -1"); +#endif /* * There should be no difference between an "internal" skip * and userspace asking for syscall "-1". @@ -3072,7 +3075,8 @@ TEST(syscall_restart) timeout.tv_sec = 1; errno = 0; EXPECT_EQ(0, nanosleep(&timeout, NULL)) { - TH_LOG("Call to nanosleep() failed (errno %d)", errno); + TH_LOG("Call to nanosleep() failed (errno %d: %s)", + errno, strerror(errno)); } /* Read final sync from parent. */ @@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } + if (__NR_clone3 < 0) + SKIP(return, "Test not built with clone3 support"); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); @@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } + if (__NR_clone3 < 0) + SKIP(return, "Test not built with clone3 support"); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); @@ -4255,6 +4265,61 @@ TEST(user_notification_addfd_rlimit) close(memfd); } +#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) +#endif + +TEST(user_notification_sync) +{ + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + int status, listener; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* Try to set invalid flags. */ + EXPECT_SYSCALL_RETURN(-EINVAL, + ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, + SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + ret = syscall(__NR_getppid); + ASSERT_EQ(ret, USER_NOTIF_MAGIC) { + _exit(1); + } + _exit(0); + } + + req.pid = 0; + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(req.data.nr, __NR_getppid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + resp.flags = 0; + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_EQ(status, 0); +} + + /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ FIXTURE(O_SUSPEND_SECCOMP) { pid_t pid; |