diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 12:44:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-06-29 12:44:51 -0700 |
commit | 1dfb0f47aca11350f45f8c04c3b83f0e829adfa9 (patch) | |
tree | 52a32a16ea2963739d12df5be505aca460ae91e5 /tools/testing | |
parent | a22c3f615a6fef6553e20c559d31ea817216b4e6 (diff) | |
parent | 48f7eee81cd53a94699d28959566b41a9dcac1d9 (diff) |
Merge tag 'x86-entry-2021-06-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 entry code related updates from Thomas Gleixner:
- Consolidate the macros for .byte ... opcode sequences
- Deduplicate register offset defines in include files
- Simplify the ia32,x32 compat handling of the related syscall tables
to get rid of #ifdeffery.
- Clear all EFLAGS which are not required for syscall handling
- Consolidate the syscall tables and switch the generation over to the
generic shell script and remove the CFLAGS tweaks which are not
longer required.
- Use 'int' type for system call numbers to match the generic code.
- Add more selftests for syscalls
* tag 'x86-entry-2021-06-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/syscalls: Don't adjust CFLAGS for syscall tables
x86/syscalls: Remove -Wno-override-init for syscall tables
x86/uml/syscalls: Remove array index from syscall initializers
x86/syscalls: Clear 'offset' and 'prefix' in case they are set in env
x86/entry: Use int everywhere for system call numbers
x86/entry: Treat out of range and gap system calls the same
x86/entry/64: Sign-extend system calls on entry to int
selftests/x86/syscall: Add tests under ptrace to syscall_numbering_64
selftests/x86/syscall: Simplify message reporting in syscall_numbering
selftests/x86/syscall: Update and extend syscall_numbering_64
x86/syscalls: Switch to generic syscallhdr.sh
x86/syscalls: Use __NR_syscalls instead of __NR_syscall_max
x86/unistd: Define X32_NR_syscalls only for 64-bit kernel
x86/syscalls: Stop filling syscall arrays with *_sys_ni_syscall
x86/syscalls: Switch to generic syscalltbl.sh
x86/entry/x32: Rename __x32_compat_sys_* to __x64_compat_sys_*
Diffstat (limited to 'tools/testing')
-rw-r--r-- | tools/testing/selftests/x86/syscall_numbering.c | 491 |
1 files changed, 442 insertions, 49 deletions
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c index d6b09cb1aa2c..991591718bb0 100644 --- a/tools/testing/selftests/x86/syscall_numbering.c +++ b/tools/testing/selftests/x86/syscall_numbering.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * syscall_numbering.c - test calling the x86-64 kernel with various + * valid and invalid system call numbers. + * * Copyright (c) 2018 Andrew Lutomirski */ @@ -11,79 +13,470 @@ #include <stdbool.h> #include <errno.h> #include <unistd.h> -#include <syscall.h> +#include <string.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <sysexits.h> -static int nerrs; +#include <sys/ptrace.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <sys/mman.h> -#define X32_BIT 0x40000000UL +#include <linux/ptrace.h> -static void check_enosys(unsigned long nr, bool *ok) +/* Common system call numbers */ +#define SYS_READ 0 +#define SYS_WRITE 1 +#define SYS_GETPID 39 +/* x64-only system call numbers */ +#define X64_IOCTL 16 +#define X64_READV 19 +#define X64_WRITEV 20 +/* x32-only system call numbers (without X32_BIT) */ +#define X32_IOCTL 514 +#define X32_READV 515 +#define X32_WRITEV 516 + +#define X32_BIT 0x40000000 + +static int nullfd = -1; /* File descriptor for /dev/null */ +static bool with_x32; /* x32 supported on this kernel? */ + +enum ptrace_pass { + PTP_NOTHING, + PTP_GETREGS, + PTP_WRITEBACK, + PTP_FUZZRET, + PTP_FUZZHIGH, + PTP_INTNUM, + PTP_DONE +}; + +static const char * const ptrace_pass_name[] = { - /* If this fails, a segfault is reasonably likely. */ - fflush(stdout); + [PTP_NOTHING] = "just stop, no data read", + [PTP_GETREGS] = "only getregs", + [PTP_WRITEBACK] = "getregs, unmodified setregs", + [PTP_FUZZRET] = "modifying the default return", + [PTP_FUZZHIGH] = "clobbering the top 32 bits", + [PTP_INTNUM] = "sign-extending the syscall number", +}; - long ret = syscall(nr, 0, 0, 0, 0, 0, 0); - if (ret == 0) { - printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr); - *ok = false; - } else if (errno != ENOSYS) { - printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno); - *ok = false; - } +/* + * Shared memory block between tracer and test + */ +struct shared { + unsigned int nerr; /* Total error count */ + unsigned int indent; /* Message indentation level */ + enum ptrace_pass ptrace_pass; + bool probing_syscall; /* In probe_syscall() */ +}; +static volatile struct shared *sh; + +static inline unsigned int offset(void) +{ + unsigned int level = sh ? sh->indent : 0; + + return 8 + level * 4; } -static void test_x32_without_x32_bit(void) +#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \ + ## __VA_ARGS__) + +#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__) +#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__) +#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__) + +#define fail(fmt, ...) \ + do { \ + msg(FAIL, fmt, ## __VA_ARGS__); \ + sh->nerr++; \ + } while (0) + +#define crit(fmt, ...) \ + do { \ + sh->indent = 0; \ + msg(FAIL, fmt, ## __VA_ARGS__); \ + msg(SKIP, "Unable to run test\n"); \ + exit(EX_OSERR); \ + } while (0) + +/* Sentinel for ptrace-modified return value */ +#define MODIFIED_BY_PTRACE -9999 + +/* + * Directly invokes the given syscall with nullfd as the first argument + * and the rest zero. Avoids involving glibc wrappers in case they ever + * end up intercepting some system calls for some reason, or modify + * the system call number itself. + */ +static long long probe_syscall(int msb, int lsb) { - bool ok = true; + register long long arg1 asm("rdi") = nullfd; + register long long arg2 asm("rsi") = 0; + register long long arg3 asm("rdx") = 0; + register long long arg4 asm("r10") = 0; + register long long arg5 asm("r8") = 0; + register long long arg6 asm("r9") = 0; + long long nr = ((long long)msb << 32) | (unsigned int)lsb; + long long ret; /* - * Syscalls 512-547 are "x32" syscalls. They are intended to be - * called with the x32 (0x40000000) bit set. Calling them without - * the x32 bit set is nonsense and should not work. + * We pass in an extra copy of the extended system call number + * in %rbx, so we can examine it from the ptrace handler without + * worrying about it being possibly modified. This is to test + * the validity of struct user regs.orig_rax a.k.a. + * struct pt_regs.orig_ax. */ - printf("[RUN]\tChecking syscalls 512-547\n"); - for (int i = 512; i <= 547; i++) - check_enosys(i, &ok); + sh->probing_syscall = true; + asm volatile("syscall" + : "=a" (ret) + : "a" (nr), "b" (nr), + "r" (arg1), "r" (arg2), "r" (arg3), + "r" (arg4), "r" (arg5), "r" (arg6) + : "rcx", "r11", "memory", "cc"); + sh->probing_syscall = false; + + return ret; +} + +static const char *syscall_str(int msb, int start, int end) +{ + static char buf[64]; + const char * const type = (start & X32_BIT) ? "x32" : "x64"; + int lsb = start; /* - * Check that a handful of 64-bit-only syscalls are rejected if the x32 - * bit is set. + * Improve readability by stripping the x32 bit, but round + * toward zero so we don't display -1 as -1073741825. */ - printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n"); - check_enosys(16 | X32_BIT, &ok); /* ioctl */ - check_enosys(19 | X32_BIT, &ok); /* readv */ - check_enosys(20 | X32_BIT, &ok); /* writev */ + if (lsb < 0) + lsb |= X32_BIT; + else + lsb &= ~X32_BIT; + + if (start == end) + snprintf(buf, sizeof buf, "%s syscall %d:%d", + type, msb, lsb); + else + snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d", + type, msb, lsb, lsb + (end-start)); + + return buf; +} + +static unsigned int _check_for(int msb, int start, int end, long long expect, + const char *expect_str) +{ + unsigned int err = 0; + + sh->indent++; + if (start != end) + sh->indent++; + + for (int nr = start; nr <= end; nr++) { + long long ret = probe_syscall(msb, nr); + + if (ret != expect) { + fail("%s returned %lld, but it should have returned %s\n", + syscall_str(msb, nr, nr), + ret, expect_str); + err++; + } + } + + if (start != end) + sh->indent--; + + if (err) { + if (start != end) + fail("%s had %u failure%s\n", + syscall_str(msb, start, end), + err, err == 1 ? "s" : ""); + } else { + ok("%s returned %s as expected\n", + syscall_str(msb, start, end), expect_str); + } + + sh->indent--; + + return err; +} + +#define check_for(msb,start,end,expect) \ + _check_for(msb,start,end,expect,#expect) + +static bool check_zero(int msb, int nr) +{ + return check_for(msb, nr, nr, 0); +} + +static bool check_enosys(int msb, int nr) +{ + return check_for(msb, nr, nr, -ENOSYS); +} + +/* + * Anyone diagnosing a failure will want to know whether the kernel + * supports x32. Tell them. This can also be used to conditionalize + * tests based on existence or nonexistence of x32. + */ +static bool test_x32(void) +{ + long long ret; + pid_t mypid = getpid(); + + run("Checking for x32 by calling x32 getpid()\n"); + ret = probe_syscall(0, SYS_GETPID | X32_BIT); + + sh->indent++; + if (ret == mypid) { + info("x32 is supported\n"); + with_x32 = true; + } else if (ret == -ENOSYS) { + info("x32 is not supported\n"); + with_x32 = false; + } else { + fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid); + with_x32 = false; + } + sh->indent--; + return with_x32; +} + +static void test_syscalls_common(int msb) +{ + enum ptrace_pass pass = sh->ptrace_pass; + + run("Checking some common syscalls as 64 bit\n"); + check_zero(msb, SYS_READ); + check_zero(msb, SYS_WRITE); + + run("Checking some 64-bit only syscalls as 64 bit\n"); + check_zero(msb, X64_READV); + check_zero(msb, X64_WRITEV); + + run("Checking out of range system calls\n"); + check_for(msb, -64, -2, -ENOSYS); + if (pass >= PTP_FUZZRET) + check_for(msb, -1, -1, MODIFIED_BY_PTRACE); + else + check_for(msb, -1, -1, -ENOSYS); + check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS); + check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS); + check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS); +} +static void test_syscalls_with_x32(int msb) +{ /* - * Check some syscalls with high bits set. + * Syscalls 512-547 are "x32" syscalls. They are + * intended to be called with the x32 (0x40000000) bit + * set. Calling them without the x32 bit set is + * nonsense and should not work. */ - printf("[RUN]\tChecking numbers above 2^32-1\n"); - check_enosys((1UL << 32), &ok); - check_enosys(X32_BIT | (1UL << 32), &ok); + run("Checking x32 syscalls as 64 bit\n"); + check_for(msb, 512, 547, -ENOSYS); - if (!ok) - nerrs++; - else - printf("[OK]\tThey all returned -ENOSYS\n"); + run("Checking some common syscalls as x32\n"); + check_zero(msb, SYS_READ | X32_BIT); + check_zero(msb, SYS_WRITE | X32_BIT); + + run("Checking some x32 syscalls as x32\n"); + check_zero(msb, X32_READV | X32_BIT); + check_zero(msb, X32_WRITEV | X32_BIT); + + run("Checking some 64-bit syscalls as x32\n"); + check_enosys(msb, X64_IOCTL | X32_BIT); + check_enosys(msb, X64_READV | X32_BIT); + check_enosys(msb, X64_WRITEV | X32_BIT); } -int main() +static void test_syscalls_without_x32(int msb) { + run("Checking for absence of x32 system calls\n"); + check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS); +} + +static void test_syscall_numbering(void) +{ + static const int msbs[] = { + 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX, + INT_MIN, INT_MIN+1 + }; + + sh->indent++; + /* - * Anyone diagnosing a failure will want to know whether the kernel - * supports x32. Tell them. + * The MSB is supposed to be ignored, so we loop over a few + * to test that out. */ - printf("\tChecking for x32..."); - fflush(stdout); - if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) { - printf(" supported\n"); - } else if (errno == ENOSYS) { - printf(" not supported\n"); + for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) { + int msb = msbs[i]; + run("Checking system calls with msb = %d (0x%x)\n", + msb, msb); + + sh->indent++; + + test_syscalls_common(msb); + if (with_x32) + test_syscalls_with_x32(msb); + else + test_syscalls_without_x32(msb); + + sh->indent--; + } + + sh->indent--; +} + +static void syscall_numbering_tracee(void) +{ + enum ptrace_pass pass; + + if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { + crit("Failed to request tracing\n"); + return; + } + raise(SIGSTOP); + + for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE; + sh->ptrace_pass = ++pass) { + run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]); + test_syscall_numbering(); + } +} + +static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass) +{ + struct user_regs_struct regs; + + sh->probing_syscall = false; /* Do this on entry only */ + + /* For these, don't even getregs */ + if (pass == PTP_NOTHING || pass == PTP_DONE) + return; + + ptrace(PTRACE_GETREGS, testpid, NULL, ®s); + + if (regs.orig_rax != regs.rbx) { + fail("orig_rax %#llx doesn't match syscall number %#llx\n", + (unsigned long long)regs.orig_rax, + (unsigned long long)regs.rbx); + } + + switch (pass) { + case PTP_GETREGS: + /* Just read, no writeback */ + return; + case PTP_WRITEBACK: + /* Write back the same register state verbatim */ + break; + case PTP_FUZZRET: + regs.rax = MODIFIED_BY_PTRACE; + break; + case PTP_FUZZHIGH: + regs.rax = MODIFIED_BY_PTRACE; + regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL; + break; + case PTP_INTNUM: + regs.rax = MODIFIED_BY_PTRACE; + regs.orig_rax = (int)regs.orig_rax; + break; + default: + crit("invalid ptrace_pass\n"); + break; + } + + ptrace(PTRACE_SETREGS, testpid, NULL, ®s); +} + +static void syscall_numbering_tracer(pid_t testpid) +{ + int wstatus; + + do { + pid_t wpid = waitpid(testpid, &wstatus, 0); + if (wpid < 0 && errno != EINTR) + break; + if (wpid != testpid) + continue; + if (!WIFSTOPPED(wstatus)) + break; /* Thread exited? */ + + if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP) + mess_with_syscall(testpid, sh->ptrace_pass); + } while (sh->ptrace_pass != PTP_DONE && + !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL)); + + ptrace(PTRACE_DETACH, testpid, NULL, NULL); + + /* Wait for the child process to terminate */ + while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus)) + /* wait some more */; +} + +static void test_traced_syscall_numbering(void) +{ + pid_t testpid; + + /* Launch the test thread; this thread continues as the tracer thread */ + testpid = fork(); + + if (testpid < 0) { + crit("Unable to launch tracer process\n"); + } else if (testpid == 0) { + syscall_numbering_tracee(); + _exit(0); } else { - printf(" confused\n"); + syscall_numbering_tracer(testpid); } +} - test_x32_without_x32_bit(); +int main(void) +{ + unsigned int nerr; - return nerrs ? 1 : 0; + /* + * It is quite likely to get a segfault on a failure, so make + * sure the message gets out by setting stdout to nonbuffered. + */ + setvbuf(stdout, NULL, _IONBF, 0); + + /* + * Harmless file descriptor to work on... + */ + nullfd = open("/dev/null", O_RDWR); + if (nullfd < 0) { + crit("Unable to open /dev/null: %s\n", strerror(errno)); + } + + /* + * Set up a block of shared memory... + */ + sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_SHARED, 0, 0); + if (sh == MAP_FAILED) { + crit("Unable to allocated shared memory block: %s\n", + strerror(errno)); + } + + with_x32 = test_x32(); + + run("Running tests without ptrace...\n"); + test_syscall_numbering(); + + test_traced_syscall_numbering(); + + nerr = sh->nerr; + if (!nerr) { + ok("All system calls succeeded or failed as expected\n"); + return 0; + } else { + fail("A total of %u system call%s had incorrect behavior\n", + nerr, nerr != 1 ? "s" : ""); + return 1; + } } |