diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-10-30 15:40:57 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-10-30 15:40:57 -1000 |
commit | f0d25b5d0f8ef0ad35f1beff17da5843279d47a1 (patch) | |
tree | 73bb0fea8ed4a96d01517349dfb2588c3ae37e6d /tools | |
parent | 1641b9b04002c22f616a51a164c04b7f679d241f (diff) | |
parent | a1e2b8b36820d8c91275f207e77e91645b7c6836 (diff) |
Merge tag 'x86-mm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm handling updates from Ingo Molnar:
- Add new NX-stack self-test
- Improve NUMA partial-CFMWS handling
- Fix #VC handler bugs resulting in SEV-SNP boot failures
- Drop the 4MB memory size restriction on minimal NUMA nodes
- Reorganize headers a bit, in preparation to header dependency
reduction efforts
- Misc cleanups & fixes
* tag 'x86-mm-2023-10-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Drop the 4 MB restriction on minimal NUMA node memory size
selftests/x86/lam: Zero out buffer for readlink()
x86/sev: Drop unneeded #include
x86/sev: Move sev_setup_arch() to mem_encrypt.c
x86/tdx: Replace deprecated strncpy() with strtomem_pad()
selftests/x86/mm: Add new test that userspace stack is in fact NX
x86/sev: Make boot_ghcb_page[] static
x86/boot: Move x86_cache_alignment initialization to correct spot
x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach
x86/sev-es: Allow copy_from_kernel_nofault() in earlier boot
x86_64: Show CR4.PSE on auxiliaries like on BSP
x86/iommu/docs: Update AMD IOMMU specification document URL
x86/sev/docs: Update document URL in amd-memory-encryption.rst
x86/mm: Move arch_memory_failure() and arch_is_platform_page() definitions from <asm/processor.h> to <asm/pgtable.h>
ACPI/NUMA: Apply SRAT proximity domain to entire CFMWS window
x86/numa: Introduce numa_fill_memblks()
Diffstat (limited to 'tools')
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 4 | ||||
-rw-r--r-- | tools/testing/selftests/x86/lam.c | 6 | ||||
-rw-r--r-- | tools/testing/selftests/x86/nx_stack.c | 212 |
3 files changed, 219 insertions, 3 deletions
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 7e8c937627dd..0b872c0a42d2 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -14,6 +14,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap check_initial_reg_state sigreturn iopl ioperm \ test_vsyscall mov_ss_trap \ syscall_arg_fault fsgsbase_restore sigaltstack +TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer @@ -109,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S # state. $(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static + +$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack +$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index eb0e46905bf9..8f9b06d9ce03 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -573,7 +573,7 @@ int do_uring(unsigned long lam) char path[PATH_MAX] = {0}; /* get current process path */ - if (readlink("/proc/self/exe", path, PATH_MAX) <= 0) + if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0) return 1; int file_fd = open(path, O_RDONLY); @@ -680,14 +680,14 @@ static int handle_execve(struct testcases *test) perror("Fork failed."); ret = 1; } else if (pid == 0) { - char path[PATH_MAX]; + char path[PATH_MAX] = {0}; /* Set LAM mode in parent process */ if (set_lam(lam) != 0) return 1; /* Get current binary's path and the binary was run by execve */ - if (readlink("/proc/self/exe", path, PATH_MAX) <= 0) + if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0) exit(-1); /* run binary to get LAM mode and return to parent process */ diff --git a/tools/testing/selftests/x86/nx_stack.c b/tools/testing/selftests/x86/nx_stack.c new file mode 100644 index 000000000000..ea4a4e246879 --- /dev/null +++ b/tools/testing/selftests/x86/nx_stack.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack + * because I don't want to bother with PT_GNU_STACK detection. + * + * Fill the stack with INT3's and then try to execute some of them: + * SIGSEGV -- good, SIGTRAP -- bad. + * + * Regular stack is completely overwritten before testing. + * Test doesn't exit SIGSEGV handler after first fault at INT3. + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <unistd.h> + +#define PAGE_SIZE 4096 + +/* + * This is memset(rsp, 0xcc, -1); but down. + * It will SIGSEGV when bottom of the stack is reached. + * Byte-size access is important! (see rdi tweak in the signal handler). + */ +void make_stack1(void); +asm( +".pushsection .text\n" +".globl make_stack1\n" +".align 16\n" +"make_stack1:\n" + "mov $0xcc, %al\n" +#if defined __amd64__ + "mov %rsp, %rdi\n" + "mov $-1, %rcx\n" +#elif defined __i386__ + "mov %esp, %edi\n" + "mov $-1, %ecx\n" +#else +#error +#endif + "std\n" + "rep stosb\n" + /* unreachable */ + "hlt\n" +".type make_stack1,@function\n" +".size make_stack1,.-make_stack1\n" +".popsection\n" +); + +/* + * memset(p, 0xcc, -1); + * It will SIGSEGV when top of the stack is reached. + */ +void make_stack2(uint64_t p); +asm( +".pushsection .text\n" +".globl make_stack2\n" +".align 16\n" +"make_stack2:\n" + "mov $0xcc, %al\n" +#if defined __amd64__ + "mov $-1, %rcx\n" +#elif defined __i386__ + "mov $-1, %ecx\n" +#else +#error +#endif + "cld\n" + "rep stosb\n" + /* unreachable */ + "hlt\n" +".type make_stack2,@function\n" +".size make_stack2,.-make_stack2\n" +".popsection\n" +); + +static volatile int test_state = 0; +static volatile unsigned long stack_min_addr; + +#if defined __amd64__ +#define RDI REG_RDI +#define RIP REG_RIP +#define RIP_STRING "rip" +#elif defined __i386__ +#define RDI REG_EDI +#define RIP REG_EIP +#define RIP_STRING "eip" +#else +#error +#endif + +static void sigsegv(int _, siginfo_t *__, void *uc_) +{ + /* + * Some Linux versions didn't clear DF before entering signal + * handler. make_stack1() doesn't have a chance to clear DF + * either so we clear it by hand here. + */ + asm volatile ("cld" ::: "memory"); + + ucontext_t *uc = uc_; + + if (test_state == 0) { + /* Stack is faulted and cleared from RSP to the lowest address. */ + stack_min_addr = ++uc->uc_mcontext.gregs[RDI]; + if (1) { + printf("stack min %lx\n", stack_min_addr); + } + uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2; + test_state = 1; + } else if (test_state == 1) { + /* Stack has been cleared from top to bottom. */ + unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI]; + if (1) { + printf("stack max %lx\n", stack_max_addr); + } + /* Start faulting pages on stack and see what happens. */ + uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE; + test_state = 2; + } else if (test_state == 2) { + /* Stack page is NX -- good, test next page. */ + uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE; + if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) { + /* One more SIGSEGV and test ends. */ + test_state = 3; + } + } else { + printf("PASS\tAll stack pages are NX\n"); + _exit(EXIT_SUCCESS); + } +} + +static void sigtrap(int _, siginfo_t *__, void *uc_) +{ + const ucontext_t *uc = uc_; + unsigned long rip = uc->uc_mcontext.gregs[RIP]; + printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip); + _exit(EXIT_FAILURE); +} + +int main(void) +{ + { + struct sigaction act = {}; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &sigsegv; + int rv = sigaction(SIGSEGV, &act, NULL); + assert(rv == 0); + } + { + struct sigaction act = {}; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &sigtrap; + int rv = sigaction(SIGTRAP, &act, NULL); + assert(rv == 0); + } + { + struct rlimit rlim; + int rv = getrlimit(RLIMIT_STACK, &rlim); + assert(rv == 0); + /* Cap stack at time-honored 8 MiB value. */ + rlim.rlim_max = rlim.rlim_cur; + if (rlim.rlim_max > 8 * 1024 * 1024) { + rlim.rlim_max = 8 * 1024 * 1024; + } + rv = setrlimit(RLIMIT_STACK, &rlim); + assert(rv == 0); + } + { + /* + * We don't know now much stack SIGSEGV handler uses. + * Bump this by 1 page every time someone complains, + * or rewrite it in assembly. + */ + const size_t len = SIGSTKSZ; + void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert(p != MAP_FAILED); + stack_t ss = {}; + ss.ss_sp = p; + ss.ss_size = len; + int rv = sigaltstack(&ss, NULL); + assert(rv == 0); + } + make_stack1(); + /* + * Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere. + * Fold it into main SIGTRAP pathway. + */ + __builtin_trap(); +} |