diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-14 20:19:25 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-07-14 20:19:25 -0700 |
commit | b6e6cc1f78c772e952495b7416c9ac9029f9390c (patch) | |
tree | f43d33a19e988dcec55b8ce4597e165deb1459d7 /arch/x86 | |
parent | be522ac7cdcc1b7dd19fa348205363041ab65a98 (diff) | |
parent | 535d0ae39185a266536a1e97ff9a8956d7fbb9df (diff) |
Merge tag 'x86_urgent_for_6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 CFI fixes from Peter Zijlstra:
"Fix kCFI/FineIBT weaknesses
The primary bug Alyssa noticed was that with FineIBT enabled function
prologues have a spurious ENDBR instruction:
__cfi_foo:
endbr64
subl $hash, %r10d
jz 1f
ud2
nop
1:
foo:
endbr64 <--- *sadface*
This means that any indirect call that fails to target the __cfi
symbol and instead targets (the regular old) foo+0, will succeed due
to that second ENDBR.
Fixing this led to the discovery of a single indirect call that was
still doing this: ret_from_fork(). Since that's an assembly stub the
compiler would not generate the proper kCFI indirect call magic and it
would not get patched.
Brian came up with the most comprehensive fix -- convert the thing to
C with only a very thin asm wrapper. This ensures the kernel thread
boostrap is a proper kCFI call.
While discussing all this, Kees noted that kCFI hashes could/should be
poisoned to seal all functions whose address is never taken, further
limiting the valid kCFI targets -- much like we already do for IBT.
So what was a 'simple' observation and fix cascaded into a bunch of
inter-related CFI infrastructure fixes"
* tag 'x86_urgent_for_6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/cfi: Only define poison_cfi() if CONFIG_X86_KERNEL_IBT=y
x86/fineibt: Poison ENDBR at +0
x86: Rewrite ret_from_fork() in C
x86/32: Remove schedule_tail_wrapper()
x86/cfi: Extend ENDBR sealing to kCFI
x86/alternative: Rename apply_ibt_endbr()
x86/cfi: Extend {JMP,CAKK}_NOSPEC comment
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/entry/entry_32.S | 53 | ||||
-rw-r--r-- | arch/x86/entry/entry_64.S | 33 | ||||
-rw-r--r-- | arch/x86/include/asm/alternative.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/ibt.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/nospec-branch.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/switch_to.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/alternative.c | 71 | ||||
-rw-r--r-- | arch/x86/kernel/module.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 22 |
9 files changed, 119 insertions, 74 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 91397f58ac30..6e6af42e044a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -720,26 +720,6 @@ SYM_CODE_END(__switch_to_asm) .popsection /* - * The unwinder expects the last frame on the stack to always be at the same - * offset from the end of the page, which allows it to validate the stack. - * Calling schedule_tail() directly would break that convention because its an - * asmlinkage function so its argument has to be pushed on the stack. This - * wrapper creates a proper "end of stack" frame header before the call. - */ -.pushsection .text, "ax" -SYM_FUNC_START(schedule_tail_wrapper) - FRAME_BEGIN - - pushl %eax - call schedule_tail - popl %eax - - FRAME_END - RET -SYM_FUNC_END(schedule_tail_wrapper) -.popsection - -/* * A newly forked process directly context switches into this address. * * eax: prev task we switched from @@ -747,29 +727,22 @@ SYM_FUNC_END(schedule_tail_wrapper) * edi: kernel thread arg */ .pushsection .text, "ax" -SYM_CODE_START(ret_from_fork) - call schedule_tail_wrapper +SYM_CODE_START(ret_from_fork_asm) + movl %esp, %edx /* regs */ - testl %ebx, %ebx - jnz 1f /* kernel threads are uncommon */ + /* return address for the stack unwinder */ + pushl $.Lsyscall_32_done -2: - /* When we fork, we trace the syscall return in the child, too. */ - movl %esp, %eax - call syscall_exit_to_user_mode - jmp .Lsyscall_32_done + FRAME_BEGIN + /* prev already in EAX */ + movl %ebx, %ecx /* fn */ + pushl %edi /* fn_arg */ + call ret_from_fork + addl $4, %esp + FRAME_END - /* kernel thread */ -1: movl %edi, %eax - CALL_NOSPEC ebx - /* - * A kernel thread is allowed to return here after successfully - * calling kernel_execve(). Exit to userspace to complete the execve() - * syscall. - */ - movl $0, PT_EAX(%esp) - jmp 2b -SYM_CODE_END(ret_from_fork) + RET +SYM_CODE_END(ret_from_fork_asm) .popsection SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f31e286c2977..91f6818884fa 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -284,36 +284,19 @@ SYM_FUNC_END(__switch_to_asm) * r12: kernel thread arg */ .pushsection .text, "ax" - __FUNC_ALIGN -SYM_CODE_START_NOALIGN(ret_from_fork) - UNWIND_HINT_END_OF_STACK +SYM_CODE_START(ret_from_fork_asm) + UNWIND_HINT_REGS ANNOTATE_NOENDBR // copy_thread CALL_DEPTH_ACCOUNT - movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + movq %rax, %rdi /* prev */ + movq %rsp, %rsi /* regs */ + movq %rbx, %rdx /* fn */ + movq %r12, %rcx /* fn_arg */ + call ret_from_fork -2: - UNWIND_HINT_REGS - movq %rsp, %rdi - call syscall_exit_to_user_mode /* returns with IRQs disabled */ jmp swapgs_restore_regs_and_return_to_usermode - -1: - /* kernel thread */ - UNWIND_HINT_END_OF_STACK - movq %r12, %rdi - CALL_NOSPEC rbx - /* - * A kernel thread is allowed to return here after successfully - * calling kernel_execve(). Exit to userspace to complete the execve() - * syscall. - */ - movq $0, RAX(%rsp) - jmp 2b -SYM_CODE_END(ret_from_fork) +SYM_CODE_END(ret_from_fork_asm) .popsection .macro DEBUG_ENTRY_ASSERT_IRQS_OFF diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 6c15a622ad60..9c4da699e11a 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -96,7 +96,7 @@ extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); extern void apply_returns(s32 *start, s32 *end); -extern void apply_ibt_endbr(s32 *start, s32 *end); +extern void apply_seal_endbr(s32 *start, s32 *end); extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, s32 *start_cfi, s32 *end_cfi); diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h index baae6b4fea23..1e59581d500c 100644 --- a/arch/x86/include/asm/ibt.h +++ b/arch/x86/include/asm/ibt.h @@ -34,7 +34,7 @@ /* * Create a dummy function pointer reference to prevent objtool from marking * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by - * apply_ibt_endbr()). + * apply_seal_endbr()). */ #define IBT_NOSEAL(fname) \ ".pushsection .discard.ibt_endbr_noseal\n\t" \ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 55388c9f7601..1a65cf4acb2b 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -234,6 +234,10 @@ * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. + * + * NOTE: these do not take kCFI into account and are thus not comparable to C + * indirect calls, take care when using. The target of these should be an ENDBR + * instruction irrespective of kCFI. */ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 5c91305d09d2..f42dbf17f52b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -asmlinkage void ret_from_fork(void); +asmlinkage void ret_from_fork_asm(void); +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, + int (*fn)(void *), void *fn_arg); /* * This is the structure pointed to by thread.sp for an inactive task. The diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 72646d75b6ff..2dcf3a06af09 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -778,6 +778,8 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT +static void poison_cfi(void *addr); + static void __init_or_module poison_endbr(void *addr, bool warn) { u32 endbr, poison = gen_endbr_poison(); @@ -802,8 +804,11 @@ static void __init_or_module poison_endbr(void *addr, bool warn) /* * Generated by: objtool --ibt + * + * Seal the functions for indirect calls by clobbering the ENDBR instructions + * and the kCFI hash value. */ -void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end) { s32 *s; @@ -812,13 +817,13 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) poison_endbr(addr, true); if (IS_ENABLED(CONFIG_FINEIBT)) - poison_endbr(addr - 16, false); + poison_cfi(addr - 16); } } #else -void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } +void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { } #endif /* CONFIG_X86_KERNEL_IBT */ @@ -1063,6 +1068,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end) return 0; } +static void cfi_rewrite_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + + poison_endbr(addr+16, false); + } +} + /* .retpoline_sites */ static int cfi_rand_callers(s32 *start, s32 *end) { @@ -1157,14 +1173,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, return; case CFI_FINEIBT: + /* place the FineIBT preamble at func()-16 */ ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err; + /* rewrite the callers to target func()-16 */ ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err; + /* now that nobody targets func()+0, remove ENDBR there */ + cfi_rewrite_endbr(start_cfi, end_cfi); + if (builtin) pr_info("Using FineIBT CFI\n"); return; @@ -1177,6 +1198,41 @@ err: pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); } +static inline void poison_hash(void *addr) +{ + *(u32 *)addr = 0; +} + +static void poison_cfi(void *addr) +{ + switch (cfi_mode) { + case CFI_FINEIBT: + /* + * __cfi_\func: + * osp nopl (%rax) + * subl $0, %r10d + * jz 1f + * ud2 + * 1: nop + */ + poison_endbr(addr, false); + poison_hash(addr + fineibt_preamble_hash); + break; + + case CFI_KCFI: + /* + * __cfi_\func: + * movl $0, %eax + * .skip 11, 0x90 + */ + poison_hash(addr + 1); + break; + + default: + break; + } +} + #else static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, @@ -1184,6 +1240,10 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, { } +#ifdef CONFIG_X86_KERNEL_IBT +static void poison_cfi(void *addr) { } +#endif + #endif void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, @@ -1565,7 +1625,10 @@ void __init alternative_instructions(void) */ callthunks_patch_builtin_calls(); - apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + /* + * Seal all functions that do not have their address taken. + */ + apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b05f62ee2344..5f71a0cf4399 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -358,7 +358,7 @@ int module_finalize(const Elf_Ehdr *hdr, } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; - apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size); } if (locks) { void *lseg = (void *)locks->sh_addr; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff9b80a0e3e3..72015dba72ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -28,6 +28,7 @@ #include <linux/static_call.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> +#include <linux/entry-common.h> #include <asm/cpu.h> #include <asm/apic.h> #include <linux/uaccess.h> @@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) return do_set_thread_area_64(p, ARCH_SET_FS, tls); } +__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs, + int (*fn)(void *), void *fn_arg) +{ + schedule_tail(prev); + + /* Is this a kernel thread? */ + if (unlikely(fn)) { + fn(fn_arg); + /* + * A kernel thread is allowed to return here after successfully + * calling kernel_execve(). Exit to userspace to complete the + * execve() syscall. + */ + regs->ax = 0; + } + + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) frame = &fork_frame->frame; frame->bp = encode_frame_pointer(childregs); - frame->ret_addr = (unsigned long) ret_from_fork; + frame->ret_addr = (unsigned long) ret_from_fork_asm; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; p->thread.iopl_warn = 0; |