From 7be4412721aee25e35583a20a896085dc6b99c3e Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 19 Dec 2019 00:11:47 +0100 Subject: x86/insn-eval: Add support for 64-bit kernel mode To support evaluating 64-bit kernel mode instructions: * Replace existing checks for user_64bit_mode() with a new helper that checks whether code is being executed in either 64-bit kernel mode or 64-bit user mode. * Select the GS base depending on whether the instruction is being evaluated in kernel mode. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dmitry Vyukov Cc: "Gustavo A. R. Silva" Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kasan-dev@googlegroups.com Cc: Oleg Nesterov Cc: Sean Christopherson Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191218231150.12139-1-jannh@google.com --- arch/x86/include/asm/ptrace.h | 13 +++++++++++++ arch/x86/lib/insn-eval.c | 26 +++++++++++++++----------- 2 files changed, 28 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 5057a8ed100b..ac45b06941a5 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -159,6 +159,19 @@ static inline bool user_64bit_mode(struct pt_regs *regs) #endif } +/* + * Determine whether the register set came from any context that is running in + * 64-bit mode. + */ +static inline bool any_64bit_mode(struct pt_regs *regs) +{ +#ifdef CONFIG_X86_64 + return !user_mode(regs) || user_64bit_mode(regs); +#else + return false; +#endif +} + #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 306c3a0902ba..31600d851fd8 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -155,7 +155,7 @@ static bool check_seg_overrides(struct insn *insn, int regoff) */ static int resolve_default_seg(struct insn *insn, struct pt_regs *regs, int off) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; /* * Resolve the default segment register as described in Section 3.7.4 @@ -266,7 +266,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) * which may be invalid at this point. */ if (regoff == offsetof(struct pt_regs, ip)) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) return INAT_SEG_REG_IGNORE; else return INAT_SEG_REG_CS; @@ -289,7 +289,7 @@ static int resolve_seg_reg(struct insn *insn, struct pt_regs *regs, int regoff) * In long mode, segment override prefixes are ignored, except for * overrides for FS and GS. */ - if (user_64bit_mode(regs)) { + if (any_64bit_mode(regs)) { if (idx != INAT_SEG_REG_FS && idx != INAT_SEG_REG_GS) idx = INAT_SEG_REG_IGNORE; @@ -646,23 +646,27 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx) */ return (unsigned long)(sel << 4); - if (user_64bit_mode(regs)) { + if (any_64bit_mode(regs)) { /* * Only FS or GS will have a base address, the rest of * the segments' bases are forced to 0. */ unsigned long base; - if (seg_reg_idx == INAT_SEG_REG_FS) + if (seg_reg_idx == INAT_SEG_REG_FS) { rdmsrl(MSR_FS_BASE, base); - else if (seg_reg_idx == INAT_SEG_REG_GS) + } else if (seg_reg_idx == INAT_SEG_REG_GS) { /* * swapgs was called at the kernel entry point. Thus, * MSR_KERNEL_GS_BASE will have the user-space GS base. */ - rdmsrl(MSR_KERNEL_GS_BASE, base); - else + if (user_mode(regs)) + rdmsrl(MSR_KERNEL_GS_BASE, base); + else + rdmsrl(MSR_GS_BASE, base); + } else { base = 0; + } return base; } @@ -703,7 +707,7 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) if (sel < 0) return 0; - if (user_64bit_mode(regs) || v8086_mode(regs)) + if (any_64bit_mode(regs) || v8086_mode(regs)) return -1L; if (!sel) @@ -948,7 +952,7 @@ static int get_eff_addr_modrm(struct insn *insn, struct pt_regs *regs, * following instruction. */ if (*regoff == -EDOM) { - if (user_64bit_mode(regs)) + if (any_64bit_mode(regs)) tmp = regs->ip + insn->length; else tmp = 0; @@ -1250,7 +1254,7 @@ static void __user *get_addr_ref_32(struct insn *insn, struct pt_regs *regs) * After computed, the effective address is treated as an unsigned * quantity. */ - if (!user_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) + if (!any_64bit_mode(regs) && ((unsigned int)eff_addr > seg_limit)) goto out; /* -- cgit v1.2.3 From 59c1dcbed5b51cab543be8f47b6d7d9cf107ec94 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 19 Dec 2019 00:11:48 +0100 Subject: x86/traps: Print address on #GP A frequent cause of #GP exceptions are memory accesses to non-canonical addresses. Unlike #PF, #GP doesn't report a fault address in CR2, so the kernel doesn't currently print the fault address for a #GP. Luckily, the necessary infrastructure for decoding x86 instructions and computing the memory address being accessed is already present. Hook it up to the #GP handler so that the address operand of the faulting instruction can be figured out and printed. Distinguish two cases: a) (Part of) the memory range being accessed lies in the non-canonical address range; in this case, it is likely that the decoded address is actually the one that caused the #GP. b) The entire memory range of the decoded operand lies in canonical address space; the #GP may or may not be related in some way to the computed address. Print it, but with hedging language in the message. While it is already possible to compute the faulting address manually by disassembling the opcode dump and evaluating the instruction against the register dump, this should make it slightly easier to identify crashes at a glance. Note that the operand length which comes from the instruction decoder and is used to determine whether the access straddles into non-canonical address space, is currently somewhat unreliable; but it should be good enough, considering that Linux on x86-64 never maps the page directly before the start of the non-canonical range anyway, and therefore the case where a memory range begins in that page and potentially straddles into the non-canonical range should be fairly uncommon. In the case the address is still computed wrongly, it only influences whether the error message claims that the access is canonical. [ bp: Remove ambiguous "we", massage, reflow comments and spacing. ] Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Reviewed-by: Sean Christopherson Tested-by: Sean Christopherson Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dmitry Vyukov Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kasan-dev@googlegroups.com Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191218231150.12139-2-jannh@google.com --- arch/x86/kernel/traps.c | 72 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 67 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 05da6b5b167b..108ab1ee069f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -56,6 +56,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_64 #include @@ -518,10 +520,53 @@ exit_trap: do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, 0, NULL); } -dotraplinkage void -do_general_protection(struct pt_regs *regs, long error_code) +enum kernel_gp_hint { + GP_NO_HINT, + GP_NON_CANONICAL, + GP_CANONICAL +}; + +/* + * When an uncaught #GP occurs, try to determine the memory address accessed by + * the instruction and return that address to the caller. Also, try to figure + * out whether any part of the access to that address was non-canonical. + */ +static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, + unsigned long *addr) { - const char *desc = "general protection fault"; + u8 insn_buf[MAX_INSN_SIZE]; + struct insn insn; + + if (probe_kernel_read(insn_buf, (void *)regs->ip, MAX_INSN_SIZE)) + return GP_NO_HINT; + + kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE); + insn_get_modrm(&insn); + insn_get_sib(&insn); + + *addr = (unsigned long)insn_get_addr_ref(&insn, regs); + if (*addr == -1UL) + return GP_NO_HINT; + +#ifdef CONFIG_X86_64 + /* + * Check that: + * - the operand is not in the kernel half + * - the last byte of the operand is not in the user canonical half + */ + if (*addr < ~__VIRTUAL_MASK && + *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) + return GP_NON_CANONICAL; +#endif + + return GP_CANONICAL; +} + +#define GPFSTR "general protection fault" + +dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) +{ + char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; struct task_struct *tsk; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); @@ -540,6 +585,9 @@ do_general_protection(struct pt_regs *regs, long error_code) tsk = current; if (!user_mode(regs)) { + enum kernel_gp_hint hint = GP_NO_HINT; + unsigned long gp_addr; + if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) return; @@ -556,8 +604,22 @@ do_general_protection(struct pt_regs *regs, long error_code) return; if (notify_die(DIE_GPF, desc, regs, error_code, - X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP) - die(desc, regs, error_code); + X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) + return; + + if (error_code) + snprintf(desc, sizeof(desc), "segment-related " GPFSTR); + else + hint = get_kernel_gp_address(regs, &gp_addr); + + if (hint != GP_NO_HINT) + snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", + (hint == GP_NON_CANONICAL) ? + "probably for non-canonical address" : + "maybe for address", + gp_addr); + + die(desc, regs, error_code); return; } -- cgit v1.2.3 From aa49f20462c90df4150f33d245cbcfe0d9c80350 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 19 Dec 2019 00:11:49 +0100 Subject: x86/dumpstack: Introduce die_addr() for die() with #GP fault address Split __die() into __die_header() and __die_body(). This allows inserting extra information below the header line that initiates the bug report. Introduce a new function die_addr() that behaves like die(), but is for faults only and uses __die_header() and __die_body() so that a future commit can print extra information after the header line. [ bp: Comment the KASAN-specific usage of gp_addr. ] Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dmitry Vyukov Cc: "Eric W. Biederman" Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kasan-dev@googlegroups.com Cc: Masami Hiramatsu Cc: "Peter Zijlstra (Intel)" Cc: Sean Christopherson Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191218231150.12139-3-jannh@google.com --- arch/x86/include/asm/kdebug.h | 1 + arch/x86/kernel/dumpstack.c | 24 +++++++++++++++++++++++- arch/x86/kernel/traps.c | 9 ++++++++- 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 75f1e35e7c15..247ab14c6309 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -33,6 +33,7 @@ enum show_regs_mode { }; extern void die(const char *, struct pt_regs *,long); +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr); extern int __must_check __die(const char *, struct pt_regs *, long); extern void show_stack_regs(struct pt_regs *regs); extern void __show_regs(struct pt_regs *regs, enum show_regs_mode); diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index e07424e19274..8995bf10c97c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -365,7 +365,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) } NOKPROBE_SYMBOL(oops_end); -int __die(const char *str, struct pt_regs *regs, long err) +static void __die_header(const char *str, struct pt_regs *regs, long err) { const char *pr = ""; @@ -384,7 +384,11 @@ int __die(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "", IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); +} +NOKPROBE_SYMBOL(__die_header); +static int __die_body(const char *str, struct pt_regs *regs, long err) +{ show_regs(regs); print_modules(); @@ -394,6 +398,13 @@ int __die(const char *str, struct pt_regs *regs, long err) return 0; } +NOKPROBE_SYMBOL(__die_body); + +int __die(const char *str, struct pt_regs *regs, long err) +{ + __die_header(str, regs, err); + return __die_body(str, regs, err); +} NOKPROBE_SYMBOL(__die); /* @@ -410,6 +421,17 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } +void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr) +{ + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + __die_header(str, regs, err); + if (__die_body(str, regs, err)) + sig = 0; + oops_end(flags, regs, sig); +} + void show_regs(struct pt_regs *regs) { show_regs_print_info(KERN_DEFAULT); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 108ab1ee069f..2afd7d8d4007 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -619,7 +619,14 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) "maybe for address", gp_addr); - die(desc, regs, error_code); + /* + * KASAN is interested only in the non-canonical case, clear it + * otherwise. + */ + if (hint != GP_NON_CANONICAL) + gp_addr = 0; + + die_addr(desc, regs, error_code, gp_addr); return; } -- cgit v1.2.3 From 2f004eea0fc8f86b45dfc2007add2d4986de8d02 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 19 Dec 2019 00:11:50 +0100 Subject: x86/kasan: Print original address on #GP Make #GP exceptions caused by out-of-bounds KASAN shadow accesses easier to understand by computing the address of the original access and printing that. More details are in the comments in the patch. This turns an error like this: kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault, probably for non-canonical address 0xe017577ddf75b7dd: 0000 [#1] PREEMPT SMP KASAN PTI into this: general protection fault, probably for non-canonical address 0xe017577ddf75b7dd: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: maybe wild-memory-access in range [0x00badbeefbadbee8-0x00badbeefbadbeef] The hook is placed in architecture-independent code, but is currently only wired up to the X86 exception handler because I'm not sufficiently familiar with the address space layout and exception handling mechanisms on other architectures. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Reviewed-by: Dmitry Vyukov Cc: Alexander Potapenko Cc: Andrew Morton Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: kasan-dev@googlegroups.com Cc: linux-mm Cc: Peter Zijlstra Cc: Sean Christopherson Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20191218231150.12139-4-jannh@google.com --- arch/x86/kernel/dumpstack.c | 2 ++ arch/x86/mm/kasan_init_64.c | 21 --------------------- include/linux/kasan.h | 6 ++++++ mm/kasan/report.c | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 8995bf10c97c..ae64ec7f752f 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -427,6 +427,8 @@ void die_addr(const char *str, struct pt_regs *regs, long err, long gp_addr) int sig = SIGSEGV; __die_header(str, regs, err); + if (gp_addr) + kasan_non_canonical_hook(gp_addr); if (__die_body(str, regs, err)) sig = 0; oops_end(flags, regs, sig); diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index cf5bc37c90ac..763e71abc0fe 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -288,23 +288,6 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end) } while (pgd++, addr = next, addr != (unsigned long)end); } -#ifdef CONFIG_KASAN_INLINE -static int kasan_die_handler(struct notifier_block *self, - unsigned long val, - void *data) -{ - if (val == DIE_GPF) { - pr_emerg("CONFIG_KASAN_INLINE enabled\n"); - pr_emerg("GPF could be caused by NULL-ptr deref or user memory access\n"); - } - return NOTIFY_OK; -} - -static struct notifier_block kasan_die_notifier = { - .notifier_call = kasan_die_handler, -}; -#endif - void __init kasan_early_init(void) { int i; @@ -341,10 +324,6 @@ void __init kasan_init(void) int i; void *shadow_cpu_entry_begin, *shadow_cpu_entry_end; -#ifdef CONFIG_KASAN_INLINE - register_die_notifier(&kasan_die_notifier); -#endif - memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); /* diff --git a/include/linux/kasan.h b/include/linux/kasan.h index e18fe54969e9..5cde9e7c2664 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -228,4 +228,10 @@ static inline void kasan_release_vmalloc(unsigned long start, unsigned long free_region_end) {} #endif +#ifdef CONFIG_KASAN_INLINE +void kasan_non_canonical_hook(unsigned long addr); +#else /* CONFIG_KASAN_INLINE */ +static inline void kasan_non_canonical_hook(unsigned long addr) { } +#endif /* CONFIG_KASAN_INLINE */ + #endif /* LINUX_KASAN_H */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 621782100eaa..5ef9f24f566b 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -512,3 +512,43 @@ void __kasan_report(unsigned long addr, size_t size, bool is_write, unsigned lon end_report(&flags); } + +#ifdef CONFIG_KASAN_INLINE +/* + * With CONFIG_KASAN_INLINE, accesses to bogus pointers (outside the high + * canonical half of the address space) cause out-of-bounds shadow memory reads + * before the actual access. For addresses in the low canonical half of the + * address space, as well as most non-canonical addresses, that out-of-bounds + * shadow memory access lands in the non-canonical part of the address space. + * Help the user figure out what the original bogus pointer was. + */ +void kasan_non_canonical_hook(unsigned long addr) +{ + unsigned long orig_addr; + const char *bug_type; + + if (addr < KASAN_SHADOW_OFFSET) + return; + + orig_addr = (addr - KASAN_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT; + /* + * For faults near the shadow address for NULL, we can be fairly certain + * that this is a KASAN shadow memory access. + * For faults that correspond to shadow for low canonical addresses, we + * can still be pretty sure - that shadow region is a fairly narrow + * chunk of the non-canonical address space. + * But faults that look like shadow for non-canonical addresses are a + * really large chunk of the address space. In that case, we still + * print the decoded address, but make it clear that this is not + * necessarily what's actually going on. + */ + if (orig_addr < PAGE_SIZE) + bug_type = "null-ptr-deref"; + else if (orig_addr < TASK_SIZE) + bug_type = "probably user-memory-access"; + else + bug_type = "maybe wild-memory-access"; + pr_alert("KASAN: %s in range [0x%016lx-0x%016lx]\n", bug_type, + orig_addr, orig_addr + KASAN_SHADOW_MASK); +} +#endif -- cgit v1.2.3 From 36209766cede1fe9d39f3d3418d93bbf71ad21c4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 31 Dec 2019 17:15:35 +0100 Subject: x86/traps: Cleanup do_general_protection() Hoist the user_mode() case up because it is less code and can be dealt with up-front like the other special cases UMIP and vm86. This saves an indentation level for the kernel-mode #GP case and allows to "unfold" the code more so that it is more readable. No functional changes. Signed-off-by: Borislav Petkov Cc: Jann Horn Cc: x86@kernel.org --- arch/x86/kernel/traps.c | 79 +++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2afd7d8d4007..ca395ad28b4e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -567,7 +567,10 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; + enum kernel_gp_hint hint = GP_NO_HINT; struct task_struct *tsk; + unsigned long gp_addr; + int ret; RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); cond_local_irq_enable(regs); @@ -584,58 +587,56 @@ dotraplinkage void do_general_protection(struct pt_regs *regs, long error_code) } tsk = current; - if (!user_mode(regs)) { - enum kernel_gp_hint hint = GP_NO_HINT; - unsigned long gp_addr; - - if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) - return; + if (user_mode(regs)) { tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; - /* - * To be potentially processing a kprobe fault and to - * trust the result from kprobe_running(), we have to - * be non-preemptible. - */ - if (!preemptible() && kprobe_running() && - kprobe_fault_handler(regs, X86_TRAP_GP)) - return; + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); + force_sig(SIGSEGV); - if (notify_die(DIE_GPF, desc, regs, error_code, - X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP) - return; + return; + } - if (error_code) - snprintf(desc, sizeof(desc), "segment-related " GPFSTR); - else - hint = get_kernel_gp_address(regs, &gp_addr); + if (fixup_exception(regs, X86_TRAP_GP, error_code, 0)) + return; - if (hint != GP_NO_HINT) - snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", - (hint == GP_NON_CANONICAL) ? - "probably for non-canonical address" : - "maybe for address", - gp_addr); + tsk->thread.error_code = error_code; + tsk->thread.trap_nr = X86_TRAP_GP; - /* - * KASAN is interested only in the non-canonical case, clear it - * otherwise. - */ - if (hint != GP_NON_CANONICAL) - gp_addr = 0; + /* + * To be potentially processing a kprobe fault and to trust the result + * from kprobe_running(), we have to be non-preemptible. + */ + if (!preemptible() && + kprobe_running() && + kprobe_fault_handler(regs, X86_TRAP_GP)) + return; - die_addr(desc, regs, error_code, gp_addr); + ret = notify_die(DIE_GPF, desc, regs, error_code, X86_TRAP_GP, SIGSEGV); + if (ret == NOTIFY_STOP) return; - } - tsk->thread.error_code = error_code; - tsk->thread.trap_nr = X86_TRAP_GP; + if (error_code) + snprintf(desc, sizeof(desc), "segment-related " GPFSTR); + else + hint = get_kernel_gp_address(regs, &gp_addr); + + if (hint != GP_NO_HINT) + snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", + (hint == GP_NON_CANONICAL) ? "probably for non-canonical address" + : "maybe for address", + gp_addr); + + /* + * KASAN is interested only in the non-canonical case, clear it + * otherwise. + */ + if (hint != GP_NON_CANONICAL) + gp_addr = 0; - show_signal(tsk, SIGSEGV, "", desc, regs, error_code); + die_addr(desc, regs, error_code, gp_addr); - force_sig(SIGSEGV); } NOKPROBE_SYMBOL(do_general_protection); -- cgit v1.2.3 From 248ed51048c40d36728e70914e38bffd7821da57 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 11 Jan 2020 20:54:27 +0800 Subject: x86/nmi: Remove irq_work from the long duration NMI handler First, printk() is NMI-context safe now since the safe printk() has been implemented and it already has an irq_work to make NMI-context safe. Second, this NMI irq_work actually does not work if a NMI handler causes panic by watchdog timeout. It has no chance to run in such case, while the safe printk() will flush its per-cpu buffers before panicking. While at it, repurpose the irq_work callback into a function which concentrates the NMI duration checking and makes the code easier to follow. [ bp: Massage. ] Signed-off-by: Changbin Du Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20200111125427.15662-1-changbin.du@gmail.com --- arch/x86/include/asm/nmi.h | 1 - arch/x86/kernel/nmi.c | 20 +++++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 75ded1d13d98..9d5d949e662e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -41,7 +41,6 @@ struct nmiaction { struct list_head list; nmi_handler_t handler; u64 max_duration; - struct irq_work irq_work; unsigned long flags; const char *name; }; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e676a9916c49..54c21d6abd5a 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -104,18 +104,22 @@ static int __init nmi_warning_debugfs(void) } fs_initcall(nmi_warning_debugfs); -static void nmi_max_handler(struct irq_work *w) +static void nmi_check_duration(struct nmiaction *action, u64 duration) { - struct nmiaction *a = container_of(w, struct nmiaction, irq_work); + u64 whole_msecs = READ_ONCE(action->max_duration); int remainder_ns, decimal_msecs; - u64 whole_msecs = READ_ONCE(a->max_duration); + + if (duration < nmi_longest_ns || duration < action->max_duration) + return; + + action->max_duration = duration; remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; printk_ratelimited(KERN_INFO "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", - a->handler, whole_msecs, decimal_msecs); + action->handler, whole_msecs, decimal_msecs); } static int nmi_handle(unsigned int type, struct pt_regs *regs) @@ -142,11 +146,7 @@ static int nmi_handle(unsigned int type, struct pt_regs *regs) delta = sched_clock() - delta; trace_nmi_handler(a->handler, (int)delta, thishandled); - if (delta < nmi_longest_ns || delta < a->max_duration) - continue; - - a->max_duration = delta; - irq_work_queue(&a->irq_work); + nmi_check_duration(a, delta); } rcu_read_unlock(); @@ -164,8 +164,6 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) if (!action->handler) return -EINVAL; - init_irq_work(&action->irq_work, nmi_max_handler); - raw_spin_lock_irqsave(&desc->lock, flags); /* -- cgit v1.2.3