diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-06-29 16:13:25 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-06-29 16:13:25 +1000 |
commit | 7cf0526ce2b7867e927d568f26ffca49a8c0ee43 (patch) | |
tree | e481d70ba6900be3c8f0a3bdcba423010cd9d768 | |
parent | 516619b8d112c528ab4f6076dfea4371656976fb (diff) | |
parent | 1a0df77cd27e928ea008fc995da24bc610286d42 (diff) |
Merge branch 'akpm/master'
83 files changed, 1365 insertions, 446 deletions
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt index c41331398752..7a065ac4a9d1 100644 --- a/Documentation/DMA-ISA-LPC.txt +++ b/Documentation/DMA-ISA-LPC.txt @@ -42,7 +42,7 @@ requirements you pass the flag GFP_DMA to kmalloc. Unfortunately the memory available for ISA DMA is scarce so unless you allocate the memory during boot-up it's a good idea to also pass -__GFP_REPEAT and __GFP_NOWARN to make the allocator try a bit harder. +__GFP_RETRY_MAYFAIL and __GFP_NOWARN to make the allocator try a bit harder. (This scarcity also means that you should allocate the buffer as early as possible and not release it until the driver is unloaded.) diff --git a/arch/Kconfig b/arch/Kconfig index cae0958a2298..21d0089117fe 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -198,9 +198,6 @@ config HAVE_KPROBES_ON_FTRACE config HAVE_NMI bool -config HAVE_NMI_WATCHDOG - depends on HAVE_NMI - bool # # An arch should select this if it provides all these things: # @@ -226,6 +223,12 @@ config GENERIC_SMP_IDLE_THREAD config GENERIC_IDLE_POLL_SETUP bool +config ARCH_HAS_FORTIFY_SOURCE + bool + help + An architecture should select this when it can successfully + build and run with CONFIG_FORTIFY_SOURCE. + # Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h config ARCH_HAS_SET_MEMORY bool @@ -288,6 +291,28 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_HARDLOCKUP_DETECTOR_PERF + bool + depends on HAVE_PERF_EVENTS_NMI + help + The arch chooses to use the generic perf-NMI-based hardlockup + detector. Must define HAVE_PERF_EVENTS_NMI. + +config HAVE_NMI_WATCHDOG + depends on HAVE_NMI + bool + help + The arch provides a low level NMI watchdog. It provides + asm/nmi.h, and defines its own arch_touch_nmi_watchdog(). + +config HAVE_HARDLOCKUP_DETECTOR_ARCH + bool + select HAVE_NMI_WATCHDOG + help + The arch chooses to provide its own hardlockup detector, which is + a superset of the HAVE_NMI_WATCHDOG. It also conforms to config + interfaces and parameters provided by hardlockup detector subsystem. + config HAVE_PERF_REGS bool help diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8addb851ab5e..dfd908630631 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -12,6 +12,7 @@ config ARM64 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA select ARCH_HAS_KCOV diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 2eb714c4639f..d0aa42907569 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -63,6 +63,11 @@ extern int memcmp(const void *, const void *, size_t); #define memcpy(dst, src, len) __memcpy(dst, src, len) #define memmove(dst, src, len) __memmove(dst, src, len) #define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + #endif #endif diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index adc208c2ae9c..decccffb03ca 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -35,7 +35,7 @@ * Leave enough space between the mmap area and the stack to honour ulimit in * the face of randomisation. */ -#define MIN_GAP (SZ_128M + ((STACK_RND_MASK << PAGE_SHIFT) + 1)) +#define MIN_GAP (SZ_128M) #define MAX_GAP (STACK_TOP/6*5) static int mmap_is_legacy(void) @@ -65,6 +65,11 @@ unsigned long arch_mmap_rnd(void) static unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap; + + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; if (gap < MIN_GAP) gap = MIN_GAP; diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h index b9caac4fcfd8..107d23705f46 100644 --- a/arch/blackfin/include/asm/nmi.h +++ b/arch/blackfin/include/asm/nmi.h @@ -9,4 +9,6 @@ #include <linux/nmi.h> +extern void arch_touch_nmi_watchdog(void); + #endif diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c index 633c37083e87..1e714329fe8a 100644 --- a/arch/blackfin/kernel/nmi.c +++ b/arch/blackfin/kernel/nmi.c @@ -190,7 +190,7 @@ static int __init init_nmi_wdt(void) } device_initcall(init_nmi_wdt); -void touch_nmi_watchdog(void) +void arch_touch_nmi_watchdog(void) { atomic_set(&nmi_touched[smp_processor_id()], 1); } diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index a1bdb1ea5234..39b9f311c4ef 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -116,7 +116,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; - pud = (pud_t *) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, PUD_ORDER); + pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); if (pud) pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); return pud; diff --git a/arch/mn10300/include/asm/nmi.h b/arch/mn10300/include/asm/nmi.h index f3671cbbc117..b05627597b1b 100644 --- a/arch/mn10300/include/asm/nmi.h +++ b/arch/mn10300/include/asm/nmi.h @@ -11,4 +11,6 @@ #ifndef _ASM_NMI_H #define _ASM_NMI_H +extern void arch_touch_nmi_watchdog(void); + #endif /* _ASM_NMI_H */ diff --git a/arch/mn10300/kernel/mn10300-watchdog-low.S b/arch/mn10300/kernel/mn10300-watchdog-low.S index f2f5c9cfaabd..34f8773de7d0 100644 --- a/arch/mn10300/kernel/mn10300-watchdog-low.S +++ b/arch/mn10300/kernel/mn10300-watchdog-low.S @@ -50,9 +50,9 @@ watchdog_handler: # we can't inline it) # ############################################################################### - .globl touch_nmi_watchdog - .type touch_nmi_watchdog,@function -touch_nmi_watchdog: + .globl arch_touch_nmi_watchdog + .type arch_touch_nmi_watchdog,@function +arch_touch_nmi_watchdog: clr d0 clr d1 mov watchdog_alert_counter, a0 @@ -63,4 +63,4 @@ touch_nmi_watchdog: lne ret [],0 - .size touch_nmi_watchdog,.-touch_nmi_watchdog + .size arch_touch_nmi_watchdog,.-arch_touch_nmi_watchdog diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c index a2d8e6938d67..0d5641beadf5 100644 --- a/arch/mn10300/kernel/mn10300-watchdog.c +++ b/arch/mn10300/kernel/mn10300-watchdog.c @@ -31,7 +31,7 @@ static unsigned int watchdog; static unsigned int watchdog_hz = 1; unsigned int watchdog_alert_counter[NR_CPUS]; -EXPORT_SYMBOL(touch_nmi_watchdog); +EXPORT_SYMBOL(arch_touch_nmi_watchdog); /* * the best way to detect whether a CPU has a 'hard lockup' problem diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d0ee448700de..d556f9557f04 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -82,7 +82,7 @@ config NR_IRQS config NMI_IPI bool - depends on SMP && (DEBUGGER || KEXEC_CORE) + depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR) default y config STACKTRACE_SUPPORT @@ -125,6 +125,7 @@ config PPC select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_SET_COHERENT_MASK select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN @@ -189,11 +190,13 @@ config PPC select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC - select HAVE_NMI if PERF_EVENTS + select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) + select HAVE_HARDLOCKUP_DETECTOR_ARCH if (PPC64 && PPC_BOOK3S) select HAVE_OPROFILE select HAVE_OPTPROBES if PPC64 select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE if SMP diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 20b1485ff1e8..e2329db9d6f4 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -56,7 +56,7 @@ static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP)); #else struct page *page; - page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_REPEAT), + page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL), 4); if (!page) return NULL; diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index ff1ccb375e60..6f8e79cd35d8 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -1,4 +1,15 @@ #ifndef _ASM_NMI_H #define _ASM_NMI_H +#ifdef CONFIG_HARDLOCKUP_DETECTOR +extern void arch_touch_nmi_watchdog(void); + +extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, + bool exclude_self); +#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace + +#else +static inline void arch_touch_nmi_watchdog(void) {} +#endif + #endif /* _ASM_NMI_H */ diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index ebddb2111d87..8ea98504f900 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -55,6 +55,8 @@ struct smp_ops_t { int (*cpu_bootable)(unsigned int nr); }; +extern void smp_flush_nmi_ipi(u64 delay_us); +extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us); extern void smp_send_debugger_break(void); extern void start_secondary_resume(void); extern void smp_generic_give_timebase(void); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0845eebc5af3..4aa7c147e447 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_VDSO32) += vdso32/ +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index fb4ffc773082..3ba8c4590c62 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1314,6 +1314,31 @@ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) #endif +#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) + +#define MASKED_DEC_HANDLER_LABEL 3f + +#define MASKED_DEC_HANDLER(_H) \ +3: /* soft-nmi */ \ + std r12,PACA_EXGEN+EX_R12(r13); \ + GET_SCRATCH0(r10); \ + std r10,PACA_EXGEN+EX_R13(r13); \ + EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) + +EXC_COMMON_BEGIN(soft_nmi_common) + mr r10,r1 + ld r1,PACAEMERGSP(r13) + ld r1,PACA_NMI_EMERG_SP(r13) + subi r1,r1,INT_FRAME_SIZE + EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, + system_reset, soft_nmi_interrupt, + ADD_NVGPRS;ADD_RECONCILE) + b ret_from_except + +#else +#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ +#define MASKED_DEC_HANDLER(_H) +#endif /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -1336,7 +1361,7 @@ masked_##_H##interrupt: \ lis r10,0x7fff; \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ - b 2f; \ + b MASKED_DEC_HANDLER_LABEL; \ 1: cmpwi r10,PACA_IRQ_DBELL; \ beq 2f; \ cmpwi r10,PACA_IRQ_HMI; \ @@ -1351,7 +1376,8 @@ masked_##_H##interrupt: \ ld r11,PACA_EXGEN+EX_R11(r13); \ GET_SCRATCH0(r13); \ ##_H##rfid; \ - b . + b .; \ + MASKED_DEC_HANDLER(_H) /* * Real mode exceptions actually use this too, but alternate diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ad37f827a97..1086ea37c832 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,6 +25,7 @@ #include <linux/kvm_para.h> #include <linux/slab.h> #include <linux/of.h> +#include <linux/nmi.h> /* hardlockup_detector_disable() */ #include <asm/reg.h> #include <asm/sections.h> @@ -718,6 +719,12 @@ static __init void kvm_free_tmp(void) static int __init kvm_guest_init(void) { + /* + * The hardlockup detector is likely to get false positives in + * KVM guests, so disable it by default. + */ + hardlockup_detector_disable(); + if (!kvm_para_available()) goto free_tmp; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index dd8a04f3053a..613f79f03877 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -15,6 +15,9 @@ #undef DEBUG_PROM +/* we cannot use FORTIFY as it brings in new symbols */ +#define __NO_FORTIFY + #include <stdarg.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4640f6d64f8b..af23d4b576ec 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -751,22 +751,3 @@ unsigned long memory_block_size_bytes(void) struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif - -#ifdef CONFIG_HARDLOCKUP_DETECTOR -u64 hw_nmi_get_sample_period(int watchdog_thresh) -{ - return ppc_proc_freq * watchdog_thresh; -} - -/* - * The hardlockup detector breaks PMU event based branches and is likely - * to get false positives in KVM guests, so disable it by default. - */ -static int __init disable_hardlockup_detector(void) -{ - hardlockup_detector_disable(); - - return 0; -} -early_initcall(disable_hardlockup_detector); -#endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 72035493c429..e4f82e1b5f67 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -434,13 +434,31 @@ static void do_smp_send_nmi_ipi(int cpu) } } +void smp_flush_nmi_ipi(u64 delay_us) +{ + unsigned long flags; + + nmi_ipi_lock_start(&flags); + while (nmi_ipi_busy_count) { + nmi_ipi_unlock_end(&flags); + udelay(1); + if (delay_us) { + delay_us--; + if (!delay_us) + return; + } + nmi_ipi_lock_start(&flags); + } + nmi_ipi_unlock_end(&flags); +} + /* * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. * - fn is the target callback function. * - delay_us > 0 is the delay before giving up waiting for targets to * enter the handler, == 0 specifies indefinite delay. */ -static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) { unsigned long flags; int me = raw_smp_processor_id(); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c new file mode 100644 index 000000000000..ae1ed7516eae --- /dev/null +++ b/arch/powerpc/kernel/watchdog.c @@ -0,0 +1,388 @@ +/* + * Watchdog support on powerpc systems. + * + * Copyright 2017, IBM Corporation. + * + * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c + */ +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/cpu.h> +#include <linux/nmi.h> +#include <linux/module.h> +#include <linux/export.h> +#include <linux/kprobes.h> +#include <linux/hardirq.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/kdebug.h> +#include <linux/sched/debug.h> +#include <linux/delay.h> +#include <linux/smp.h> + +#include <asm/paca.h> + +/* + * The watchdog has a simple timer that runs on each CPU, once per timer + * period. This is the heartbeat. + * + * Then there are checks to see if the heartbeat has not triggered on a CPU + * for the panic timeout period. Currently the watchdog only supports an + * SMP check, so the heartbeat only turns on when we have 2 or more CPUs. + * + * This is not an NMI watchdog, but Linux uses that name for a generic + * watchdog in some cases, so NMI gets used in some places. + */ + +static cpumask_t wd_cpus_enabled __read_mostly; + +static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */ +static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ + +static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ + +static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(u64, wd_timer_tb); + +/* + * These are for the SMP checker. CPUs clear their pending bit in their + * heartbeat. If the bitmask becomes empty, the time is noted and the + * bitmask is refilled. + * + * All CPUs clear their bit in the pending mask every timer period. + * Once all have cleared, the time is noted and the bits are reset. + * If the time since all clear was greater than the panic timeout, + * we can panic with the list of stuck CPUs. + * + * This will work best with NMI IPIs for crash code so the stuck CPUs + * can be pulled out to get their backtraces. + */ +static unsigned long __wd_smp_lock; +static cpumask_t wd_smp_cpus_pending; +static cpumask_t wd_smp_cpus_stuck; +static u64 wd_smp_last_reset_tb; + +static inline void wd_smp_lock(unsigned long *flags) +{ + /* + * Avoid locking layers if possible. + * This may be called from low level interrupt handlers at some + * point in future. + */ + local_irq_save(*flags); + while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) + cpu_relax(); +} + +static inline void wd_smp_unlock(unsigned long *flags) +{ + clear_bit_unlock(0, &__wd_smp_lock); + local_irq_restore(*flags); +} + +static void wd_lockup_ipi(struct pt_regs *regs) +{ + pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id()); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); +} + +static void set_cpu_stuck(int cpu, u64 tb) +{ + cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + if (cpumask_empty(&wd_smp_cpus_pending)) { + wd_smp_last_reset_tb = tb; + cpumask_andnot(&wd_smp_cpus_pending, + &wd_cpus_enabled, + &wd_smp_cpus_stuck); + } +} + +static void watchdog_smp_panic(int cpu, u64 tb) +{ + unsigned long flags; + int c; + + wd_smp_lock(&flags); + /* Double check some things under lock */ + if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) + goto out; + if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) + goto out; + if (cpumask_weight(&wd_smp_cpus_pending) == 0) + goto out; + + pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n", + cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + + /* + * Try to trigger the stuck CPUs. + */ + for_each_cpu(c, &wd_smp_cpus_pending) { + if (c == cpu) + continue; + smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); + } + smp_flush_nmi_ipi(1000000); + + /* Take the stuck CPU out of the watch group */ + for_each_cpu(c, &wd_smp_cpus_pending) + set_cpu_stuck(c, tb); + +out: + wd_smp_unlock(&flags); + + printk_safe_flush(); + /* + * printk_safe_flush() seems to require another print + * before anything actually goes out to console. + */ + if (sysctl_hardlockup_all_cpu_backtrace) + trigger_allbutself_cpu_backtrace(); + + if (hardlockup_panic) + nmi_panic(NULL, "Hard LOCKUP"); +} + +static void wd_smp_clear_cpu_pending(int cpu, u64 tb) +{ + if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { + if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { + unsigned long flags; + + pr_emerg("Watchdog CPU:%d became unstuck\n", cpu); + wd_smp_lock(&flags); + cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); + wd_smp_unlock(&flags); + } + return; + } + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + if (cpumask_empty(&wd_smp_cpus_pending)) { + unsigned long flags; + + wd_smp_lock(&flags); + if (cpumask_empty(&wd_smp_cpus_pending)) { + wd_smp_last_reset_tb = tb; + cpumask_andnot(&wd_smp_cpus_pending, + &wd_cpus_enabled, + &wd_smp_cpus_stuck); + } + wd_smp_unlock(&flags); + } +} + +static void watchdog_timer_interrupt(int cpu) +{ + u64 tb = get_tb(); + + per_cpu(wd_timer_tb, cpu) = tb; + + wd_smp_clear_cpu_pending(cpu, tb); + + if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) + watchdog_smp_panic(cpu, tb); +} + +void soft_nmi_interrupt(struct pt_regs *regs) +{ + unsigned long flags; + int cpu = raw_smp_processor_id(); + u64 tb; + + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) + return; + + nmi_enter(); + tb = get_tb(); + if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { + per_cpu(wd_timer_tb, cpu) = tb; + + wd_smp_lock(&flags); + if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { + wd_smp_unlock(&flags); + goto out; + } + set_cpu_stuck(cpu, tb); + + pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + wd_smp_unlock(&flags); + + if (sysctl_hardlockup_all_cpu_backtrace) + trigger_allbutself_cpu_backtrace(); + + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); + } + if (wd_panic_timeout_tb < 0x7fffffff) + mtspr(SPRN_DEC, wd_panic_timeout_tb); + +out: + nmi_exit(); +} + +static void wd_timer_reset(unsigned int cpu, struct timer_list *t) +{ + t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); + if (wd_timer_period_ms > 1000) + t->expires = __round_jiffies_up(t->expires, cpu); + add_timer_on(t, cpu); +} + +static void wd_timer_fn(unsigned long data) +{ + struct timer_list *t = this_cpu_ptr(&wd_timer); + int cpu = smp_processor_id(); + + watchdog_timer_interrupt(cpu); + + wd_timer_reset(cpu, t); +} + +void arch_touch_nmi_watchdog(void) +{ + int cpu = smp_processor_id(); + + watchdog_timer_interrupt(cpu); +} +EXPORT_SYMBOL(arch_touch_nmi_watchdog); + +static void start_watchdog_timer_on(unsigned int cpu) +{ + struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); + + per_cpu(wd_timer_tb, cpu) = get_tb(); + + setup_pinned_timer(t, wd_timer_fn, 0); + wd_timer_reset(cpu, t); +} + +static void stop_watchdog_timer_on(unsigned int cpu) +{ + struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); + + del_timer_sync(t); +} + +static int start_wd_on_cpu(unsigned int cpu) +{ + if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { + WARN_ON(1); + return 0; + } + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return 0; + + if (watchdog_suspended) + return 0; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return 0; + + cpumask_set_cpu(cpu, &wd_cpus_enabled); + if (cpumask_weight(&wd_cpus_enabled) == 1) { + cpumask_set_cpu(cpu, &wd_smp_cpus_pending); + wd_smp_last_reset_tb = get_tb(); + } + smp_wmb(); + start_watchdog_timer_on(cpu); + + return 0; +} + +static int stop_wd_on_cpu(unsigned int cpu) +{ + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) { + WARN_ON(1); + return 0; + } + + stop_watchdog_timer_on(cpu); + + cpumask_clear_cpu(cpu, &wd_cpus_enabled); + wd_smp_clear_cpu_pending(cpu, get_tb()); + + return 0; +} + +static void watchdog_calc_timeouts(void) +{ + wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; + + /* Have the SMP detector trigger a bit later */ + wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; + + /* 2/5 is the factor that the perf based detector uses */ + wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; +} + +void watchdog_nmi_reconfigure(void) +{ + int cpu; + + watchdog_calc_timeouts(); + + for_each_cpu(cpu, &wd_cpus_enabled) + stop_wd_on_cpu(cpu); + + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) + start_wd_on_cpu(cpu); +} + +/* + * This runs after lockup_detector_init() which sets up watchdog_cpumask. + */ +static int __init powerpc_watchdog_init(void) +{ + int err; + + watchdog_calc_timeouts(); + + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", + start_wd_on_cpu, stop_wd_on_cpu); + if (err < 0) + pr_warn("Watchdog could not be initialized"); + + return 0; +} +arch_initcall(powerpc_watchdog_init); + +static void handle_backtrace_ipi(struct pt_regs *regs) +{ + nmi_cpu_backtrace(regs); +} + +static void raise_backtrace_ipi(cpumask_t *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + if (cpu == smp_processor_id()) + handle_backtrace_ipi(NULL); + else + smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000); + } +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); +} diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 710e491206ed..8cb0190e2a73 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -93,7 +93,7 @@ int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order) } if (!hpt) - hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL |__GFP_NOWARN, order - PAGE_SHIFT); if (!hpt) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index f3917705c686..41cf5ae273cf 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -233,192 +233,192 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p) static void test_basic_patching(void) { - extern unsigned int ftr_fixup_test1; - extern unsigned int end_ftr_fixup_test1; - extern unsigned int ftr_fixup_test1_orig; - extern unsigned int ftr_fixup_test1_expected; - int size = &end_ftr_fixup_test1 - &ftr_fixup_test1; + extern unsigned int ftr_fixup_test1[]; + extern unsigned int end_ftr_fixup_test1[]; + extern unsigned int ftr_fixup_test1_orig[]; + extern unsigned int ftr_fixup_test1_expected[]; + int size = end_ftr_fixup_test1 - ftr_fixup_test1; fixup.value = fixup.mask = 8; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2); fixup.alt_start_off = fixup.alt_end_off = 0; /* Sanity check */ - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0); /* Check we don't patch if the value matches */ patch_feature_section(8, &fixup); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0); /* Check we do patch if the value doesn't match */ patch_feature_section(0, &fixup); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); /* Check we do patch if the mask doesn't match */ - memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0); + memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0); patch_feature_section(~8, &fixup); - check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0); + check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0); } static void test_alternative_patching(void) { - extern unsigned int ftr_fixup_test2; - extern unsigned int end_ftr_fixup_test2; - extern unsigned int ftr_fixup_test2_orig; - extern unsigned int ftr_fixup_test2_alt; - extern unsigned int ftr_fixup_test2_expected; - int size = &end_ftr_fixup_test2 - &ftr_fixup_test2; + extern unsigned int ftr_fixup_test2[]; + extern unsigned int end_ftr_fixup_test2[]; + extern unsigned int ftr_fixup_test2_orig[]; + extern unsigned int ftr_fixup_test2_alt[]; + extern unsigned int ftr_fixup_test2_expected[]; + int size = end_ftr_fixup_test2 - ftr_fixup_test2; fixup.value = fixup.mask = 0xF; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2); - fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt); - fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1); /* Sanity check */ - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0); /* Check we don't patch if the value matches */ patch_feature_section(0xF, &fixup); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0); /* Check we do patch if the value doesn't match */ patch_feature_section(0, &fixup); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); /* Check we do patch if the mask doesn't match */ - memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0); + memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0); patch_feature_section(~0xF, &fixup); - check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0); + check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0); } static void test_alternative_case_too_big(void) { - extern unsigned int ftr_fixup_test3; - extern unsigned int end_ftr_fixup_test3; - extern unsigned int ftr_fixup_test3_orig; - extern unsigned int ftr_fixup_test3_alt; - int size = &end_ftr_fixup_test3 - &ftr_fixup_test3; + extern unsigned int ftr_fixup_test3[]; + extern unsigned int end_ftr_fixup_test3[]; + extern unsigned int ftr_fixup_test3_orig[]; + extern unsigned int ftr_fixup_test3_alt[]; + int size = end_ftr_fixup_test3 - ftr_fixup_test3; fixup.value = fixup.mask = 0xC; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2); - fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt); - fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2); /* Sanity check */ - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); /* Expect nothing to be patched, and the error returned to us */ check(patch_feature_section(0xF, &fixup) == 1); - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); check(patch_feature_section(0, &fixup) == 1); - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); check(patch_feature_section(~0xF, &fixup) == 1); - check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0); + check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0); } static void test_alternative_case_too_small(void) { - extern unsigned int ftr_fixup_test4; - extern unsigned int end_ftr_fixup_test4; - extern unsigned int ftr_fixup_test4_orig; - extern unsigned int ftr_fixup_test4_alt; - extern unsigned int ftr_fixup_test4_expected; - int size = &end_ftr_fixup_test4 - &ftr_fixup_test4; + extern unsigned int ftr_fixup_test4[]; + extern unsigned int end_ftr_fixup_test4[]; + extern unsigned int ftr_fixup_test4_orig[]; + extern unsigned int ftr_fixup_test4_alt[]; + extern unsigned int ftr_fixup_test4_expected[]; + int size = end_ftr_fixup_test4 - ftr_fixup_test4; unsigned long flag; /* Check a high-bit flag */ flag = 1UL << ((sizeof(unsigned long) - 1) * 8); fixup.value = fixup.mask = flag; - fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1); - fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5); - fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt); - fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2); + fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1); + fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5); + fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt); + fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2); /* Sanity check */ - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0); /* Check we don't patch if the value matches */ patch_feature_section(flag, &fixup); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0); /* Check we do patch if the value doesn't match */ patch_feature_section(0, &fixup); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0); /* Check we do patch if the mask doesn't match */ - memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0); + memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0); patch_feature_section(~flag, &fixup); - check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0); + check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0); } static void test_alternative_case_with_branch(void) { - extern unsigned int ftr_fixup_test5; - extern unsigned int end_ftr_fixup_test5; - extern unsigned int ftr_fixup_test5_expected; - int size = &end_ftr_fixup_test5 - &ftr_fixup_test5; + extern unsigned int ftr_fixup_test5[]; + extern unsigned int end_ftr_fixup_test5[]; + extern unsigned int ftr_fixup_test5_expected[]; + int size = end_ftr_fixup_test5 - ftr_fixup_test5; - check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0); + check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0); } static void test_alternative_case_with_external_branch(void) { - extern unsigned int ftr_fixup_test6; - extern unsigned int end_ftr_fixup_test6; - extern unsigned int ftr_fixup_test6_expected; - int size = &end_ftr_fixup_test6 - &ftr_fixup_test6; + extern unsigned int ftr_fixup_test6[]; + extern unsigned int end_ftr_fixup_test6[]; + extern unsigned int ftr_fixup_test6_expected[]; + int size = end_ftr_fixup_test6 - ftr_fixup_test6; - check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0); + check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0); } static void test_cpu_macros(void) { - extern u8 ftr_fixup_test_FTR_macros; - extern u8 ftr_fixup_test_FTR_macros_expected; - unsigned long size = &ftr_fixup_test_FTR_macros_expected - - &ftr_fixup_test_FTR_macros; + extern u8 ftr_fixup_test_FTR_macros[]; + extern u8 ftr_fixup_test_FTR_macros_expected[]; + unsigned long size = ftr_fixup_test_FTR_macros_expected - + ftr_fixup_test_FTR_macros; /* The fixups have already been done for us during boot */ - check(memcmp(&ftr_fixup_test_FTR_macros, - &ftr_fixup_test_FTR_macros_expected, size) == 0); + check(memcmp(ftr_fixup_test_FTR_macros, + ftr_fixup_test_FTR_macros_expected, size) == 0); } static void test_fw_macros(void) { #ifdef CONFIG_PPC64 - extern u8 ftr_fixup_test_FW_FTR_macros; - extern u8 ftr_fixup_test_FW_FTR_macros_expected; - unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected - - &ftr_fixup_test_FW_FTR_macros; + extern u8 ftr_fixup_test_FW_FTR_macros[]; + extern u8 ftr_fixup_test_FW_FTR_macros_expected[]; + unsigned long size = ftr_fixup_test_FW_FTR_macros_expected - + ftr_fixup_test_FW_FTR_macros; /* The fixups have already been done for us during boot */ - check(memcmp(&ftr_fixup_test_FW_FTR_macros, - &ftr_fixup_test_FW_FTR_macros_expected, size) == 0); + check(memcmp(ftr_fixup_test_FW_FTR_macros, + ftr_fixup_test_FW_FTR_macros_expected, size) == 0); #endif } static void test_lwsync_macros(void) { - extern u8 lwsync_fixup_test; - extern u8 end_lwsync_fixup_test; - extern u8 lwsync_fixup_test_expected_LWSYNC; - extern u8 lwsync_fixup_test_expected_SYNC; - unsigned long size = &end_lwsync_fixup_test - - &lwsync_fixup_test; + extern u8 lwsync_fixup_test[]; + extern u8 end_lwsync_fixup_test[]; + extern u8 lwsync_fixup_test_expected_LWSYNC[]; + extern u8 lwsync_fixup_test_expected_SYNC[]; + unsigned long size = end_lwsync_fixup_test - + lwsync_fixup_test; /* The fixups have already been done for us during boot */ if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) { - check(memcmp(&lwsync_fixup_test, - &lwsync_fixup_test_expected_LWSYNC, size) == 0); + check(memcmp(lwsync_fixup_test, + lwsync_fixup_test_expected_LWSYNC, size) == 0); } else { - check(memcmp(&lwsync_fixup_test, - &lwsync_fixup_test_expected_SYNC, size) == 0); + check(memcmp(lwsync_fixup_test, + lwsync_fixup_test_expected_SYNC, size) == 0); } } diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 0ee6be4f1ba4..5d78b193fec4 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -34,16 +34,9 @@ /* * Top of mmap area (just below the process stack). * - * Leave at least a ~128 MB hole on 32bit applications. - * - * On 64bit applications we randomise the stack by 1GB so we need to - * space our mmap start address by a further 1GB, otherwise there is a - * chance the mmap area will end up closer to the stack than our ulimit - * requires. + * Leave at least a ~128 MB hole. */ -#define MIN_GAP32 (128*1024*1024) -#define MIN_GAP64 ((128 + 1024)*1024*1024UL) -#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64) +#define MIN_GAP (128*1024*1024) #define MAX_GAP (TASK_SIZE/6*5) static inline int mmap_is_legacy(void) @@ -71,9 +64,26 @@ unsigned long arch_mmap_rnd(void) return rnd << PAGE_SHIFT; } +static inline unsigned long stack_maxrandom_size(void) +{ + if (!(current->flags & PF_RANDOMIZE)) + return 0; + + /* 8MB for 32bit, 1GB for 64bit */ + if (is_32bit_task()) + return (1<<23); + else + return (1<<30); +} + static inline unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long pad = stack_maxrandom_size() + stack_guard_gap; + + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; if (gap < MIN_GAP) gap = MIN_GAP; diff --git a/arch/sh/include/asm/bug.h b/arch/sh/include/asm/bug.h index 1b77f068be2b..c9828f785ca0 100644 --- a/arch/sh/include/asm/bug.h +++ b/arch/sh/include/asm/bug.h @@ -48,6 +48,7 @@ do { \ "i" (__FILE__), \ "i" (__LINE__), "i" (0), \ "i" (sizeof(struct bug_entry))); \ + unreachable(); \ } while (0) #define __WARN_FLAGS(flags) \ diff --git a/arch/sparc/include/asm/nmi.h b/arch/sparc/include/asm/nmi.h index 26ad2b2607c6..284eac3ffaf2 100644 --- a/arch/sparc/include/asm/nmi.h +++ b/arch/sparc/include/asm/nmi.h @@ -7,6 +7,7 @@ void nmi_adjust_hz(unsigned int new_hz); extern atomic_t nmi_active; +void arch_touch_nmi_watchdog(void); void start_nmi_watchdog(void *unused); void stop_nmi_watchdog(void *unused); diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index e4b4e790bf89..fa466ce45bc9 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -205,7 +205,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) handle_size = (sizeof(struct mdesc_handle) - sizeof(struct mdesc_hdr) + mdesc_size); - base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_REPEAT); + base = kmalloc(handle_size + 15, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!base) return NULL; diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 95e73c63c99d..048ad783ea3f 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -51,7 +51,7 @@ static DEFINE_PER_CPU(unsigned int, last_irq_sum); static DEFINE_PER_CPU(long, alert_counter); static DEFINE_PER_CPU(int, nmi_touch); -void touch_nmi_watchdog(void) +void arch_touch_nmi_watchdog(void) { if (atomic_read(&nmi_active)) { int cpu; @@ -61,10 +61,8 @@ void touch_nmi_watchdog(void) per_cpu(nmi_touch, cpu) = 1; } } - - touch_softlockup_watchdog(); } -EXPORT_SYMBOL(touch_nmi_watchdog); +EXPORT_SYMBOL(arch_touch_nmi_watchdog); static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) { diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S index 7c0c81f18837..c07834f3a6fc 100644 --- a/arch/sparc/lib/NG4memset.S +++ b/arch/sparc/lib/NG4memset.S @@ -13,14 +13,14 @@ .globl NG4memset NG4memset: andcc %o1, 0xff, %o4 - be,pt %icc, 1f + be,pt %xcc, 1f mov %o2, %o1 sllx %o4, 8, %g1 or %g1, %o4, %o2 sllx %o2, 16, %g1 or %g1, %o2, %o2 sllx %o2, 32, %g1 - ba,pt %icc, 1f + ba,pt %xcc, 1f or %g1, %o2, %o4 .size NG4memset,.-NG4memset @@ -29,7 +29,7 @@ NG4memset: NG4bzero: clr %o4 1: cmp %o1, 16 - ble %icc, .Ltiny + ble %xcc, .Ltiny mov %o0, %o3 sub %g0, %o0, %g1 and %g1, 0x7, %g1 @@ -37,7 +37,7 @@ NG4bzero: sub %o1, %g1, %o1 1: stb %o4, [%o0 + 0x00] subcc %g1, 1, %g1 - bne,pt %icc, 1b + bne,pt %xcc, 1b add %o0, 1, %o0 .Laligned8: cmp %o1, 64 + (64 - 8) @@ -48,7 +48,7 @@ NG4bzero: sub %o1, %g1, %o1 1: stx %o4, [%o0 + 0x00] subcc %g1, 8, %g1 - bne,pt %icc, 1b + bne,pt %xcc, 1b add %o0, 0x8, %o0 .Laligned64: andn %o1, 64 - 1, %g1 @@ -58,30 +58,30 @@ NG4bzero: 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P subcc %g1, 0x40, %g1 stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P - bne,pt %icc, 1b + bne,pt %xcc, 1b add %o0, 0x40, %o0 .Lpostloop: cmp %o1, 8 - bl,pn %icc, .Ltiny + bl,pn %xcc, .Ltiny membar #StoreStore|#StoreLoad .Lmedium: andn %o1, 0x7, %g1 sub %o1, %g1, %o1 1: stx %o4, [%o0 + 0x00] subcc %g1, 0x8, %g1 - bne,pt %icc, 1b + bne,pt %xcc, 1b add %o0, 0x08, %o0 andcc %o1, 0x4, %g1 - be,pt %icc, .Ltiny + be,pt %xcc, .Ltiny sub %o1, %g1, %o1 stw %o4, [%o0 + 0x00] add %o0, 0x4, %o0 .Ltiny: cmp %o1, 0 - be,pn %icc, .Lexit + be,pn %xcc, .Lexit 1: subcc %o1, 1, %o1 stb %o4, [%o0 + 0x00] - bne,pt %icc, 1b + bne,pt %xcc, 1b add %o0, 1, %o0 .Lexit: retl @@ -99,8 +99,8 @@ NG4bzero: stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P - bne,pt %icc, 1b + bne,pt %xcc, 1b add %o0, 0x30, %o0 - ba,a,pt %icc, .Lpostloop + ba,a,pt %xcc, .Lpostloop nop .size NG4bzero,.-NG4bzero diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2543ffc44afe..5c1f5795ed95 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -50,6 +50,7 @@ config X86 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MMIO_FLUSH @@ -160,6 +161,7 @@ config X86 select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_HARDLOCKUP_DETECTOR_PERF if HAVE_PERF_EVENTS_NMI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index b3c5a5f030ce..43691238a21d 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -409,3 +409,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, debug_putstr("done.\nBooting the kernel.\n"); return output; } + +void fortify_panic(const char *name) +{ + error("detected buffer overflow"); +} diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 3d3e8353ee5c..e9ee84873de5 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h @@ -142,7 +142,9 @@ static __always_inline void *__constant_memcpy(void *to, const void *from, } #define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #ifdef CONFIG_X86_USE_3DNOW #include <asm/mmx.h> @@ -195,11 +197,15 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) #endif #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t n); +extern int memcmp(const void *, const void *, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #define memcmp __builtin_memcmp +#endif #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *cs, int c, size_t count); @@ -321,6 +327,8 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, : __memset_generic((s), (c), (count))) #define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, size_t); +#ifndef CONFIG_FORTIFY_SOURCE #if (__GNUC__ >= 4) #define memset(s, c, count) __builtin_memset(s, c, count) #else @@ -330,6 +338,7 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern, (count)) \ : __memset((s), (c), (count))) #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ /* * find the first occurrence of byte 'c', or 1 past the area if none diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 733bae07fb29..309fe644569f 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -31,6 +31,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t extern void *memcpy(void *to, const void *from, size_t len); extern void *__memcpy(void *to, const void *from, size_t len); +#ifndef CONFIG_FORTIFY_SOURCE #ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4 #define memcpy(dst, src, len) \ @@ -51,6 +52,7 @@ extern void *__memcpy(void *to, const void *from, size_t len); */ #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) #endif +#endif /* !CONFIG_FORTIFY_SOURCE */ #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); @@ -77,6 +79,11 @@ int strcmp(const char *cs, const char *ct); #define memcpy(dst, src, len) __memcpy(dst, src, len) #define memmove(dst, src, len) __memmove(dst, src, len) #define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c73c9fb281e1..d6f387780849 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -19,7 +19,7 @@ #include <linux/init.h> #include <linux/delay.h> -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh) { return (u64)(cpu_khz) * 1000 * watchdog_thresh; diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index cad12634d6bd..2eab7d0bfedd 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c @@ -6,7 +6,7 @@ __visible void *memcpy(void *to, const void *from, size_t n) { -#ifdef CONFIG_X86_USE_3DNOW +#if defined(CONFIG_X86_USE_3DNOW) && !defined(CONFIG_FORTIFY_SOURCE) return __memcpy3d(to, from, n); #else return __memcpy(to, from, n); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 797295e792b2..229d04a83f85 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -92,13 +92,18 @@ unsigned long arch_mmap_rnd(void) static unsigned long mmap_base(unsigned long rnd, unsigned long task_size) { unsigned long gap = rlimit(RLIMIT_STACK); + unsigned long pad = stack_maxrandom_size(task_size) + stack_guard_gap; unsigned long gap_min, gap_max; + /* Values close to RLIM_INFINITY can overflow. */ + if (gap + pad > gap) + gap += pad; + /* * Top of mmap area (just below the process stack). * Leave an at least ~128 MB hole with possible stack randomization. */ - gap_min = SIZE_128M + stack_maxrandom_size(task_size); + gap_min = SIZE_128M; gap_max = (task_size / 6) * 5; if (gap < gap_min) diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 30f6290109d4..bdc12d426b24 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -7,6 +7,7 @@ generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h generic-y += extable.h +generic-y += fb.h generic-y += fcntl.h generic-y += hardirq.h generic-y += ioctl.h diff --git a/arch/xtensa/include/asm/fb.h b/arch/xtensa/include/asm/fb.h deleted file mode 100644 index c7df38030992..000000000000 --- a/arch/xtensa/include/asm/fb.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _ASM_FB_H_ -#define _ASM_FB_H_ -#include <linux/fb.h> - -#define fb_pgprotect(...) do {} while (0) - -static inline int fb_is_primary_device(struct fb_info *info) -{ - return 0; -} - -#endif /* _ASM_FB_H_ */ diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index f7425960f6a5..37e24f525162 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -17,6 +17,7 @@ cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ + -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ $(call cc-option,-fno-stack-protector) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7dcac3bfb771..969bac8404f1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2434,8 +2434,9 @@ rebuild_st: * again with !__GFP_NORETRY. However, we still * want to fail this allocation rather than * trigger the out-of-memory killer and for - * this we want the future __GFP_MAYFAIL. + * this we want __GFP_RETRY_MAYFAIL. */ + gfp |= __GFP_RETRY_MAYFAIL; } } while (1); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 23039768f541..be944d5aa9af 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -995,7 +995,9 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); - memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(ack_pkt)); + memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0, + sizeof(skb->cb) - sizeof(ack_pkt)); res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c index e15a9733fcfd..9668616faf16 100644 --- a/drivers/mmc/host/wbsd.c +++ b/drivers/mmc/host/wbsd.c @@ -1386,7 +1386,7 @@ static void wbsd_request_dma(struct wbsd_host *host, int dma) * order for ISA to be able to DMA to it. */ host->dma_buffer = kmalloc(WBSD_DMA_SIZE, - GFP_NOIO | GFP_DMA | __GFP_REPEAT | __GFP_NOWARN); + GFP_NOIO | GFP_DMA | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!host->dma_buffer) goto free; diff --git a/drivers/s390/char/vmcp.c b/drivers/s390/char/vmcp.c index 65f5a794f26d..98749fa817da 100644 --- a/drivers/s390/char/vmcp.c +++ b/drivers/s390/char/vmcp.c @@ -98,7 +98,7 @@ vmcp_write(struct file *file, const char __user *buff, size_t count, } if (!session->response) session->response = (char *)__get_free_pages(GFP_KERNEL - | __GFP_REPEAT | GFP_DMA, + | __GFP_RETRY_MAYFAIL | GFP_DMA, get_order(session->bufsize)); if (!session->response) { mutex_unlock(&session->mutex); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 28de421e3220..cf166535dd5c 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -252,7 +252,7 @@ int transport_alloc_session_tags(struct se_session *se_sess, int rc; se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, - GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); if (!se_sess->sess_cmd_map) { se_sess->sess_cmd_map = vzalloc(tag_num * tag_size); if (!se_sess->sess_cmd_map) { diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e3d7ea1288c6..06d044862e58 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -897,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) struct sk_buff **queue; int i; - n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_REPEAT); + n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!n) return -ENOMEM; vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 679f8960db4b..046f6d280af5 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1399,7 +1399,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) struct vhost_virtqueue **vqs; int r = -ENOMEM, i; - vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL); if (!vs) { vs = vzalloc(sizeof(*vs)); if (!vs) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 3f63e03de8e8..c9de9c41aa97 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -508,7 +508,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) /* This struct is large and allocation could fail, fall back to vmalloc * if there is no other way. */ - vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_REPEAT); + vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!vsock) return -ENOMEM; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8b426f83909f..245c430a2e41 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -380,8 +380,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) struct page *page = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); if (likely(page) && PageDirty(page)) { - __dec_wb_stat(old_wb, WB_RECLAIMABLE); - __inc_wb_stat(new_wb, WB_RECLAIMABLE); + dec_wb_stat(old_wb, WB_RECLAIMABLE); + inc_wb_stat(new_wb, WB_RECLAIMABLE); } } @@ -391,8 +391,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work) &mapping->tree_lock); if (likely(page)) { WARN_ON_ONCE(!PageWriteback(page)); - __dec_wb_stat(old_wb, WB_WRITEBACK); - __inc_wb_stat(new_wb, WB_WRITEBACK); + dec_wb_stat(old_wb, WB_WRITEBACK); + inc_wb_stat(new_wb, WB_WRITEBACK); } } diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index d6ea520162b2..4d85992d75b2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -54,6 +54,16 @@ kmem_flags_convert(xfs_km_flags_t flags) lflags &= ~__GFP_FS; } + /* + * Default page/slab allocator behavior is to retry for ever + * for small allocations. We can override this behavior by using + * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long + * as it is feasible but rather fail than retry forever for all + * request sizes. + */ + if (flags & KM_MAYFAIL) + lflags |= __GFP_RETRY_MAYFAIL; + if (flags & KM_ZERO) lflags |= __GFP_ZERO; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 334165c911f0..854e1bdd0b2a 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -69,34 +69,14 @@ static inline void __add_wb_stat(struct bdi_writeback *wb, percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); } -static inline void __inc_wb_stat(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - __add_wb_stat(wb, item, 1); -} - static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - unsigned long flags; - - local_irq_save(flags); - __inc_wb_stat(wb, item); - local_irq_restore(flags); -} - -static inline void __dec_wb_stat(struct bdi_writeback *wb, - enum wb_stat_item item) -{ - __add_wb_stat(wb, item, -1); + __add_wb_stat(wb, item, 1); } static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) { - unsigned long flags; - - local_irq_save(flags); - __dec_wb_stat(wb, item); - local_irq_restore(flags); + __add_wb_stat(wb, item, -1); } static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) diff --git a/include/linux/crc-ccitt.h b/include/linux/crc-ccitt.h index f52696a1ff0d..7cd45a4bc224 100644 --- a/include/linux/crc-ccitt.h +++ b/include/linux/crc-ccitt.h @@ -4,12 +4,19 @@ #include <linux/types.h> extern u16 const crc_ccitt_table[256]; +extern u16 const crc_ccitt_false_table[256]; extern u16 crc_ccitt(u16 crc, const u8 *buffer, size_t len); +extern u16 crc_ccitt_false(u16 crc, const u8 *buffer, size_t len); static inline u16 crc_ccitt_byte(u16 crc, const u8 c) { return (crc >> 8) ^ crc_ccitt_table[(crc ^ c) & 0xff]; } +static inline u16 crc_ccitt_false_byte(u16 crc, const u8 c) +{ + return (crc << 8) ^ crc_ccitt_false_table[(crc >> 8) ^ c]; +} + #endif /* _LINUX_CRC_CCITT_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 4c6656f1fee7..bcfb9f7c46f5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -25,7 +25,7 @@ struct vm_area_struct; #define ___GFP_FS 0x80u #define ___GFP_COLD 0x100u #define ___GFP_NOWARN 0x200u -#define ___GFP_REPEAT 0x400u +#define ___GFP_RETRY_MAYFAIL 0x400u #define ___GFP_NOFAIL 0x800u #define ___GFP_NORETRY 0x1000u #define ___GFP_MEMALLOC 0x2000u @@ -136,26 +136,56 @@ struct vm_area_struct; * * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. * - * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt - * _might_ fail. This depends upon the particular VM implementation. + * The default allocator behavior depends on the request size. We have a concept + * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER). + * !costly allocations are too essential to fail so they are implicitly + * non-failing by default (with some exceptions like OOM victims might fail so + * the caller still has to check for failures) while costly requests try to be + * not disruptive and back off even without invoking the OOM killer. + * The following three modifiers might be used to override some of these + * implicit rules + * + * __GFP_NORETRY: The VM implementation will try only very lightweight + * memory direct reclaim to get some memory under memory pressure (thus + * it can sleep). It will avoid disruptive actions like OOM killer. The + * caller must handle the failure which is quite likely to happen under + * heavy memory pressure. The flag is suitable when failure can easily be + * handled at small cost, such as reduced throughput + * + * __GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim + * procedures that have previously failed if there is some indication + * that progress has been made else where. It can wait for other + * tasks to attempt high level approaches to freeing memory such as + * compaction (which removes fragmentation) and page-out. + * There is still a definite limit to the number of retries, but it is + * a larger limit than with __GFP_NORETRY. + * Allocations with this flag may fail, but only when there is + * genuinely little unused memory. While these allocations do not + * directly trigger the OOM killer, their failure indicates that + * the system is likely to need to use the OOM killer soon. The + * caller must handle failure, but can reasonably do so by failing + * a higher-level request, or completing it only in a much less + * efficient manner. + * If the allocation does fail, and the caller is in a position to + * free some non-essential memory, doing so could benefit the system + * as a whole. * * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. New users should be evaluated carefully - * (and the flag should be used only when there is no reasonable failure - * policy) but it is definitely preferable to use the flag rather than - * opencode endless loop around allocator. - * - * __GFP_NORETRY: The VM implementation must not retry indefinitely and will - * return NULL when direct reclaim and memory compaction have failed to allow - * the allocation to succeed. The OOM killer is not called with the current - * implementation. + * cannot handle allocation failures. The allocation could block + * indefinitely but will never return with failure. Testing for + * failure is pointless. + * New users should be evaluated carefully (and the flag should be + * used only when there is no reasonable failure policy) but it is + * definitely preferable to use the flag rather than opencode endless + * loop around allocator. + * Using this flag for costly allocations is _highly_ discouraged. */ #define __GFP_IO ((__force gfp_t)___GFP_IO) #define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ #define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) -#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) +#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL) #define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) #define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4634da521238..3e0d405dc842 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -34,7 +34,7 @@ extern char *migrate_reason_names[MR_TYPES]; static inline struct page *new_page_nodemask(struct page *page, int preferred_nid, nodemask_t *nodemask) { - gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; + gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL; if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), diff --git a/include/linux/nmi.h b/include/linux/nmi.h index aa3cd0878270..8aa01fd859fb 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -6,18 +6,26 @@ #include <linux/sched.h> #include <asm/irq.h> +#if defined(CONFIG_HAVE_NMI_WATCHDOG) +#include <asm/nmi.h> +#endif #ifdef CONFIG_LOCKUP_DETECTOR +void lockup_detector_init(void); +#else +static inline void lockup_detector_init(void) +{ +} +#endif + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; -extern unsigned int hardlockup_panic; -void lockup_detector_init(void); +extern int soft_watchdog_enabled; +extern atomic_t watchdog_park_in_progress; #else static inline void touch_softlockup_watchdog_sched(void) { @@ -31,9 +39,6 @@ static inline void touch_softlockup_watchdog_sync(void) static inline void touch_all_softlockup_watchdogs(void) { } -static inline void lockup_detector_init(void) -{ -} #endif #ifdef CONFIG_DETECT_HUNG_TASK @@ -61,6 +66,21 @@ static inline void reset_hung_task_detector(void) #define NMI_WATCHDOG_ENABLED (1 << NMI_WATCHDOG_ENABLED_BIT) #define SOFT_WATCHDOG_ENABLED (1 << SOFT_WATCHDOG_ENABLED_BIT) +#if defined(CONFIG_HARDLOCKUP_DETECTOR) +extern void hardlockup_detector_disable(void); +extern unsigned int hardlockup_panic; +#else +static inline void hardlockup_detector_disable(void) {} +#endif + +#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) +extern void arch_touch_nmi_watchdog(void); +#else +#if !defined(CONFIG_HAVE_NMI_WATCHDOG) +static inline void arch_touch_nmi_watchdog(void) {} +#endif +#endif + /** * touch_nmi_watchdog - restart NMI watchdog timeout. * @@ -68,21 +88,11 @@ static inline void reset_hung_task_detector(void) * may be used to reset the timeout - for code which intentionally * disables interrupts for a long time. This call is stateless. */ -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) -#include <asm/nmi.h> -extern void touch_nmi_watchdog(void); -#else static inline void touch_nmi_watchdog(void) { + arch_touch_nmi_watchdog(); touch_softlockup_watchdog(); } -#endif - -#if defined(CONFIG_HARDLOCKUP_DETECTOR) -extern void hardlockup_detector_disable(void); -#else -static inline void hardlockup_detector_disable(void) {} -#endif /* * Create trigger_all_cpu_backtrace() out of the arch-provided @@ -139,15 +149,18 @@ static inline bool trigger_single_cpu_backtrace(int cpu) } #endif -#ifdef CONFIG_LOCKUP_DETECTOR +#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF u64 hw_nmi_get_sample_period(int watchdog_thresh); +#endif + +#ifdef CONFIG_LOCKUP_DETECTOR extern int nmi_watchdog_enabled; -extern int soft_watchdog_enabled; extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long watchdog_enabled; +extern struct cpumask watchdog_cpumask; extern unsigned long *watchdog_cpumask_bits; -extern atomic_t watchdog_park_in_progress; +extern int __read_mostly watchdog_suspended; #ifdef CONFIG_SMP extern int sysctl_softlockup_all_cpu_backtrace; extern int sysctl_hardlockup_all_cpu_backtrace; diff --git a/include/linux/slab.h b/include/linux/slab.h index 04a7f7993e67..41473df6dfb0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -471,7 +471,8 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * * %__GFP_NOWARN - If allocation fails, don't issue any warnings. * - * %__GFP_REPEAT - If allocation fails initially, try once more before failing. + * %__GFP_RETRY_MAYFAIL - Try really hard to succeed the allocation but fail + * eventually. * * There are other flags available as well, but these are not intended * for general use, and so are not documented here. For a full list of diff --git a/include/linux/string.h b/include/linux/string.h index 537918f8a98e..2215431b7a04 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -187,4 +187,204 @@ static inline const char *kbasename(const char *path) return tail ? tail + 1 : path; } +#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline)) +#define __RENAME(x) __asm__(#x) + +void fortify_panic(const char *name) __noreturn __cold; +void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); +void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); +void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); + +#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) +__FORTIFY_INLINE char *strcpy(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strcpy(p, q); + if (strscpy(p, q, p_size < q_size ? p_size : q_size) < 0) + fortify_panic(__func__); + return p; +} + +__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_strncpy(p, q, size); +} + +__FORTIFY_INLINE char *strcat(char *p, const char *q) +{ + size_t p_size = __builtin_object_size(p, 0); + if (p_size == (size_t)-1) + return __builtin_strcat(p, q); + if (strlcat(p, q, p_size) >= p_size) + fortify_panic(__func__); + return p; +} + +__FORTIFY_INLINE __kernel_size_t strlen(const char *p) +{ + __kernel_size_t ret; + size_t p_size = __builtin_object_size(p, 0); + if (p_size == (size_t)-1) + return __builtin_strlen(p); + ret = strnlen(p, p_size); + if (p_size <= ret) + fortify_panic(__func__); + return ret; +} + +extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen); +__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen) +{ + size_t p_size = __builtin_object_size(p, 0); + __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size); + if (p_size <= ret && maxlen != ret) + fortify_panic(__func__); + return ret; +} + +/* defined after fortified strlen to reuse it */ +extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy); +__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size) +{ + size_t ret; + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __real_strlcpy(p, q, size); + ret = strlen(q); + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + if (__builtin_constant_p(len) && len >= p_size) + __write_overflow(); + if (len >= p_size) + fortify_panic(__func__); + __builtin_memcpy(p, q, len); + p[len] = '\0'; + } + return ret; +} + +/* defined after fortified strlen and strnlen to reuse them */ +__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count) +{ + size_t p_len, copy_len; + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (p_size == (size_t)-1 && q_size == (size_t)-1) + return __builtin_strncat(p, q, count); + p_len = strlen(p); + copy_len = strnlen(q, count); + if (p_size < p_len + copy_len + 1) + fortify_panic(__func__); + __builtin_memcpy(p + p_len, q, copy_len); + p[p_len + copy_len] = '\0'; + return p; +} + +__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __write_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_memset(p, c, size); +} + +__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __write_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memcpy(p, q, size); +} + +__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __write_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memmove(p, q, size); +} + +extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan); +__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_memscan(p, c, size); +} + +__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + size_t q_size = __builtin_object_size(q, 0); + if (__builtin_constant_p(size)) { + if (p_size < size) + __read_overflow(); + if (q_size < size) + __read_overflow2(); + } + if (p_size < size || q_size < size) + fortify_panic(__func__); + return __builtin_memcmp(p, q, size); +} + +__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __builtin_memchr(p, c, size); +} + +void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv); +__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_memchr_inv(p, c, size); +} + +extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup); +__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp) +{ + size_t p_size = __builtin_object_size(p, 0); + if (__builtin_constant_p(size) && p_size < size) + __read_overflow(); + if (p_size < size) + fortify_panic(__func__); + return __real_kmemdup(p, size, gfp); +} +#endif + #endif /* _LINUX_STRING_H_ */ diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 10e3663a75a6..8e50d01c645f 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -34,7 +34,7 @@ {(unsigned long)__GFP_FS, "__GFP_FS"}, \ {(unsigned long)__GFP_COLD, "__GFP_COLD"}, \ {(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \ - {(unsigned long)__GFP_REPEAT, "__GFP_REPEAT"}, \ + {(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \ {(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \ {(unsigned long)__GFP_NORETRY, "__GFP_NORETRY"}, \ {(unsigned long)__GFP_COMP, "__GFP_COMP"}, \ diff --git a/kernel/Makefile b/kernel/Makefile index 72aa080f91f0..4cb8e8b23c6e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o +obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f78f719dae36..5710591a9b6d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -26,13 +26,6 @@ #include <linux/vmalloc.h> #include "kexec_internal.h" -/* - * Declare these symbols weak so that if architecture provides a purgatory, - * these will be overridden. - */ -char __weak kexec_purgatory[0]; -size_t __weak kexec_purgatory_size = 0; - static int kexec_calculate_store_digests(struct kimage *image); /* Architectures can provide this probe function */ diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 799a8a452187..50dfcb039a41 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -17,6 +17,8 @@ extern struct mutex kexec_mutex; #ifdef CONFIG_KEXEC_FILE #include <linux/purgatory.h> void kimage_file_post_load_cleanup(struct kimage *image); +extern char kexec_purgatory[]; +extern size_t kexec_purgatory_size; #else /* CONFIG_KEXEC_FILE */ static inline void kimage_file_post_load_cleanup(struct kimage *image) { } #endif /* CONFIG_KEXEC_FILE */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index df9f2a367882..6648fbbb8157 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -901,6 +901,14 @@ static struct ctl_table kern_table[] = { #endif }, { + .procname = "watchdog_cpumask", + .data = &watchdog_cpumask_bits, + .maxlen = NR_CPUS, + .mode = 0644, + .proc_handler = proc_watchdog_cpumask, + }, +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + { .procname = "soft_watchdog", .data = &soft_watchdog_enabled, .maxlen = sizeof (int), @@ -910,13 +918,6 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, { - .procname = "watchdog_cpumask", - .data = &watchdog_cpumask_bits, - .maxlen = NR_CPUS, - .mode = 0644, - .proc_handler = proc_watchdog_cpumask, - }, - { .procname = "softlockup_panic", .data = &softlockup_panic, .maxlen = sizeof(int), @@ -925,27 +926,29 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, -#ifdef CONFIG_HARDLOCKUP_DETECTOR +#ifdef CONFIG_SMP { - .procname = "hardlockup_panic", - .data = &hardlockup_panic, + .procname = "softlockup_all_cpu_backtrace", + .data = &sysctl_softlockup_all_cpu_backtrace, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, +#endif /* CONFIG_SMP */ #endif -#ifdef CONFIG_SMP +#ifdef CONFIG_HARDLOCKUP_DETECTOR { - .procname = "softlockup_all_cpu_backtrace", - .data = &sysctl_softlockup_all_cpu_backtrace, + .procname = "hardlockup_panic", + .data = &hardlockup_panic, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &zero, .extra2 = &one, }, +#ifdef CONFIG_SMP { .procname = "hardlockup_all_cpu_backtrace", .data = &sysctl_hardlockup_all_cpu_backtrace, @@ -957,6 +960,8 @@ static struct ctl_table kern_table[] = { }, #endif /* CONFIG_SMP */ #endif +#endif + #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { .procname = "unknown_nmi_panic", diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 03e0b69bb5bf..cabe3e9fb620 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,15 +29,58 @@ #include <linux/kvm_para.h> #include <linux/kthread.h> +/* Watchdog configuration */ static DEFINE_MUTEX(watchdog_proc_mutex); -#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR) -unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED; +int __read_mostly nmi_watchdog_enabled; + +#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) +unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | + NMI_WATCHDOG_ENABLED; #else unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; #endif -int __read_mostly nmi_watchdog_enabled; + +#ifdef CONFIG_HARDLOCKUP_DETECTOR +/* boot commands */ +/* + * Should we panic when a soft-lockup or hard-lockup occurs: + */ +unsigned int __read_mostly hardlockup_panic = + CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; +/* + * We may not want to enable hard lockup detection by default in all cases, + * for example when running the kernel as a guest on a hypervisor. In these + * cases this function can be called to disable hard lockup detection. This + * function should only be executed once by the boot processor before the + * kernel command line parameters are parsed, because otherwise it is not + * possible to override this in hardlockup_panic_setup(). + */ +void hardlockup_detector_disable(void) +{ + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; +} + +static int __init hardlockup_panic_setup(char *str) +{ + if (!strncmp(str, "panic", 5)) + hardlockup_panic = 1; + else if (!strncmp(str, "nopanic", 7)) + hardlockup_panic = 0; + else if (!strncmp(str, "0", 1)) + watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; + else if (!strncmp(str, "1", 1)) + watchdog_enabled |= NMI_WATCHDOG_ENABLED; + return 1; +} +__setup("nmi_watchdog=", hardlockup_panic_setup); + +#endif + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR int __read_mostly soft_watchdog_enabled; +#endif + int __read_mostly watchdog_user_enabled; int __read_mostly watchdog_thresh = 10; @@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10; int __read_mostly sysctl_softlockup_all_cpu_backtrace; int __read_mostly sysctl_hardlockup_all_cpu_backtrace; #endif -static struct cpumask watchdog_cpumask __read_mostly; +struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); -/* Helper for online, unparked cpus. */ -#define for_each_watchdog_cpu(cpu) \ - for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) - -atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); - /* * The 'watchdog_running' variable is set to 1 when the watchdog threads * are registered/started and is set to 0 when the watchdog threads are @@ -72,7 +109,47 @@ static int __read_mostly watchdog_running; * of 'watchdog_running' cannot change while the watchdog is deactivated * temporarily (see related code in 'proc' handlers). */ -static int __read_mostly watchdog_suspended; +int __read_mostly watchdog_suspended; + +/* + * These functions can be overridden if an architecture implements its + * own hardlockup detector. + * + * watchdog_nmi_enable/disable can be implemented to start and stop when + * softlockup watchdog threads start and stop. The arch must select the + * SOFTLOCKUP_DETECTOR Kconfig. + */ +int __weak watchdog_nmi_enable(unsigned int cpu) +{ + return 0; +} +void __weak watchdog_nmi_disable(unsigned int cpu) +{ +} + +/* + * watchdog_nmi_reconfigure can be implemented to be notified after any + * watchdog configuration change. The arch hardlockup watchdog should + * respond to the following variables: + * - nmi_watchdog_enabled + * - watchdog_thresh + * - watchdog_cpumask + * - sysctl_hardlockup_all_cpu_backtrace + * - hardlockup_panic + * - watchdog_suspended + */ +void __weak watchdog_nmi_reconfigure(void) +{ +} + + +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + +/* Helper for online, unparked cpus. */ +#define for_each_watchdog_cpu(cpu) \ + for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) + +atomic_t watchdog_park_in_progress = ATOMIC_INIT(0); static u64 __read_mostly sample_period; @@ -120,6 +197,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str) return 1; } __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#ifdef CONFIG_HARDLOCKUP_DETECTOR static int __init hardlockup_all_cpu_backtrace_setup(char *str) { sysctl_hardlockup_all_cpu_backtrace = @@ -128,6 +206,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str) } __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup); #endif +#endif /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- @@ -213,18 +292,6 @@ void touch_softlockup_watchdog_sync(void) __this_cpu_write(watchdog_touch_ts, 0); } -/* watchdog detector functions */ -bool is_hardlockup(void) -{ - unsigned long hrint = __this_cpu_read(hrtimer_interrupts); - - if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) - return true; - - __this_cpu_write(hrtimer_interrupts_saved, hrint); - return false; -} - static int is_softlockup(unsigned long touch_ts) { unsigned long now = get_timestamp(); @@ -237,21 +304,21 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -static void watchdog_interrupt_count(void) +/* watchdog detector functions */ +bool is_hardlockup(void) { - __this_cpu_inc(hrtimer_interrupts); -} + unsigned long hrint = __this_cpu_read(hrtimer_interrupts); -/* - * These two functions are mostly architecture specific - * defining them as weak here. - */ -int __weak watchdog_nmi_enable(unsigned int cpu) -{ - return 0; + if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) + return true; + + __this_cpu_write(hrtimer_interrupts_saved, hrint); + return false; } -void __weak watchdog_nmi_disable(unsigned int cpu) + +static void watchdog_interrupt_count(void) { + __this_cpu_inc(hrtimer_interrupts); } static int watchdog_enable_all_cpus(void); @@ -502,57 +569,6 @@ static void watchdog_unpark_threads(void) kthread_unpark(per_cpu(softlockup_watchdog, cpu)); } -/* - * Suspend the hard and soft lockup detector by parking the watchdog threads. - */ -int lockup_detector_suspend(void) -{ - int ret = 0; - - get_online_cpus(); - mutex_lock(&watchdog_proc_mutex); - /* - * Multiple suspend requests can be active in parallel (counted by - * the 'watchdog_suspended' variable). If the watchdog threads are - * running, the first caller takes care that they will be parked. - * The state of 'watchdog_running' cannot change while a suspend - * request is active (see related code in 'proc' handlers). - */ - if (watchdog_running && !watchdog_suspended) - ret = watchdog_park_threads(); - - if (ret == 0) - watchdog_suspended++; - else { - watchdog_disable_all_cpus(); - pr_err("Failed to suspend lockup detectors, disabled\n"); - watchdog_enabled = 0; - } - - mutex_unlock(&watchdog_proc_mutex); - - return ret; -} - -/* - * Resume the hard and soft lockup detector by unparking the watchdog threads. - */ -void lockup_detector_resume(void) -{ - mutex_lock(&watchdog_proc_mutex); - - watchdog_suspended--; - /* - * The watchdog threads are unparked if they were previously running - * and if there is no more active suspend request. - */ - if (watchdog_running && !watchdog_suspended) - watchdog_unpark_threads(); - - mutex_unlock(&watchdog_proc_mutex); - put_online_cpus(); -} - static int update_watchdog_all_cpus(void) { int ret; @@ -605,6 +621,100 @@ static void watchdog_disable_all_cpus(void) } #ifdef CONFIG_SYSCTL +static int watchdog_update_cpus(void) +{ + return smpboot_update_cpumask_percpu_thread( + &watchdog_threads, &watchdog_cpumask); +} +#endif + +#else /* SOFTLOCKUP */ +static int watchdog_park_threads(void) +{ + return 0; +} + +static void watchdog_unpark_threads(void) +{ +} + +static int watchdog_enable_all_cpus(void) +{ + return 0; +} + +static void watchdog_disable_all_cpus(void) +{ +} + +#ifdef CONFIG_SYSCTL +static int watchdog_update_cpus(void) +{ + return 0; +} +#endif + +static void set_sample_period(void) +{ +} +#endif /* SOFTLOCKUP */ + +/* + * Suspend the hard and soft lockup detector by parking the watchdog threads. + */ +int lockup_detector_suspend(void) +{ + int ret = 0; + + get_online_cpus(); + mutex_lock(&watchdog_proc_mutex); + /* + * Multiple suspend requests can be active in parallel (counted by + * the 'watchdog_suspended' variable). If the watchdog threads are + * running, the first caller takes care that they will be parked. + * The state of 'watchdog_running' cannot change while a suspend + * request is active (see related code in 'proc' handlers). + */ + if (watchdog_running && !watchdog_suspended) + ret = watchdog_park_threads(); + + if (ret == 0) + watchdog_suspended++; + else { + watchdog_disable_all_cpus(); + pr_err("Failed to suspend lockup detectors, disabled\n"); + watchdog_enabled = 0; + } + + watchdog_nmi_reconfigure(); + + mutex_unlock(&watchdog_proc_mutex); + + return ret; +} + +/* + * Resume the hard and soft lockup detector by unparking the watchdog threads. + */ +void lockup_detector_resume(void) +{ + mutex_lock(&watchdog_proc_mutex); + + watchdog_suspended--; + /* + * The watchdog threads are unparked if they were previously running + * and if there is no more active suspend request. + */ + if (watchdog_running && !watchdog_suspended) + watchdog_unpark_threads(); + + watchdog_nmi_reconfigure(); + + mutex_unlock(&watchdog_proc_mutex); + put_online_cpus(); +} + +#ifdef CONFIG_SYSCTL /* * Update the run state of the lockup detectors. @@ -625,6 +735,8 @@ static int proc_watchdog_update(void) else watchdog_disable_all_cpus(); + watchdog_nmi_reconfigure(); + return err; } @@ -810,10 +922,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, * a temporary cpumask, so we are likely not in a * position to do much else to make things better. */ - if (smpboot_update_cpumask_percpu_thread( - &watchdog_threads, &watchdog_cpumask) != 0) + if (watchdog_update_cpus() != 0) pr_err("cpumask update failed\n"); } + + watchdog_nmi_reconfigure(); } out: mutex_unlock(&watchdog_proc_mutex); diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 54a427d1f344..295a0d84934c 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -22,41 +22,9 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); -/* boot commands */ -/* - * Should we panic when a soft-lockup or hard-lockup occurs: - */ -unsigned int __read_mostly hardlockup_panic = - CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE; static unsigned long hardlockup_allcpu_dumped; -/* - * We may not want to enable hard lockup detection by default in all cases, - * for example when running the kernel as a guest on a hypervisor. In these - * cases this function can be called to disable hard lockup detection. This - * function should only be executed once by the boot processor before the - * kernel command line parameters are parsed, because otherwise it is not - * possible to override this in hardlockup_panic_setup(). - */ -void hardlockup_detector_disable(void) -{ - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; -} - -static int __init hardlockup_panic_setup(char *str) -{ - if (!strncmp(str, "panic", 5)) - hardlockup_panic = 1; - else if (!strncmp(str, "nopanic", 7)) - hardlockup_panic = 0; - else if (!strncmp(str, "0", 1)) - watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; - else if (!strncmp(str, "1", 1)) - watchdog_enabled |= NMI_WATCHDOG_ENABLED; - return 1; -} -__setup("nmi_watchdog=", hardlockup_panic_setup); -void touch_nmi_watchdog(void) +void arch_touch_nmi_watchdog(void) { /* * Using __raw here because some code paths have @@ -66,9 +34,8 @@ void touch_nmi_watchdog(void) * going off. */ raw_cpu_write(watchdog_nmi_touch, true); - touch_softlockup_watchdog(); } -EXPORT_SYMBOL(touch_nmi_watchdog); +EXPORT_SYMBOL(arch_touch_nmi_watchdog); static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4121c7fdd5a8..0e0ecc8bd076 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -778,34 +778,45 @@ config DEBUG_SHIRQ menu "Debug Lockups and Hangs" config LOCKUP_DETECTOR - bool "Detect Hard and Soft Lockups" + bool + +config SOFTLOCKUP_DETECTOR + bool "Detect Soft Lockups" depends on DEBUG_KERNEL && !S390 + select LOCKUP_DETECTOR help Say Y here to enable the kernel to act as a watchdog to detect - hard and soft lockups. + soft lockups. Softlockups are bugs that cause the kernel to loop in kernel mode for more than 20 seconds, without giving other tasks a chance to run. The current stack trace is displayed upon detection and the system will stay locked up. +config HARDLOCKUP_DETECTOR_PERF + bool + select SOFTLOCKUP_DETECTOR + +# +# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard +# lockup detector rather than the perf based detector. +# +config HARDLOCKUP_DETECTOR + bool "Detect Hard Lockups" + depends on DEBUG_KERNEL && !S390 + depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH + select LOCKUP_DETECTOR + select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF + select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH + help + Say Y here to enable the kernel to act as a watchdog to detect + hard lockups. + Hardlockups are bugs that cause the CPU to loop in kernel mode for more than 10 seconds, without letting other interrupts have a chance to run. The current stack trace is displayed upon detection and the system will stay locked up. - The overhead should be minimal. A periodic hrtimer runs to - generate interrupts and kick the watchdog task every 4 seconds. - An NMI is generated every 10 seconds or so to check for hardlockups. - - The frequency of hrtimer and NMI events and the soft and hard lockup - thresholds can be controlled through the sysctl watchdog_thresh. - -config HARDLOCKUP_DETECTOR - def_bool y - depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG - depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI - config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" depends on HARDLOCKUP_DETECTOR @@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on LOCKUP_DETECTOR + depends on SOFTLOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on LOCKUP_DETECTOR + depends on SOFTLOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL - default LOCKUP_DETECTOR + default SOFTLOCKUP_DETECTOR help Say Y here to enable the kernel to detect "hung tasks", which are bugs that cause the task to be stuck in diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c index 7f6dd68d2d09..d873b34039ff 100644 --- a/lib/crc-ccitt.c +++ b/lib/crc-ccitt.c @@ -51,8 +51,49 @@ u16 const crc_ccitt_table[256] = { }; EXPORT_SYMBOL(crc_ccitt_table); +/* + * Similar table to calculate CRC16 variant known as CRC-CCITT-FALSE + * Reflected bits order, does not augment final value. + */ +u16 const crc_ccitt_false_table[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7, + 0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6, + 0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485, + 0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4, + 0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC, + 0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B, + 0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12, + 0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A, + 0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41, + 0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49, + 0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70, + 0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78, + 0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F, + 0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E, + 0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D, + 0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C, + 0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3, + 0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A, + 0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92, + 0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9, + 0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1, + 0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8, + 0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0 +}; +EXPORT_SYMBOL(crc_ccitt_false_table); + /** - * crc_ccitt - recompute the CRC for the data buffer + * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data + * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer @@ -65,5 +106,20 @@ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) } EXPORT_SYMBOL(crc_ccitt); +/** + * crc_ccitt_false - recompute the CRC (CRC-CCITT-FALSE variant) + * for the data buffer + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + */ +u16 crc_ccitt_false(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc_ccitt_false_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc_ccitt_false); + MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL"); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 8ee7e5ec21be..3bf4a9984f4c 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -72,6 +72,13 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) } EXPORT_SYMBOL(percpu_counter_set); +/** + * This function is both preempt and irq safe. The former is due to explicit + * preemption disable. The latter is guaranteed by the fact that the slow path + * is explicitly protected by an irq-safe spinlock whereas the fast patch uses + * this_cpu_add which is irq-safe by definition. Hence there is no need muck + * with irq state before calling this one + */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; diff --git a/lib/string.c b/lib/string.c index 1c1fc9187b05..ebbb99c775bd 100644 --- a/lib/string.c +++ b/lib/string.c @@ -978,3 +978,10 @@ char *strreplace(char *s, char old, char new) return s; } EXPORT_SYMBOL(strreplace); + +void fortify_panic(const char *name) +{ + pr_emerg("detected buffer overflow in %s\n", name); + BUG(); +} +EXPORT_SYMBOL(fortify_panic); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1e516520433d..bc48ee783dd9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1384,7 +1384,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) page = __alloc_pages_node(nid, htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| - __GFP_REPEAT|__GFP_NOWARN, + __GFP_RETRY_MAYFAIL|__GFP_NOWARN, huge_page_order(h)); if (page) { prep_new_huge_page(h, page, nid); @@ -1525,7 +1525,7 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h, { int order = huge_page_order(h); - gfp_mask |= __GFP_COMP|__GFP_REPEAT|__GFP_NOWARN; + gfp_mask |= __GFP_COMP|__GFP_RETRY_MAYFAIL|__GFP_NOWARN; if (nid == NUMA_NO_NODE) nid = numa_mem_id(); return __alloc_pages_nodemask(gfp_mask, order, nid, nmask); diff --git a/mm/internal.h b/mm/internal.h index 0e4f558412fb..24d88f084705 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -23,7 +23,7 @@ * hints such as HIGHMEM usage. */ #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ - __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ + __GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ __GFP_ATOMIC) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7d8e56214ac0..d911fa5cb2a7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1078,7 +1078,8 @@ static struct page *new_page(struct page *page, unsigned long start, int **x) /* * if !vma, alloc_page_vma() will use task or system default policy */ - return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + return alloc_page_vma(GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL, + vma, address); } #else diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 0b60cc7ddac2..96e93b214d31 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -601,7 +601,7 @@ static inline void __wb_writeout_inc(struct bdi_writeback *wb) { struct wb_domain *cgdom; - __inc_wb_stat(wb, WB_WRITTEN); + inc_wb_stat(wb, WB_WRITTEN); wb_domain_writeout_inc(&global_wb_domain, &wb->completions, wb->bdi->max_prop_frac); @@ -2435,8 +2435,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping) __inc_lruvec_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING); __inc_node_page_state(page, NR_DIRTIED); - __inc_wb_stat(wb, WB_RECLAIMABLE); - __inc_wb_stat(wb, WB_DIRTIED); + inc_wb_stat(wb, WB_RECLAIMABLE); + inc_wb_stat(wb, WB_DIRTIED); task_io_account_write(PAGE_SIZE); current->nr_dirtied++; this_cpu_inc(bdp_ratelimits); @@ -2741,7 +2741,7 @@ int test_clear_page_writeback(struct page *page) if (bdi_cap_account_writeback(bdi)) { struct bdi_writeback *wb = inode_to_wb(inode); - __dec_wb_stat(wb, WB_WRITEBACK); + dec_wb_stat(wb, WB_WRITEBACK); __wb_writeout_inc(wb); } } @@ -2786,7 +2786,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) page_index(page), PAGECACHE_TAG_WRITEBACK); if (bdi_cap_account_writeback(bdi)) - __inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); + inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK); /* * We can come through here when swapping anonymous diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 35de49c3de8b..2bc48ea53cc9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3284,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, /* The OOM killer will not help higher order allocs */ if (order > PAGE_ALLOC_COSTLY_ORDER) goto out; + /* + * We have already exhausted all our reclaim opportunities without any + * success so it is time to admit defeat. We will skip the OOM killer + * because it is very likely that the caller has a more reasonable + * fallback than shooting a random task. + */ + if (gfp_mask & __GFP_RETRY_MAYFAIL) + goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ if (ac->high_zoneidx < ZONE_NORMAL) goto out; @@ -3413,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags, } /* - * !costly requests are much more important than __GFP_REPEAT + * !costly requests are much more important than __GFP_RETRY_MAYFAIL * costly ones because they are de facto nofail and invoke OOM * killer to move on while costly can fail and users are ready * to cope with that. 1/4 retries is rather arbitrary but we @@ -3920,9 +3928,9 @@ retry: /* * Do not retry costly high order allocations unless they are - * __GFP_REPEAT + * __GFP_RETRY_MAYFAIL */ - if (costly_order && !(gfp_mask & __GFP_REPEAT)) + if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index a56c3989f773..c50b1a14d55e 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -56,11 +56,11 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) if (node_state(node, N_HIGH_MEMORY)) page = alloc_pages_node( - node, GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, + node, GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL, get_order(size)); else page = alloc_pages( - GFP_KERNEL | __GFP_ZERO | __GFP_REPEAT, + GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL, get_order(size)); if (page) return page_address(page); diff --git a/mm/util.c b/mm/util.c index 26be6407abd7..ee250e2cde34 100644 --- a/mm/util.c +++ b/mm/util.c @@ -339,9 +339,9 @@ EXPORT_SYMBOL(vm_mmap); * Uses kmalloc to get the memory but if the allocation fails then falls back * to the vmalloc allocator. Use kvfree for freeing the memory. * - * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. __GFP_REPEAT - * is supported only for large (>32kB) allocations, and it should be used only if - * kmalloc is preferable to the vmalloc fallback, due to visible performance drawbacks. + * Reclaim modifiers - __GFP_NORETRY and __GFP_NOFAIL are not supported. + * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is + * preferable to the vmalloc fallback, due to visible performance drawbacks. * * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. */ @@ -366,13 +366,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (size > PAGE_SIZE) { kmalloc_flags |= __GFP_NOWARN; - /* - * We have to override __GFP_REPEAT by __GFP_NORETRY for !costly - * requests because there is no other way to tell the allocator - * that we want to fail rather than retry endlessly. - */ - if (!(kmalloc_flags & __GFP_REPEAT) || - (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + if (!(kmalloc_flags & __GFP_RETRY_MAYFAIL)) kmalloc_flags |= __GFP_NORETRY; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6016ab079e2b..8698c1c86c4d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1795,7 +1795,7 @@ fail: * allocator with @gfp_mask flags. Map them into contiguous * kernel virtual space, using a pagetable protection of @prot. * - * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_REPEAT + * Reclaim modifiers in @gfp_mask - __GFP_NORETRY, __GFP_RETRY_MAYFAIL * and __GFP_NOFAIL are not supported * * Any use of gfp flags outside of GFP_KERNEL should be consulted diff --git a/mm/vmscan.c b/mm/vmscan.c index f84cdd3751e1..efc9da21c5e6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2506,18 +2506,18 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, return false; /* Consider stopping depending on scan and reclaim activity */ - if (sc->gfp_mask & __GFP_REPEAT) { + if (sc->gfp_mask & __GFP_RETRY_MAYFAIL) { /* - * For __GFP_REPEAT allocations, stop reclaiming if the + * For __GFP_RETRY_MAYFAIL allocations, stop reclaiming if the * full LRU list has been scanned and we are still failing * to reclaim pages. This full LRU scan is potentially - * expensive but a __GFP_REPEAT caller really wants to succeed + * expensive but a __GFP_RETRY_MAYFAIL caller really wants to succeed */ if (!nr_reclaimed && !nr_scanned) return false; } else { /* - * For non-__GFP_REPEAT allocations which can presumably + * For non-__GFP_RETRY_MAYFAIL allocations which can presumably * fail without consequence, stop if we failed to reclaim * any pages from the last SWAP_CLUSTER_MAX number of * pages that were scanned. This will return to the diff --git a/net/core/dev.c b/net/core/dev.c index 9be778ba7857..73fb6a28e7de 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7373,7 +7373,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) BUG_ON(count < 1); - rx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!rx) return -ENOMEM; @@ -7413,7 +7413,7 @@ static int netif_alloc_netdev_queues(struct net_device *dev) if (count < 1 || count > 0xffff) return -EINVAL; - tx = kvzalloc(sz, GFP_KERNEL | __GFP_REPEAT); + tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!tx) return -ENOMEM; @@ -7954,7 +7954,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, /* ensure 32-byte alignment of whole construct */ alloc_size += NETDEV_ALIGN - 1; - p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_REPEAT); + p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!p) return NULL; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f75897a33fa4..2bff10a20bc9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4747,7 +4747,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len, gfp_head = gfp_mask; if (gfp_head & __GFP_DIRECT_RECLAIM) - gfp_head |= __GFP_REPEAT; + gfp_head |= __GFP_RETRY_MAYFAIL; *errcode = -ENOBUFS; skb = alloc_skb(header_len, gfp_head); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 1770c1d9b37f..e1648238a9c9 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1003,14 +1003,10 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) return NULL; - if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) - info = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); - if (!info) { - info = __vmalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, - PAGE_KERNEL); - if (!info) - return NULL; - } + info = kvmalloc(sz, GFP_KERNEL); + if (!info) + return NULL; + memset(info, 0, sizeof(*info)); info->size = size; return info; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 147fde73a0f5..263d16e3219e 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -648,7 +648,7 @@ static int fq_resize(struct Qdisc *sch, u32 log) return 0; /* If XPS was setup, we can allocate memory on right NUMA node */ - array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_REPEAT, + array = kvmalloc_node(sizeof(struct rb_root) << log, GFP_KERNEL | __GFP_RETRY_MAYFAIL, netdev_queue_numa_node_read(sch->dev_queue)); if (!array) return -ENOMEM; diff --git a/security/Kconfig b/security/Kconfig index d540bfe73190..e8e449444e65 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -163,6 +163,13 @@ config HARDENED_USERCOPY_PAGESPAN been removed. This config is intended to be used only while trying to find such users. +config FORTIFY_SOURCE + bool "Harden common str/mem functions against buffer overflows" + depends on ARCH_HAS_FORTIFY_SOURCE + help + Detect overflows of buffers in common string and memory functions + where the compiler can determine and validate the buffer sizes. + config STATIC_USERMODEHELPER bool "Force all usermode helper calls through a single binary" help diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9409c9464667..c4222ea452e9 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -643,7 +643,7 @@ static const struct { { "__GFP_FS", "F" }, { "__GFP_COLD", "CO" }, { "__GFP_NOWARN", "NWR" }, - { "__GFP_REPEAT", "R" }, + { "__GFP_RETRY_MAYFAIL", "R" }, { "__GFP_NOFAIL", "NF" }, { "__GFP_NORETRY", "NR" }, { "__GFP_COMP", "C" }, |