diff options
author | Ingo Molnar <mingo@kernel.org> | 2019-05-16 09:04:48 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-05-16 09:04:48 +0200 |
commit | 00f5764dbb040188e5dce2cd9e648360886b045c (patch) | |
tree | 2dc969bb165a27a7cebdd1798b7a697243a790de /arch | |
parent | 409ca45526a428620d8efb362ccfd4b1e6b80642 (diff) | |
parent | 5ac94332248ee017964ba368cdda4ce647e3aba7 (diff) |
Merge branch 'linus' into x86/urgent, to pick up dependent changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
789 files changed, 11213 insertions, 9184 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 5e43fcbad4ca..f11f0698b148 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -245,6 +245,13 @@ config ARCH_HAS_FORTIFY_SOURCE An architecture should select this when it can successfully build and run with CONFIG_FORTIFY_SOURCE. +# +# Select if the arch provides a historic keepinit alias for the retain_initrd +# command line option +# +config ARCH_HAS_KEEPINITRD + bool + # Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h config ARCH_HAS_SET_MEMORY bool diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h index d73a6fcb519c..11c688c1d7ec 100644 --- a/arch/alpha/include/asm/syscall.h +++ b/arch/alpha/include/asm/syscall.h @@ -4,7 +4,7 @@ #include <uapi/linux/audit.h> -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_ALPHA; } diff --git a/arch/alpha/include/uapi/asm/sockios.h b/arch/alpha/include/uapi/asm/sockios.h index ba287e4b01bf..af92bc27c3be 100644 --- a/arch/alpha/include/uapi/asm/sockios.h +++ b/arch/alpha/include/uapi/asm/sockios.h @@ -11,7 +11,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */ #endif /* _ASM_ALPHA_SOCKIOS_H */ diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 3034d6d936d2..242108439f42 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -249,7 +249,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) ok = 0; /* If both conditions above are met, we are fine. */ - DBGA("pci_dac_dma_supported %s from %pf\n", + DBGA("pci_dac_dma_supported %s from %ps\n", ok ? "yes" : "no", __builtin_return_address(0)); return ok; @@ -281,7 +281,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, && paddr + size <= __direct_map_size) { ret = paddr + __direct_map_base; - DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %pf\n", + DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %ps\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; @@ -292,7 +292,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, if (dac_allowed) { ret = paddr + alpha_mv.pci_dac_offset; - DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %pf\n", + DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %ps\n", cpu_addr, size, ret, __builtin_return_address(0)); return ret; @@ -329,7 +329,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size, ret = arena->dma_base + dma_ofs * PAGE_SIZE; ret += (unsigned long)cpu_addr & ~PAGE_MASK; - DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %pf\n", + DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %ps\n", cpu_addr, size, npages, ret, __builtin_return_address(0)); return ret; @@ -396,14 +396,14 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, && dma_addr < __direct_map_base + __direct_map_size) { /* Nothing to do. */ - DBGA2("pci_unmap_single: direct [%llx,%zx] from %pf\n", + DBGA2("pci_unmap_single: direct [%llx,%zx] from %ps\n", dma_addr, size, __builtin_return_address(0)); return; } if (dma_addr > 0xffffffff) { - DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %pf\n", + DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %ps\n", dma_addr, size, __builtin_return_address(0)); return; } @@ -435,7 +435,7 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, spin_unlock_irqrestore(&arena->lock, flags); - DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %pf\n", + DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %ps\n", dma_addr, size, npages, __builtin_return_address(0)); } @@ -458,7 +458,7 @@ try_again: cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order); if (! cpu_addr) { printk(KERN_INFO "pci_alloc_consistent: " - "get_free_pages failed from %pf\n", + "get_free_pages failed from %ps\n", __builtin_return_address(0)); /* ??? Really atomic allocation? Otherwise we could play with vmalloc and sg if we can't find contiguous memory. */ @@ -477,7 +477,7 @@ try_again: goto try_again; } - DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %pf\n", + DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n", size, cpu_addr, *dma_addrp, __builtin_return_address(0)); return cpu_addr; @@ -497,7 +497,7 @@ static void alpha_pci_free_coherent(struct device *dev, size_t size, pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); free_pages((unsigned long)cpu_addr, get_order(size)); - DBGA2("pci_free_consistent: [%llx,%zx] from %pf\n", + DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n", dma_addr, size, __builtin_return_address(0)); } diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index a42fc5c4db89..e2cbec3789e8 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -285,17 +285,3 @@ mem_init(void) memblock_free_all(); mem_init_print_info(NULL); } - -void -free_initmem(void) -{ - free_initmem_default(-1); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void -free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig index f56cc2070c11..b117e6c16d41 100644 --- a/arch/arc/configs/haps_hs_defconfig +++ b/arch/arc/configs/haps_hs_defconfig @@ -15,7 +15,6 @@ CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig index b6f2482c7e74..33a787c375e2 100644 --- a/arch/arc/configs/haps_hs_smp_defconfig +++ b/arch/arc/configs/haps_hs_smp_defconfig @@ -17,7 +17,6 @@ CONFIG_PERF_EVENTS=y CONFIG_SLAB=y CONFIG_KPROBES=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig index 318e4cd29629..de398c7b10b3 100644 --- a/arch/arc/configs/nsim_700_defconfig +++ b/arch/arc/configs/nsim_700_defconfig @@ -18,7 +18,6 @@ CONFIG_PERF_EVENTS=y CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig index c15807b0e0c1..2dbd34a9ff07 100644 --- a/arch/arc/configs/nsim_hs_defconfig +++ b/arch/arc/configs/nsim_hs_defconfig @@ -20,7 +20,6 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig index 65e983fd942b..c7135f1e2583 100644 --- a/arch/arc/configs/nsim_hs_smp_defconfig +++ b/arch/arc/configs/nsim_hs_smp_defconfig @@ -18,7 +18,6 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 08c5b99ac341..385a71d3c478 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -18,7 +18,6 @@ CONFIG_PERF_EVENTS=y CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 5b5e26d67955..248a2c3bdc12 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -17,7 +17,6 @@ CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index 26af9b2f7fcb..1a4bc7b660fb 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -12,7 +12,6 @@ CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_KPROBES=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index aa2d6da9d187..2b80c184c9c8 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -10,13 +10,9 @@ #define __ASM_ARC_ELF_H #include <linux/types.h> +#include <linux/elf-em.h> #include <uapi/asm/elf.h> -/* These ELF defines belong to uapi but libc elf.h already defines them */ -#define EM_ARCOMPACT 93 - -#define EM_ARCV2 195 /* ARCv2 Cores */ - #define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \ EM_ARCOMPACT : EM_ARCV2) diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h index c7a4201ed62b..9cac959ca4e8 100644 --- a/arch/arc/include/asm/syscall.h +++ b/arch/arc/include/asm/syscall.h @@ -9,6 +9,7 @@ #ifndef _ASM_ARC_SYSCALL_H #define _ASM_ARC_SYSCALL_H 1 +#include <uapi/linux/audit.h> #include <linux/err.h> #include <linux/sched.h> #include <asm/unistd.h> @@ -67,4 +68,14 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, } } +static inline int +syscall_get_arch(struct task_struct *task) +{ + return IS_ENABLED(CONFIG_ISA_ARCOMPACT) + ? (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + ? AUDIT_ARCH_ARCOMPACTBE : AUDIT_ARCH_ARCOMPACT) + : (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + ? AUDIT_ARCH_ARCV2BE : AUDIT_ARCH_ARCV2); +} + #endif diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index e1ab2d7f1d64..02b7a3b20d7c 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -206,18 +206,3 @@ void __init mem_init(void) memblock_free_all(); mem_init_print_info(NULL); } - -/* - * free_initmem: Free all the __init memory. - */ -void __ref free_initmem(void) -{ - free_initmem_default(-1); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index dc9855c4a3b4..5fd344bd06b9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,11 +4,11 @@ config ARM default y select ARCH_32BIT_OFF_T select ARCH_CLOCKSOURCE_DATA - select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_KEEPINITRD select ARCH_HAS_KCOV select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL if ARM_LPAE @@ -21,6 +21,7 @@ config ARM select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_KEEP_MEMBLOCK if HAVE_ARCH_PFN_VALID || KEXEC select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_NO_SG_CHAIN if !ARM_HAS_SG_CHAIN select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index b4f2723ecd86..b10ff5877b4c 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -446,6 +446,34 @@ status = "disabled"; }; + counter0: counter@29d0000 { + compatible = "fsl,ftm-quaddec"; + reg = <0x0 0x29d0000 0x0 0x10000>; + big-endian; + status = "disabled"; + }; + + counter1: counter@29e0000 { + compatible = "fsl,ftm-quaddec"; + reg = <0x0 0x29e0000 0x0 0x10000>; + big-endian; + status = "disabled"; + }; + + counter2: counter@29f0000 { + compatible = "fsl,ftm-quaddec"; + reg = <0x0 0x29f0000 0x0 0x10000>; + big-endian; + status = "disabled"; + }; + + counter3: counter@2a00000 { + compatible = "fsl,ftm-quaddec"; + reg = <0x0 0x2a00000 0x0 0x10000>; + big-endian; + status = "disabled"; + }; + gpio0: gpio@2300000 { compatible = "fsl,ls1021a-gpio", "fsl,qoriq-gpio"; reg = <0x0 0x2300000 0x0 0x10000>; diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts index e21ec929f096..714863f8f261 100644 --- a/arch/arm/boot/dts/omap4-droid4-xt894.dts +++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts @@ -214,7 +214,6 @@ width-mm = <50>; height-mm = <89>; - backlight = <&lcd_backlight>; panel-timing { clock-frequency = <0>; /* Calculated by dsi */ @@ -383,20 +382,30 @@ }; &i2c1 { - lm3532@38 { + led-controller@38 { compatible = "ti,lm3532"; + #address-cells = <1>; + #size-cells = <0>; reg = <0x38>; enable-gpios = <&gpio6 12 GPIO_ACTIVE_HIGH>; - lcd_backlight: backlight { - compatible = "ti,lm3532-backlight"; + ramp-up-us = <1024>; + ramp-down-us = <8193>; - lcd { - led-sources = <0 1 2>; - ramp-up-msec = <1>; - ramp-down-msec = <0>; - }; + led@0 { + reg = <0>; + led-sources = <2>; + ti,led-mode = <0>; + label = ":backlight"; + linux,default-trigger = "backlight"; + }; + + led@1 { + reg = <1>; + led-sources = <1>; + ti,led-mode = <0>; + label = ":kbd_backlight"; }; }; }; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index a024d1e7e74c..8ce3dd2264b1 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -616,6 +616,7 @@ dr_mode = "host"; phys = <&usbphy2>; phy-names = "usb2-phy"; + snps,reset-phy-on-wake; status = "disabled"; }; @@ -904,6 +905,8 @@ clocks = <&cru SCLK_OTGPHY0>; clock-names = "phyclk"; #clock-cells = <0>; + resets = <&cru SRST_USBOTG_PHY>; + reset-names = "phy-reset"; }; usbphy1: usb-phy@334 { @@ -912,6 +915,8 @@ clocks = <&cru SCLK_OTGPHY1>; clock-names = "phyclk"; #clock-cells = <0>; + resets = <&cru SRST_USBHOST0_PHY>; + reset-names = "phy-reset"; }; usbphy2: usb-phy@348 { @@ -920,6 +925,8 @@ clocks = <&cru SCLK_OTGPHY2>; clock-names = "phyclk"; #clock-cells = <0>; + resets = <&cru SRST_USBHOST1_PHY>; + reset-names = "phy-reset"; }; }; }; diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c index 45412d21aa6b..179ca8757a74 100644 --- a/arch/arm/common/sa1111.c +++ b/arch/arm/common/sa1111.c @@ -32,7 +32,7 @@ #include <mach/hardware.h> #include <asm/mach/irq.h> #include <asm/mach-types.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/hardware/sa1111.h> diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig index 1446262921b4..190d6e9d3296 100644 --- a/arch/arm/configs/aspeed_g4_defconfig +++ b/arch/arm/configs/aspeed_g4_defconfig @@ -23,7 +23,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_JUMP_LABEL=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_GCC_PLUGINS=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEBUG_FS is not set # CONFIG_IOSCHED_DEADLINE is not set @@ -248,7 +247,6 @@ CONFIG_PANIC_TIMEOUT=-1 # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_FUNCTION_TRACER=y -# CONFIG_TRACING_EVENTS_GPIO is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_DEBUG_WX=y CONFIG_DEBUG_USER=y diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig index 02fa3a41add5..407ffb7655a8 100644 --- a/arch/arm/configs/aspeed_g5_defconfig +++ b/arch/arm/configs/aspeed_g5_defconfig @@ -23,7 +23,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_JUMP_LABEL=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_GCC_PLUGINS=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_BLK_DEBUG_FS is not set # CONFIG_IOSCHED_DEADLINE is not set @@ -248,7 +247,6 @@ CONFIG_PANIC_TIMEOUT=-1 # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_FUNCTION_TRACER=y -# CONFIG_TRACING_EVENTS_GPIO is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_DEBUG_WX=y CONFIG_DEBUG_USER=y diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig index e4b1be66b3f5..a88e31449880 100644 --- a/arch/arm/configs/at91_dt_defconfig +++ b/arch/arm/configs/at91_dt_defconfig @@ -9,7 +9,6 @@ CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set @@ -56,7 +55,7 @@ CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_DATAFLASH=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ATMEL=y CONFIG_MTD_UBI=y CONFIG_MTD_UBI_GLUEBI=y diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig index fc105c9178cc..c255dab36bde 100644 --- a/arch/arm/configs/clps711x_defconfig +++ b/arch/arm/configs/clps711x_defconfig @@ -6,7 +6,6 @@ CONFIG_RD_LZMA=y CONFIG_EMBEDDED=y CONFIG_SLOB=y CONFIG_JUMP_LABEL=y -# CONFIG_LBDAF is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set CONFIG_ARCH_CLPS711X=y @@ -36,7 +35,7 @@ CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PLATRAM=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_GPIO=y CONFIG_NETDEVICES=y # CONFIG_NET_CADENCE is not set diff --git a/arch/arm/configs/cm_x2xx_defconfig b/arch/arm/configs/cm_x2xx_defconfig index fb45b4983d3c..5344434df652 100644 --- a/arch/arm/configs/cm_x2xx_defconfig +++ b/arch/arm/configs/cm_x2xx_defconfig @@ -58,7 +58,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PXA2XX=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_GPIO=m CONFIG_MTD_NAND_CM_X270=y CONFIG_MTD_NAND_PLATFORM=y diff --git a/arch/arm/configs/cm_x300_defconfig b/arch/arm/configs/cm_x300_defconfig index 5e349c625b71..3707a014cbc4 100644 --- a/arch/arm/configs/cm_x300_defconfig +++ b/arch/arm/configs/cm_x300_defconfig @@ -48,7 +48,7 @@ CONFIG_LIB80211=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_MARVELL=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/arm/configs/colibri_pxa270_defconfig b/arch/arm/configs/colibri_pxa270_defconfig index 8995695fc118..8d484e4d51cc 100644 --- a/arch/arm/configs/colibri_pxa270_defconfig +++ b/arch/arm/configs/colibri_pxa270_defconfig @@ -64,7 +64,7 @@ CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PXA2XX=y CONFIG_MTD_BLOCK2MTD=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_DISKONCHIP=y CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED=y CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0x4000000 diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig index 09e1672777c9..d99725984947 100644 --- a/arch/arm/configs/corgi_defconfig +++ b/arch/arm/configs/corgi_defconfig @@ -87,7 +87,7 @@ CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_SHARPSL=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_SD=y diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index 207962a656a2..4a8cad4d3707 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -74,7 +74,7 @@ CONFIG_MTD_CFI_INTELEXT=m CONFIG_MTD_CFI_AMDSTD=m CONFIG_MTD_PHYSMAP=m CONFIG_MTD_M25P80=m -CONFIG_MTD_NAND=m +CONFIG_MTD_RAW_NAND=m CONFIG_MTD_NAND_DAVINCI=m CONFIG_MTD_SPI_NOR=m CONFIG_MTD_UBI=m diff --git a/arch/arm/configs/efm32_defconfig b/arch/arm/configs/efm32_defconfig index ee42158f41ec..10ea92513a69 100644 --- a/arch/arm/configs/efm32_defconfig +++ b/arch/arm/configs/efm32_defconfig @@ -11,7 +11,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arm/configs/em_x270_defconfig b/arch/arm/configs/em_x270_defconfig index 30a67523f860..61228a25ba8d 100644 --- a/arch/arm/configs/em_x270_defconfig +++ b/arch/arm/configs/em_x270_defconfig @@ -54,7 +54,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PXA2XX=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig index 78cd73d1c795..14889a785f07 100644 --- a/arch/arm/configs/ep93xx_defconfig +++ b/arch/arm/configs/ep93xx_defconfig @@ -63,7 +63,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_ROM=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_BLK_DEV_NBD=y CONFIG_EEPROM_LEGACY=y CONFIG_SCSI=y diff --git a/arch/arm/configs/eseries_pxa_defconfig b/arch/arm/configs/eseries_pxa_defconfig index eabb784cf7da..b85575867d21 100644 --- a/arch/arm/configs/eseries_pxa_defconfig +++ b/arch/arm/configs/eseries_pxa_defconfig @@ -43,7 +43,7 @@ CONFIG_MAC80211_RC_PID=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_MTD=m -CONFIG_MTD_NAND=m +CONFIG_MTD_RAW_NAND=m CONFIG_MTD_NAND_TMIO=m CONFIG_BLK_DEV_LOOP=m # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index 484e51fbd4a6..e3afca5bd9d6 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -13,7 +13,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARCH_PXA=y diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig index ebeca11faa48..175881b7da7c 100644 --- a/arch/arm/configs/h3600_defconfig +++ b/arch/arm/configs/h3600_defconfig @@ -4,7 +4,6 @@ CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig index f204017c26b9..9b779e13e05d 100644 --- a/arch/arm/configs/imote2_defconfig +++ b/arch/arm/configs/imote2_defconfig @@ -12,7 +12,6 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_CFQ is not set CONFIG_ARCH_PXA=y diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index b37f8e675e40..f2cf0722e8e1 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -61,7 +61,7 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I2 is not set CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_MXC=y CONFIG_MTD_UBI=y CONFIG_EEPROM_AT24=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 50fb01d70b10..8116648a8efd 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -110,7 +110,7 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_DATAFLASH=y CONFIG_MTD_M25P80=y CONFIG_MTD_SST25L=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_GPMI_NAND=y CONFIG_MTD_NAND_VF610_NFC=y CONFIG_MTD_NAND_MXC=y diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 8c3c99cd6de9..39ebcce3bc2f 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -112,7 +112,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_IXP4XX=y -CONFIG_MTD_NAND=m +CONFIG_MTD_RAW_NAND=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 3ded35a07f45..72fee57aad2f 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -124,7 +124,7 @@ CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_PLATRAM=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_DAVINCI=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig index e752fb704df0..4b3b2c693c29 100644 --- a/arch/arm/configs/lpc32xx_defconfig +++ b/arch/arm/configs/lpc32xx_defconfig @@ -47,7 +47,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_SLC_LPC32XX=y CONFIG_MTD_NAND_MLC_LPC32XX=y CONFIG_MTD_UBI=y diff --git a/arch/arm/configs/mini2440_defconfig b/arch/arm/configs/mini2440_defconfig index d95a8059d30b..8b0f7c4c3f09 100644 --- a/arch/arm/configs/mini2440_defconfig +++ b/arch/arm/configs/mini2440_defconfig @@ -92,7 +92,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_RAM=y CONFIG_MTD_ROM=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_S3C2410=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_LPDDR=y diff --git a/arch/arm/configs/mmp2_defconfig b/arch/arm/configs/mmp2_defconfig index 1eeee7f11d91..94deb0ed0541 100644 --- a/arch/arm/configs/mmp2_defconfig +++ b/arch/arm/configs/mmp2_defconfig @@ -28,7 +28,7 @@ CONFIG_IP_PNP=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_GENERIC=y # CONFIG_BLK_DEV is not set diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig index 078228a19339..6a11669fa536 100644 --- a/arch/arm/configs/moxart_defconfig +++ b/arch/arm/configs/moxart_defconfig @@ -15,7 +15,6 @@ CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set CONFIG_ARCH_MULTI_V4=y diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig index 9a6390c172d6..0b42bddfbc82 100644 --- a/arch/arm/configs/multi_v4t_defconfig +++ b/arch/arm/configs/multi_v4t_defconfig @@ -5,7 +5,6 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EMBEDDED=y CONFIG_SLOB=y CONFIG_JUMP_LABEL=y -# CONFIG_LBDAF is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set CONFIG_ARCH_MULTI_V4T=y @@ -39,7 +38,7 @@ CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PLATRAM=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_GPIO=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index 318b76fa26d1..63b5a8824f0f 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -87,7 +87,7 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ATMEL=y CONFIG_MTD_NAND_ORION=y CONFIG_MTD_SPI_NOR=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index c75051b9392c..b7b1cd00a294 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -184,7 +184,7 @@ CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_DENALI_DT=y CONFIG_MTD_NAND_OMAP2=y CONFIG_MTD_NAND_OMAP_BCH=y diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 0448bd8075ac..e9567513f068 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -47,7 +47,7 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ORION=y CONFIG_BLK_DEV_LOOP=y # CONFIG_SCSI_PROC_FS is not set diff --git a/arch/arm/configs/mvebu_v5_defconfig b/arch/arm/configs/mvebu_v5_defconfig index 4b598da0d086..0e5577a31851 100644 --- a/arch/arm/configs/mvebu_v5_defconfig +++ b/arch/arm/configs/mvebu_v5_defconfig @@ -77,7 +77,7 @@ CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ORION=y CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index 55140219ab11..48f7b4277b8d 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -52,7 +52,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_MARVELL=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index 38480596c449..ed570a0d1f2a 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -50,7 +50,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_DATAFLASH=y CONFIG_MTD_M25P80=y CONFIG_MTD_SST25L=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_GPMI_NAND=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index 5f4c6aaa07f6..cfc094189d09 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -53,8 +53,8 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_VERIFY_WRITE=y CONFIG_MTD_ONENAND_GENERIC=y -CONFIG_MTD_NAND_ECC_SMC=y -CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig index cfc00b0961ec..82af77c093f1 100644 --- a/arch/arm/configs/omap1_defconfig +++ b/arch/arm/configs/omap1_defconfig @@ -17,7 +17,6 @@ CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set @@ -90,7 +89,7 @@ CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 3f03ec6d2644..c7bf9c493646 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -143,8 +143,8 @@ CONFIG_MTD_M25P80=m CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_VERIFY_WRITE=y CONFIG_MTD_ONENAND_OMAP2=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ECC_BCH=y +CONFIG_MTD_RAW_NAND=y +CONFIG_MTD_NAND_ECC_SW_BCH=y CONFIG_MTD_NAND_OMAP2=y CONFIG_MTD_NAND_OMAP_BCH=y CONFIG_MTD_SPI_NOR=m diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index bf9046331f6e..077e0fde1ff9 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -70,7 +70,7 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_NAND_ORION=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/arm/configs/oxnas_v6_defconfig b/arch/arm/configs/oxnas_v6_defconfig index f6ba32c9d173..cae0db6b4eaf 100644 --- a/arch/arm/configs/oxnas_v6_defconfig +++ b/arch/arm/configs/oxnas_v6_defconfig @@ -50,7 +50,7 @@ CONFIG_SIMPLE_PM_BUS=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_OXNAS=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/arm/configs/pxa3xx_defconfig b/arch/arm/configs/pxa3xx_defconfig index 3e0de035ab77..7681eea60127 100644 --- a/arch/arm/configs/pxa3xx_defconfig +++ b/arch/arm/configs/pxa3xx_defconfig @@ -31,7 +31,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_MTD=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_MARVELL=y CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_VERIFY_WRITE=y diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig index d4654755b09c..f6d24d762a7f 100644 --- a/arch/arm/configs/pxa_defconfig +++ b/arch/arm/configs/pxa_defconfig @@ -185,8 +185,8 @@ CONFIG_MTD_PXA2XX=m CONFIG_MTD_M25P80=m CONFIG_MTD_BLOCK2MTD=y CONFIG_MTD_DOCG3=m -CONFIG_MTD_NAND=m -CONFIG_MTD_NAND_ECC_BCH=y +CONFIG_MTD_RAW_NAND=m +CONFIG_MTD_NAND_ECC_SW_BCH=y CONFIG_MTD_NAND_GPIO=m CONFIG_MTD_NAND_DISKONCHIP=m CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED=y diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index bd6440f23493..4c50b5337cf6 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -57,7 +57,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_QCOM=y CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig index 2afb359f3168..39c648594d93 100644 --- a/arch/arm/configs/s3c2410_defconfig +++ b/arch/arm/configs/s3c2410_defconfig @@ -192,7 +192,7 @@ CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_ROM=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_S3C2410=y CONFIG_PARPORT=y CONFIG_PARPORT_PC=m diff --git a/arch/arm/configs/s3c6400_defconfig b/arch/arm/configs/s3c6400_defconfig index 507d7ad7523a..6e2656567da6 100644 --- a/arch/arm/configs/s3c6400_defconfig +++ b/arch/arm/configs/s3c6400_defconfig @@ -23,7 +23,7 @@ CONFIG_CMDLINE="console=ttySAC0,115200 root=/dev/ram init=/linuxrc initrd=0x5100 CONFIG_VFP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_S3C2410=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig index b0026f73083d..515cb37eeab6 100644 --- a/arch/arm/configs/sama5_defconfig +++ b/arch/arm/configs/sama5_defconfig @@ -66,7 +66,7 @@ CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ATMEL=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig index 08d1b3e11d68..9d42cfe85f5b 100644 --- a/arch/arm/configs/socfpga_defconfig +++ b/arch/arm/configs/socfpga_defconfig @@ -51,7 +51,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_DENALI_DT=y CONFIG_MTD_SPI_NOR=y # CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set @@ -106,6 +106,7 @@ CONFIG_SENSORS_LTC2978_REGULATOR=y CONFIG_WATCHDOG=y CONFIG_DW_WATCHDOG=y CONFIG_MFD_ALTERA_A10SR=y +CONFIG_MFD_ALTERA_SYSMGR=y CONFIG_MFD_STMPE=y CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig index 7b36eeb928bb..8ee3679ca8b2 100644 --- a/arch/arm/configs/spear13xx_defconfig +++ b/arch/arm/configs/spear13xx_defconfig @@ -32,7 +32,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_OF_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig index f1b52fb3461b..ddd73b25f75e 100644 --- a/arch/arm/configs/spear3xx_defconfig +++ b/arch/arm/configs/spear3xx_defconfig @@ -17,7 +17,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_OF_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig index 124c244d8df1..5b410f0a365b 100644 --- a/arch/arm/configs/spear6xx_defconfig +++ b/arch/arm/configs/spear6xx_defconfig @@ -14,7 +14,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_OF_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig index 9ea82c118661..f6d2f674517c 100644 --- a/arch/arm/configs/spitz_defconfig +++ b/arch/arm/configs/spitz_defconfig @@ -84,7 +84,7 @@ CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_ROM=y CONFIG_MTD_COMPLEX_MAPPINGS=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_SHARPSL=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_SD=y diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig index 0258ba891376..152321d2893e 100644 --- a/arch/arm/configs/stm32_defconfig +++ b/arch/arm/configs/stm32_defconfig @@ -13,7 +13,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arm/configs/tango4_defconfig b/arch/arm/configs/tango4_defconfig index 68725d4eae45..68eb16e583ac 100644 --- a/arch/arm/configs/tango4_defconfig +++ b/arch/arm/configs/tango4_defconfig @@ -39,7 +39,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_TESTS=m CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_TANGO=y CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=y diff --git a/arch/arm/configs/trizeps4_defconfig b/arch/arm/configs/trizeps4_defconfig index 2b5a224d2da1..ecad22501b48 100644 --- a/arch/arm/configs/trizeps4_defconfig +++ b/arch/arm/configs/trizeps4_defconfig @@ -76,7 +76,7 @@ CONFIG_MTD_DOC2001PLUS=y CONFIG_MTD_DOCPROBE_ADVANCED=y CONFIG_MTD_DOCPROBE_ADDRESS=0x4000000 CONFIG_MTD_DOCPROBE_HIGH=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_DISKONCHIP=y CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED=y CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0x4000000 diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig index 36d77406e31b..bedf397c75de 100644 --- a/arch/arm/configs/u300_defconfig +++ b/arch/arm/configs/u300_defconfig @@ -9,7 +9,6 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set @@ -27,7 +26,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig index 392ed3b3613c..484d77a7f589 100644 --- a/arch/arm/configs/vexpress_defconfig +++ b/arch/arm/configs/vexpress_defconfig @@ -14,7 +14,6 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 07e31941dc67..617c2c99ebfb 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -278,6 +278,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, true); + if (err) + return err; crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv); diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c index 9d6fda81986d..48a89537b828 100644 --- a/arch/arm/crypto/chacha-neon-glue.c +++ b/arch/arm/crypto/chacha-neon-glue.c @@ -21,6 +21,7 @@ #include <crypto/algapi.h> #include <crypto/chacha.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> @@ -93,7 +94,7 @@ static int chacha_neon(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_chacha_crypt(req); return chacha_neon_stream_xor(req, ctx, req->iv); @@ -107,7 +108,7 @@ static int xchacha_neon(struct skcipher_request *req) u32 state[16]; u8 real_iv[16]; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_xchacha_crypt(req); crypto_chacha_init(state, ctx, req->iv); diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index cd9e93b46c2d..e712c2a7d387 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/hwcap.h> #include <asm/neon.h> @@ -113,7 +114,7 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, u32 *crc = shash_desc_ctx(desc); unsigned int l; - if (may_use_simd()) { + if (crypto_simd_usable()) { if ((u32)data % SCALE_F) { l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); @@ -147,7 +148,7 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, u32 *crc = shash_desc_ctx(desc); unsigned int l; - if (may_use_simd()) { + if (crypto_simd_usable()) { if ((u32)data % SCALE_F) { l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c index 3d6b800b8396..3b24f2872592 100644 --- a/arch/arm/crypto/crct10dif-ce-glue.c +++ b/arch/arm/crypto/crct10dif-ce-glue.c @@ -15,6 +15,7 @@ #include <linux/string.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/neon.h> #include <asm/simd.h> @@ -36,7 +37,7 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data, { u16 *crc = shash_desc_ctx(desc); - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { kernel_neon_begin(); *crc = crc_t10dif_pmull(*crc, data, length); kernel_neon_end(); diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c index b7d30b6cf49c..39d1ccec1aab 100644 --- a/arch/arm/crypto/ghash-ce-glue.c +++ b/arch/arm/crypto/ghash-ce-glue.c @@ -14,6 +14,7 @@ #include <asm/unaligned.h> #include <crypto/cryptd.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/gf128mul.h> #include <linux/cpufeature.h> #include <linux/crypto.h> @@ -185,7 +186,6 @@ static int ghash_async_init(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return crypto_shash_init(desc); } @@ -196,7 +196,7 @@ static int ghash_async_update(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!may_use_simd() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -214,7 +214,7 @@ static int ghash_async_final(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!may_use_simd() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -232,7 +232,7 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!may_use_simd() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -242,7 +242,6 @@ static int ghash_async_digest(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return shash_ahash_digest(req, desc); } } @@ -255,7 +254,6 @@ static int ghash_async_import(struct ahash_request *req, const void *in) struct shash_desc *desc = cryptd_shash_desc(cryptd_req); desc->tfm = cryptd_ahash_child(ctx->cryptd_tfm); - desc->flags = req->base.flags; return crypto_shash_import(desc, in); } diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c index 49aae87cb2bc..ae5aefc44a4d 100644 --- a/arch/arm/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -9,6 +9,7 @@ #include <asm/neon.h> #include <asm/simd.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> @@ -25,7 +26,7 @@ static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_neon_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !may_use_simd()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index b732522e20f8..4c6c6900853c 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -9,6 +9,7 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> @@ -33,7 +34,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, { struct sha1_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd() || + if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return sha1_update_arm(desc, data, len); @@ -47,7 +48,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return sha1_finup_arm(desc, data, len, out); kernel_neon_begin(); diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index d15e0ea2c95e..d6c95c213d42 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -19,6 +19,7 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> @@ -39,7 +40,7 @@ static int sha1_neon_update(struct shash_desc *desc, const u8 *data, { struct sha1_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd() || + if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return sha1_update_arm(desc, data, len); @@ -54,7 +55,7 @@ static int sha1_neon_update(struct shash_desc *desc, const u8 *data, static int sha1_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return sha1_finup_arm(desc, data, len, out); kernel_neon_begin(); diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 1211a5c129fc..a47a9d4b663e 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -9,6 +9,7 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> #include <linux/cpufeature.h> @@ -34,7 +35,7 @@ static int sha2_ce_update(struct shash_desc *desc, const u8 *data, { struct sha256_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd() || + if (!crypto_simd_usable() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_arm_update(desc, data, len); @@ -49,7 +50,7 @@ static int sha2_ce_update(struct shash_desc *desc, const u8 *data, static int sha2_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha256_arm_finup(desc, data, len, out); kernel_neon_begin(); diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index 1d82c6cd31a4..f3f6b1624fc3 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -15,6 +15,7 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/cryptohash.h> #include <linux/types.h> #include <linux/string.h> @@ -34,7 +35,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, { struct sha256_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd() || + if (!crypto_simd_usable() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_arm_update(desc, data, len); @@ -49,7 +50,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, static int sha256_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha256_arm_finup(desc, data, len, out); kernel_neon_begin(); diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c index 8a5642b41fd6..d33ab59c26c0 100644 --- a/arch/arm/crypto/sha512-neon-glue.c +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -9,6 +9,7 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha512_base.h> #include <linux/crypto.h> @@ -30,7 +31,7 @@ static int sha512_neon_update(struct shash_desc *desc, const u8 *data, { struct sha512_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd() || + if (!crypto_simd_usable() || (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return sha512_arm_update(desc, data, len); @@ -45,7 +46,7 @@ static int sha512_neon_update(struct shash_desc *desc, const u8 *data, static int sha512_neon_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return sha512_arm_finup(desc, data, len, out); kernel_neon_begin(); diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 41deac2451af..0b2ecc98e086 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -17,7 +17,6 @@ generic-y += seccomp.h generic-y += segment.h generic-y += serial.h generic-y += simd.h -generic-y += sizes.h generic-y += trace_clock.h generated-y += mach-types.h diff --git a/arch/arm/include/asm/hardirq.h b/arch/arm/include/asm/hardirq.h index cba23eaa6072..7a88f160b1fb 100644 --- a/arch/arm/include/asm/hardirq.h +++ b/arch/arm/include/asm/hardirq.h @@ -6,6 +6,7 @@ #include <linux/threads.h> #include <asm/irq.h> +/* number of IPIS _not_ including IPI_CPU_BACKTRACE */ #define NR_IPI 7 typedef struct { diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index 080ce70cab12..fd02761ba06c 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -73,7 +73,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->ARM_r0 + 1, args, 5 * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* ARM tasks don't change audit architectures on the fly. */ return AUDIT_ARCH_ARM; diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index 201100226301..067e12edc341 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tags); const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr); #else -static inline const struct machine_desc * +static inline const struct machine_desc * __init __noreturn setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) { early_print("no ATAGS support: can't continue\n"); diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c index 84363fe7bad2..10c45cc6b957 100644 --- a/arch/arm/kernel/dma-isa.c +++ b/arch/arm/kernel/dma-isa.c @@ -55,6 +55,12 @@ static int isa_get_dma_residue(unsigned int chan, dma_t *dma) return chan < 4 ? count : (count << 1); } +static struct device isa_dma_dev = { + .init_name = "fallback device", + .coherent_dma_mask = ~(dma_addr_t)0, + .dma_mask = &isa_dma_dev.coherent_dma_mask, +}; + static void isa_enable_dma(unsigned int chan, dma_t *dma) { if (dma->invalid) { @@ -89,7 +95,7 @@ static void isa_enable_dma(unsigned int chan, dma_t *dma) dma->sg = &dma->buf; dma->sgcount = 1; dma->buf.length = dma->count; - dma->buf.dma_address = dma_map_single(NULL, + dma->buf.dma_address = dma_map_single(&isa_dma_dev, dma->addr, dma->count, direction); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index facd4240ca02..c93fe0f256de 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -70,6 +70,10 @@ enum ipi_msg_type { IPI_CPU_STOP, IPI_IRQ_WORK, IPI_COMPLETION, + /* + * CPU_BACKTRACE is special and not included in NR_IPI + * or tracable with trace_ipi_* + */ IPI_CPU_BACKTRACE, /* * SGI8-15 can be reserved by secure firmware, and thus may @@ -797,7 +801,7 @@ core_initcall(register_cpufreq_notifier); static void raise_nmi(cpumask_t *mask) { - smp_cross_call(mask, IPI_CPU_BACKTRACE); + __smp_cross_call(mask, IPI_CPU_BACKTRACE); } void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 3f5320f46de2..f591026347a5 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -22,7 +22,6 @@ config KVM bool "Kernel-based Virtual Machine (KVM) support" depends on MMU && OF select PREEMPT_NOTIFIERS - select ANON_INODES select ARM_GIC select ARM_GIC_V3 select ARM_GIC_V3_ITS diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c index 8e89ec8b6f0f..34e18e9556d9 100644 --- a/arch/arm/mach-ep93xx/edb93xx.c +++ b/arch/arm/mach-ep93xx/edb93xx.c @@ -29,6 +29,7 @@ #include <linux/platform_device.h> #include <linux/i2c.h> #include <linux/spi/spi.h> +#include <linux/gpio/machine.h> #include <sound/cs4271.h> @@ -105,13 +106,16 @@ static struct spi_board_info edb93xx_spi_board_info[] __initdata = { }, }; -static int edb93xx_spi_chipselects[] __initdata = { - EP93XX_GPIO_LINE_EGPIO6, +static struct gpiod_lookup_table edb93xx_spi_cs_gpio_table = { + .dev_id = "ep93xx-spi.0", + .table = { + GPIO_LOOKUP("A", 6, "cs", GPIO_ACTIVE_LOW), + { }, + }, }; static struct ep93xx_spi_info edb93xx_spi_info __initdata = { - .chipselect = edb93xx_spi_chipselects, - .num_chipselect = ARRAY_SIZE(edb93xx_spi_chipselects), + /* Intentionally left blank */ }; static void __init edb93xx_register_spi(void) @@ -123,6 +127,7 @@ static void __init edb93xx_register_spi(void) else if (machine_is_edb9315a()) edb93xx_cs4271_data.gpio_nreset = EP93XX_GPIO_LINE_EGPIO14; + gpiod_add_lookup_table(&edb93xx_spi_cs_gpio_table); ep93xx_register_spi(&edb93xx_spi_info, edb93xx_spi_board_info, ARRAY_SIZE(edb93xx_spi_board_info)); } diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c index 80ccb984d521..f0f38c0dba52 100644 --- a/arch/arm/mach-ep93xx/simone.c +++ b/arch/arm/mach-ep93xx/simone.c @@ -77,13 +77,15 @@ static struct spi_board_info simone_spi_devices[] __initdata = { * low between multi-message command blocks. From v1.4, it uses a GPIO instead. * v1.3 parts will still work, since the signal on SFRMOUT is automatic. */ -static int simone_spi_chipselects[] __initdata = { - EP93XX_GPIO_LINE_EGPIO1, +static struct gpiod_lookup_table simone_spi_cs_gpio_table = { + .dev_id = "ep93xx-spi.0", + .table = { + GPIO_LOOKUP("A", 1, "cs", GPIO_ACTIVE_LOW), + { }, + }, }; static struct ep93xx_spi_info simone_spi_info __initdata = { - .chipselect = simone_spi_chipselects, - .num_chipselect = ARRAY_SIZE(simone_spi_chipselects), .use_dma = 1, }; @@ -113,6 +115,7 @@ static void __init simone_init_machine(void) ep93xx_register_i2c(simone_i2c_board_info, ARRAY_SIZE(simone_i2c_board_info)); gpiod_add_lookup_table(&simone_mmc_spi_gpio_table); + gpiod_add_lookup_table(&simone_spi_cs_gpio_table); ep93xx_register_spi(&simone_spi_info, simone_spi_devices, ARRAY_SIZE(simone_spi_devices)); simone_register_audio(); diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c index 85b74ac943f0..a3a20c83c6b8 100644 --- a/arch/arm/mach-ep93xx/ts72xx.c +++ b/arch/arm/mach-ep93xx/ts72xx.c @@ -22,6 +22,7 @@ #include <linux/spi/mmc_spi.h> #include <linux/mmc/host.h> #include <linux/platform_data/spi-ep93xx.h> +#include <linux/gpio/machine.h> #include <mach/gpio-ep93xx.h> #include <mach/hardware.h> @@ -269,13 +270,15 @@ static struct spi_board_info bk3_spi_board_info[] __initdata = { * The all work is performed automatically by !SPI_FRAME (SFRM1) and * goes through CPLD */ -static int bk3_spi_chipselects[] __initdata = { - EP93XX_GPIO_LINE_F(3), +static struct gpiod_lookup_table bk3_spi_cs_gpio_table = { + .dev_id = "ep93xx-spi.0", + .table = { + GPIO_LOOKUP("F", 3, "cs", GPIO_ACTIVE_LOW), + { }, + }, }; static struct ep93xx_spi_info bk3_spi_master __initdata = { - .chipselect = bk3_spi_chipselects, - .num_chipselect = ARRAY_SIZE(bk3_spi_chipselects), .use_dma = 1, }; @@ -316,13 +319,17 @@ static struct spi_board_info ts72xx_spi_devices[] __initdata = { }, }; -static int ts72xx_spi_chipselects[] __initdata = { - EP93XX_GPIO_LINE_F(2), /* DIO_17 */ +static struct gpiod_lookup_table ts72xx_spi_cs_gpio_table = { + .dev_id = "ep93xx-spi.0", + .table = { + /* DIO_17 */ + GPIO_LOOKUP("F", 2, "cs", GPIO_ACTIVE_LOW), + { }, + }, }; static struct ep93xx_spi_info ts72xx_spi_info __initdata = { - .chipselect = ts72xx_spi_chipselects, - .num_chipselect = ARRAY_SIZE(ts72xx_spi_chipselects), + /* Intentionally left blank */ }; static void __init ts72xx_init_machine(void) @@ -339,6 +346,7 @@ static void __init ts72xx_init_machine(void) if (board_is_ts7300()) platform_device_register(&ts73xx_fpga_device); #endif + gpiod_add_lookup_table(&ts72xx_spi_cs_gpio_table); ep93xx_register_spi(&ts72xx_spi_info, ts72xx_spi_devices, ARRAY_SIZE(ts72xx_spi_devices)); } @@ -398,6 +406,7 @@ static void __init bk3_init_machine(void) ep93xx_register_eth(&ts72xx_eth_data, 1); + gpiod_add_lookup_table(&bk3_spi_cs_gpio_table); ep93xx_register_spi(&bk3_spi_master, bk3_spi_board_info, ARRAY_SIZE(bk3_spi_board_info)); diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c index 767ee64628dc..f95a644769e4 100644 --- a/arch/arm/mach-ep93xx/vision_ep9307.c +++ b/arch/arm/mach-ep93xx/vision_ep9307.c @@ -245,15 +245,17 @@ static struct spi_board_info vision_spi_board_info[] __initdata = { }, }; -static int vision_spi_chipselects[] __initdata = { - EP93XX_GPIO_LINE_EGPIO6, - EP93XX_GPIO_LINE_EGPIO7, - EP93XX_GPIO_LINE_G(2), +static struct gpiod_lookup_table vision_spi_cs_gpio_table = { + .dev_id = "ep93xx-spi.0", + .table = { + GPIO_LOOKUP_IDX("A", 6, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("A", 7, "cs", 1, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("G", 2, "cs", 2, GPIO_ACTIVE_LOW), + { }, + }, }; static struct ep93xx_spi_info vision_spi_master __initdata = { - .chipselect = vision_spi_chipselects, - .num_chipselect = ARRAY_SIZE(vision_spi_chipselects), .use_dma = 1, }; @@ -295,6 +297,7 @@ static void __init vision_init_machine(void) ep93xx_register_i2c(vision_i2c_info, ARRAY_SIZE(vision_i2c_info)); gpiod_add_lookup_table(&vision_spi_mmc_gpio_table); + gpiod_add_lookup_table(&vision_spi_cs_gpio_table); ep93xx_register_spi(&vision_spi_master, vision_spi_board_info, ARRAY_SIZE(vision_spi_board_info)); vision_register_i2s(); diff --git a/arch/arm/mach-imx/devices/platform-fec.c b/arch/arm/mach-imx/devices/platform-fec.c index b403a4fe2892..605c0af5851d 100644 --- a/arch/arm/mach-imx/devices/platform-fec.c +++ b/arch/arm/mach-imx/devices/platform-fec.c @@ -7,7 +7,7 @@ * Free Software Foundation. */ #include <linux/dma-mapping.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include "../hardware.h" #include "devices-common.h" diff --git a/arch/arm/mach-imx/devices/platform-gpio_keys.c b/arch/arm/mach-imx/devices/platform-gpio_keys.c index 486282539c76..9f0a132ea1bc 100644 --- a/arch/arm/mach-imx/devices/platform-gpio_keys.c +++ b/arch/arm/mach-imx/devices/platform-gpio_keys.c @@ -15,7 +15,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. */ -#include <asm/sizes.h> +#include <linux/sizes.h> #include "../hardware.h" #include "devices-common.h" diff --git a/arch/arm/mach-imx/devices/platform-imx2-wdt.c b/arch/arm/mach-imx/devices/platform-imx2-wdt.c index 8c134c8d7500..0c6d3c05fd6d 100644 --- a/arch/arm/mach-imx/devices/platform-imx2-wdt.c +++ b/arch/arm/mach-imx/devices/platform-imx2-wdt.c @@ -6,7 +6,7 @@ * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. */ -#include <asm/sizes.h> +#include <linux/sizes.h> #include "../hardware.h" #include "devices-common.h" diff --git a/arch/arm/mach-imx/devices/platform-mxc_nand.c b/arch/arm/mach-imx/devices/platform-mxc_nand.c index 676df4920c7b..046e0cc826c1 100644 --- a/arch/arm/mach-imx/devices/platform-mxc_nand.c +++ b/arch/arm/mach-imx/devices/platform-mxc_nand.c @@ -6,7 +6,7 @@ * the terms of the GNU General Public License version 2 as published by the * Free Software Foundation. */ -#include <asm/sizes.h> +#include <linux/sizes.h> #include "../hardware.h" #include "devices-common.h" diff --git a/arch/arm/mach-imx/hardware.h b/arch/arm/mach-imx/hardware.h index 90e10cbd8fd1..b5ca8cebe1d6 100644 --- a/arch/arm/mach-imx/hardware.h +++ b/arch/arm/mach-imx/hardware.h @@ -24,7 +24,7 @@ #include <asm/io.h> #include <soc/imx/revision.h> #endif -#include <asm/sizes.h> +#include <linux/sizes.h> #define addr_in_module(addr, mod) \ ((unsigned long)(addr) - mod ## _BASE_ADDR < mod ## _SIZE) diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 87f45b926c78..e67e0b2d4ce0 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -631,7 +631,7 @@ static void imx6_pm_stby_poweroff(void) static int imx6_pm_stby_poweroff_probe(void) { if (pm_power_off) { - pr_warn("%s: pm_power_off already claimed %p %pf!\n", + pr_warn("%s: pm_power_off already claimed %p %ps!\n", __func__, pm_power_off, pm_power_off); return -EBUSY; } diff --git a/arch/arm/mach-integrator/impd1.c b/arch/arm/mach-integrator/impd1.c index 8dfad012dfae..6ddbe153910a 100644 --- a/arch/arm/mach-integrator/impd1.c +++ b/arch/arm/mach-integrator/impd1.c @@ -27,7 +27,7 @@ #include <linux/irqchip/arm-vic.h> #include <linux/gpio/machine.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include "lm.h" #include "impd1.h" diff --git a/arch/arm/mach-iop13xx/pci.c b/arch/arm/mach-iop13xx/pci.c index 070d92ae1b6f..8426ab9e2f5a 100644 --- a/arch/arm/mach-iop13xx/pci.c +++ b/arch/arm/mach-iop13xx/pci.c @@ -24,7 +24,7 @@ #include <linux/export.h> #include <asm/irq.h> #include <mach/hardware.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/signal.h> #include <asm/mach/pci.h> #include "pci.h" diff --git a/arch/arm/mach-iop13xx/tpmi.c b/arch/arm/mach-iop13xx/tpmi.c index 116feb6b261e..d3d8c78e7d10 100644 --- a/arch/arm/mach-iop13xx/tpmi.c +++ b/arch/arm/mach-iop13xx/tpmi.c @@ -23,7 +23,7 @@ #include <linux/dma-mapping.h> #include <linux/io.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/irqs.h> /* assumes CONTROLLER_ONLY# is never asserted in the ESSR register */ diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 6835b17113e5..a53104bb28f5 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -31,7 +31,7 @@ #include <asm/cputype.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/pci.h> #include <mach/hardware.h> diff --git a/arch/arm/mach-ks8695/include/mach/hardware.h b/arch/arm/mach-ks8695/include/mach/hardware.h index 959c748ee8bb..877629b3d944 100644 --- a/arch/arm/mach-ks8695/include/mach/hardware.h +++ b/arch/arm/mach-ks8695/include/mach/hardware.h @@ -14,7 +14,7 @@ #ifndef __ASM_ARCH_HARDWARE_H #define __ASM_ARCH_HARDWARE_H -#include <asm/sizes.h> +#include <linux/sizes.h> /* * Clocks are derived from MCLK, which is 25MHz diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c index 0aa88105d46e..9b5f4d665374 100644 --- a/arch/arm/mach-mvebu/kirkwood.c +++ b/arch/arm/mach-mvebu/kirkwood.c @@ -92,7 +92,8 @@ static void __init kirkwood_dt_eth_fixup(void) continue; /* skip disabled nodes or nodes with valid MAC address*/ - if (!of_device_is_available(pnp) || of_get_mac_address(np)) + if (!of_device_is_available(pnp) || + !IS_ERR(of_get_mac_address(np))) goto eth_fixup_skip; clk = of_clk_get(pnp, 0); diff --git a/arch/arm/mach-omap1/include/mach/hardware.h b/arch/arm/mach-omap1/include/mach/hardware.h index 5875a5098d35..e7c8ac7d83e3 100644 --- a/arch/arm/mach-omap1/include/mach/hardware.h +++ b/arch/arm/mach-omap1/include/mach/hardware.h @@ -36,7 +36,7 @@ #ifndef __ASM_ARCH_OMAP_HARDWARE_H #define __ASM_ARCH_OMAP_HARDWARE_H -#include <asm/sizes.h> +#include <linux/sizes.h> #ifndef __ASSEMBLER__ #include <asm/types.h> #include <mach/soc.h> diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 42881f21cede..3e0f09cc0028 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -119,6 +119,9 @@ void __init ti_clk_init_features(void) if (cpu_is_omap343x()) features.flags |= TI_CLK_DPLL_HAS_FREQSEL; + if (omap_type() == OMAP2_DEVICE_TYPE_GP) + features.flags |= TI_CLK_DEVICE_TYPE_GP; + /* Idlest value for interface clocks. * 24xx uses 0 to indicate not ready, and 1 to indicate ready. * 34xx reverses this, just to keep us on our toes diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 3a04c73ac03c..baadddf9aad4 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -648,10 +648,10 @@ static struct clockdomain *_get_clkdm(struct omap_hwmod *oh) if (oh->clkdm) { return oh->clkdm; } else if (oh->_clk) { - if (__clk_get_flags(oh->_clk) & CLK_IS_BASIC) + if (!omap2_clk_is_hw_omap(__clk_get_hw(oh->_clk))) return NULL; clk = to_clk_hw_omap(__clk_get_hw(oh->_clk)); - return clk->clkdm; + return clk->clkdm; } return NULL; } diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c index 9b30b6b471ae..e19f620c4074 100644 --- a/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_interconnect_data.c @@ -11,7 +11,7 @@ * XXX handle crossbar/shared link difference for L3? * XXX these should be marked initdata for multi-OMAP kernels */ -#include <asm/sizes.h> +#include <linux/sizes.h> #include "omap_hwmod.h" #include "l3_2xxx.h" diff --git a/arch/arm/mach-prima2/common.c b/arch/arm/mach-prima2/common.c index ffe05c27087e..1607deab5290 100644 --- a/arch/arm/mach-prima2/common.c +++ b/arch/arm/mach-prima2/common.c @@ -8,7 +8,7 @@ #include <linux/init.h> #include <linux/kernel.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> #include <linux/of.h> diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c index 4bcbd3d55b36..1f24e0259f99 100644 --- a/arch/arm/mach-pxa/balloon3.c +++ b/arch/arm/mach-pxa/balloon3.c @@ -35,7 +35,7 @@ #include <asm/setup.h> #include <asm/mach-types.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-pxa/colibri-pxa270.c b/arch/arm/mach-pxa/colibri-pxa270.c index e68acdd0cdbb..510625dde3cb 100644 --- a/arch/arm/mach-pxa/colibri-pxa270.c +++ b/arch/arm/mach-pxa/colibri-pxa270.c @@ -24,7 +24,7 @@ #include <asm/mach/arch.h> #include <asm/mach/flash.h> #include <asm/mach-types.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/audio.h> #include "colibri.h" diff --git a/arch/arm/mach-pxa/colibri-pxa300.c b/arch/arm/mach-pxa/colibri-pxa300.c index 6a5558d95d4e..2f635bdc797f 100644 --- a/arch/arm/mach-pxa/colibri-pxa300.c +++ b/arch/arm/mach-pxa/colibri-pxa300.c @@ -18,7 +18,7 @@ #include <linux/interrupt.h> #include <asm/mach-types.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-pxa/colibri-pxa320.c b/arch/arm/mach-pxa/colibri-pxa320.c index 17067a3039a8..ffcefe6dbc82 100644 --- a/arch/arm/mach-pxa/colibri-pxa320.c +++ b/arch/arm/mach-pxa/colibri-pxa320.c @@ -19,7 +19,7 @@ #include <linux/usb/gpio_vbus.h> #include <asm/mach-types.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-pxa/colibri-pxa3xx.c b/arch/arm/mach-pxa/colibri-pxa3xx.c index e31a591e949f..0c88e4e417b4 100644 --- a/arch/arm/mach-pxa/colibri-pxa3xx.c +++ b/arch/arm/mach-pxa/colibri-pxa3xx.c @@ -17,7 +17,7 @@ #include <linux/etherdevice.h> #include <asm/mach-types.h> #include <mach/hardware.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/system_info.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index 4764acca5480..eb03283ccdee 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -33,7 +33,7 @@ #include <asm/mach-types.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c index e9f401b0a432..5c03c4f7b82e 100644 --- a/arch/arm/mach-pxa/lpd270.c +++ b/arch/arm/mach-pxa/lpd270.c @@ -33,7 +33,7 @@ #include <asm/mach-types.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c index c1bd0d544981..825939877839 100644 --- a/arch/arm/mach-pxa/lubbock.c +++ b/arch/arm/mach-pxa/lubbock.c @@ -39,7 +39,7 @@ #include <asm/mach-types.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c index d6e17d407ac0..b3f8592eebe6 100644 --- a/arch/arm/mach-pxa/mainstone.c +++ b/arch/arm/mach-pxa/mainstone.c @@ -40,7 +40,7 @@ #include <asm/mach-types.h> #include <mach/hardware.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-pxa/trizeps4.c b/arch/arm/mach-pxa/trizeps4.c index c76f1daecfc9..99a2ee433f1f 100644 --- a/arch/arm/mach-pxa/trizeps4.c +++ b/arch/arm/mach-pxa/trizeps4.c @@ -35,7 +35,7 @@ #include <asm/memory.h> #include <asm/mach-types.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/mach/arch.h> #include <asm/mach/map.h> diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index ab2f89266bbd..c4c25a2f24f6 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -58,7 +58,7 @@ #include <asm/setup.h> #include <asm/mach-types.h> #include <asm/irq.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/system_info.h> #include <asm/mach/arch.h> diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c index fb48f3141fb4..f2703ca17954 100644 --- a/arch/arm/mach-rpc/dma.c +++ b/arch/arm/mach-rpc/dma.c @@ -151,6 +151,12 @@ static void iomd_free_dma(unsigned int chan, dma_t *dma) free_irq(idma->irq, idma); } +static struct device isa_dma_dev = { + .init_name = "fallback device", + .coherent_dma_mask = ~(dma_addr_t)0, + .dma_mask = &isa_dma_dev.coherent_dma_mask, +}; + static void iomd_enable_dma(unsigned int chan, dma_t *dma) { struct iomd_dma *idma = container_of(dma, struct iomd_dma, dma); @@ -168,7 +174,7 @@ static void iomd_enable_dma(unsigned int chan, dma_t *dma) idma->dma.sg = &idma->dma.buf; idma->dma.sgcount = 1; idma->dma.buf.length = idma->dma.count; - idma->dma.buf.dma_address = dma_map_single(NULL, + idma->dma.buf.dma_address = dma_map_single(&isa_dma_dev, idma->dma.addr, idma->dma.count, idma->dma.dma_mode == DMA_MODE_READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE); diff --git a/arch/arm/mach-s3c24xx/include/mach/hardware.h b/arch/arm/mach-s3c24xx/include/mach/hardware.h index 1b2975708e3f..f28ac6c78d82 100644 --- a/arch/arm/mach-s3c24xx/include/mach/hardware.h +++ b/arch/arm/mach-s3c24xx/include/mach/hardware.h @@ -15,7 +15,7 @@ extern unsigned int s3c2410_modify_misccr(unsigned int clr, unsigned int chg); #endif /* __ASSEMBLY__ */ -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/map.h> #endif /* __ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h index fa5cf4744992..3b19296f5062 100644 --- a/arch/arm/mach-sa1100/include/mach/memory.h +++ b/arch/arm/mach-sa1100/include/mach/memory.h @@ -8,7 +8,7 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -#include <asm/sizes.h> +#include <linux/sizes.h> /* * Because of the wide memory address space between physical RAM banks on the diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c index eb60a71cf125..a671e4c994cf 100644 --- a/arch/arm/mach-sa1100/neponset.c +++ b/arch/arm/mach-sa1100/neponset.c @@ -21,7 +21,7 @@ #include <asm/mach-types.h> #include <asm/mach/map.h> #include <asm/hardware/sa1111.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/hardware.h> #include <mach/assabet.h> diff --git a/arch/arm/mach-tegra/iomap.h b/arch/arm/mach-tegra/iomap.h index 9bc291e76887..4af9e92a216f 100644 --- a/arch/arm/mach-tegra/iomap.h +++ b/arch/arm/mach-tegra/iomap.h @@ -20,7 +20,7 @@ #define __MACH_TEGRA_IOMAP_H #include <asm/pgtable.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #define TEGRA_IRAM_BASE 0x40000000 #define TEGRA_IRAM_SIZE SZ_256K diff --git a/arch/arm/mach-tegra/irammap.h b/arch/arm/mach-tegra/irammap.h index e32e1742c9a1..6a7bb887585e 100644 --- a/arch/arm/mach-tegra/irammap.h +++ b/arch/arm/mach-tegra/irammap.h @@ -17,7 +17,7 @@ #ifndef __MACH_TEGRA_IRAMMAP_H #define __MACH_TEGRA_IRAMMAP_H -#include <asm/sizes.h> +#include <linux/sizes.h> /* The first 1K of IRAM is permanently reserved for the CPU reset handler */ #define TEGRA_IRAM_RESET_HANDLER_OFFSET 0 diff --git a/arch/arm/mach-w90x900/include/mach/hardware.h b/arch/arm/mach-w90x900/include/mach/hardware.h index fe3c6265a466..2e6555df538e 100644 --- a/arch/arm/mach-w90x900/include/mach/hardware.h +++ b/arch/arm/mach-w90x900/include/mach/hardware.h @@ -18,7 +18,7 @@ #ifndef __ASM_ARCH_HARDWARE_H #define __ASM_ARCH_HARDWARE_H -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/map.h> #endif /* __ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b54f8f8def36..e376883ab35b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -133,7 +133,7 @@ static const char *usermode_action[] = { static int alignment_proc_show(struct seq_file *m, void *v) { seq_printf(m, "User:\t\t%lu\n", ai_user); - seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc); + seq_printf(m, "System:\t\t%lu (%pS)\n", ai_sys, ai_sys_last_pc); seq_printf(m, "Skipped:\t%lu\n", ai_skipped); seq_printf(m, "Half:\t\t%lu\n", ai_half); seq_printf(m, "Word:\t\t%lu\n", ai_word); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 43f46aa7ef33..0a75058c11f3 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1577,31 +1577,21 @@ static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - unsigned long uaddr = vma->vm_start; - unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; + int err; if (!pages) return -ENXIO; - if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) + if (vma->vm_pgoff >= nr_pages) return -ENXIO; - pages += off; - - do { - int ret = vm_insert_page(vma, uaddr, *pages++); - if (ret) { - pr_err("Remapping memory failed: %d\n", ret); - return ret; - } - uaddr += PAGE_SIZE; - usize -= PAGE_SIZE; - } while (usize > 0); + err = vm_map_pages(vma, pages, nr_pages); + if (err) + pr_err("Remapping memory failed: %d\n", err); - return 0; + return err; } static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c2daabbe0af0..68dcd5f8d7c6 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -695,27 +695,14 @@ void free_initmem(void) } #ifdef CONFIG_BLK_DEV_INITRD - -static int keep_initrd; - void free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) { - if (start == initrd_start) - start = round_down(start, PAGE_SIZE); - if (end == initrd_end) - end = round_up(end, PAGE_SIZE); + if (start == initrd_start) + start = round_down(start, PAGE_SIZE); + if (end == initrd_end) + end = round_up(end, PAGE_SIZE); - poison_init_mem((void *)start, PAGE_ALIGN(end) - start); - free_reserved_area((void *)start, (void *)end, -1, "initrd"); - } + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); } - -static int __init keepinitrd_setup(char *__unused) -{ - keep_initrd = 1; - return 1; -} - -__setup("keepinitrd", keepinitrd_setup); #endif diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c index 1365e8650843..ee34c76e6624 100644 --- a/arch/arm/nwfpe/fpmodule.c +++ b/arch/arm/nwfpe/fpmodule.c @@ -147,7 +147,7 @@ void float_raise(signed char flags) #ifdef CONFIG_DEBUG_USER if (flags & debug) printk(KERN_DEBUG - "NWFPE: %s[%d] takes exception %08x at %pf from %08lx\n", + "NWFPE: %s[%d] takes exception %08x at %ps from %08lx\n", current->comm, current->pid, flags, __builtin_return_address(0), GET_USERREG()->ARM_pc); #endif diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index d4012d6c0dcb..5ca4c5fd627a 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -1449,7 +1449,6 @@ static void __exit omap_system_dma_exit(void) MODULE_DESCRIPTION("OMAP SYSTEM DMA DRIVER"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("platform:" DRIVER_NAME); MODULE_AUTHOR("Texas Instruments Inc"); /* diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index df350f4e1e7a..69a59a5d1143 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,13 +13,15 @@ config ARM64 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_COHERENT_TO_PFN select ARCH_HAS_DMA_MMAP_PGPROT + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SETUP_DMA_OPS @@ -58,6 +60,7 @@ config ARM64 select ARCH_INLINE_SPIN_UNLOCK_BH if !PREEMPT select ARCH_INLINE_SPIN_UNLOCK_IRQ if !PREEMPT select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index a2cec6218211..fe107ce115ef 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -393,7 +393,7 @@ }; sysmgr: sysmgr@ffd12000 { - compatible = "altr,sys-mgr", "syscon"; + compatible = "altr,sys-mgr-s10","altr,sys-mgr"; reg = <0xffd12000 0x228>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi index 976d92a94738..43307bad3f0d 100644 --- a/arch/arm64/boot/dts/mediatek/mt2712e.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt2712e.dtsi @@ -819,7 +819,6 @@ #size-cells = <2>; #interrupt-cells = <1>; ranges; - num-lanes = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc0 0>, <0 0 0 2 &pcie_intc0 1>, @@ -840,7 +839,6 @@ #size-cells = <2>; #interrupt-cells = <1>; ranges; - num-lanes = <1>; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc1 0>, <0 0 0 2 &pcie_intc1 1>, diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 2d9c39033c1a..74f0a199166d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -206,7 +206,7 @@ CONFIG_SIMPLE_PM_BUS=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_DENALI_DT=y CONFIG_MTD_NAND_MARVELL=y CONFIG_MTD_NAND_QCOM=y @@ -432,6 +432,7 @@ CONFIG_MESON_WATCHDOG=m CONFIG_RENESAS_WDT=y CONFIG_UNIPHIER_WATCHDOG=y CONFIG_BCM2835_WDT=y +CONFIG_MFD_ALTERA_SYSMGR=y CONFIG_MFD_BD9571MWV=y CONFIG_MFD_AXP20X_I2C=y CONFIG_MFD_AXP20X_RSB=y diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c index 036ea77f83bc..cb89c80800b5 100644 --- a/arch/arm64/crypto/aes-ce-ccm-glue.c +++ b/arch/arm64/crypto/aes-ce-ccm-glue.c @@ -14,6 +14,7 @@ #include <crypto/aes.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/module.h> @@ -109,7 +110,7 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen) static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[], u32 abytes, u32 *macp) { - if (may_use_simd()) { + if (crypto_simd_usable()) { kernel_neon_begin(); ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc, num_rounds(key)); @@ -255,7 +256,7 @@ static int ccm_encrypt(struct aead_request *req) err = skcipher_walk_aead_encrypt(&walk, req, false); - if (may_use_simd()) { + if (crypto_simd_usable()) { while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; @@ -313,7 +314,7 @@ static int ccm_decrypt(struct aead_request *req) err = skcipher_walk_aead_decrypt(&walk, req, false); - if (may_use_simd()) { + if (crypto_simd_usable()) { while (walk.nbytes) { u32 tail = walk.nbytes % AES_BLOCK_SIZE; diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index e6b3227bbf57..3213843fcb46 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -12,6 +12,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/aes.h> +#include <crypto/internal/simd.h> #include <linux/cpufeature.h> #include <linux/crypto.h> #include <linux/module.h> @@ -52,7 +53,7 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - if (!may_use_simd()) { + if (!crypto_simd_usable()) { __aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx)); return; } @@ -66,7 +67,7 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) { struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm); - if (!may_use_simd()) { + if (!crypto_simd_usable()) { __aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx)); return; } diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 1e676625ef33..f0ceb545bd1e 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -405,7 +405,7 @@ static int ctr_encrypt_sync(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm); - if (!may_use_simd()) + if (!crypto_simd_usable()) return aes_ctr_encrypt_fallback(ctx, req); return ctr_encrypt(req); @@ -642,7 +642,7 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks, { int rounds = 6 + ctx->key_length / 4; - if (may_use_simd()) { + if (crypto_simd_usable()) { kernel_neon_begin(); aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before, enc_after); @@ -707,7 +707,7 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out) struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); struct mac_desc_ctx *ctx = shash_desc_ctx(desc); - mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0); + mac_do_update(&tctx->key, NULL, 0, ctx->dg, (ctx->len != 0), 0); memcpy(out, ctx->dg, AES_BLOCK_SIZE); diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index bf1b321ff4c1..02b65d9eb947 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -288,7 +288,7 @@ static int ctr_encrypt_sync(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - if (!may_use_simd()) + if (!crypto_simd_usable()) return aes_ctr_encrypt_fallback(&ctx->fallback, req); return ctr_encrypt(req); @@ -304,6 +304,8 @@ static int __xts_crypt(struct skcipher_request *req, int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; kernel_neon_begin(); neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index cb054f51c917..82029cda2e77 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -21,6 +21,7 @@ #include <crypto/algapi.h> #include <crypto/chacha.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> @@ -90,7 +91,7 @@ static int chacha_neon(struct skcipher_request *req) struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_chacha_crypt(req); return chacha_neon_stream_xor(req, ctx, req->iv); @@ -104,7 +105,7 @@ static int xchacha_neon(struct skcipher_request *req) u32 state[16]; u8 real_iv[16]; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_xchacha_crypt(req); crypto_chacha_init(state, ctx, req->iv); diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c index e81d5bd555c0..2e0a7d2eee24 100644 --- a/arch/arm64/crypto/crct10dif-ce-glue.c +++ b/arch/arm64/crypto/crct10dif-ce-glue.c @@ -16,6 +16,7 @@ #include <linux/string.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/neon.h> #include <asm/simd.h> @@ -38,7 +39,7 @@ static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data, { u16 *crc = shash_desc_ctx(desc); - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { kernel_neon_begin(); *crc = crc_t10dif_pmull_p8(*crc, data, length); kernel_neon_end(); @@ -54,7 +55,7 @@ static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data, { u16 *crc = shash_desc_ctx(desc); - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) { kernel_neon_begin(); *crc = crc_t10dif_pmull_p64(*crc, data, length); kernel_neon_end(); diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 4e69bb78ea89..b39ed99b06fb 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -17,6 +17,7 @@ #include <crypto/gf128mul.h> #include <crypto/internal/aead.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/cpufeature.h> @@ -89,7 +90,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src, struct ghash_key const *k, const char *head)) { - if (likely(may_use_simd())) { + if (likely(crypto_simd_usable())) { kernel_neon_begin(); simd_update(blocks, dg, src, key, head); kernel_neon_end(); @@ -441,7 +442,7 @@ static int gcm_encrypt(struct aead_request *req) err = skcipher_walk_aead_encrypt(&walk, req, false); - if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) { + if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) { u32 const *rk = NULL; kernel_neon_begin(); @@ -473,9 +474,11 @@ static int gcm_encrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + const int blocks = + walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; + int remaining = blocks; do { __aes_arm64_encrypt(ctx->aes_key.key_enc, @@ -485,9 +488,9 @@ static int gcm_encrypt(struct aead_request *req) dst += AES_BLOCK_SIZE; src += AES_BLOCK_SIZE; - } while (--blocks > 0); + } while (--remaining > 0); - ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg, + ghash_do_update(blocks, dg, walk.dst.virt.addr, &ctx->ghash_key, NULL, pmull_ghash_update_p64); @@ -563,7 +566,7 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_aead_decrypt(&walk, req, false); - if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) { + if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) { u32 const *rk = NULL; kernel_neon_begin(); @@ -609,7 +612,7 @@ static int gcm_decrypt(struct aead_request *req) put_unaligned_be32(2, iv + GCM_IV_SIZE); while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) { - int blocks = walk.nbytes / AES_BLOCK_SIZE; + int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2; u8 *dst = walk.dst.virt.addr; u8 *src = walk.src.virt.addr; diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index 38a589044b6c..895d3727c1fb 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -9,6 +9,7 @@ #include <asm/neon.h> #include <asm/simd.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> @@ -25,7 +26,7 @@ static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_neon_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !may_use_simd()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 17fac2889f56..eaa7a8258f1c 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -12,6 +12,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> @@ -38,7 +39,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data, { struct sha1_ce_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha1_update(desc, data, len); sctx->finalize = 0; @@ -56,7 +57,7 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); /* @@ -78,7 +79,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out) { struct sha1_ce_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha1_finup(desc, NULL, 0, out); sctx->finalize = 0; diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 261f5195cab7..a725997e55f2 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -12,6 +12,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> #include <linux/cpufeature.h> @@ -42,7 +43,7 @@ static int sha256_ce_update(struct shash_desc *desc, const u8 *data, { struct sha256_ce_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd()) + if (!crypto_simd_usable()) return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); @@ -61,7 +62,7 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, struct sha256_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); - if (!may_use_simd()) { + if (!crypto_simd_usable()) { if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); @@ -90,7 +91,7 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out) { struct sha256_ce_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd()) { + if (!crypto_simd_usable()) { sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_block_data_order); return sha256_base_finish(desc, out); diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 0cccdb9cc2c0..e62298740e31 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -14,6 +14,7 @@ #include <asm/neon.h> #include <asm/simd.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> #include <linux/cryptohash.h> @@ -89,7 +90,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data, { struct sha256_state *sctx = shash_desc_ctx(desc); - if (!may_use_simd()) + if (!crypto_simd_usable()) return sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); @@ -119,7 +120,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data, static int sha256_finup_neon(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) { + if (!crypto_simd_usable()) { if (len) sha256_base_do_update(desc, data, len, (sha256_block_fn *)sha256_block_data_order); diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index a336feac0f59..9a4bbfc45f40 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -14,6 +14,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha3.h> #include <linux/cpufeature.h> #include <linux/crypto.h> @@ -32,7 +33,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data, struct sha3_state *sctx = shash_desc_ctx(desc); unsigned int digest_size = crypto_shash_digestsize(desc->tfm); - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha3_update(desc, data, len); if ((sctx->partial + len) >= sctx->rsiz) { @@ -76,7 +77,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) __le64 *digest = (__le64 *)out; int i; - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sha3_final(desc, out); sctx->buf[sctx->partial++] = 0x06; diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index f2c5f28c622a..2369540040aa 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -13,6 +13,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha512_base.h> #include <linux/cpufeature.h> @@ -31,7 +32,7 @@ asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks); static int sha512_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return sha512_base_do_update(desc, data, len, (sha512_block_fn *)sha512_block_data_order); @@ -46,7 +47,7 @@ static int sha512_ce_update(struct shash_desc *desc, const u8 *data, static int sha512_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) { + if (!crypto_simd_usable()) { if (len) sha512_base_do_update(desc, data, len, (sha512_block_fn *)sha512_block_data_order); @@ -65,7 +66,7 @@ static int sha512_ce_finup(struct shash_desc *desc, const u8 *data, static int sha512_ce_final(struct shash_desc *desc, u8 *out) { - if (!may_use_simd()) { + if (!crypto_simd_usable()) { sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_block_data_order); return sha512_base_finish(desc, out); diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index 88938a20d9b2..5d15533799a2 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -12,6 +12,7 @@ #include <asm/simd.h> #include <asm/unaligned.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/sm3.h> #include <crypto/sm3_base.h> #include <linux/cpufeature.h> @@ -28,7 +29,7 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src, static int sm3_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sm3_update(desc, data, len); kernel_neon_begin(); @@ -40,7 +41,7 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, static int sm3_ce_final(struct shash_desc *desc, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sm3_finup(desc, NULL, 0, out); kernel_neon_begin(); @@ -53,7 +54,7 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out) static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!may_use_simd()) + if (!crypto_simd_usable()) return crypto_sm3_finup(desc, data, len, out); kernel_neon_begin(); diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 0c4fc223f225..2754c875d39c 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -3,6 +3,7 @@ #include <asm/neon.h> #include <asm/simd.h> #include <crypto/sm4.h> +#include <crypto/internal/simd.h> #include <linux/module.h> #include <linux/cpufeature.h> #include <linux/crypto.h> @@ -20,7 +21,7 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); - if (!may_use_simd()) { + if (!crypto_simd_usable()) { crypto_sm4_encrypt(tfm, out, in); } else { kernel_neon_begin(); @@ -33,7 +34,7 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); - if (!may_use_simd()) { + if (!crypto_simd_usable()) { crypto_sm4_decrypt(tfm, out, in); } else { kernel_neon_begin(); diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index eb0df239a759..9e977dedf193 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -20,7 +20,6 @@ generic-y += qspinlock.h generic-y += segment.h generic-y += serial.h generic-y += set_memory.h -generic-y += sizes.h generic-y += switch_to.h generic-y += trace_clock.h generic-y += unaligned.h diff --git a/arch/arm64/include/asm/boot.h b/arch/arm64/include/asm/boot.h index 355e552a9175..c7f67da13cd9 100644 --- a/arch/arm64/include/asm/boot.h +++ b/arch/arm64/include/asm/boot.h @@ -3,7 +3,7 @@ #ifndef __ASM_BOOT_H #define __ASM_BOOT_H -#include <asm/sizes.h> +#include <linux/sizes.h> /* * arm64 requires the DTB to be 8 byte aligned and diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f210bcf096f7..bc895c869892 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -401,7 +401,7 @@ unsigned long cpu_get_elf_hwcap2(void); #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) /* System capability check for constant caps */ -static inline bool __cpus_have_const_cap(int num) +static __always_inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; @@ -415,7 +415,7 @@ static inline bool cpus_have_cap(unsigned int num) return test_bit(num, cpu_hwcaps); } -static inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_const_cap(int num) { if (static_branch_likely(&arm64_const_caps_ready)) return __cpus_have_const_cap(num); diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index c6a07a3b433e..4aad6382f631 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -70,8 +70,4 @@ extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr, #include <asm-generic/hugetlb.h> -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif - #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 9c01f04db64d..ec894de0ed4e 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) +__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0xB8200000) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) @@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register state, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size); +u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 2cb8248fa2c8..8ffcf5a512bb 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -26,7 +26,7 @@ #include <linux/types.h> #include <asm/bug.h> #include <asm/page-def.h> -#include <asm/sizes.h> +#include <linux/sizes.h> /* * Size of the PCI I/O space. This must remain a power of two so that diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index a179df3674a1..a65167f5cded 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -87,9 +87,9 @@ static inline void syscall_set_arguments(struct task_struct *task, * We don't care about endianness (__AUDIT_ARCH_LE bit) here because * AArch64 has the same system calls both on little- and big- endian. */ -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { - if (is_compat_task()) + if (is_compat_thread(task_thread_info(task))) return AUDIT_ARCH_ARM; return AUDIT_ARCH_AARCH64; diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index eac1d0cc595c..7ff800045434 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -45,7 +45,7 @@ static inline int get_cpu_for_acpi_id(u32 uid) return -EINVAL; } -static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header, +static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_srat_gicc_affinity *pa; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7820a4a688fa..9e2b5882cdeb 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } +u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size) +{ + u32 insn = aarch64_insn_get_ldadd_value(); + + switch (size) { + case AARCH64_INSN_SIZE_32: + case AARCH64_INSN_SIZE_64: + break; + default: + pr_err("%s: unimplemented size encoding %d\n", __func__, size); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + result); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + address); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + value); +} + +u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size) +{ + /* + * STADD is simply encoded as an alias for LDADD with XZR as + * the destination register. + */ + return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address, + value, size); +} + static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_policy policy, diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 824de7038967..bb4b3f07761a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -586,7 +586,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) } static int __init -acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, +acpi_parse_gic_cpu_interface(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_generic_interrupt *processor; @@ -595,7 +595,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_map_gic_cpu_interface(processor); diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a3f85624313e..a67121d419a2 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -23,7 +23,6 @@ config KVM depends on OF select MMU_NOTIFIER select PREEMPT_NOTIFIERS - select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 40e2d7e5efcb..d2adffb81b5d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -48,7 +48,7 @@ #include <asm/numa.h> #include <asm/sections.h> #include <asm/setup.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/tlb.h> #include <asm/alternative.h> @@ -578,24 +578,11 @@ void free_initmem(void) } #ifdef CONFIG_BLK_DEV_INITRD - -static int keep_initrd __initdata; - void __init free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) { - free_reserved_area((void *)start, (void *)end, 0, "initrd"); - memblock_free(__virt_to_phys(start), end - start); - } -} - -static int __init keepinitrd_setup(char *__unused) -{ - keep_initrd = 1; - return 1; + free_reserved_area((void *)start, (void *)end, 0, "initrd"); + memblock_free(__virt_to_phys(start), end - start); } - -__setup("keepinitrd", keepinitrd_setup); #endif /* diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ef82312860ac..a170c6369a68 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,7 +40,7 @@ #include <asm/kernel-pgtable.h> #include <asm/sections.h> #include <asm/setup.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/tlb.h> #include <asm/mmu_context.h> #include <asm/ptdump.h> @@ -1065,8 +1065,8 @@ int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { int flags = 0; @@ -1077,6 +1077,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, - altmap, want_memblock); + restrictions); } #endif diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 783de51a6c4e..76606e87233f 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,11 +100,9 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* Prefetch */ -#define A64_PRFM(Rn, type, target, policy) \ - aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \ - AARCH64_INSN_PRFM_TARGET_##target, \ - AARCH64_INSN_PRFM_POLICY_##policy) +/* LSE atomics */ +#define A64_STADD(sf, Rn, Rs) \ + aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index aaddc0217e73..df845cee438e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const bool is64 = BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_JMP; const bool isdw = BPF_SIZE(code) == BPF_DW; - u8 jmp_cond; + u8 jmp_cond, reg; s32 jmp_offset; #define check_imm(bits, imm) do { \ @@ -756,19 +756,28 @@ emit_cond_jmp: break; } break; + /* STX XADD: lock *(u32 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_W: /* STX XADD: lock *(u64 *)(dst + off) += src */ case BPF_STX | BPF_XADD | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_ADD(1, tmp, tmp, dst), ctx); - emit(A64_PRFM(tmp, PST, L1, STRM), ctx); - emit(A64_LDXR(isdw, tmp2, tmp), ctx); - emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); - emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx); - jmp_offset = -3; - check_imm19(jmp_offset); - emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + if (!off) { + reg = dst; + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_ADD(1, tmp, tmp, dst), ctx); + reg = tmp; + } + if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) { + emit(A64_STADD(isdw, reg, src), ctx); + } else { + emit(A64_LDXR(isdw, tmp2, reg), ctx); + emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); + emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); + jmp_offset = -3; + check_imm19(jmp_offset); + emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); + } break; default: diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h index 15ba8599858e..5bcdcb651b19 100644 --- a/arch/c6x/include/asm/syscall.h +++ b/arch/c6x/include/asm/syscall.h @@ -11,6 +11,7 @@ #ifndef __ASM_C6X_SYSCALL_H #define __ASM_C6X_SYSCALL_H +#include <uapi/linux/audit.h> #include <linux/err.h> #include <linux/sched.h> @@ -69,4 +70,10 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->a9 = *args; } +static inline int syscall_get_arch(struct task_struct *task) +{ + return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + ? AUDIT_ARCH_C6XBE : AUDIT_ARCH_C6X; +} + #endif /* __ASM_C6X_SYSCALLS_H */ diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c index fe582c3a1794..573242b160e1 100644 --- a/arch/c6x/mm/init.c +++ b/arch/c6x/mm/init.c @@ -68,15 +68,3 @@ void __init mem_init(void) mem_init_print_info(NULL); } - -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - -void __init free_initmem(void) -{ - free_initmem_default(-1); -} diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 6555d1781132..ce0799077f3b 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -1,6 +1,7 @@ config CSKY def_bool y select ARCH_32BIT_OFF_T + select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_USE_BUILTIN_BSWAP @@ -29,15 +30,20 @@ config CSKY select GENERIC_SCHED_CLOCK select GENERIC_SMP_IDLE_THREAD select HAVE_ARCH_TRACEHOOK + select HAVE_ARCH_AUDITSYSCALL + select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO select HAVE_KERNEL_LZMA select HAVE_PERF_EVENTS - select HAVE_C_RECORDMCOUNT + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS + select HAVE_SYSCALL_TRACEPOINTS select MAY_HAVE_SPARSE_IRQ select MODULES_USE_ELF_RELA if MODULES select OF diff --git a/arch/csky/Makefile b/arch/csky/Makefile index 3607a6e8f66c..6b87f6c22ad6 100644 --- a/arch/csky/Makefile +++ b/arch/csky/Makefile @@ -36,7 +36,7 @@ endif ifneq ($(CSKYABI),) MCPU_STR = $(CPUTYPE)$(FPUEXT)$(VDSPEXT)$(TEEEXT) -KBUILD_CFLAGS += -mcpu=$(MCPU_STR) +KBUILD_CFLAGS += -mcpu=$(CPUTYPE) -Wa,-mcpu=$(MCPU_STR) KBUILD_CFLAGS += -DCSKYCPU_DEF_NAME=\"$(MCPU_STR)\" KBUILD_CFLAGS += -msoft-float -mdiv KBUILD_CFLAGS += -fno-tree-vectorize diff --git a/arch/csky/abiv1/inc/abi/ckmmu.h b/arch/csky/abiv1/inc/abi/ckmmu.h index 3a002017bebe..81f37715c0d2 100644 --- a/arch/csky/abiv1/inc/abi/ckmmu.h +++ b/arch/csky/abiv1/inc/abi/ckmmu.h @@ -40,6 +40,26 @@ static inline void write_mmu_entryhi(int value) cpwcr("cpcr4", value); } +static inline unsigned long read_mmu_msa0(void) +{ + return cprcr("cpcr30"); +} + +static inline void write_mmu_msa0(unsigned long value) +{ + cpwcr("cpcr30", value); +} + +static inline unsigned long read_mmu_msa1(void) +{ + return cprcr("cpcr31"); +} + +static inline void write_mmu_msa1(unsigned long value) +{ + cpwcr("cpcr31", value); +} + /* * TLB operations. */ @@ -65,11 +85,11 @@ static inline void tlb_invalid_indexed(void) static inline void setup_pgd(unsigned long pgd, bool kernel) { - cpwcr("cpcr29", pgd); + cpwcr("cpcr29", pgd | BIT(0)); } static inline unsigned long get_pgd(void) { - return cprcr("cpcr29"); + return cprcr("cpcr29") & ~BIT(0); } #endif /* __ASM_CSKY_CKMMUV1_H */ diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h index 3f3faab3d747..7ab78bd0f3b1 100644 --- a/arch/csky/abiv1/inc/abi/entry.h +++ b/arch/csky/abiv1/inc/abi/entry.h @@ -16,9 +16,6 @@ #define LSAVE_A4 40 #define LSAVE_A5 44 -#define EPC_INCREASE 2 -#define EPC_KEEP 0 - .macro USPTOKSP mtcr sp, ss1 mfcr sp, ss0 @@ -29,10 +26,6 @@ mfcr sp, ss1 .endm -.macro INCTRAP rx - addi \rx, EPC_INCREASE -.endm - .macro SAVE_ALL epc_inc mtcr r13, ss2 mfcr r13, epsr @@ -150,11 +143,35 @@ cpwcr \rx, cpcr8 .endm -.macro SETUP_MMU rx - lrw \rx, PHYS_OFFSET | 0xe - cpwcr \rx, cpcr30 - lrw \rx, (PHYS_OFFSET + 0x20000000) | 0xe - cpwcr \rx, cpcr31 +.macro SETUP_MMU + /* Init psr and enable ee */ + lrw r6, DEFAULT_PSR_VALUE + mtcr r6, psr + psrset ee + + /* Select MMU as co-processor */ + cpseti cp15 + + /* + * cpcr30 format: + * 31 - 29 | 28 - 4 | 3 | 2 | 1 | 0 + * BA Reserved C D V + */ + cprcr r6, cpcr30 + lsri r6, 28 + lsli r6, 28 + addi r6, 0xe + cpwcr r6, cpcr30 + + lsri r6, 28 + addi r6, 2 + lsli r6, 28 + addi r6, 0xe + cpwcr r6, cpcr31 .endm +.macro ANDI_R3 rx, imm + lsri \rx, 3 + andi \rx, (\imm >> 3) +.endm #endif /* __ASM_CSKY_ENTRY_H */ diff --git a/arch/csky/abiv1/inc/abi/regdef.h b/arch/csky/abiv1/inc/abi/regdef.h index 876689291b71..104707fbdcc1 100644 --- a/arch/csky/abiv1/inc/abi/regdef.h +++ b/arch/csky/abiv1/inc/abi/regdef.h @@ -5,9 +5,8 @@ #define __ASM_CSKY_REGDEF_H #define syscallid r1 -#define r11_sig r11 - #define regs_syscallid(regs) regs->regs[9] +#define regs_fp(regs) regs->regs[2] /* * PSR format: @@ -23,4 +22,6 @@ #define SYSTRACE_SAVENUM 2 +#define TRAP0_SIZE 2 + #endif /* __ASM_CSKY_REGDEF_H */ diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index d22c95ffc74d..5bb887b275e1 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -34,10 +34,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, { unsigned long addr, pfn; struct page *page; - void *va; - - if (!(vma->vm_flags & VM_EXEC)) - return; pfn = pte_pfn(*pte); if (unlikely(!pfn_valid(pfn))) @@ -47,14 +43,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, if (page == ZERO_PAGE(0)) return; - va = page_address(page); - addr = (unsigned long) va; - - if (va == NULL && PageHighMem(page)) - addr = (unsigned long) kmap_atomic(page); + addr = (unsigned long) kmap_atomic(page); cache_wbinv_range(addr, addr + PAGE_SIZE); - if (va == NULL && PageHighMem(page)) - kunmap_atomic((void *) addr); + kunmap_atomic((void *) addr); } diff --git a/arch/csky/abiv2/inc/abi/ckmmu.h b/arch/csky/abiv2/inc/abi/ckmmu.h index 97230ad9427c..e4480e6bc3b3 100644 --- a/arch/csky/abiv2/inc/abi/ckmmu.h +++ b/arch/csky/abiv2/inc/abi/ckmmu.h @@ -42,6 +42,26 @@ static inline void write_mmu_entryhi(int value) mtcr("cr<4, 15>", value); } +static inline unsigned long read_mmu_msa0(void) +{ + return mfcr("cr<30, 15>"); +} + +static inline void write_mmu_msa0(unsigned long value) +{ + mtcr("cr<30, 15>", value); +} + +static inline unsigned long read_mmu_msa1(void) +{ + return mfcr("cr<31, 15>"); +} + +static inline void write_mmu_msa1(unsigned long value) +{ + mtcr("cr<31, 15>", value); +} + /* * TLB operations. */ @@ -70,18 +90,16 @@ static inline void tlb_invalid_indexed(void) mtcr("cr<8, 15>", 0x02000000); } -/* setup hardrefil pgd */ -static inline unsigned long get_pgd(void) -{ - return mfcr("cr<29, 15>"); -} - static inline void setup_pgd(unsigned long pgd, bool kernel) { if (kernel) - mtcr("cr<28, 15>", pgd); + mtcr("cr<28, 15>", pgd | BIT(0)); else - mtcr("cr<29, 15>", pgd); + mtcr("cr<29, 15>", pgd | BIT(0)); } +static inline unsigned long get_pgd(void) +{ + return mfcr("cr<29, 15>") & ~BIT(0); +} #endif /* __ASM_CSKY_CKMMUV2_H */ diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h index edc5cc04c4de..9897a16b45e5 100644 --- a/arch/csky/abiv2/inc/abi/entry.h +++ b/arch/csky/abiv2/inc/abi/entry.h @@ -14,18 +14,11 @@ #define LSAVE_A2 32 #define LSAVE_A3 36 -#define EPC_INCREASE 4 -#define EPC_KEEP 0 - #define KSPTOUSP #define USPTOKSP #define usp cr<14, 1> -.macro INCTRAP rx - addi \rx, EPC_INCREASE -.endm - .macro SAVE_ALL epc_inc subi sp, 152 stw tls, (sp, 0) @@ -169,10 +162,80 @@ mtcr \rx, cr<8, 15> .endm -.macro SETUP_MMU rx - lrw \rx, PHYS_OFFSET | 0xe - mtcr \rx, cr<30, 15> - lrw \rx, (PHYS_OFFSET + 0x20000000) | 0xe - mtcr \rx, cr<31, 15> +.macro SETUP_MMU + /* Init psr and enable ee */ + lrw r6, DEFAULT_PSR_VALUE + mtcr r6, psr + psrset ee + + /* Invalid I/Dcache BTB BHT */ + movi r6, 7 + lsli r6, 16 + addi r6, (1<<4) | 3 + mtcr r6, cr17 + + /* Invalid all TLB */ + bgeni r6, 26 + mtcr r6, cr<8, 15> /* Set MCIR */ + + /* Check MMU on/off */ + mfcr r6, cr18 + btsti r6, 0 + bt 1f + + /* MMU off: setup mapping tlb entry */ + movi r6, 0 + mtcr r6, cr<6, 15> /* Set MPR with 4K page size */ + + grs r6, 1f /* Get current pa by PC */ + bmaski r7, (PAGE_SHIFT + 1) /* r7 = 0x1fff */ + andn r6, r7 + mtcr r6, cr<4, 15> /* Set MEH */ + + mov r8, r6 + movi r7, 0x00000006 + or r8, r7 + mtcr r8, cr<2, 15> /* Set MEL0 */ + movi r7, 0x00001006 + or r8, r7 + mtcr r8, cr<3, 15> /* Set MEL1 */ + + bgeni r8, 28 + mtcr r8, cr<8, 15> /* Set MCIR to write TLB */ + + br 2f +1: + /* + * MMU on: use origin MSA value from bootloader + * + * cr<30/31, 15> MSA register format: + * 31 - 29 | 28 - 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * BA Reserved SH WA B SO SEC C D V + */ + mfcr r6, cr<30, 15> /* Get MSA0 */ +2: + lsri r6, 28 + lsli r6, 28 + addi r6, 0x1ce + mtcr r6, cr<30, 15> /* Set MSA0 */ + + lsri r6, 28 + addi r6, 2 + lsli r6, 28 + addi r6, 0x1ce + mtcr r6, cr<31, 15> /* Set MSA1 */ + + /* enable MMU */ + mfcr r6, cr18 + bseti r6, 0 + mtcr r6, cr18 + + jmpi 3f /* jump to va */ +3: +.endm + +.macro ANDI_R3 rx, imm + lsri \rx, 3 + andi \rx, (\imm >> 3) .endm #endif /* __ASM_CSKY_ENTRY_H */ diff --git a/arch/csky/abiv2/inc/abi/regdef.h b/arch/csky/abiv2/inc/abi/regdef.h index c72abb781bdc..d7328bbc1ce7 100644 --- a/arch/csky/abiv2/inc/abi/regdef.h +++ b/arch/csky/abiv2/inc/abi/regdef.h @@ -5,9 +5,8 @@ #define __ASM_CSKY_REGDEF_H #define syscallid r7 -#define r11_sig r11 - #define regs_syscallid(regs) regs->regs[3] +#define regs_fp(regs) regs->regs[4] /* * PSR format: @@ -23,4 +22,6 @@ #define SYSTRACE_SAVENUM 5 +#define TRAP0_SIZE 4 + #endif /* __ASM_CSKY_REGDEF_H */ diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S index c633379956f5..326402e65f9e 100644 --- a/arch/csky/abiv2/mcount.S +++ b/arch/csky/abiv2/mcount.S @@ -61,10 +61,17 @@ addi sp, 16 .endm +.macro nop32_stub + nop32 + nop32 + nop32 +.endm + ENTRY(ftrace_stub) jmp lr END(ftrace_stub) +#ifndef CONFIG_DYNAMIC_FTRACE ENTRY(_mcount) mcount_enter @@ -76,7 +83,7 @@ ENTRY(_mcount) bf skip_ftrace mov a0, lr - subi a0, MCOUNT_INSN_SIZE + subi a0, 4 ldw a1, (sp, 24) jsr r26 @@ -101,13 +108,41 @@ skip_ftrace: mcount_exit #endif END(_mcount) +#else /* CONFIG_DYNAMIC_FTRACE */ +ENTRY(_mcount) + mov t1, lr + ldw lr, (sp, 0) + addi sp, 4 + jmp t1 +ENDPROC(_mcount) + +ENTRY(ftrace_caller) + mcount_enter + + ldw a0, (sp, 16) + subi a0, 4 + ldw a1, (sp, 24) + + nop +GLOBAL(ftrace_call) + nop32_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + nop +GLOBAL(ftrace_graph_call) + nop32_stub +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) mov a0, sp addi a0, 24 ldw a1, (sp, 16) - subi a1, MCOUNT_INSN_SIZE + subi a1, 4 mov a2, r8 lrw r26, prepare_ftrace_return jsr r26 diff --git a/arch/csky/abiv2/memmove.S b/arch/csky/abiv2/memmove.S index b0c42ecf1889..5721e73ad3d8 100644 --- a/arch/csky/abiv2/memmove.S +++ b/arch/csky/abiv2/memmove.S @@ -35,11 +35,7 @@ ENTRY(memmove) .L_len_larger_16bytes: subi r1, 16 subi r0, 16 -#if defined(__CSKY_VDSPV2__) - vldx.8 vr0, (r1), r19 - PRE_BNEZAD (r18) - vstx.8 vr0, (r0), r19 -#elif defined(__CK860__) +#if defined(__CK860__) ldw r3, (r1, 12) stw r3, (r0, 12) ldw r3, (r1, 8) diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild index 95f4e550db8a..a9b63efef416 100644 --- a/arch/csky/include/asm/Kbuild +++ b/arch/csky/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += fb.h -generic-y += ftrace.h generic-y += futex.h generic-y += gpio.h generic-y += hardirq.h diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h index 7547c45312a8..ba35d93ecda2 100644 --- a/arch/csky/include/asm/ftrace.h +++ b/arch/csky/include/asm/ftrace.h @@ -4,10 +4,26 @@ #ifndef __ASM_CSKY_FTRACE_H #define __ASM_CSKY_FTRACE_H -#define MCOUNT_INSN_SIZE 4 +#define MCOUNT_INSN_SIZE 14 #define HAVE_FUNCTION_GRAPH_FP_TEST #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +#define MCOUNT_ADDR ((unsigned long)_mcount) + +#ifndef __ASSEMBLY__ + +extern void _mcount(unsigned long); + +extern void ftrace_graph_call(void); + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + return addr; +} + +struct dyn_arch_ftrace { +}; +#endif /* !__ASSEMBLY__ */ #endif /* __ASM_CSKY_FTRACE_H */ diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index b2905c0485a7..734db3a122e1 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -14,23 +14,10 @@ #include <linux/sched.h> #include <abi/ckmmu.h> -static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel) -{ - pgd -= PAGE_OFFSET; - pgd += PHYS_OFFSET; - pgd |= 1; - setup_pgd(pgd, kernel); -} - #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ - tlbmiss_handler_setup_pgd((unsigned long)pgd, 0) + setup_pgd(__pa(pgd), false) #define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \ - tlbmiss_handler_setup_pgd((unsigned long)pgd, 1) - -static inline unsigned long tlb_get_pgd(void) -{ - return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET; -} + setup_pgd(__pa(pgd), true) #define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) #define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index 73cf2bd66a13..9738eacefdc7 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -8,7 +8,7 @@ #include <linux/const.h> /* - * PAGE_SHIFT determines the page size + * PAGE_SHIFT determines the page size: 4KB */ #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) @@ -17,12 +17,18 @@ #define THREAD_MASK (~(THREAD_SIZE - 1)) #define THREAD_SHIFT (PAGE_SHIFT + 1) + /* - * NOTE: virtual isn't really correct, actually it should be the offset into the - * memory node, but we have no highmem, so that works for now. - * TODO: implement (fast) pfn<->pgdat_idx conversion functions, this makes lots - * of the shifts unnecessary. + * For C-SKY "User-space:Kernel-space" is "2GB:2GB" fixed by hardware and there + * are two segment registers (MSA0 + MSA1) to mapping 512MB + 512MB physical + * address region. We use them mapping kernel 1GB direct-map address area and + * for more than 1GB of memory we use highmem. */ +#define PAGE_OFFSET 0x80000000 +#define SSEG_SIZE 0x20000000 +#define LOWMEM_LIMIT (SSEG_SIZE * 2) + +#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1)) #ifndef __ASSEMBLY__ @@ -50,9 +56,6 @@ struct page; struct vm_area_struct; -/* - * These are used to make use of C type-checking.. - */ typedef struct { unsigned long pte_low; } pte_t; #define pte_val(x) ((x).pte_low) @@ -69,18 +72,13 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) -#endif /* !__ASSEMBLY__ */ +extern unsigned long va_pa_offset; -#define PHYS_OFFSET (CONFIG_RAM_BASE & ~(LOWMEM_LIMIT - 1)) -#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (LOWMEM_LIMIT - 1)) -#define ARCH_PFN_OFFSET PFN_DOWN(CONFIG_RAM_BASE) +#define ARCH_PFN_OFFSET PFN_DOWN(va_pa_offset + PHYS_OFFSET_OFFSET) -#define PAGE_OFFSET 0x80000000 -#define LOWMEM_LIMIT 0x40000000 +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + va_pa_offset) +#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - va_pa_offset)) -#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET) -#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - \ - PHYS_OFFSET)) #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #define MAP_NR(x) PFN_DOWN((unsigned long)(x) - PAGE_OFFSET - \ @@ -90,15 +88,10 @@ typedef struct page *pgtable_t; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -/* - * main RAM and kernel working space are coincident at 0x80000000, but to make - * life more interesting, there's also an uncached virtual shadow at 0xb0000000 - * - these mappings are fixed in the MMU - */ - #define pfn_to_kaddr(x) __va(PFN_PHYS(x)) #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> +#endif /* !__ASSEMBLY__ */ #endif /* __ASM_CSKY_PAGE_H */ diff --git a/arch/csky/include/asm/perf_event.h b/arch/csky/include/asm/perf_event.h index ea8193122294..572093e11001 100644 --- a/arch/csky/include/asm/perf_event.h +++ b/arch/csky/include/asm/perf_event.h @@ -4,4 +4,12 @@ #ifndef __ASM_CSKY_PERF_EVENT_H #define __ASM_CSKY_PERF_EVENT_H +#include <abi/regdef.h> + +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->pc = (__ip); \ + regs_fp(regs) = (unsigned long) __builtin_frame_address(0); \ + asm volatile("mov %0, sp\n":"=r"((regs)->usp)); \ +} + #endif /* __ASM_PERF_EVENT_ELF_H */ diff --git a/arch/csky/include/asm/ptrace.h b/arch/csky/include/asm/ptrace.h new file mode 100644 index 000000000000..d0aba7b32417 --- /dev/null +++ b/arch/csky/include/asm/ptrace.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#ifndef __ASM_CSKY_PTRACE_H +#define __ASM_CSKY_PTRACE_H + +#include <uapi/asm/ptrace.h> +#include <asm/traps.h> +#include <linux/types.h> + +#ifndef __ASSEMBLY__ + +#define PS_S 0x80000000 /* Supervisor Mode */ + +#define arch_has_single_step() (1) +#define current_pt_regs() \ +({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; }) + +#define user_stack_pointer(regs) ((regs)->usp) + +#define user_mode(regs) (!((regs)->sr & PS_S)) +#define instruction_pointer(regs) ((regs)->pc) +#define profile_pc(regs) instruction_pointer(regs) + +static inline bool in_syscall(struct pt_regs const *regs) +{ + return ((regs->sr >> 16) & 0xff) == VEC_TRAP0; +} + +static inline void forget_syscall(struct pt_regs *regs) +{ + regs->sr &= ~(0xff << 16); +} + +static inline unsigned long regs_return_value(struct pt_regs *regs) +{ + return regs->a0; +} + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_CSKY_PTRACE_H */ diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h index bda0a446c63e..f624fa3bbc22 100644 --- a/arch/csky/include/asm/syscall.h +++ b/arch/csky/include/asm/syscall.h @@ -8,6 +8,8 @@ #include <abi/regdef.h> #include <uapi/linux/audit.h> +extern void *sys_call_table[]; + static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { @@ -15,6 +17,13 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs) } static inline void +syscall_set_nr(struct task_struct *task, struct pt_regs *regs, + int sysno) +{ + regs_syscallid(regs) = sysno; +} + +static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { regs->a0 = regs->orig_a0; @@ -60,7 +69,7 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, } static inline int -syscall_get_arch(void) +syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_CSKY; } diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h index 0e9d035d712b..0b546a55a8bf 100644 --- a/arch/csky/include/asm/thread_info.h +++ b/arch/csky/include/asm/thread_info.h @@ -51,29 +51,26 @@ static inline struct thread_info *current_thread_info(void) #endif /* !__ASSEMBLY__ */ -/* entry.S relies on these definitions! - * bits 0-5 are tested at every exception exit - */ #define TIF_SIGPENDING 0 /* signal pending */ #define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SYSCALL_TRACE 5 /* syscall trace active */ -#define TIF_DELAYED_TRACE 14 /* single step a syscall */ +#define TIF_SYSCALL_TRACE 3 /* syscall trace active */ +#define TIF_SYSCALL_TRACEPOINT 4 /* syscall tracepoint instrumentation */ +#define TIF_SYSCALL_AUDIT 5 /* syscall auditing */ #define TIF_POLLING_NRFLAG 16 /* poll_idle() is TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -#define TIF_FREEZE 19 /* thread is freezing for suspend */ #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */ #define TIF_SECCOMP 21 /* secure computing */ -#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) -#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) -#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) -#define _TIF_DELAYED_TRACE (1 << TIF_DELAYED_TRACE) -#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_MEMDIE (1 << TIF_MEMDIE) -#define _TIF_FREEZE (1 << TIF_FREEZE) -#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) -#define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #endif /* _ASM_CSKY_THREAD_INFO_H */ diff --git a/arch/csky/include/asm/unistd.h b/arch/csky/include/asm/unistd.h index 284487477a61..da7a18295615 100644 --- a/arch/csky/include/asm/unistd.h +++ b/arch/csky/include/asm/unistd.h @@ -2,3 +2,5 @@ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. #include <uapi/asm/unistd.h> + +#define NR_syscalls (__NR_syscalls) diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..ee323d818592 --- /dev/null +++ b/arch/csky/include/uapi/asm/perf_regs.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#ifndef _ASM_CSKY_PERF_REGS_H +#define _ASM_CSKY_PERF_REGS_H + +/* Index of struct pt_regs */ +enum perf_event_csky_regs { + PERF_REG_CSKY_TLS, + PERF_REG_CSKY_LR, + PERF_REG_CSKY_PC, + PERF_REG_CSKY_SR, + PERF_REG_CSKY_SP, + PERF_REG_CSKY_ORIG_A0, + PERF_REG_CSKY_A0, + PERF_REG_CSKY_A1, + PERF_REG_CSKY_A2, + PERF_REG_CSKY_A3, + PERF_REG_CSKY_REGS0, + PERF_REG_CSKY_REGS1, + PERF_REG_CSKY_REGS2, + PERF_REG_CSKY_REGS3, + PERF_REG_CSKY_REGS4, + PERF_REG_CSKY_REGS5, + PERF_REG_CSKY_REGS6, + PERF_REG_CSKY_REGS7, + PERF_REG_CSKY_REGS8, + PERF_REG_CSKY_REGS9, +#if defined(__CSKYABIV2__) + PERF_REG_CSKY_EXREGS0, + PERF_REG_CSKY_EXREGS1, + PERF_REG_CSKY_EXREGS2, + PERF_REG_CSKY_EXREGS3, + PERF_REG_CSKY_EXREGS4, + PERF_REG_CSKY_EXREGS5, + PERF_REG_CSKY_EXREGS6, + PERF_REG_CSKY_EXREGS7, + PERF_REG_CSKY_EXREGS8, + PERF_REG_CSKY_EXREGS9, + PERF_REG_CSKY_EXREGS10, + PERF_REG_CSKY_EXREGS11, + PERF_REG_CSKY_EXREGS12, + PERF_REG_CSKY_EXREGS13, + PERF_REG_CSKY_EXREGS14, + PERF_REG_CSKY_HI, + PERF_REG_CSKY_LO, + PERF_REG_CSKY_DCSR, +#endif + PERF_REG_CSKY_MAX, +}; +#endif /* _ASM_CSKY_PERF_REGS_H */ diff --git a/arch/csky/include/uapi/asm/ptrace.h b/arch/csky/include/uapi/asm/ptrace.h index a4eaa8ddf0b1..4e248d5b86ef 100644 --- a/arch/csky/include/uapi/asm/ptrace.h +++ b/arch/csky/include/uapi/asm/ptrace.h @@ -48,20 +48,5 @@ struct user_fp { unsigned long reserved; }; -#ifdef __KERNEL__ - -#define PS_S 0x80000000 /* Supervisor Mode */ - -#define arch_has_single_step() (1) -#define current_pt_regs() \ -({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; }) - -#define user_stack_pointer(regs) ((regs)->usp) - -#define user_mode(regs) (!((regs)->sr & PS_S)) -#define instruction_pointer(regs) ((regs)->pc) -#define profile_pc(regs) instruction_pointer(regs) - -#endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _CSKY_PTRACE_H */ diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile index 484e6d3a3647..1624b04bffb5 100644 --- a/arch/csky/kernel/Makefile +++ b/arch/csky/kernel/Makefile @@ -9,6 +9,8 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_CSKY_PMU_V1) += perf_event.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o +obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S index d2357c8f85bd..5b84f11485ae 100644 --- a/arch/csky/kernel/atomic.S +++ b/arch/csky/kernel/atomic.S @@ -12,11 +12,10 @@ * If *ptr != oldval && return 1, * else *ptr = newval return 0. */ -#ifdef CONFIG_CPU_HAS_LDSTEX ENTRY(csky_cmpxchg) USPTOKSP mfcr a3, epc - INCTRAP a3 + addi a3, TRAP0_SIZE subi sp, 8 stw a3, (sp, 0) @@ -24,6 +23,7 @@ ENTRY(csky_cmpxchg) stw a3, (sp, 4) psrset ee +#ifdef CONFIG_CPU_HAS_LDSTEX 1: ldex a3, (a2) cmpne a0, a3 @@ -33,27 +33,7 @@ ENTRY(csky_cmpxchg) bez a3, 1b 2: sync.is - mvc a0 - ldw a3, (sp, 0) - mtcr a3, epc - ldw a3, (sp, 4) - mtcr a3, epsr - addi sp, 8 - KSPTOUSP - rte -END(csky_cmpxchg) #else -ENTRY(csky_cmpxchg) - USPTOKSP - mfcr a3, epc - INCTRAP a3 - - subi sp, 8 - stw a3, (sp, 0) - mfcr a3, epsr - stw a3, (sp, 4) - - psrset ee 1: ldw a3, (a2) cmpne a0, a3 @@ -61,6 +41,7 @@ ENTRY(csky_cmpxchg) 2: stw a1, (a2) 3: +#endif mvc a0 ldw a3, (sp, 0) mtcr a3, epc @@ -71,6 +52,7 @@ ENTRY(csky_cmpxchg) rte END(csky_cmpxchg) +#ifndef CONFIG_CPU_HAS_LDSTEX /* * Called from tlbmodified exception */ diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S index 5137ed9062bd..a7e84ccccbd8 100644 --- a/arch/csky/kernel/entry.S +++ b/arch/csky/kernel/entry.S @@ -40,7 +40,8 @@ ENTRY(csky_\name) WR_MCIR a2 #endif bclri r6, 0 - lrw a2, PHYS_OFFSET + lrw a2, va_pa_offset + ld.w a2, (a2, 0) subu r6, a2 bseti r6, 31 @@ -50,7 +51,8 @@ ENTRY(csky_\name) addu r6, a2 ldw r6, (r6) - lrw a2, PHYS_OFFSET + lrw a2, va_pa_offset + ld.w a2, (a2, 0) subu r6, a2 bseti r6, 31 @@ -91,7 +93,7 @@ ENTRY(csky_\name) mfcr a3, ss2 mfcr r6, ss3 mfcr a2, ss4 - SAVE_ALL EPC_KEEP + SAVE_ALL 0 .endm .macro tlbop_end is_write RD_MEH a2 @@ -99,7 +101,6 @@ ENTRY(csky_\name) mov a0, sp movi a1, \is_write jbsr do_page_fault - movi r11_sig, 0 /* r11 = 0, Not a syscall. */ jmpi ret_from_exception .endm @@ -118,7 +119,7 @@ jbsr csky_cmpxchg_fixup tlbop_end 1 ENTRY(csky_systemcall) - SAVE_ALL EPC_INCREASE + SAVE_ALL TRAP0_SIZE psrset ee, ie @@ -136,8 +137,9 @@ ENTRY(csky_systemcall) bmaski r10, THREAD_SHIFT andn r9, r10 ldw r8, (r9, TINFO_FLAGS) - btsti r8, TIF_SYSCALL_TRACE - bt 1f + ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r8, 0 + bt csky_syscall_trace #if defined(__CSKYABIV2__) subi sp, 8 stw r5, (sp, 0x4) @@ -150,10 +152,9 @@ ENTRY(csky_systemcall) stw a0, (sp, LSAVE_A0) /* Save return value */ jmpi ret_from_exception -1: - movi a0, 0 /* enter system call */ - mov a1, sp /* sp = pt_regs pointer */ - jbsr syscall_trace +csky_syscall_trace: + mov a0, sp /* sp = pt_regs pointer */ + jbsr syscall_trace_enter /* Prepare args before do system call */ ldw a0, (sp, LSAVE_A0) ldw a1, (sp, LSAVE_A1) @@ -173,9 +174,8 @@ ENTRY(csky_systemcall) #endif stw a0, (sp, LSAVE_A0) /* Save return value */ - movi a0, 1 /* leave system call */ - mov a1, sp /* right now, sp --> pt_regs */ - jbsr syscall_trace + mov a0, sp /* right now, sp --> pt_regs */ + jbsr syscall_trace_exit br ret_from_exception ENTRY(ret_from_kernel_thread) @@ -190,14 +190,11 @@ ENTRY(ret_from_fork) bmaski r10, THREAD_SHIFT andn r9, r10 ldw r8, (r9, TINFO_FLAGS) - movi r11_sig, 1 - btsti r8, TIF_SYSCALL_TRACE - bf 3f - movi a0, 1 - mov a1, sp /* sp = pt_regs pointer */ - jbsr syscall_trace -3: - jbsr ret_from_exception + ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + cmpnei r8, 0 + bf ret_from_exception + mov a0, sp /* sp = pt_regs pointer */ + jbsr syscall_trace_exit ret_from_exception: ld syscallid, (sp, LSAVE_PSR) @@ -212,41 +209,30 @@ ret_from_exception: bmaski r10, THREAD_SHIFT andn r9, r10 -resume_userspace: ldw r8, (r9, TINFO_FLAGS) andi r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED) cmpnei r8, 0 bt exit_work -1: RESTORE_ALL +1: + RESTORE_ALL exit_work: + lrw syscallid, ret_from_exception + mov lr, syscallid + btsti r8, TIF_NEED_RESCHED bt work_resched - /* If thread_info->flag is empty, RESTORE_ALL */ - cmpnei r8, 0 - bf 1b - mov a1, sp - mov a0, r8 - mov a2, r11_sig /* syscall? */ - btsti r8, TIF_SIGPENDING /* delivering a signal? */ - /* prevent further restarts(set r11 = 0) */ - clrt r11_sig - jbsr do_notify_resume /* do signals */ - br resume_userspace + + mov a0, sp + mov a1, r8 + jmpi do_notify_resume work_resched: - lrw syscallid, ret_from_exception - mov r15, syscallid /* Return address in link */ jmpi schedule -ENTRY(sys_rt_sigreturn) - movi r11_sig, 0 - jmpi do_rt_sigreturn - ENTRY(csky_trap) - SAVE_ALL EPC_KEEP + SAVE_ALL 0 psrset ee - movi r11_sig, 0 /* r11 = 0, Not a syscall. */ mov a0, sp /* Push Stack pointer arg */ jbsr trap_c /* Call C-level trap handler */ jmpi ret_from_exception @@ -261,7 +247,7 @@ ENTRY(csky_get_tls) /* increase epc for continue */ mfcr a0, epc - INCTRAP a0 + addi a0, TRAP0_SIZE mtcr a0, epc /* get current task thread_info with kernel 8K stack */ @@ -278,9 +264,8 @@ ENTRY(csky_get_tls) rte ENTRY(csky_irq) - SAVE_ALL EPC_KEEP + SAVE_ALL 0 psrset ee - movi r11_sig, 0 /* r11 = 0, Not a syscall. */ #ifdef CONFIG_PREEMPT mov r9, sp /* Get current stack pointer */ diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index 274c431f1810..44f4880179b7 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -3,6 +3,137 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> +#include <asm/cacheflush.h> + +#ifdef CONFIG_DYNAMIC_FTRACE + +#define NOP 0x4000 +#define NOP32_HI 0xc400 +#define NOP32_LO 0x4820 +#define PUSH_LR 0x14d0 +#define MOVIH_LINK 0xea3a +#define ORI_LINK 0xef5a +#define JSR_LINK 0xe8fa +#define BSR_LINK 0xe000 + +/* + * Gcc-csky with -pg will insert stub in function prologue: + * push lr + * jbsr _mcount + * nop32 + * nop32 + * + * If the (callee - current_pc) is less then 64MB, we'll use bsr: + * push lr + * bsr _mcount + * nop32 + * nop32 + * else we'll use (movih + ori + jsr): + * push lr + * movih r26, ... + * ori r26, ... + * jsr r26 + * + * (r26 is our reserved link-reg) + * + */ +static inline void make_jbsr(unsigned long callee, unsigned long pc, + uint16_t *call, bool nolr) +{ + long offset; + + call[0] = nolr ? NOP : PUSH_LR; + + offset = (long) callee - (long) pc; + + if (unlikely(offset < -67108864 || offset > 67108864)) { + call[1] = MOVIH_LINK; + call[2] = callee >> 16; + call[3] = ORI_LINK; + call[4] = callee & 0xffff; + call[5] = JSR_LINK; + call[6] = 0; + } else { + offset = offset >> 1; + + call[1] = BSR_LINK | + ((uint16_t)((unsigned long) offset >> 16) & 0x3ff); + call[2] = (uint16_t)((unsigned long) offset & 0xffff); + call[3] = call[5] = NOP32_HI; + call[4] = call[6] = NOP32_LO; + } +} + +static uint16_t nops[7] = {NOP, NOP32_HI, NOP32_LO, NOP32_HI, NOP32_LO, + NOP32_HI, NOP32_LO}; +static int ftrace_check_current_nop(unsigned long hook) +{ + uint16_t olds[7]; + unsigned long hook_pos = hook - 2; + + if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops))) + return -EFAULT; + + if (memcmp((void *)nops, (void *)olds, sizeof(nops))) { + pr_err("%p: nop but get (%04x %04x %04x %04x %04x %04x %04x)\n", + (void *)hook_pos, + olds[0], olds[1], olds[2], olds[3], olds[4], olds[5], + olds[6]); + + return -EINVAL; + } + + return 0; +} + +static int ftrace_modify_code(unsigned long hook, unsigned long target, + bool enable, bool nolr) +{ + uint16_t call[7]; + + unsigned long hook_pos = hook - 2; + int ret = 0; + + make_jbsr(target, hook, call, nolr); + + ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, + sizeof(nops)); + if (ret) + return -EPERM; + + flush_icache_range(hook_pos, hook_pos + MCOUNT_INSN_SIZE); + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + int ret = ftrace_check_current_nop(rec->ip); + + if (ret) + return ret; + + return ftrace_modify_code(rec->ip, addr, true, false); +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + return ftrace_modify_code(rec->ip, addr, false, false); +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + int ret = ftrace_modify_code((unsigned long)&ftrace_call, + (unsigned long)func, true, true); + return ret; +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, @@ -43,8 +174,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, *(unsigned long *)frame_pointer = return_hooker; } } -#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_code((unsigned long)&ftrace_graph_call, + (unsigned long)&ftrace_graph_caller, true, true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_code((unsigned long)&ftrace_graph_call, + (unsigned long)&ftrace_graph_caller, false, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ /* _mcount is defined in abi's mcount.S */ -extern void _mcount(void); EXPORT_SYMBOL(_mcount); diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S index 9c4ec473b76b..61989f9241c0 100644 --- a/arch/csky/kernel/head.S +++ b/arch/csky/kernel/head.S @@ -7,16 +7,11 @@ __HEAD ENTRY(_start) - /* set super user mode */ - lrw a3, DEFAULT_PSR_VALUE - mtcr a3, psr - psrset ee - - SETUP_MMU a3 + SETUP_MMU /* set stack point */ - lrw a3, init_thread_union + THREAD_SIZE - mov sp, a3 + lrw r6, init_thread_union + THREAD_SIZE + mov sp, r6 jmpi csky_start END(_start) @@ -24,53 +19,12 @@ END(_start) #ifdef CONFIG_SMP .align 10 ENTRY(_start_smp_secondary) - /* Invalid I/Dcache BTB BHT */ - movi a3, 7 - lsli a3, 16 - addi a3, (1<<4) | 3 - mtcr a3, cr17 - - tlbi.alls - - /* setup PAGEMASK */ - movi a3, 0 - mtcr a3, cr<6, 15> - - /* setup MEL0/MEL1 */ - grs a0, _start_smp_pc -_start_smp_pc: - bmaski a1, 13 - andn a0, a1 - movi a1, 0x00000006 - movi a2, 0x00001006 - or a1, a0 - or a2, a0 - mtcr a1, cr<2, 15> - mtcr a2, cr<3, 15> - - /* setup MEH */ - mtcr a0, cr<4, 15> - - /* write TLB */ - bgeni a3, 28 - mtcr a3, cr<8, 15> - - SETUP_MMU a3 - - /* enable MMU */ - movi a3, 1 - mtcr a3, cr18 - - jmpi _goto_mmu_on -_goto_mmu_on: - lrw a3, DEFAULT_PSR_VALUE - mtcr a3, psr - psrset ee + SETUP_MMU /* set stack point */ - lrw a3, secondary_stack - ld.w a3, (a3, 0) - mov sp, a3 + lrw r6, secondary_stack + ld.w r6, (r6, 0) + mov sp, r6 jmpi csky_start_secondary END(_start_smp_secondary) diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c new file mode 100644 index 000000000000..e68ff375c8f8 --- /dev/null +++ b/arch/csky/kernel/perf_callchain.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/perf_event.h> +#include <linux/uaccess.h> + +/* Kernel callchain */ +struct stackframe { + unsigned long fp; + unsigned long lr; +}; + +static int unwind_frame_kernel(struct stackframe *frame) +{ + if (kstack_end((void *)frame->fp)) + return -EPERM; + if (frame->fp & 0x3 || frame->fp < TASK_SIZE) + return -EPERM; + + *frame = *(struct stackframe *)frame->fp; + if (__kernel_text_address(frame->lr)) { + int graph = 0; + + frame->lr = ftrace_graph_ret_addr(NULL, &graph, frame->lr, + NULL); + } + return 0; +} + +static void notrace walk_stackframe(struct stackframe *fr, + struct perf_callchain_entry_ctx *entry) +{ + do { + perf_callchain_store(entry, fr->lr); + } while (unwind_frame_kernel(fr) >= 0); +} + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, + unsigned long fp, unsigned long reg_lr) +{ + struct stackframe buftail; + unsigned long lr = 0; + unsigned long *user_frame_tail = (unsigned long *)fp; + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + if (reg_lr != 0) + lr = reg_lr; + else + lr = buftail.lr; + + fp = buftail.fp; + perf_callchain_store(entry, lr); + + return fp; +} + +/* + * This will be called when the target is in user mode + * This function will only be called when we use + * "PERF_SAMPLE_CALLCHAIN" in + * kernel/events/core.c:perf_prepare_sample() + * + * How to trigger perf_callchain_[user/kernel] : + * $ perf record -e cpu-clock --call-graph fp ./program + * $ perf report --call-graph + * + * On C-SKY platform, the program being sampled and the C library + * need to be compiled with * -mbacktrace, otherwise the user + * stack will not contain function frame. + */ +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp = 0; + + /* C-SKY does not support virtualization. */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return; + + fp = regs->regs[4]; + perf_callchain_store(entry, regs->pc); + + /* + * While backtrace from leaf function, lr is normally + * not saved inside frame on C-SKY, so get lr from pt_regs + * at the sample point. However, lr value can be incorrect if + * lr is used as temp register + */ + fp = user_backtrace(entry, fp, regs->lr); + + while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) + fp = user_backtrace(entry, fp, 0); +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stackframe fr; + + /* C-SKY does not support virtualization. */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + pr_warn("C-SKY does not support perf in guest mode!"); + return; + } + + fr.fp = regs->regs[4]; + fr.lr = regs->lr; + walk_stackframe(&fr, entry); +} diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c new file mode 100644 index 000000000000..eb32838b8210 --- /dev/null +++ b/arch/csky/kernel/perf_regs.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_CSKY_MAX)) + return 0; + + return (u64)*((u32 *)regs + idx); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_CSKY_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index f2f12fff36f7..313623a19ecb 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. +#include <linux/audit.h> #include <linux/elf.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -11,6 +12,7 @@ #include <linux/sched/task_stack.h> #include <linux/signal.h> #include <linux/smp.h> +#include <linux/tracehook.h> #include <linux/uaccess.h> #include <linux/user.h> @@ -22,6 +24,9 @@ #include <abi/regdef.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* sets the trace bits. */ #define TRACE_MODE_SI (1 << 14) #define TRACE_MODE_RUN 0 @@ -207,35 +212,27 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -/* - * If process's system calls is traces, do some corresponding handles in this - * function before entering system call function and after exiting system call - * function. - */ -asmlinkage void syscall_trace(int why, struct pt_regs *regs) +asmlinkage void syscall_trace_enter(struct pt_regs *regs) { - long saved_why; - /* - * Save saved_why, why is used to denote syscall entry/exit; - * why = 0:entry, why = 1: exit - */ - saved_why = regs->regs[SYSTRACE_SAVENUM]; - regs->regs[SYSTRACE_SAVENUM] = why; - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } + if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (tracehook_report_syscall_entry(regs)) + syscall_set_nr(current, regs, -1); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, syscall_get_nr(current, regs)); + + audit_syscall_entry(regs_syscallid(regs), regs->a0, regs->a1, regs->a2, regs->a3); +} + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + audit_syscall_exit(regs); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(regs, 0); - regs->regs[SYSTRACE_SAVENUM] = saved_why; + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, syscall_get_return_value(current, regs)); } extern void show_stack(struct task_struct *task, unsigned long *stack); diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index dff8b89444ec..23ee604aafdb 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -142,18 +142,24 @@ void __init setup_arch(char **cmdline_p) #endif } -asmlinkage __visible void __init csky_start(unsigned int unused, void *param) +unsigned long va_pa_offset; +EXPORT_SYMBOL(va_pa_offset); + +asmlinkage __visible void __init csky_start(unsigned int unused, + void *dtb_start) { /* Clean up bss section */ memset(__bss_start, 0, __bss_stop - __bss_start); + va_pa_offset = read_mmu_msa0() & ~(SSEG_SIZE - 1); + pre_trap_init(); pre_mmu_init(); - if (param == NULL) + if (dtb_start == NULL) early_init_dt_scan(__dtb_start); else - early_init_dt_scan(param); + early_init_dt_scan(dtb_start); start_kernel(); diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 207a891479d2..04a43cfd4e09 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -1,26 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/kernel.h> #include <linux/signal.h> +#include <linux/uaccess.h> #include <linux/syscalls.h> -#include <linux/errno.h> -#include <linux/wait.h> -#include <linux/ptrace.h> -#include <linux/unistd.h> -#include <linux/stddef.h> -#include <linux/highuid.h> -#include <linux/personality.h> -#include <linux/tty.h> -#include <linux/binfmts.h> #include <linux/tracehook.h> -#include <linux/freezer.h> -#include <linux/uaccess.h> -#include <asm/setup.h> -#include <asm/pgtable.h> #include <asm/traps.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -29,110 +13,117 @@ #ifdef CONFIG_CPU_HAS_FPU #include <abi/fpu.h> - -static int restore_fpu_state(struct sigcontext *sc) +static int restore_fpu_state(struct sigcontext __user *sc) { int err = 0; struct user_fp user_fp; - err = copy_from_user(&user_fp, &sc->sc_user_fp, sizeof(user_fp)); + err = __copy_from_user(&user_fp, &sc->sc_user_fp, sizeof(user_fp)); restore_from_user_fp(&user_fp); return err; } -static int save_fpu_state(struct sigcontext *sc) +static int save_fpu_state(struct sigcontext __user *sc) { struct user_fp user_fp; save_to_user_fp(&user_fp); - return copy_to_user(&sc->sc_user_fp, &user_fp, sizeof(user_fp)); + return __copy_to_user(&sc->sc_user_fp, &user_fp, sizeof(user_fp)); } #else -static inline int restore_fpu_state(struct sigcontext *sc) { return 0; } -static inline int save_fpu_state(struct sigcontext *sc) { return 0; } +#define restore_fpu_state(sigcontext) (0) +#define save_fpu_state(sigcontext) (0) #endif struct rt_sigframe { - int sig; - struct siginfo *pinfo; - void *puc; struct siginfo info; struct ucontext uc; }; -static int -restore_sigframe(struct pt_regs *regs, - struct sigcontext *sc, int *pr2) +static long restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) { int err = 0; - /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->task->restart_block.fn = do_no_restart_syscall; - - err |= copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs)); + /* sc_pt_regs is structured the same as the start of pt_regs */ + err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs)); + /* Restore the floating-point state. */ err |= restore_fpu_state(sc); - *pr2 = regs->a0; return err; } -asmlinkage int -do_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { - sigset_t set; - int a0; struct pt_regs *regs = current_pt_regs(); - struct rt_sigframe *frame = (struct rt_sigframe *)(regs->usp); + struct rt_sigframe __user *frame; + struct task_struct *task; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + frame = (struct rt_sigframe __user *)regs->usp; if (!access_ok(frame, sizeof(*frame))) goto badframe; + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, (sigmask(SIGKILL) | sigmask(SIGSTOP))); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&set); - if (restore_sigframe(regs, &frame->uc.uc_mcontext, &a0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; - return a0; + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + return regs->a0; badframe: - force_sig(SIGSEGV, current); + task = current; + force_sig(SIGSEGV, task); return 0; } -static int setup_sigframe(struct sigcontext *sc, struct pt_regs *regs) +static int setup_sigcontext(struct rt_sigframe __user *frame, + struct pt_regs *regs) { + struct sigcontext __user *sc = &frame->uc.uc_mcontext; int err = 0; - err |= copy_to_user(&sc->sc_pt_regs, regs, sizeof(struct pt_regs)); + err |= __copy_to_user(&sc->sc_pt_regs, regs, sizeof(struct pt_regs)); err |= save_fpu_state(sc); return err; } -static inline void * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) +static inline void __user *get_sigframe(struct ksignal *ksig, + struct pt_regs *regs, size_t framesize) { - unsigned long usp; + unsigned long sp; + /* Default to using normal stack */ + sp = regs->usp; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user __force *)(-1UL); - /* Default to using normal stack. */ - usp = regs->usp; + /* This is the X/Open sanctioned signal stack switching. */ + sp = sigsp(sp, ksig) - framesize; - /* This is the X/Open sanctioned signal stack switching. */ - if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(usp)) { - if (!on_sig_stack(usp)) - usp = current->sas_ss_sp + current->sas_ss_size; - } - return (void *)((usp - frame_size) & -8UL); + /* Align the stack frame. */ + sp &= -8UL; + + return (void __user *)sp; } static int @@ -140,205 +131,128 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe *frame; int err = 0; - struct csky_vdso *vdso = current->mm->context.vdso; - frame = get_sigframe(&ksig->ka, regs, sizeof(*frame)); - if (!frame) - return 1; + frame = get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(frame, sizeof(*frame))) + return -EFAULT; - err |= __put_user(ksig->sig, &frame->sig); - err |= __put_user(&frame->info, &frame->pinfo); - err |= __put_user(&frame->uc, &frame->puc); err |= copy_siginfo_to_user(&frame->info, &ksig->info); - /* Create the ucontext. */ + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); - err |= __put_user(0, &frame->uc.uc_link); - err |= __put_user((void *)current->sas_ss_sp, - &frame->uc.uc_stack.ss_sp); - err |= __put_user(sas_ss_flags(regs->usp), - &frame->uc.uc_stack.ss_flags); - err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); - err |= setup_sigframe(&frame->uc.uc_mcontext, regs); - err |= copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); - + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __save_altstack(&frame->uc.uc_stack, regs->usp); + err |= setup_sigcontext(frame, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); if (err) - goto give_sigsegv; + return -EFAULT; - /* Set up registers for signal handler */ - regs->usp = (unsigned long)frame; - regs->pc = (unsigned long)ksig->ka.sa.sa_handler; - regs->lr = (unsigned long)vdso->rt_signal_retcode; + /* Set up to return from userspace. */ + regs->lr = (unsigned long)(vdso->rt_signal_retcode); -adjust_stack: - regs->a0 = ksig->sig; /* first arg is signo */ - regs->a1 = (unsigned long)(&(frame->info)); - regs->a2 = (unsigned long)(&(frame->uc)); - return err; + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->pc = (unsigned long)ksig->ka.sa.sa_handler; + regs->usp = (unsigned long)frame; + regs->a0 = ksig->sig; /* a0: signal number */ + regs->a1 = (unsigned long)(&(frame->info)); /* a1: siginfo pointer */ + regs->a2 = (unsigned long)(&(frame->uc)); /* a2: ucontext pointer */ -give_sigsegv: - if (ksig->sig == SIGSEGV) - ksig->ka.sa.sa_handler = SIG_DFL; - force_sig(SIGSEGV, current); - goto adjust_stack; + return 0; } -/* - * OK, we're invoking a handler - */ -static int -handle_signal(struct ksignal *ksig, struct pt_regs *regs) +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { - int ret; sigset_t *oldset = sigmask_to_save(); + int ret; - /* - * set up the stack frame, regardless of SA_SIGINFO, - * and pass info anyway. - */ - ret = setup_rt_frame(ksig, oldset, regs); + /* Are we from a system call? */ + if (in_syscall(regs)) { + /* Avoid additional syscall restarting via ret_from_exception */ + forget_syscall(regs); + + /* If so, check system call restarting.. */ + switch (regs->a0) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->a0 = -EINTR; + break; - if (ret != 0) { - force_sigsegv(ksig->sig, current); - return ret; + case -ERESTARTSYS: + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { + regs->a0 = -EINTR; + break; + } + /* fallthrough */ + case -ERESTARTNOINTR: + regs->a0 = regs->orig_a0; + regs->pc -= TRAP0_SIZE; + break; + } } - /* Block the signal if we were successful. */ - spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked, ¤t->blocked, &ksig->ka.sa.sa_mask); - if (!(ksig->ka.sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked, ksig->sig); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + /* Set up the stack frame */ + ret = setup_rt_frame(ksig, oldset, regs); - return 0; + signal_setup_done(ret, ksig, 0); } -/* - * Note that 'init' is a special process: it doesn't get signals it doesn't - * want to handle. Thus you cannot kill init even with a SIGKILL even by - * mistake. - * - * Note that we go through the signals twice: once to check the signals - * that the kernel can handle, and then we build all the user-level signal - * handling stack-frames in one go after that. - */ -static void do_signal(struct pt_regs *regs, int syscall) +static void do_signal(struct pt_regs *regs) { - unsigned int retval = 0, continue_addr = 0, restart_addr = 0; struct ksignal ksig; - /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. - */ - if (!user_mode(regs)) + if (get_signal(&ksig)) { + /* Actually deliver the signal */ + handle_signal(&ksig, regs); return; + } - /* - * If we were from a system call, check for system call restarting... - */ - if (syscall) { - continue_addr = regs->pc; -#if defined(__CSKYABIV2__) - restart_addr = continue_addr - 4; -#else - restart_addr = continue_addr - 2; -#endif - retval = regs->a0; + /* Did we come from a system call? */ + if (in_syscall(regs)) { + /* Avoid additional syscall restarting via ret_from_exception */ + forget_syscall(regs); - /* - * Prepare for system call restart. We do this here so that a - * debugger will see the already changed. - */ - switch (retval) { + /* Restart the system call - no handlers present */ + switch (regs->a0) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: regs->a0 = regs->orig_a0; - regs->pc = restart_addr; + regs->pc -= TRAP0_SIZE; break; case -ERESTART_RESTARTBLOCK: - regs->a0 = -EINTR; + regs->a0 = regs->orig_a0; + regs_syscallid(regs) = __NR_restart_syscall; + regs->pc -= TRAP0_SIZE; break; } } - if (try_to_freeze()) - goto no_signal; - /* - * Get the signal to deliver. When running under ptrace, at this - * point the debugger may change all our registers ... + * If there is no signal to deliver, we just put the saved + * sigmask back. */ - if (get_signal(&ksig)) { - /* - * Depending on the signal settings we may need to revert the - * decision to restart the system call. But skip this if a - * debugger has chosen to restart at a different PC. - */ - if (regs->pc == restart_addr) { - if (retval == -ERESTARTNOHAND || - (retval == -ERESTARTSYS && - !(ksig.ka.sa.sa_flags & SA_RESTART))) { - regs->a0 = -EINTR; - regs->pc = continue_addr; - } - } - - /* Whee! Actually deliver the signal. */ - if (handle_signal(&ksig, regs) == 0) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); - } - return; - } - -no_signal: - if (syscall) { - /* - * Handle restarting a different system call. As above, - * if a debugger has chosen to restart at a different PC, - * ignore the restart. - */ - if (retval == -ERESTART_RESTARTBLOCK - && regs->pc == continue_addr) { -#if defined(__CSKYABIV2__) - regs->regs[3] = __NR_restart_syscall; - regs->pc -= 4; -#else - regs->regs[9] = __NR_restart_syscall; - regs->pc -= 2; -#endif - } - - /* - * If there's no signal to deliver, we just put the saved - * sigmask back. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); - } - } + restore_saved_sigmask(); } -asmlinkage void -do_notify_resume(unsigned int thread_flags, struct pt_regs *regs, int syscall) +/* + * notification of userspace execution resumption + * - triggered by the _TIF_WORK_MASK flags + */ +asmlinkage void do_notify_resume(struct pt_regs *regs, + unsigned long thread_info_flags) { - if (thread_flags & _TIF_SIGPENDING) - do_signal(regs, syscall); + /* Handle pending signal delivery */ + if (thread_info_flags & _TIF_SIGPENDING) + do_signal(regs); - if (thread_flags & _TIF_NOTIFY_RESUME) { + if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index d6f4b66b93e2..18041f46ded1 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -15,9 +15,9 @@ #include <linux/smp.h> #include <linux/version.h> #include <linux/vt_kern.h> -#include <linux/kernel.h> #include <linux/extable.h> #include <linux/uaccess.h> +#include <linux/perf_event.h> #include <asm/hardirq.h> #include <asm/mmu_context.h> @@ -82,7 +82,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, unsigned long pgd_base; - pgd_base = tlb_get_pgd(); + pgd_base = (unsigned long)__va(get_pgd()); pgd = (pgd_t *)pgd_base + offset; pgd_k = init_mm.pgd + offset; @@ -107,6 +107,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, return; } #endif + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* * If we're in an interrupt or have no user * context, we must not take the fault.. @@ -154,10 +156,15 @@ good_area: goto bad_area; BUG(); } - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, + address); + } else { tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, + address); + } up_read(&mm->mmap_sem); return; diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 123d8f54be4a..f2e22058e488 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -42,7 +42,6 @@ generic-y += scatterlist.h generic-y += sections.h generic-y += serial.h generic-y += shmparam.h -generic-y += sizes.h generic-y += spinlock.h generic-y += timex.h generic-y += tlbflush.h diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h index ddd483c6ca95..01666b8bb263 100644 --- a/arch/h8300/include/asm/syscall.h +++ b/arch/h8300/include/asm/syscall.h @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <linux/types.h> #include <linux/ptrace.h> +#include <uapi/linux/audit.h> static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) @@ -27,6 +28,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, *args = regs->er6; } +static inline int +syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_H8300; +} /* Misc syscall related bits */ diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c index 0f04a5e9aa4f..1eab16b1a0bc 100644 --- a/arch/h8300/mm/init.c +++ b/arch/h8300/mm/init.c @@ -102,17 +102,3 @@ void __init mem_init(void) mem_init_print_info(NULL); } - - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - -void -free_initmem(void) -{ - free_initmem_default(-1); -} diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 3e54a53208d5..b7d404bbaa0f 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -22,7 +22,6 @@ config HEXAGON select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK - select ARCH_DISCARD_MEMBLOCK select NEED_SG_DMA_LENGTH select NO_IOPORT_MAP select GENERIC_IOMAP diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index 6234a303d2a3..4a3d72f76ea2 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -32,7 +32,6 @@ generic-y += sections.h generic-y += segment.h generic-y += serial.h generic-y += shmparam.h -generic-y += sizes.h generic-y += topology.h generic-y += trace_clock.h generic-y += unaligned.h diff --git a/arch/hexagon/include/asm/elf.h b/arch/hexagon/include/asm/elf.h index 80311e7b8ca6..d10fbd54ae51 100644 --- a/arch/hexagon/include/asm/elf.h +++ b/arch/hexagon/include/asm/elf.h @@ -23,11 +23,7 @@ #include <asm/ptrace.h> #include <asm/user.h> - -/* - * This should really be in linux/elf-em.h. - */ -#define EM_HEXAGON 164 /* QUALCOMM Hexagon */ +#include <linux/elf-em.h> struct elf32_hdr; diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h index ae3a1e24fabd..dab26a71f577 100644 --- a/arch/hexagon/include/asm/syscall.h +++ b/arch/hexagon/include/asm/syscall.h @@ -21,6 +21,8 @@ #ifndef _ASM_HEXAGON_SYSCALL_H #define _ASM_HEXAGON_SYSCALL_H +#include <uapi/linux/audit.h> + typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); @@ -41,4 +43,10 @@ static inline void syscall_get_arguments(struct task_struct *task, { memcpy(args, &(®s->r00)[0], 6 * sizeof(args[0])); } + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_HEXAGON; +} + #endif diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c index 1719ede9e9bd..41cf34243ea1 100644 --- a/arch/hexagon/mm/init.c +++ b/arch/hexagon/mm/init.c @@ -85,16 +85,6 @@ void __init mem_init(void) } /* - * free_initmem - frees memory used by stuff declared with __init - * - * Todo: free pages between __init_begin and __init_end; possibly - * some devtree related stuff as well. - */ -void __ref free_initmem(void) -{ -} - -/* * free_initrd_mem - frees... initrd memory. * @start - start of init memory * @end - end of init memory diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 73a26f04644e..7468d8e50467 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -33,7 +33,6 @@ config IA64 select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB select ARCH_HAS_SYNC_DMA_FOR_CPU if SWIOTLB select VIRT_TO_BUS - select ARCH_DISCARD_MEMBLOCK select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 0d9e7fab4a79..da108cd45174 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -74,7 +74,7 @@ static inline void syscall_set_arguments(struct task_struct *task, ia64_syscall_get_set_arguments(task, regs, args, 1); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_IA64; } diff --git a/arch/ia64/include/uapi/asm/sockios.h b/arch/ia64/include/uapi/asm/sockios.h deleted file mode 100644 index f27a12f95d20..000000000000 --- a/arch/ia64/include/uapi/asm/sockios.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_SOCKIOS_H -#define _ASM_IA64_SOCKIOS_H - -/* - * Socket-level I/O control calls. - * - * Based on <asm-i386/sockios.h>. - * - * Modified 1998, 1999 - * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co - */ -#define FIOSETOWN 0x8901 -#define SIOCSPGRP 0x8902 -#define FIOGETOWN 0x8903 -#define SIOCGPGRP 0x8904 -#define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ - -#endif /* _ASM_IA64_SOCKIOS_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 41eb281709da..1435e7a1a8cd 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -177,7 +177,7 @@ struct acpi_table_madt *acpi_madt __initdata; static u8 has_8259; static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_override *lapic; @@ -195,7 +195,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lsapic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_sapic *lsapic; @@ -216,7 +216,7 @@ acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end) } static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lacpi_nmi; @@ -230,7 +230,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e } static int __init -acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_io_sapic *iosapic; @@ -245,7 +245,7 @@ acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end static unsigned int __initdata acpi_madt_rev; static int __init -acpi_parse_plat_int_src(struct acpi_subtable_header * header, +acpi_parse_plat_int_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_source *plintsrc; @@ -329,7 +329,7 @@ unsigned int get_cpei_target_cpu(void) } static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_override *p; @@ -350,7 +350,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_nmi_source *nmi_src; diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index e49200e31750..d28e29103bdb 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -666,14 +666,14 @@ mem_init (void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); if (ret) printk("%s: Problem encountered in __add_pages() as ret=%d\n", __func__, ret); @@ -682,20 +682,15 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; - int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - if (ret) - pr_warn("%s: Problem encountered in __remove_pages() as" - " ret=%d\n", __func__, ret); - - return ret; + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index fe5cc2da6d10..218e037ef901 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -26,7 +26,6 @@ config M68K select MODULES_USE_ELF_RELA select OLD_SIGSUSPEND3 select OLD_SIGACTION - select ARCH_DISCARD_MEMBLOCK select MMU_GATHER_NO_RANGE if MMU config CPU_BIG_ENDIAN diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig index 0857cdbfde0c..d5e683dd885d 100644 --- a/arch/m68k/configs/amcore_defconfig +++ b/arch/m68k/configs/amcore_defconfig @@ -12,7 +12,6 @@ CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_CFQ is not set # CONFIG_MMU is not set diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig index 4f4ccd13c11b..434bd3750966 100644 --- a/arch/m68k/configs/m5475evb_defconfig +++ b/arch/m68k/configs/m5475evb_defconfig @@ -11,7 +11,6 @@ CONFIG_SYSCTL_SYSCALL=y # CONFIG_AIO is not set CONFIG_EMBEDDED=y CONFIG_MODULES=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig index 69f23c7b0497..27fa9465d19d 100644 --- a/arch/m68k/configs/stmark2_defconfig +++ b/arch/m68k/configs/stmark2_defconfig @@ -17,7 +17,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_BLK_CMDLINE_PARSER=y # CONFIG_MMU is not set diff --git a/arch/m68k/include/asm/syscall.h b/arch/m68k/include/asm/syscall.h new file mode 100644 index 000000000000..465ac039be09 --- /dev/null +++ b/arch/m68k/include/asm/syscall.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_M68K_SYSCALL_H +#define _ASM_M68K_SYSCALL_H + +#include <uapi/linux/audit.h> + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_M68K; +} + +#endif /* _ASM_M68K_SYSCALL_H */ diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c index 8868a4c9adae..778cacb7d57b 100644 --- a/arch/m68k/mm/init.c +++ b/arch/m68k/mm/init.c @@ -147,10 +147,3 @@ void __init mem_init(void) init_pointer_tables(); mem_init_print_info(NULL); } - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h index 833d3a53dab3..3a6924f3cbde 100644 --- a/arch/microblaze/include/asm/syscall.h +++ b/arch/microblaze/include/asm/syscall.h @@ -105,7 +105,7 @@ static inline void syscall_set_arguments(struct task_struct *task, asmlinkage unsigned long do_syscall_trace_enter(struct pt_regs *regs); asmlinkage void do_syscall_trace_leave(struct pt_regs *regs); -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_MICROBLAZE; } diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 7e97d44f6538..a015a951c8b7 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -186,18 +186,6 @@ void __init setup_memory(void) paging_init(); } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - -void free_initmem(void) -{ - free_initmem_default(-1); -} - void __init mem_init(void) { high_memory = (void *)__va(memory_start + lowmem_size - 1); diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index c2ce1e42b888..8fe54fda31dc 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -75,7 +75,7 @@ static void __iomem *__ioremap(phys_addr_t addr, unsigned long size, p >= memory_start && p < virt_to_phys(high_memory) && !(p >= __virt_to_phys((phys_addr_t)__bss_stop) && p < __virt_to_phys((phys_addr_t)__bss_stop))) { - pr_warn("__ioremap(): phys addr "PTE_FMT" is RAM lr %pf\n", + pr_warn("__ioremap(): phys addr "PTE_FMT" is RAM lr %ps\n", (unsigned long)p, __builtin_return_address(0)); return NULL; } diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index b9c48b27162d..677e5bfeff47 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -5,7 +5,6 @@ config MIPS select ARCH_32BIT_OFF_T if !64BIT select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT select ARCH_CLOCKSOURCE_DATA - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UBSAN_SANITIZE_ALL @@ -44,8 +43,7 @@ config MIPS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT - select HAVE_CBPF_JIT if (!64BIT && !CPU_MICROMIPS) - select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS) + select HAVE_EBPF_JIT if (!CPU_MICROMIPS) select HAVE_CONTEXT_TRACKING select HAVE_COPY_THREAD_TLS select HAVE_C_RECORDMCOUNT @@ -276,7 +274,7 @@ config BCM47XX select BCM47XX_SPROM select BCM47XX_SSB if !BCM47XX_BCMA help - Support for BCM47XX based boards + Support for BCM47XX based boards config BCM63XX bool "Broadcom BCM63XX based boards" @@ -295,7 +293,7 @@ config BCM63XX select MIPS_L1_CACHE_SHIFT_4 select CLKDEV_LOOKUP help - Support for BCM63XX based boards + Support for BCM63XX based boards config MIPS_COBALT bool "Cobalt Server" @@ -374,10 +372,10 @@ config MACH_JAZZ select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_100HZ help - This a family of machines based on the MIPS R4030 chipset which was - used by several vendors to build RISC/os and Windows NT workstations. - Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and - Olivetti M700-10 workstations. + This a family of machines based on the MIPS R4030 chipset which was + used by several vendors to build RISC/os and Windows NT workstations. + Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and + Olivetti M700-10 workstations. config MACH_INGENIC bool "Ingenic SoC based machines" @@ -573,14 +571,14 @@ config NXP_STB220 bool "NXP STB220 board" select SOC_PNX833X help - Support for NXP Semiconductors STB220 Development Board. + Support for NXP Semiconductors STB220 Development Board. config NXP_STB225 bool "NXP 225 board" select SOC_PNX833X select SOC_PNX8335 help - Support for NXP Semiconductors STB225 Development Board. + Support for NXP Semiconductors STB225 Development Board. config PMC_MSP bool "PMC-Sierra MSP chipsets" @@ -722,9 +720,9 @@ config SGI_IP28 select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select MIPS_L1_CACHE_SHIFT_7 - help - This is the SGI Indigo2 with R10000 processor. To compile a Linux - kernel that runs on these, say Y here. + help + This is the SGI Indigo2 with R10000 processor. To compile a Linux + kernel that runs on these, say Y here. config SGI_IP32 bool "SGI IP32 (O2)" @@ -1168,9 +1166,9 @@ config HOLES_IN_ZONE config SYS_SUPPORTS_RELOCATABLE bool help - Selected if the platform supports relocating the kernel. - The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF - to allow access to command line and entropy sources. + Selected if the platform supports relocating the kernel. + The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF + to allow access to command line and entropy sources. config MIPS_CBPF_JIT def_bool y @@ -2113,8 +2111,8 @@ config MIPS_PGD_C0_CONTEXT # Set to y for ptrace access to watch registers. # config HARDWARE_WATCHPOINTS - bool - default y if CPU_MIPSR1 || CPU_MIPSR2 || CPU_MIPSR6 + bool + default y if CPU_MIPSR1 || CPU_MIPSR2 || CPU_MIPSR6 menu "Kernel type" @@ -2178,10 +2176,10 @@ config PAGE_SIZE_4KB bool "4kB" depends on !CPU_LOONGSON2 && !CPU_LOONGSON3 help - This option select the standard 4kB Linux page size. On some - R3000-family processors this is the only available page size. Using - 4kB page size will minimize memory consumption and is therefore - recommended for low memory systems. + This option select the standard 4kB Linux page size. On some + R3000-family processors this is the only available page size. Using + 4kB page size will minimize memory consumption and is therefore + recommended for low memory systems. config PAGE_SIZE_8KB bool "8kB" @@ -2474,7 +2472,6 @@ config SB1_PASS_2_1_WORKAROUNDS depends on CPU_SB1 && CPU_SB1_PASS_2 default y - choice prompt "SmartMIPS or microMIPS ASE support" @@ -2682,16 +2679,16 @@ config RANDOMIZE_BASE bool "Randomize the address of the kernel image" depends on RELOCATABLE ---help--- - Randomizes the physical and virtual address at which the - kernel image is loaded, as a security feature that - deters exploit attempts relying on knowledge of the location - of kernel internals. + Randomizes the physical and virtual address at which the + kernel image is loaded, as a security feature that + deters exploit attempts relying on knowledge of the location + of kernel internals. - Entropy is generated using any coprocessor 0 registers available. + Entropy is generated using any coprocessor 0 registers available. - The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET. + The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET. - If unsure, say N. + If unsure, say N. config RANDOMIZE_BASE_MAX_OFFSET hex "Maximum kASLR offset" if EXPERT @@ -2821,7 +2818,7 @@ choice prompt "Timer frequency" default HZ_250 help - Allows the configuration of the timer frequency. + Allows the configuration of the timer frequency. config HZ_24 bool "24 HZ" if SYS_SUPPORTS_24HZ || SYS_SUPPORTS_ARBIT_HZ @@ -3121,10 +3118,10 @@ config ARCH_MMAP_RND_BITS_MAX default 15 config ARCH_MMAP_RND_COMPAT_BITS_MIN - default 8 + default 8 config ARCH_MMAP_RND_COMPAT_BITS_MAX - default 15 + default 15 config I8253 bool diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index d129475fd40d..a95a894aceaf 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -160,7 +160,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.name = ALCHEMY_CPU_CLK; id.parent_names = &parent_name; id.num_parents = 1; - id.flags = CLK_IS_BASIC; + id.flags = 0; id.ops = &alchemy_clkops_cpu; h->init = &id; diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig index 29471038d817..6889f74e06f5 100644 --- a/arch/mips/bcm47xx/Kconfig +++ b/arch/mips/bcm47xx/Kconfig @@ -15,9 +15,9 @@ config BCM47XX_SSB select SSB_DRIVER_GPIO default y help - Add support for old Broadcom BCM47xx boards with Sonics Silicon Backplane support. + Add support for old Broadcom BCM47xx boards with Sonics Silicon Backplane support. - This will generate an image with support for SSB and MIPS32 R1 instruction set. + This will generate an image with support for SSB and MIPS32 R1 instruction set. config BCM47XX_BCMA bool "BCMA Support for Broadcom BCM47XX" @@ -31,8 +31,8 @@ config BCM47XX_BCMA select BCMA_DRIVER_GPIO default y help - Add support for new Broadcom BCM47xx boards with Broadcom specific Advanced Microcontroller Bus. + Add support for new Broadcom BCM47xx boards with Broadcom specific Advanced Microcontroller Bus. - This will generate an image with support for BCMA and MIPS32 R2 instruction set. + This will generate an image with support for BCMA and MIPS32 R2 instruction set. endif diff --git a/arch/mips/bcm63xx/boards/Kconfig b/arch/mips/bcm63xx/boards/Kconfig index f60d96610ace..492c3bd005d5 100644 --- a/arch/mips/bcm63xx/boards/Kconfig +++ b/arch/mips/bcm63xx/boards/Kconfig @@ -5,7 +5,7 @@ choice default BOARD_BCM963XX config BOARD_BCM963XX - bool "Generic Broadcom 963xx boards" + bool "Generic Broadcom 963xx boards" select SSB endchoice diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig index 9fbfb6e5c7d2..c83fdf649327 100644 --- a/arch/mips/configs/ar7_defconfig +++ b/arch/mips/configs/ar7_defconfig @@ -18,7 +18,6 @@ CONFIG_KEXEC=y # CONFIG_SECCOMP is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_BSD_DISKLABEL=y diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig index 249f5285e343..91ce75edbfb4 100644 --- a/arch/mips/configs/bcm47xx_defconfig +++ b/arch/mips/configs/bcm47xx_defconfig @@ -41,7 +41,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_BCM47XXSFLASH=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_BCM47XXNFLASH=y CONFIG_NETDEVICES=y CONFIG_B44=y diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index 412800d5d7e0..50bebce28500 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -51,7 +51,7 @@ CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=32 CONFIG_MTD=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_JZ4780=y CONFIG_MTD_UBI=y CONFIG_MTD_UBI_FASTMAP=y diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index 34633b7611cb..bc9b6ae046b2 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -95,8 +95,8 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_M25P80=y CONFIG_MTD_SST25L=y -CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ECC_BCH=y +CONFIG_MTD_RAW_NAND=y +CONFIG_MTD_NAND_ECC_SW_BCH=y CONFIG_MTD_NAND_AU1550=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_SPI_NOR=y diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 0c86ed86266a..30a6eafdb1d0 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -17,7 +17,6 @@ CONFIG_TC=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_LBDAF is not set CONFIG_PARTITION_ADVANCED=y CONFIG_OSF_PARTITION=y # CONFIG_EFI_PARTITION is not set diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index 0e54ab2680ce..e2b58dbf4aa9 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -16,7 +16,6 @@ CONFIG_TC=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_LBDAF is not set CONFIG_PARTITION_ADVANCED=y CONFIG_OSF_PARTITION=y # CONFIG_EFI_PARTITION is not set diff --git a/arch/mips/configs/generic/board-ni169445.config b/arch/mips/configs/generic/board-ni169445.config index f72223b366ca..1ed0d3e8715e 100644 --- a/arch/mips/configs/generic/board-ni169445.config +++ b/arch/mips/configs/generic/board-ni169445.config @@ -15,9 +15,9 @@ CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_NAND_ECC=y -CONFIG_MTD_NAND_ECC_BCH=y -CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_ECC_SW_HAMMING=y +CONFIG_MTD_NAND_ECC_SW_BCH=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_GPIO=y CONFIG_MTD_NAND_IDS=y diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index 184eb65a6ba7..1134fbb99fc2 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -10,7 +10,7 @@ CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_M25P80=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig index 5d80521e5d5a..714169e411cf 100644 --- a/arch/mips/configs/generic_defconfig +++ b/arch/mips/configs/generic_defconfig @@ -26,6 +26,7 @@ CONFIG_MIPS_CPS=y CONFIG_HIGHMEM=y CONFIG_NR_CPUS=16 CONFIG_MIPS_O32_FP64_SUPPORT=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_TRIM_UNUSED_KSYMS=y diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1b_defconfig index b064d68a5424..3d390a7494d6 100644 --- a/arch/mips/configs/loongson1b_defconfig +++ b/arch/mips/configs/loongson1b_defconfig @@ -19,7 +19,6 @@ CONFIG_MACH_LOONGSON32=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_NET=y @@ -42,7 +41,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=m diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig index 5d76559b56cd..247d56e94c0a 100644 --- a/arch/mips/configs/loongson1c_defconfig +++ b/arch/mips/configs/loongson1c_defconfig @@ -20,7 +20,6 @@ CONFIG_LOONGSON1_LS1C=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_NET=y @@ -43,7 +42,7 @@ CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_SCSI=m diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig index 7671fe6a8042..1a0677d04982 100644 --- a/arch/mips/configs/qi_lb60_defconfig +++ b/arch/mips/configs/qi_lb60_defconfig @@ -44,7 +44,7 @@ CONFIG_TCP_CONG_WESTWOOD=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_JZ4740=y CONFIG_MTD_UBI=y CONFIG_NETDEVICES=y diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig index 7befe05fd813..50632a3103dd 100644 --- a/arch/mips/configs/rb532_defconfig +++ b/arch/mips/configs/rb532_defconfig @@ -19,7 +19,6 @@ CONFIG_PCI=y # CONFIG_PCI_QUIRKS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y @@ -110,7 +109,7 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_BLOCK2MTD=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_ATA=y # CONFIG_ATA_VERBOSE_ERROR is not set diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig index 50a2c9ad583f..5e389db35fa7 100644 --- a/arch/mips/configs/rbtx49xx_defconfig +++ b/arch/mips/configs/rbtx49xx_defconfig @@ -17,7 +17,6 @@ CONFIG_TOSHIBA_RBTX4938_MPLEX_KEEP=y CONFIG_PCI=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_NET=y CONFIG_PACKET=y @@ -40,7 +39,7 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_RBTX4939=y -CONFIG_MTD_NAND=m +CONFIG_MTD_RAW_NAND=m CONFIG_MTD_NAND_TXX9NDFMC=m CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig index 2bb02ea9fb4e..203db83c3ee9 100644 --- a/arch/mips/configs/xway_defconfig +++ b/arch/mips/configs/xway_defconfig @@ -81,7 +81,7 @@ CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_LANTIQ=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_XWAY=y CONFIG_EEPROM_93CX6=m CONFIG_SCSI=y diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 830c93a010c3..9a466dde9b96 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -482,7 +482,7 @@ static inline void __clear_bit_unlock(unsigned long nr, volatile unsigned long * * Return the bit position (0..63) of the most significant 1 bit in a word * Returns -1 if no 1 bit exists */ -static inline unsigned long __fls(unsigned long word) +static __always_inline unsigned long __fls(unsigned long word) { int num; @@ -548,7 +548,7 @@ static inline unsigned long __fls(unsigned long word) * Returns 0..SZLONG-1 * Undefined if no bit exists, so code should check against 0 first. */ -static inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned long __ffs(unsigned long word) { return __fls(word & -word); } diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index a301a8f4bc66..235bc2f52113 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -92,6 +92,7 @@ extern unsigned long mips_machtype; #define BOOT_MEM_ROM_DATA 2 #define BOOT_MEM_RESERVED 3 #define BOOT_MEM_INIT_RAM 4 +#define BOOT_MEM_NOMAP 5 /* * A memory map that's built upon what was determined diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e4456e450f94..3185fd3220ec 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -11,6 +11,7 @@ #ifndef __ASSEMBLY__ #include <linux/types.h> +#include <asm/isa-rev.h> #define JUMP_LABEL_NOP_SIZE 4 @@ -21,15 +22,20 @@ #endif #ifdef CONFIG_CPU_MICROMIPS -#define B_INSN "b32" +# define B_INSN "b32" +# define J_INSN "j32" +#elif MIPS_ISA_REV >= 6 +# define B_INSN "bc" +# define J_INSN "bc" #else -#define B_INSN "b" +# define B_INSN "b" +# define J_INSN "j" #endif static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\t" B_INSN " 2f\n\t" - "2:\tnop\n\t" + "2:\t.insn\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" @@ -42,8 +48,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { - asm_volatile_goto("1:\tj %l[l_yes]\n\t" - "nop\n\t" + asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index a2b4748655df..acf80ae0a430 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -141,14 +141,14 @@ extern const unsigned long sys_call_table[]; extern const unsigned long sys32_call_table[]; extern const unsigned long sysn32_call_table[]; -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_MIPS; #ifdef CONFIG_64BIT - if (!test_thread_flag(TIF_32BIT_REGS)) { + if (!test_tsk_thread_flag(task, TIF_32BIT_REGS)) { arch |= __AUDIT_ARCH_64BIT; /* N32 sets only TIF_32BIT_ADDR */ - if (test_thread_flag(TIF_32BIT_ADDR)) + if (test_tsk_thread_flag(task, TIF_32BIT_ADDR)) arch |= __AUDIT_ARCH_CONVENTION_MIPS64_N32; } #endif diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index b1990dd75f27..f7effca791a5 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -86,14 +86,18 @@ Ip_u2u1(_ctcmsa); Ip_u2u1s3(_daddiu); Ip_u3u1u2(_daddu); Ip_u1u2(_ddivu); +Ip_u3u1u2(_ddivu_r6); Ip_u1(_di); Ip_u2u1msbu3(_dins); Ip_u2u1msbu3(_dinsm); Ip_u2u1msbu3(_dinsu); Ip_u1u2(_divu); +Ip_u3u1u2(_divu_r6); Ip_u1u2u3(_dmfc0); +Ip_u3u1u2(_dmodu); Ip_u1u2u3(_dmtc0); Ip_u1u2(_dmultu); +Ip_u3u1u2(_dmulu); Ip_u2u1u3(_drotr); Ip_u2u1u3(_drotr32); Ip_u2u1(_dsbh); @@ -131,6 +135,7 @@ Ip_u1u2u3(_mfc0); Ip_u1u2u3(_mfhc0); Ip_u1(_mfhi); Ip_u1(_mflo); +Ip_u3u1u2(_modu); Ip_u3u1u2(_movn); Ip_u3u1u2(_movz); Ip_u1u2u3(_mtc0); @@ -139,6 +144,7 @@ Ip_u1(_mthi); Ip_u1(_mtlo); Ip_u3u1u2(_mul); Ip_u1u2(_multu); +Ip_u3u1u2(_mulu); Ip_u3u1u2(_nor); Ip_u3u1u2(_or); Ip_u2u1u3(_ori); @@ -149,6 +155,8 @@ Ip_u2s3u1(_sb); Ip_u2s3u1(_sc); Ip_u2s3u1(_scd); Ip_u2s3u1(_sd); +Ip_u3u1u2(_seleqz); +Ip_u3u1u2(_selnez); Ip_u2s3u1(_sh); Ip_u2u1u3(_sll); Ip_u3u2u1(_sllv); diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 40fbb5dd66df..eaa3a80affdf 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -55,9 +55,9 @@ enum spec_op { spec3_unused_op, spec4_unused_op, slt_op, sltu_op, dadd_op, daddu_op, dsub_op, dsubu_op, tge_op, tgeu_op, tlt_op, tltu_op, - teq_op, spec5_unused_op, tne_op, spec6_unused_op, - dsll_op, spec7_unused_op, dsrl_op, dsra_op, - dsll32_op, spec8_unused_op, dsrl32_op, dsra32_op + teq_op, seleqz_op, tne_op, selnez_op, + dsll_op, spec5_unused_op, dsrl_op, dsra_op, + dsll32_op, spec6_unused_op, dsrl32_op, dsra32_op }; /* diff --git a/arch/mips/include/uapi/asm/sockios.h b/arch/mips/include/uapi/asm/sockios.h index 5b40a88593fa..66f60234f290 100644 --- a/arch/mips/include/uapi/asm/sockios.h +++ b/arch/mips/include/uapi/asm/sockios.h @@ -21,7 +21,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */ #endif /* _ASM_SOCKIOS_H */ diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c index bada74af7641..c04b97aace4a 100644 --- a/arch/mips/kernel/cpu-bugs64.c +++ b/arch/mips/kernel/cpu-bugs64.c @@ -42,8 +42,8 @@ static inline void align_mod(const int align, const int mod) : "n"(align), "n"(mod)); } -static inline void mult_sh_align_mod(long *v1, long *v2, long *w, - const int align, const int mod) +static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, + const int align, const int mod) { unsigned long flags; int m1, m2; diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index d7de8adcfcc8..5469d43b6966 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -58,15 +58,14 @@ resume_kernel: local_irq_disable lw t0, TI_PRE_COUNT($28) bnez t0, restore_all -need_resched: LONG_L t0, TI_FLAGS($28) andi t1, t0, _TIF_NEED_RESCHED beqz t1, restore_all LONG_L t0, PT_STATUS(sp) # Interrupts off? andi t0, 1 beqz t0, restore_all - jal preempt_schedule_irq - b need_resched + PTR_LA ra, restore_all + j preempt_schedule_irq #endif FEXPORT(ret_from_kernel_thread) diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index ab943927f97a..662c8db9f45b 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -40,18 +40,38 @@ void arch_jump_label_transform(struct jump_entry *e, { union mips_instruction *insn_p; union mips_instruction insn; + long offset; insn_p = (union mips_instruction *)msk_isa16_mode(e->code); - /* Jump only works within an aligned region its delay slot is in. */ - BUG_ON((e->target & ~J_RANGE_MASK) != ((e->code + 4) & ~J_RANGE_MASK)); - /* Target must have the right alignment and ISA must be preserved. */ BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); if (type == JUMP_LABEL_JMP) { - insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; - insn.j_format.target = e->target >> J_RANGE_SHIFT; + if (!IS_ENABLED(CONFIG_CPU_MICROMIPS) && MIPS_ISA_REV >= 6) { + offset = e->target - ((unsigned long)insn_p + 4); + offset >>= 2; + + /* + * The branch offset must fit in the instruction's 26 + * bit field. + */ + WARN_ON((offset >= BIT(25)) || + (offset < -(long)BIT(25))); + + insn.j_format.opcode = bc6_op; + insn.j_format.target = offset; + } else { + /* + * Jump only works within an aligned region its delay + * slot is in. + */ + WARN_ON((e->target & ~J_RANGE_MASK) != + ((e->code + 4) & ~J_RANGE_MASK)); + + insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; + insn.j_format.target = e->target >> J_RANGE_SHIFT; + } } else { insn.word = 0; /* nop */ } diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c index 93b8e0b4332f..28bf01961bb2 100644 --- a/arch/mips/kernel/prom.c +++ b/arch/mips/kernel/prom.c @@ -41,13 +41,27 @@ char *mips_get_machine_name(void) #ifdef CONFIG_USE_OF void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - return add_memory_region(base, size, BOOT_MEM_RAM); + if (base >= PHYS_ADDR_MAX) { + pr_warn("Trying to add an invalid memory region, skipped\n"); + return; + } + + /* Truncate the passed memory region instead of type casting */ + if (base + size - 1 >= PHYS_ADDR_MAX || base + size < base) { + pr_warn("Truncate memory region %llx @ %llx to size %llx\n", + size, base, PHYS_ADDR_MAX - base); + size = PHYS_ADDR_MAX - base; + } + + add_memory_region(base, size, BOOT_MEM_RAM); } int __init early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, bool nomap) { - add_memory_region(base, size, BOOT_MEM_RESERVED); + add_memory_region(base, size, + nomap ? BOOT_MEM_NOMAP : BOOT_MEM_RESERVED); + return 0; } diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 3a62f80958e1..414b6e9c900b 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1418,7 +1418,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) unsigned long args[6]; sd.nr = syscall; - sd.arch = syscall_get_arch(); + sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) sd.args[i] = args[i]; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 8d1dc6c71173..ab349d2381c3 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -27,6 +27,7 @@ #include <linux/dma-contiguous.h> #include <linux/decompress/generic.h> #include <linux/of_fdt.h> +#include <linux/of_reserved_mem.h> #include <asm/addrspace.h> #include <asm/bootinfo.h> @@ -178,6 +179,7 @@ static bool __init __maybe_unused memory_region_available(phys_addr_t start, in_ram = true; break; case BOOT_MEM_RESERVED: + case BOOT_MEM_NOMAP: if ((start >= start_ && start < end_) || (start < start_ && start + size >= start_)) free = false; @@ -213,6 +215,9 @@ static void __init print_memory_map(void) case BOOT_MEM_RESERVED: printk(KERN_CONT "(reserved)\n"); break; + case BOOT_MEM_NOMAP: + printk(KERN_CONT "(nomap)\n"); + break; default: printk(KERN_CONT "type %lu\n", boot_mem_map.map[i].type); break; @@ -371,7 +376,6 @@ static void __init bootmem_init(void) static void __init bootmem_init(void) { - unsigned long reserved_end; phys_addr_t ramstart = PHYS_ADDR_MAX; int i; @@ -382,10 +386,10 @@ static void __init bootmem_init(void) * will reserve the area used for the initrd. */ init_initrd(); - reserved_end = (unsigned long) PFN_UP(__pa_symbol(&_end)); - memblock_reserve(PHYS_OFFSET, - (reserved_end << PAGE_SHIFT) - PHYS_OFFSET); + /* Reserve memory occupied by kernel. */ + memblock_reserve(__pa_symbol(&_text), + __pa_symbol(&_end) - __pa_symbol(&_text)); /* * max_low_pfn is not a number of pages. The number of pages @@ -394,10 +398,7 @@ static void __init bootmem_init(void) min_low_pfn = ~0UL; max_low_pfn = 0; - /* - * Find the highest page frame number we have available - * and the lowest used RAM address - */ + /* Find the highest and lowest page frame numbers we have available. */ for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long start, end; @@ -427,13 +428,6 @@ static void __init bootmem_init(void) max_low_pfn = end; if (start < min_low_pfn) min_low_pfn = start; - if (end <= reserved_end) - continue; -#ifdef CONFIG_BLK_DEV_INITRD - /* Skip zones before initrd and initrd itself */ - if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) - continue; -#endif } if (min_low_pfn >= max_low_pfn) @@ -474,6 +468,7 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } + /* Install all valid RAM ranges to the memblock memory region */ for (i = 0; i < boot_mem_map.nr_map; i++) { unsigned long start, end; @@ -481,98 +476,38 @@ static void __init bootmem_init(void) end = PFN_DOWN(boot_mem_map.map[i].addr + boot_mem_map.map[i].size); - if (start <= min_low_pfn) + if (start < min_low_pfn) start = min_low_pfn; - if (start >= end) - continue; - #ifndef CONFIG_HIGHMEM + /* Ignore highmem regions if highmem is unsupported */ if (end > max_low_pfn) end = max_low_pfn; - - /* - * ... finally, is the area going away? - */ +#endif if (end <= start) continue; -#endif memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0); - } - - /* - * Register fully available low RAM pages with the bootmem allocator. - */ - for (i = 0; i < boot_mem_map.nr_map; i++) { - unsigned long start, end, size; - start = PFN_UP(boot_mem_map.map[i].addr); - end = PFN_DOWN(boot_mem_map.map[i].addr - + boot_mem_map.map[i].size); - - /* - * Reserve usable memory. - */ + /* Reserve any memory except the ordinary RAM ranges. */ switch (boot_mem_map.map[i].type) { case BOOT_MEM_RAM: break; - case BOOT_MEM_INIT_RAM: - memory_present(0, start, end); - continue; - default: - /* Not usable memory */ - if (start > min_low_pfn && end < max_low_pfn) - memblock_reserve(boot_mem_map.map[i].addr, - boot_mem_map.map[i].size); - + case BOOT_MEM_NOMAP: /* Discard the range from the system. */ + memblock_remove(PFN_PHYS(start), PFN_PHYS(end - start)); continue; + default: /* Reserve the rest of the memory types at boot time */ + memblock_reserve(PFN_PHYS(start), PFN_PHYS(end - start)); + break; } /* - * We are rounding up the start address of usable memory - * and at the end of the usable range downwards. + * In any case the added to the memblock memory regions + * (highmem/lowmem, available/reserved, etc) are considered + * as present, so inform sparsemem about them. */ - if (start >= max_low_pfn) - continue; - if (start < reserved_end) - start = reserved_end; - if (end > max_low_pfn) - end = max_low_pfn; - - /* - * ... finally, is the area going away? - */ - if (end <= start) - continue; - size = end - start; - - /* Register lowmem ranges */ memory_present(0, start, end); } -#ifdef CONFIG_RELOCATABLE - /* - * The kernel reserves all memory below its _end symbol as bootmem, - * but the kernel may now be at a much higher address. The memory - * between the original and new locations may be returned to the system. - */ - if (__pa_symbol(_text) > __pa_symbol(VMLINUX_LOAD_ADDRESS)) { - unsigned long offset; - extern void show_kernel_relocation(const char *level); - - offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS); - memblock_free(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset); - -#if defined(CONFIG_DEBUG_KERNEL) && defined(CONFIG_DEBUG_INFO) - /* - * This information is necessary when debugging the kernel - * But is a security vulnerability otherwise! - */ - show_kernel_relocation(KERN_INFO); -#endif - } -#endif - /* * Reserve initrd memory if needed. */ @@ -781,7 +716,6 @@ static void __init request_crashkernel(struct resource *res) */ static void __init arch_mem_init(char **cmdline_p) { - struct memblock_region *reg; extern void plat_mem_setup(void); /* @@ -809,6 +743,9 @@ static void __init arch_mem_init(char **cmdline_p) arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT, PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT, BOOT_MEM_INIT_RAM); + arch_mem_addpart(PFN_DOWN(__pa_symbol(&__bss_start)) << PAGE_SHIFT, + PFN_UP(__pa_symbol(&__bss_stop)) << PAGE_SHIFT, + BOOT_MEM_RAM); pr_info("Determined physical RAM map:\n"); print_memory_map(); @@ -884,13 +821,16 @@ static void __init arch_mem_init(char **cmdline_p) plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); - /* Tell bootmem about cma reserved memblock section */ - for_each_memblock(reserved, reg) - if (reg->size != 0) - memblock_reserve(reg->base, reg->size); - reserve_bootmem_region(__pa_symbol(&__nosave_begin), - __pa_symbol(&__nosave_end)); /* Reserve for hibernation */ + /* Reserve for hibernation. */ + memblock_reserve(__pa_symbol(&__nosave_begin), + __pa_symbol(&__nosave_end) - __pa_symbol(&__nosave_begin)); + + fdt_init_reserved_mem(); + + memblock_dump_all(); + + early_memtest(PFN_PHYS(min_low_pfn), PFN_PHYS(max_low_pfn)); } static void __init resource_init(void) @@ -935,6 +875,7 @@ static void __init resource_init(void) res->flags |= IORESOURCE_SYSRAM; break; case BOOT_MEM_RESERVED: + case BOOT_MEM_NOMAP: default: res->name = "reserved"; } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 98ca55d62201..c52766a5b85f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2151,7 +2151,7 @@ static void configure_hwrena(void) static void configure_exception_vector(void) { - if (cpu_has_veic || cpu_has_vint) { + if (cpu_has_mips_r2_r6) { unsigned long sr = set_c0_status(ST0_BEV); /* If available, use WG to set top bits of EBASE */ if (cpu_has_ebase_wg) { @@ -2163,6 +2163,8 @@ static void configure_exception_vector(void) } write_c0_ebase(ebase); write_c0_status(sr); + } + if (cpu_has_veic || cpu_has_vint) { /* Setting vector spacing enables EI/VI mode */ change_c0_intctl(0x3e0, VECTORSPACING); } @@ -2193,22 +2195,6 @@ void per_cpu_trap_init(bool is_boot_cpu) * o read IntCtl.IPFDC to determine the fast debug channel interrupt */ if (cpu_has_mips_r2_r6) { - /* - * We shouldn't trust a secondary core has a sane EBASE register - * so use the one calculated by the boot CPU. - */ - if (!is_boot_cpu) { - /* If available, use WG to set top bits of EBASE */ - if (cpu_has_ebase_wg) { -#ifdef CONFIG_64BIT - write_c0_ebase_64(ebase | MIPS_EBASE_WG); -#else - write_c0_ebase(ebase | MIPS_EBASE_WG); -#endif - } - write_c0_ebase(ebase); - } - cp0_compare_irq_shift = CAUSEB_TI - CAUSEB_IP; cp0_compare_irq = (read_c0_intctl() >> INTCTLB_IPTI) & 7; cp0_perfcount_irq = (read_c0_intctl() >> INTCTLB_IPPCI) & 7; @@ -2284,19 +2270,27 @@ void __init trap_init(void) extern char except_vec3_generic; extern char except_vec4; extern char except_vec3_r4000; - unsigned long i; + unsigned long i, vec_size; + phys_addr_t ebase_pa; check_wait(); - if (cpu_has_veic || cpu_has_vint) { - unsigned long size = 0x200 + VECTORSPACING*64; - phys_addr_t ebase_pa; + if (!cpu_has_mips_r2_r6) { + ebase = CAC_BASE; + ebase_pa = virt_to_phys((void *)ebase); + vec_size = 0x400; - ebase = (unsigned long) - memblock_alloc(size, 1 << fls(size)); - if (!ebase) + memblock_reserve(ebase_pa, vec_size); + } else { + if (cpu_has_veic || cpu_has_vint) + vec_size = 0x200 + VECTORSPACING*64; + else + vec_size = PAGE_SIZE; + + ebase_pa = memblock_phys_alloc(vec_size, 1 << fls(vec_size)); + if (!ebase_pa) panic("%s: Failed to allocate %lu bytes align=0x%x\n", - __func__, size, 1 << fls(size)); + __func__, vec_size, 1 << fls(vec_size)); /* * Try to ensure ebase resides in KSeg0 if possible. @@ -2309,23 +2303,10 @@ void __init trap_init(void) * EVA is special though as it allows segments to be rearranged * and to become uncached during cache error handling. */ - ebase_pa = __pa(ebase); if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000)) ebase = CKSEG0ADDR(ebase_pa); - } else { - ebase = CAC_BASE; - - if (cpu_has_mips_r2_r6) { - if (cpu_has_ebase_wg) { -#ifdef CONFIG_64BIT - ebase = (read_c0_ebase_64() & ~0xfff); -#else - ebase = (read_c0_ebase() & ~0xfff); -#endif - } else { - ebase += (read_c0_ebase() & 0x3ffff000); - } - } + else + ebase = (unsigned long)phys_to_virt(ebase_pa); } if (cpu_has_mmips) { @@ -2459,7 +2440,7 @@ void __init trap_init(void) else set_handler(0x080, &except_vec3_generic, 0x80); - local_flush_icache_range(ebase, ebase + 0x400); + local_flush_icache_range(ebase, ebase + vec_size); sort_extable(__start___dbe_table, __stop___dbe_table); diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 4528bc9c3cb1..eac25aef21e0 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -21,7 +21,6 @@ config KVM depends on MIPS_FP_SUPPORT select EXPORT_UASM select PREEMPT_NOTIFIERS - select ANON_INODES select KVM_GENERIC_DIRTYLOG_READ_PROTECT select HAVE_KVM_VCPU_ASYNC_IOCTL select KVM_MMIO diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 0074427b04fb..e5de6bac8197 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1141,9 +1141,7 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu) unsigned long pc = vcpu->arch.pc; int index; - get_random_bytes(&index, sizeof(index)); - index &= (KVM_MIPS_GUEST_TLB_SIZE - 1); - + index = prandom_u32_max(KVM_MIPS_GUEST_TLB_SIZE); tlb = &vcpu->arch.guest_tlb[index]; kvm_mips_invalidate_guest_tlb(vcpu, tlb); diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 0d14e0d8eacf..4c2b4483683c 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -235,7 +235,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -247,8 +247,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -273,7 +273,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); @@ -289,7 +290,7 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - pages, write ? FOLL_WRITE : 0); + pages, gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index bbb196ad5f26..8a038b30d3c4 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -504,14 +504,6 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, - "initrd"); -} -#endif - void (*free_init_pages_eva)(void *begin, void *end) = NULL; void __ref free_initmem(void) diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 6abe40fc413d..7154a1d99aad 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -76,14 +76,22 @@ static const struct insn insn_table[insn_invalid] = { [insn_daddiu] = {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, [insn_daddu] = {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD}, [insn_ddivu] = {M(spec_op, 0, 0, 0, 0, ddivu_op), RS | RT}, + [insn_ddivu_r6] = {M(spec_op, 0, 0, 0, ddivu_ddivu6_op, ddivu_op), + RS | RT | RD}, [insn_di] = {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT}, [insn_dins] = {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE}, [insn_dinsm] = {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE}, [insn_dinsu] = {M(spec3_op, 0, 0, 0, 0, dinsu_op), RS | RT | RD | RE}, [insn_divu] = {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT}, + [insn_divu_r6] = {M(spec_op, 0, 0, 0, divu_divu6_op, divu_op), + RS | RT | RD}, [insn_dmfc0] = {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_dmodu] = {M(spec_op, 0, 0, 0, ddivu_dmodu_op, ddivu_op), + RS | RT | RD}, [insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, [insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT}, + [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op), + RS | RT | RD}, [insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE}, [insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE}, [insn_dsbh] = {M(spec3_op, 0, 0, 0, dsbh_op, dbshfl_op), RT | RD}, @@ -132,12 +140,16 @@ static const struct insn insn_table[insn_invalid] = { [insn_mfhc0] = {M(cop0_op, mfhc0_op, 0, 0, 0, 0), RT | RD | SET}, [insn_mfhi] = {M(spec_op, 0, 0, 0, 0, mfhi_op), RD}, [insn_mflo] = {M(spec_op, 0, 0, 0, 0, mflo_op), RD}, + [insn_modu] = {M(spec_op, 0, 0, 0, divu_modu_op, divu_op), + RS | RT | RD}, [insn_movn] = {M(spec_op, 0, 0, 0, 0, movn_op), RS | RT | RD}, [insn_movz] = {M(spec_op, 0, 0, 0, 0, movz_op), RS | RT | RD}, [insn_mtc0] = {M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, [insn_mthc0] = {M(cop0_op, mthc0_op, 0, 0, 0, 0), RT | RD | SET}, [insn_mthi] = {M(spec_op, 0, 0, 0, 0, mthi_op), RS}, [insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS}, + [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op), + RS | RT | RD}, #ifndef CONFIG_CPU_MIPSR6 [insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, #else @@ -163,6 +175,8 @@ static const struct insn insn_table[insn_invalid] = { [insn_scd] = {M6(spec3_op, 0, 0, 0, scd6_op), RS | RT | SIMM9}, #endif [insn_sd] = {M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_seleqz] = {M(spec_op, 0, 0, 0, 0, seleqz_op), RS | RT | RD}, + [insn_selnez] = {M(spec_op, 0, 0, 0, 0, selnez_op), RS | RT | RD}, [insn_sh] = {M(sh_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, [insn_sll] = {M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE}, [insn_sllv] = {M(spec_op, 0, 0, 0, 0, sllv_op), RS | RT | RD}, diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 45b6264ff308..c56f129c9a4b 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -50,21 +50,22 @@ enum opcode { insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bgtz, insn_blez, insn_bltz, insn_bltzl, insn_bne, insn_break, insn_cache, insn_cfc1, insn_cfcmsa, insn_ctc1, insn_ctcmsa, insn_daddiu, insn_daddu, insn_ddivu, - insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu, insn_dmfc0, - insn_dmtc0, insn_dmultu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd, - insn_dsll, insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav, - insn_dsrl, insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext, - insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu, - insn_ld, insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, - insn_ll, insn_lld, insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, - insn_mfhc0, insn_mfhi, insn_mflo, insn_movn, insn_movz, insn_mtc0, - insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_nor, - insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, - insn_sc, insn_scd, insn_sd, insn_sh, insn_sll, insn_sllv, - insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, insn_srav, - insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, - insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, - insn_xor, insn_xori, insn_yield, + insn_ddivu_r6, insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu, + insn_divu_r6, insn_dmfc0, insn_dmodu, insn_dmtc0, insn_dmultu, + insn_dmulu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd, insn_dsll, + insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav, insn_dsrl, + insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext, insn_ins, + insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu, insn_ld, + insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld, + insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, + insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0, + insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor, + insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc, + insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll, + insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, + insn_srav, insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, + insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, + insn_wsbh, insn_xor, insn_xori, insn_yield, insn_invalid /* insn_invalid must be last */ }; @@ -287,13 +288,17 @@ I_u2u1(_cfcmsa) I_u1u2(_ctc1) I_u2u1(_ctcmsa) I_u1u2(_ddivu) +I_u3u1u2(_ddivu_r6) I_u1u2u3(_dmfc0) +I_u3u1u2(_dmodu) I_u1u2u3(_dmtc0) I_u1u2(_dmultu) +I_u3u1u2(_dmulu) I_u2u1s3(_daddiu) I_u3u1u2(_daddu) I_u1(_di); I_u1u2(_divu) +I_u3u1u2(_divu_r6) I_u2u1(_dsbh); I_u2u1(_dshd); I_u2u1u3(_dsll) @@ -327,6 +332,7 @@ I_u2s3u1(_lw) I_u2s3u1(_lwu) I_u1u2u3(_mfc0) I_u1u2u3(_mfhc0) +I_u3u1u2(_modu) I_u3u1u2(_movn) I_u3u1u2(_movz) I_u1(_mfhi) @@ -337,6 +343,7 @@ I_u1(_mthi) I_u1(_mtlo) I_u3u1u2(_mul) I_u1u2(_multu) +I_u3u1u2(_mulu) I_u3u1u2(_nor) I_u3u1u2(_or) I_u2u1u3(_ori) @@ -345,6 +352,8 @@ I_u2s3u1(_sb) I_u2s3u1(_sc) I_u2s3u1(_scd) I_u2s3u1(_sd) +I_u3u1u2(_seleqz) +I_u3u1u2(_selnez) I_u2s3u1(_sh) I_u2u1u3(_sll) I_u3u2u1(_sllv) diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile index 47d678416715..72a78462f872 100644 --- a/arch/mips/net/Makefile +++ b/arch/mips/net/Makefile @@ -1,4 +1,3 @@ # MIPS networking code -obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c deleted file mode 100644 index 3a0e34f4e615..000000000000 --- a/arch/mips/net/bpf_jit.c +++ /dev/null @@ -1,1270 +0,0 @@ -/* - * Just-In-Time compiler for BPF filters on MIPS - * - * Copyright (c) 2014 Imagination Technologies Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include <linux/bitops.h> -#include <linux/compiler.h> -#include <linux/errno.h> -#include <linux/filter.h> -#include <linux/if_vlan.h> -#include <linux/moduleloader.h> -#include <linux/netdevice.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/types.h> -#include <asm/asm.h> -#include <asm/bitops.h> -#include <asm/cacheflush.h> -#include <asm/cpu-features.h> -#include <asm/uasm.h> - -#include "bpf_jit.h" - -/* ABI - * r_skb_hl SKB header length - * r_data SKB data pointer - * r_off Offset - * r_A BPF register A - * r_X BPF register X - * r_skb *skb - * r_M *scratch memory - * r_skb_len SKB length - * - * On entry (*bpf_func)(*skb, *filter) - * a0 = MIPS_R_A0 = skb; - * a1 = MIPS_R_A1 = filter; - * - * Stack - * ... - * M[15] - * M[14] - * M[13] - * ... - * M[0] <-- r_M - * saved reg k-1 - * saved reg k-2 - * ... - * saved reg 0 <-- r_sp - * <no argument area> - * - * Packet layout - * - * <--------------------- len ------------------------> - * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------> - * ---------------------------------------------------- - * | skb->data | - * ---------------------------------------------------- - */ - -#define ptr typeof(unsigned long) - -#define SCRATCH_OFF(k) (4 * (k)) - -/* JIT flags */ -#define SEEN_CALL (1 << BPF_MEMWORDS) -#define SEEN_SREG_SFT (BPF_MEMWORDS + 1) -#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT) -#define SEEN_SREG(x) (SEEN_SREG_BASE << (x)) -#define SEEN_OFF SEEN_SREG(2) -#define SEEN_A SEEN_SREG(3) -#define SEEN_X SEEN_SREG(4) -#define SEEN_SKB SEEN_SREG(5) -#define SEEN_MEM SEEN_SREG(6) -/* SEEN_SK_DATA also implies skb_hl an skb_len */ -#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0)) - -/* Arguments used by JIT */ -#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ - -#define SBIT(x) (1 << (x)) /* Signed version of BIT() */ - -/** - * struct jit_ctx - JIT context - * @skf: The sk_filter - * @prologue_bytes: Number of bytes for prologue - * @idx: Instruction index - * @flags: JIT flags - * @offsets: Instruction offsets - * @target: Memory location for the compiled filter - */ -struct jit_ctx { - const struct bpf_prog *skf; - unsigned int prologue_bytes; - u32 idx; - u32 flags; - u32 *offsets; - u32 *target; -}; - - -static inline int optimize_div(u32 *k) -{ - /* power of 2 divides can be implemented with right shift */ - if (!(*k & (*k-1))) { - *k = ilog2(*k); - return 1; - } - - return 0; -} - -static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); - -/* Simply emit the instruction if the JIT memory space has been allocated */ -#define emit_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - uasm_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -/* - * Similar to emit_instr but it must be used when we need to emit - * 32-bit or 64-bit instructions - */ -#define emit_long_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - UASM_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ -} while (0) - -/* Determine if immediate is within the 16-bit signed range */ -static inline bool is_range16(s32 imm) -{ - return !(imm >= SBIT(15) || imm < -SBIT(15)); -} - -static inline void emit_addu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, addu, dst, src1, src2); -} - -static inline void emit_nop(struct jit_ctx *ctx) -{ - emit_instr(ctx, nop); -} - -/* Load a u32 immediate to a register */ -static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - /* addiu can only handle s16 */ - if (!is_range16(imm)) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); - p = &ctx->target[ctx->idx + 1]; - uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff); - } else { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_addiu(&p, dst, r_zero, imm); - } - } - ctx->idx++; - - if (!is_range16(imm)) - ctx->idx++; -} - -static inline void emit_or(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, or, dst, src1, src2); -} - -static inline void emit_ori(unsigned int dst, unsigned src, u32 imm, - struct jit_ctx *ctx) -{ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_or(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, ori, dst, src, imm); - } -} - -static inline void emit_daddiu(unsigned int dst, unsigned int src, - int imm, struct jit_ctx *ctx) -{ - /* - * Only used for stack, so the imm is relatively small - * and it fits in 15-bits - */ - emit_instr(ctx, daddiu, dst, src, imm); -} - -static inline void emit_addiu(unsigned int dst, unsigned int src, - u32 imm, struct jit_ctx *ctx) -{ - if (!is_range16(imm)) { - emit_load_imm(r_tmp, imm, ctx); - emit_addu(dst, r_tmp, src, ctx); - } else { - emit_instr(ctx, addiu, dst, src, imm); - } -} - -static inline void emit_and(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, and, dst, src1, src2); -} - -static inline void emit_andi(unsigned int dst, unsigned int src, - u32 imm, struct jit_ctx *ctx) -{ - /* If imm does not fit in u16 then load it to register */ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_and(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, andi, dst, src, imm); - } -} - -static inline void emit_xor(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, xor, dst, src1, src2); -} - -static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx) -{ - /* If imm does not fit in u16 then load it to register */ - if (imm >= BIT(16)) { - emit_load_imm(r_tmp, imm, ctx); - emit_xor(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, xori, dst, src, imm); - } -} - -static inline void emit_stack_offset(int offset, struct jit_ctx *ctx) -{ - emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset); -} - -static inline void emit_subu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, subu, dst, src1, src2); -} - -static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx) -{ - emit_subu(reg, r_zero, reg, ctx); -} - -static inline void emit_sllv(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, sllv, dst, src, sa); -} - -static inline void emit_sll(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - /* sa is 5-bits long */ - if (sa >= BIT(5)) - /* Shifting >= 32 results in zero */ - emit_jit_reg_move(dst, r_zero, ctx); - else - emit_instr(ctx, sll, dst, src, sa); -} - -static inline void emit_srlv(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, srlv, dst, src, sa); -} - -static inline void emit_srl(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - /* sa is 5-bits long */ - if (sa >= BIT(5)) - /* Shifting >= 32 results in zero */ - emit_jit_reg_move(dst, r_zero, ctx); - else - emit_instr(ctx, srl, dst, src, sa); -} - -static inline void emit_slt(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, slt, dst, src1, src2); -} - -static inline void emit_sltu(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, sltu, dst, src1, src2); -} - -static inline void emit_sltiu(unsigned dst, unsigned int src, - unsigned int imm, struct jit_ctx *ctx) -{ - /* 16 bit immediate */ - if (!is_range16((s32)imm)) { - emit_load_imm(r_tmp, imm, ctx); - emit_sltu(dst, src, r_tmp, ctx); - } else { - emit_instr(ctx, sltiu, dst, src, imm); - } - -} - -/* Store register on the stack */ -static inline void emit_store_stack_reg(ptr reg, ptr base, - unsigned int offset, - struct jit_ctx *ctx) -{ - emit_long_instr(ctx, SW, reg, offset, base); -} - -static inline void emit_store(ptr reg, ptr base, unsigned int offset, - struct jit_ctx *ctx) -{ - emit_instr(ctx, sw, reg, offset, base); -} - -static inline void emit_load_stack_reg(ptr reg, ptr base, - unsigned int offset, - struct jit_ctx *ctx) -{ - emit_long_instr(ctx, LW, reg, offset, base); -} - -static inline void emit_load(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lw, reg, offset, base); -} - -static inline void emit_load_byte(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lb, reg, offset, base); -} - -static inline void emit_half_load(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lh, reg, offset, base); -} - -static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base, - unsigned int offset, struct jit_ctx *ctx) -{ - emit_instr(ctx, lhu, reg, offset, base); -} - -static inline void emit_mul(unsigned int dst, unsigned int src1, - unsigned int src2, struct jit_ctx *ctx) -{ - emit_instr(ctx, mul, dst, src1, src2); -} - -static inline void emit_div(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_divu(&p, dst, src); - p = &ctx->target[ctx->idx + 1]; - uasm_i_mflo(&p, dst); - } - ctx->idx += 2; /* 2 insts */ -} - -static inline void emit_mod(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - uasm_i_divu(&p, dst, src); - p = &ctx->target[ctx->idx + 1]; - uasm_i_mfhi(&p, dst); - } - ctx->idx += 2; /* 2 insts */ -} - -static inline void emit_dsll(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, dsll, dst, src, sa); -} - -static inline void emit_dsrl32(unsigned int dst, unsigned int src, - unsigned int sa, struct jit_ctx *ctx) -{ - emit_instr(ctx, dsrl32, dst, src, sa); -} - -static inline void emit_wsbh(unsigned int dst, unsigned int src, - struct jit_ctx *ctx) -{ - emit_instr(ctx, wsbh, dst, src); -} - -/* load pointer to register */ -static inline void emit_load_ptr(unsigned int dst, unsigned int src, - int imm, struct jit_ctx *ctx) -{ - /* src contains the base addr of the 32/64-pointer */ - emit_long_instr(ctx, LW, dst, imm, src); -} - -/* load a function pointer to register */ -static inline void emit_load_func(unsigned int reg, ptr imm, - struct jit_ctx *ctx) -{ - if (IS_ENABLED(CONFIG_64BIT)) { - /* At this point imm is always 64-bit */ - emit_load_imm(r_tmp, (u64)imm >> 32, ctx); - emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ - emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx); - emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */ - emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx); - } else { - emit_load_imm(reg, imm, ctx); - } -} - -/* Move to real MIPS register */ -static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) -{ - emit_long_instr(ctx, ADDU, dst, src, r_zero); -} - -/* Move to JIT (32-bit) register */ -static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx) -{ - emit_addu(dst, src, r_zero, ctx); -} - -/* Compute the immediate value for PC-relative branches. */ -static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx) -{ - if (ctx->target == NULL) - return 0; - - /* - * We want a pc-relative branch. We only do forward branches - * so tgt is always after pc. tgt is the instruction offset - * we want to jump to. - - * Branch on MIPS: - * I: target_offset <- sign_extend(offset) - * I+1: PC += target_offset (delay slot) - * - * ctx->idx currently points to the branch instruction - * but the offset is added to the delay slot so we need - * to subtract 4. - */ - return ctx->offsets[tgt] - - (ctx->idx * 4 - ctx->prologue_bytes) - 4; -} - -static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2, - unsigned int imm, struct jit_ctx *ctx) -{ - if (ctx->target != NULL) { - u32 *p = &ctx->target[ctx->idx]; - - switch (cond) { - case MIPS_COND_EQ: - uasm_i_beq(&p, reg1, reg2, imm); - break; - case MIPS_COND_NE: - uasm_i_bne(&p, reg1, reg2, imm); - break; - case MIPS_COND_ALL: - uasm_i_b(&p, imm); - break; - default: - pr_warn("%s: Unhandled branch conditional: %d\n", - __func__, cond); - } - } - ctx->idx++; -} - -static inline void emit_b(unsigned int imm, struct jit_ctx *ctx) -{ - emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx); -} - -static inline void emit_jalr(unsigned int link, unsigned int reg, - struct jit_ctx *ctx) -{ - emit_instr(ctx, jalr, link, reg); -} - -static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx) -{ - emit_instr(ctx, jr, reg); -} - -static inline u16 align_sp(unsigned int num) -{ - /* Double word alignment for 32-bit, quadword for 64-bit */ - unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8; - num = (num + (align - 1)) & -align; - return num; -} - -static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) -{ - int i = 0, real_off = 0; - u32 sflags, tmp_flags; - - /* Adjust the stack pointer */ - if (offset) - emit_stack_offset(-align_sp(offset), ctx); - - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; - /* sflags is essentially a bitmap */ - while (tmp_flags) { - if ((sflags >> i) & 0x1) { - emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off, - ctx); - real_off += SZREG; - } - i++; - tmp_flags >>= 1; - } - - /* save return address */ - if (ctx->flags & SEEN_CALL) { - emit_store_stack_reg(r_ra, r_sp, real_off, ctx); - real_off += SZREG; - } - - /* Setup r_M leaving the alignment gap if necessary */ - if (ctx->flags & SEEN_MEM) { - if (real_off % (SZREG * 2)) - real_off += SZREG; - emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off); - } -} - -static void restore_bpf_jit_regs(struct jit_ctx *ctx, - unsigned int offset) -{ - int i, real_off = 0; - u32 sflags, tmp_flags; - - tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT; - /* sflags is a bitmap */ - i = 0; - while (tmp_flags) { - if ((sflags >> i) & 0x1) { - emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off, - ctx); - real_off += SZREG; - } - i++; - tmp_flags >>= 1; - } - - /* restore return address */ - if (ctx->flags & SEEN_CALL) - emit_load_stack_reg(r_ra, r_sp, real_off, ctx); - - /* Restore the sp and discard the scrach memory */ - if (offset) - emit_stack_offset(align_sp(offset), ctx); -} - -static unsigned int get_stack_depth(struct jit_ctx *ctx) -{ - int sp_off = 0; - - - /* How may s* regs do we need to preserved? */ - sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG; - - if (ctx->flags & SEEN_MEM) - sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */ - - if (ctx->flags & SEEN_CALL) - sp_off += SZREG; /* Space for our ra register */ - - return sp_off; -} - -static void build_prologue(struct jit_ctx *ctx) -{ - int sp_off; - - /* Calculate the total offset for the stack pointer */ - sp_off = get_stack_depth(ctx); - save_bpf_jit_regs(ctx, sp_off); - - if (ctx->flags & SEEN_SKB) - emit_reg_move(r_skb, MIPS_R_A0, ctx); - - if (ctx->flags & SEEN_SKB_DATA) { - /* Load packet length */ - emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len), - ctx); - emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len), - ctx); - /* Load the data pointer */ - emit_load_ptr(r_skb_data, r_skb, - offsetof(struct sk_buff, data), ctx); - /* Load the header length */ - emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx); - } - - if (ctx->flags & SEEN_X) - emit_jit_reg_move(r_X, r_zero, ctx); - - /* - * Do not leak kernel data to userspace, we only need to clear - * r_A if it is ever used. In fact if it is never used, we - * will not save/restore it, so clearing it in this case would - * corrupt the state of the caller. - */ - if (bpf_needs_clear_a(&ctx->skf->insns[0]) && - (ctx->flags & SEEN_A)) - emit_jit_reg_move(r_A, r_zero, ctx); -} - -static void build_epilogue(struct jit_ctx *ctx) -{ - unsigned int sp_off; - - /* Calculate the total offset for the stack pointer */ - - sp_off = get_stack_depth(ctx); - restore_bpf_jit_regs(ctx, sp_off); - - /* Return */ - emit_jr(r_ra, ctx); - emit_nop(ctx); -} - -#define CHOOSE_LOAD_FUNC(K, func) \ - ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ - func##_positive) - -static int build_body(struct jit_ctx *ctx) -{ - const struct bpf_prog *prog = ctx->skf; - const struct sock_filter *inst; - unsigned int i, off, condt; - u32 k, b_off __maybe_unused; - u8 (*sk_load_func)(unsigned long *skb, int offset); - - for (i = 0; i < prog->len; i++) { - u16 code; - - inst = &(prog->insns[i]); - pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", - __func__, inst->code, inst->jt, inst->jf, inst->k); - k = inst->k; - code = bpf_anc_helper(inst); - - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - switch (code) { - case BPF_LD | BPF_IMM: - /* A <- k ==> li r_A, k */ - ctx->flags |= SEEN_A; - emit_load_imm(r_A, k, ctx); - break; - case BPF_LD | BPF_W | BPF_LEN: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - /* A <- len ==> lw r_A, offset(skb) */ - ctx->flags |= SEEN_SKB | SEEN_A; - off = offsetof(struct sk_buff, len); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_LD | BPF_MEM: - /* A <- M[k] ==> lw r_A, offset(M) */ - ctx->flags |= SEEN_MEM | SEEN_A; - emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_LD | BPF_W | BPF_ABS: - /* A <- P[k:4] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word); - goto load; - case BPF_LD | BPF_H | BPF_ABS: - /* A <- P[k:2] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half); - goto load; - case BPF_LD | BPF_B | BPF_ABS: - /* A <- P[k:1] */ - sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte); -load: - emit_load_imm(r_off, k, ctx); -load_common: - ctx->flags |= SEEN_CALL | SEEN_OFF | - SEEN_SKB | SEEN_A | SEEN_SKB_DATA; - - emit_load_func(r_s0, (ptr)sk_load_func, ctx); - emit_reg_move(MIPS_R_A0, r_skb, ctx); - emit_jalr(MIPS_R_RA, r_s0, ctx); - /* Load second argument to delay slot */ - emit_reg_move(MIPS_R_A1, r_off, ctx); - /* Check the error value */ - emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx), - ctx); - /* Load return register on DS for failures */ - emit_reg_move(r_ret, r_zero, ctx); - /* Return with error */ - emit_b(b_imm(prog->len, ctx), ctx); - emit_nop(ctx); - break; - case BPF_LD | BPF_W | BPF_IND: - /* A <- P[X + k:4] */ - sk_load_func = sk_load_word; - goto load_ind; - case BPF_LD | BPF_H | BPF_IND: - /* A <- P[X + k:2] */ - sk_load_func = sk_load_half; - goto load_ind; - case BPF_LD | BPF_B | BPF_IND: - /* A <- P[X + k:1] */ - sk_load_func = sk_load_byte; -load_ind: - ctx->flags |= SEEN_OFF | SEEN_X; - emit_addiu(r_off, r_X, k, ctx); - goto load_common; - case BPF_LDX | BPF_IMM: - /* X <- k */ - ctx->flags |= SEEN_X; - emit_load_imm(r_X, k, ctx); - break; - case BPF_LDX | BPF_MEM: - /* X <- M[k] */ - ctx->flags |= SEEN_X | SEEN_MEM; - emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_LDX | BPF_W | BPF_LEN: - /* X <- len */ - ctx->flags |= SEEN_X | SEEN_SKB; - off = offsetof(struct sk_buff, len); - emit_load(r_X, r_skb, off, ctx); - break; - case BPF_LDX | BPF_B | BPF_MSH: - /* X <- 4 * (P[k:1] & 0xf) */ - ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB; - /* Load offset to a1 */ - emit_load_func(r_s0, (ptr)sk_load_byte, ctx); - /* - * This may emit two instructions so it may not fit - * in the delay slot. So use a0 in the delay slot. - */ - emit_load_imm(MIPS_R_A1, k, ctx); - emit_jalr(MIPS_R_RA, r_s0, ctx); - emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ - /* Check the error value */ - emit_bcond(MIPS_COND_NE, r_ret, 0, - b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_zero, ctx); - /* We are good */ - /* X <- P[1:K] & 0xf */ - emit_andi(r_X, r_A, 0xf, ctx); - /* X << 2 */ - emit_b(b_imm(i + 1, ctx), ctx); - emit_sll(r_X, r_X, 2, ctx); /* delay slot */ - break; - case BPF_ST: - /* M[k] <- A */ - ctx->flags |= SEEN_MEM | SEEN_A; - emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_STX: - /* M[k] <- X */ - ctx->flags |= SEEN_MEM | SEEN_X; - emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); - break; - case BPF_ALU | BPF_ADD | BPF_K: - /* A += K */ - ctx->flags |= SEEN_A; - emit_addiu(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_ADD | BPF_X: - /* A += X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_addu(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_SUB | BPF_K: - /* A -= K */ - ctx->flags |= SEEN_A; - emit_addiu(r_A, r_A, -k, ctx); - break; - case BPF_ALU | BPF_SUB | BPF_X: - /* A -= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_subu(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_MUL | BPF_K: - /* A *= K */ - /* Load K to scratch register before MUL */ - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_mul(r_A, r_A, r_s0, ctx); - break; - case BPF_ALU | BPF_MUL | BPF_X: - /* A *= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_mul(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_DIV | BPF_K: - /* A /= k */ - if (k == 1) - break; - if (optimize_div(&k)) { - ctx->flags |= SEEN_A; - emit_srl(r_A, r_A, k, ctx); - break; - } - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_div(r_A, r_s0, ctx); - break; - case BPF_ALU | BPF_MOD | BPF_K: - /* A %= k */ - if (k == 1) { - ctx->flags |= SEEN_A; - emit_jit_reg_move(r_A, r_zero, ctx); - } else { - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - emit_mod(r_A, r_s0, ctx); - } - break; - case BPF_ALU | BPF_DIV | BPF_X: - /* A /= X */ - ctx->flags |= SEEN_X | SEEN_A; - /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ - emit_div(r_A, r_X, ctx); - break; - case BPF_ALU | BPF_MOD | BPF_X: - /* A %= X */ - ctx->flags |= SEEN_X | SEEN_A; - /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); - emit_load_imm(r_ret, 0, ctx); /* delay slot */ - emit_mod(r_A, r_X, ctx); - break; - case BPF_ALU | BPF_OR | BPF_K: - /* A |= K */ - ctx->flags |= SEEN_A; - emit_ori(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_OR | BPF_X: - /* A |= X */ - ctx->flags |= SEEN_A; - emit_ori(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_XOR | BPF_K: - /* A ^= k */ - ctx->flags |= SEEN_A; - emit_xori(r_A, r_A, k, ctx); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: - case BPF_ALU | BPF_XOR | BPF_X: - /* A ^= X */ - ctx->flags |= SEEN_A; - emit_xor(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_AND | BPF_K: - /* A &= K */ - ctx->flags |= SEEN_A; - emit_andi(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_AND | BPF_X: - /* A &= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_and(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_LSH | BPF_K: - /* A <<= K */ - ctx->flags |= SEEN_A; - emit_sll(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_LSH | BPF_X: - /* A <<= X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_sllv(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_RSH | BPF_K: - /* A >>= K */ - ctx->flags |= SEEN_A; - emit_srl(r_A, r_A, k, ctx); - break; - case BPF_ALU | BPF_RSH | BPF_X: - ctx->flags |= SEEN_A | SEEN_X; - emit_srlv(r_A, r_A, r_X, ctx); - break; - case BPF_ALU | BPF_NEG: - /* A = -A */ - ctx->flags |= SEEN_A; - emit_neg(r_A, ctx); - break; - case BPF_JMP | BPF_JA: - /* pc += K */ - emit_b(b_imm(i + k + 1, ctx), ctx); - emit_nop(ctx); - break; - case BPF_JMP | BPF_JEQ | BPF_K: - /* pc += ( A == K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_EQ | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JEQ | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A == X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_EQ | MIPS_COND_X; - goto jmp_cmp; - case BPF_JMP | BPF_JGE | BPF_K: - /* pc += ( A >= K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GE | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JGE | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A >= X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GE | MIPS_COND_X; - goto jmp_cmp; - case BPF_JMP | BPF_JGT | BPF_K: - /* pc += ( A > K ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GT | MIPS_COND_K; - goto jmp_cmp; - case BPF_JMP | BPF_JGT | BPF_X: - ctx->flags |= SEEN_X; - /* pc += ( A > X ) ? pc->jt : pc->jf */ - condt = MIPS_COND_GT | MIPS_COND_X; -jmp_cmp: - /* Greater or Equal */ - if ((condt & MIPS_COND_GE) || - (condt & MIPS_COND_GT)) { - if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_A; - emit_sltiu(r_s0, r_A, k, ctx); - } else { /* X */ - ctx->flags |= SEEN_A | - SEEN_X; - emit_sltu(r_s0, r_A, r_X, ctx); - } - /* A < (K|X) ? r_scrach = 1 */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, - ctx); - emit_nop(ctx); - /* A > (K|X) ? scratch = 0 */ - if (condt & MIPS_COND_GT) { - /* Checking for equality */ - ctx->flags |= SEEN_A | SEEN_X; - if (condt & MIPS_COND_K) - emit_load_imm(r_s0, k, ctx); - else - emit_jit_reg_move(r_s0, r_X, - ctx); - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - /* Finally, A > K|X */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - } else { - /* A >= (K|X) so jump */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - } - } else { - /* A == K|X */ - if (condt & MIPS_COND_K) { /* K */ - ctx->flags |= SEEN_A; - emit_load_imm(r_s0, k, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, - ctx); - emit_bcond(MIPS_COND_NE, r_A, r_s0, - b_off, ctx); - emit_nop(ctx); - } else { /* X */ - /* jump true */ - ctx->flags |= SEEN_A | SEEN_X; - b_off = b_imm(i + inst->jt + 1, - ctx); - emit_bcond(MIPS_COND_EQ, r_A, r_X, - b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_NE, r_A, r_X, - b_off, ctx); - emit_nop(ctx); - } - } - break; - case BPF_JMP | BPF_JSET | BPF_K: - ctx->flags |= SEEN_A; - /* pc += (A & K) ? pc -> jt : pc -> jf */ - emit_load_imm(r_s1, k, ctx); - emit_and(r_s0, r_A, r_s1, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_JMP | BPF_JSET | BPF_X: - ctx->flags |= SEEN_X | SEEN_A; - /* pc += (A & X) ? pc -> jt : pc -> jf */ - emit_and(r_s0, r_A, r_X, ctx); - /* jump true */ - b_off = b_imm(i + inst->jt + 1, ctx); - emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); - emit_nop(ctx); - /* jump false */ - b_off = b_imm(i + inst->jf + 1, ctx); - emit_b(b_off, ctx); - emit_nop(ctx); - break; - case BPF_RET | BPF_A: - ctx->flags |= SEEN_A; - if (i != prog->len - 1) - /* - * If this is not the last instruction - * then jump to the epilogue - */ - emit_b(b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_A, ctx); /* delay slot */ - break; - case BPF_RET | BPF_K: - /* - * It can emit two instructions so it does not fit on - * the delay slot. - */ - emit_load_imm(r_ret, k, ctx); - if (i != prog->len - 1) { - /* - * If this is not the last instruction - * then jump to the epilogue - */ - emit_b(b_imm(prog->len, ctx), ctx); - emit_nop(ctx); - } - break; - case BPF_MISC | BPF_TAX: - /* X = A */ - ctx->flags |= SEEN_X | SEEN_A; - emit_jit_reg_move(r_X, r_A, ctx); - break; - case BPF_MISC | BPF_TXA: - /* A = X */ - ctx->flags |= SEEN_A | SEEN_X; - emit_jit_reg_move(r_A, r_X, ctx); - break; - /* AUX */ - case BPF_ANC | SKF_AD_PROTOCOL: - /* A = ntohs(skb->protocol */ - ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, - protocol) != 2); - off = offsetof(struct sk_buff, protocol); - emit_half_load(r_A, r_skb, off, ctx); -#ifdef CONFIG_CPU_LITTLE_ENDIAN - /* This needs little endian fixup */ - if (cpu_has_wsbh) { - /* R2 and later have the wsbh instruction */ - emit_wsbh(r_A, r_A, ctx); - } else { - /* Get first byte */ - emit_andi(r_tmp_imm, r_A, 0xff, ctx); - /* Shift it */ - emit_sll(r_tmp, r_tmp_imm, 8, ctx); - /* Get second byte */ - emit_srl(r_tmp_imm, r_A, 8, ctx); - emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx); - /* Put everyting together in r_A */ - emit_or(r_A, r_tmp, r_tmp_imm, ctx); - } -#endif - break; - case BPF_ANC | SKF_AD_CPU: - ctx->flags |= SEEN_A | SEEN_OFF; - /* A = current_thread_info()->cpu */ - BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, - cpu) != 4); - off = offsetof(struct thread_info, cpu); - /* $28/gp points to the thread_info struct */ - emit_load(r_A, 28, off, ctx); - break; - case BPF_ANC | SKF_AD_IFINDEX: - /* A = skb->dev->ifindex */ - case BPF_ANC | SKF_AD_HATYPE: - /* A = skb->dev->type */ - ctx->flags |= SEEN_SKB | SEEN_A; - off = offsetof(struct sk_buff, dev); - /* Load *dev pointer */ - emit_load_ptr(r_s0, r_skb, off, ctx); - /* error (0) in the delay slot */ - emit_bcond(MIPS_COND_EQ, r_s0, r_zero, - b_imm(prog->len, ctx), ctx); - emit_reg_move(r_ret, r_zero, ctx); - if (code == (BPF_ANC | SKF_AD_IFINDEX)) { - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - off = offsetof(struct net_device, ifindex); - emit_load(r_A, r_s0, off, ctx); - } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */ - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - off = offsetof(struct net_device, type); - emit_half_load_unsigned(r_A, r_s0, off, ctx); - } - break; - case BPF_ANC | SKF_AD_MARK: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - off = offsetof(struct sk_buff, mark); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_RXHASH: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - off = offsetof(struct sk_buff, hash); - emit_load(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, - vlan_tci) != 2); - off = offsetof(struct sk_buff, vlan_tci); - emit_half_load_unsigned(r_A, r_skb, off, ctx); - break; - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - ctx->flags |= SEEN_SKB | SEEN_A; - emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx); - if (PKT_VLAN_PRESENT_BIT) - emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx); - if (PKT_VLAN_PRESENT_BIT < 7) - emit_andi(r_A, r_A, 1, ctx); - break; - case BPF_ANC | SKF_AD_PKTTYPE: - ctx->flags |= SEEN_SKB; - - emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); - /* Keep only the last 3 bits */ - emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); -#ifdef __BIG_ENDIAN_BITFIELD - /* Get the actual packet type to the lower 3 bits */ - emit_srl(r_A, r_A, 5, ctx); -#endif - break; - case BPF_ANC | SKF_AD_QUEUE: - ctx->flags |= SEEN_SKB | SEEN_A; - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, - queue_mapping) != 2); - BUILD_BUG_ON(offsetof(struct sk_buff, - queue_mapping) > 0xff); - off = offsetof(struct sk_buff, queue_mapping); - emit_half_load_unsigned(r_A, r_skb, off, ctx); - break; - default: - pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, - inst->code); - return -1; - } - } - - /* compute offsets only during the first pass */ - if (ctx->target == NULL) - ctx->offsets[i] = ctx->idx * 4; - - return 0; -} - -void bpf_jit_compile(struct bpf_prog *fp) -{ - struct jit_ctx ctx; - unsigned int alloc_size, tmp_idx; - - if (!bpf_jit_enable) - return; - - memset(&ctx, 0, sizeof(ctx)); - - ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL); - if (ctx.offsets == NULL) - return; - - ctx.skf = fp; - - if (build_body(&ctx)) - goto out; - - tmp_idx = ctx.idx; - build_prologue(&ctx); - ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4; - /* just to complete the ctx.idx count */ - build_epilogue(&ctx); - - alloc_size = 4 * ctx.idx; - ctx.target = module_alloc(alloc_size); - if (ctx.target == NULL) - goto out; - - /* Clean it */ - memset(ctx.target, 0, alloc_size); - - ctx.idx = 0; - - /* Generate the actual JIT code */ - build_prologue(&ctx); - build_body(&ctx); - build_epilogue(&ctx); - - /* Update the icache */ - flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx)); - - if (bpf_jit_enable > 1) - /* Dump JIT code */ - bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); - - fp->bpf_func = (void *)ctx.target; - fp->jited = 1; - -out: - kfree(ctx.offsets); -} - -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) - module_memfree(fp->bpf_func); - - bpf_prog_unlock_free(fp); -} diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S deleted file mode 100644 index 57154c5883b6..000000000000 --- a/arch/mips/net/bpf_jit_asm.S +++ /dev/null @@ -1,285 +0,0 @@ -/* - * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF - * compiler. - * - * Copyright (C) 2015 Imagination Technologies Ltd. - * Author: Markos Chandras <markos.chandras@imgtec.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License. - */ - -#include <asm/asm.h> -#include <asm/isa-rev.h> -#include <asm/regdef.h> -#include "bpf_jit.h" - -/* ABI - * - * r_skb_hl skb header length - * r_skb_data skb data - * r_off(a1) offset register - * r_A BPF register A - * r_X PF register X - * r_skb(a0) *skb - * r_M *scratch memory - * r_skb_le skb length - * r_s0 Scratch register 0 - * r_s1 Scratch register 1 - * - * On entry: - * a0: *skb - * a1: offset (imm or imm + X) - * - * All non-BPF-ABI registers are free for use. On return, we only - * care about r_ret. The BPF-ABI registers are assumed to remain - * unmodified during the entire filter operation. - */ - -#define skb a0 -#define offset a1 -#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */ - - /* We know better :) so prevent assembler reordering etc */ - .set noreorder - -#define is_offset_negative(TYPE) \ - /* If offset is negative we have more work to do */ \ - slti t0, offset, 0; \ - bgtz t0, bpf_slow_path_##TYPE##_neg; \ - /* Be careful what follows in DS. */ - -#define is_offset_in_header(SIZE, TYPE) \ - /* Reading from header? */ \ - addiu $r_s0, $r_skb_hl, -SIZE; \ - slt t0, $r_s0, offset; \ - bgtz t0, bpf_slow_path_##TYPE; \ - -LEAF(sk_load_word) - is_offset_negative(word) -FEXPORT(sk_load_word_positive) - is_offset_in_header(4, word) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - .set reorder - lw $r_A, 0(t1) - .set noreorder -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh t0, $r_A - rotr $r_A, t0, 16 -# else - sll t0, $r_A, 24 - srl t1, $r_A, 24 - srl t2, $r_A, 8 - or t0, t0, t1 - andi t2, t2, 0xff00 - andi t1, $r_A, 0xff00 - or t0, t0, t2 - sll t1, t1, 8 - or $r_A, t0, t1 -# endif -#endif - jr $r_ra - move $r_ret, zero - END(sk_load_word) - -LEAF(sk_load_half) - is_offset_negative(half) -FEXPORT(sk_load_half_positive) - is_offset_in_header(2, half) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - lhu $r_A, 0(t1) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh $r_A, $r_A -# else - sll t0, $r_A, 8 - srl t1, $r_A, 8 - andi t0, t0, 0xff00 - or $r_A, t0, t1 -# endif -#endif - jr $r_ra - move $r_ret, zero - END(sk_load_half) - -LEAF(sk_load_byte) - is_offset_negative(byte) -FEXPORT(sk_load_byte_positive) - is_offset_in_header(1, byte) - /* Offset within header boundaries */ - PTR_ADDU t1, $r_skb_data, offset - lbu $r_A, 0(t1) - jr $r_ra - move $r_ret, zero - END(sk_load_byte) - -/* - * call skb_copy_bits: - * (prototype in linux/skbuff.h) - * - * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len) - * - * o32 mandates we leave 4 spaces for argument registers in case - * the callee needs to use them. Even though we don't care about - * the argument registers ourselves, we need to allocate that space - * to remain ABI compliant since the callee may want to use that space. - * We also allocate 2 more spaces for $r_ra and our return register (*to). - * - * n64 is a bit different. The *caller* will allocate the space to preserve - * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no - * good reason but it does not matter that much really. - * - * (void *to) is returned in r_s0 - * - */ -#ifdef CONFIG_CPU_LITTLE_ENDIAN -#define DS_OFFSET(SIZE) (4 * SZREG) -#else -#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE)) -#endif -#define bpf_slow_path_common(SIZE) \ - /* Quick check. Are we within reasonable boundaries? */ \ - LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \ - sltu $r_s0, offset, $r_s1; \ - beqz $r_s0, fault; \ - /* Load 4th argument in DS */ \ - LONG_ADDIU a3, zero, SIZE; \ - PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ - PTR_LA t0, skb_copy_bits; \ - PTR_S $r_ra, (5 * SZREG)($r_sp); \ - /* Assign low slot to a2 */ \ - PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \ - jalr t0; \ - /* Reset our destination slot (DS but it's ok) */ \ - INT_S zero, (4 * SZREG)($r_sp); \ - /* \ - * skb_copy_bits returns 0 on success and -EFAULT \ - * on error. Our data live in a2. Do not bother with \ - * our data if an error has been returned. \ - */ \ - /* Restore our frame */ \ - PTR_L $r_ra, (5 * SZREG)($r_sp); \ - INT_L $r_s0, (4 * SZREG)($r_sp); \ - bltz v0, fault; \ - PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ - move $r_ret, zero; \ - -NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp) - bpf_slow_path_common(4) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - wsbh t0, $r_s0 - jr $r_ra - rotr $r_A, t0, 16 -# else - sll t0, $r_s0, 24 - srl t1, $r_s0, 24 - srl t2, $r_s0, 8 - or t0, t0, t1 - andi t2, t2, 0xff00 - andi t1, $r_s0, 0xff00 - or t0, t0, t2 - sll t1, t1, 8 - jr $r_ra - or $r_A, t0, t1 -# endif -#else - jr $r_ra - move $r_A, $r_s0 -#endif - - END(bpf_slow_path_word) - -NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp) - bpf_slow_path_common(2) -#ifdef CONFIG_CPU_LITTLE_ENDIAN -# if MIPS_ISA_REV >= 2 - jr $r_ra - wsbh $r_A, $r_s0 -# else - sll t0, $r_s0, 8 - andi t1, $r_s0, 0xff00 - andi t0, t0, 0xff00 - srl t1, t1, 8 - jr $r_ra - or $r_A, t0, t1 -# endif -#else - jr $r_ra - move $r_A, $r_s0 -#endif - - END(bpf_slow_path_half) - -NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp) - bpf_slow_path_common(1) - jr $r_ra - move $r_A, $r_s0 - - END(bpf_slow_path_byte) - -/* - * Negative entry points - */ - .macro bpf_is_end_of_data - li t0, SKF_LL_OFF - /* Reading link layer data? */ - slt t1, offset, t0 - bgtz t1, fault - /* Be careful what follows in DS. */ - .endm -/* - * call skb_copy_bits: - * (prototype in linux/filter.h) - * - * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, - * int k, unsigned int size) - * - * see above (bpf_slow_path_common) for ABI restrictions - */ -#define bpf_negative_common(SIZE) \ - PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \ - PTR_LA t0, bpf_internal_load_pointer_neg_helper; \ - PTR_S $r_ra, (5 * SZREG)($r_sp); \ - jalr t0; \ - li a2, SIZE; \ - PTR_L $r_ra, (5 * SZREG)($r_sp); \ - /* Check return pointer */ \ - beqz v0, fault; \ - PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \ - /* Preserve our pointer */ \ - move $r_s0, v0; \ - /* Set return value */ \ - move $r_ret, zero; \ - -bpf_slow_path_word_neg: - bpf_is_end_of_data -NESTED(sk_load_word_negative, (6 * SZREG), $r_sp) - bpf_negative_common(4) - jr $r_ra - lw $r_A, 0($r_s0) - END(sk_load_word_negative) - -bpf_slow_path_half_neg: - bpf_is_end_of_data -NESTED(sk_load_half_negative, (6 * SZREG), $r_sp) - bpf_negative_common(2) - jr $r_ra - lhu $r_A, 0($r_s0) - END(sk_load_half_negative) - -bpf_slow_path_byte_neg: - bpf_is_end_of_data -NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp) - bpf_negative_common(1) - jr $r_ra - lbu $r_A, 0($r_s0) - END(sk_load_byte_negative) - -fault: - jr $r_ra - addiu $r_ret, zero, 1 diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 98bf0c222b5f..dfd5a4b1b779 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -22,6 +22,7 @@ #include <asm/byteorder.h> #include <asm/cacheflush.h> #include <asm/cpu-features.h> +#include <asm/isa-rev.h> #include <asm/uasm.h> /* Registers used by JIT */ @@ -125,15 +126,21 @@ static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx, } /* Simply emit the instruction if the JIT memory space has been allocated */ -#define emit_instr(ctx, func, ...) \ -do { \ - if ((ctx)->target != NULL) { \ - u32 *p = &(ctx)->target[ctx->idx]; \ - uasm_i_##func(&p, ##__VA_ARGS__); \ - } \ - (ctx)->idx++; \ +#define emit_instr_long(ctx, func64, func32, ...) \ +do { \ + if ((ctx)->target != NULL) { \ + u32 *p = &(ctx)->target[ctx->idx]; \ + if (IS_ENABLED(CONFIG_64BIT)) \ + uasm_i_##func64(&p, ##__VA_ARGS__); \ + else \ + uasm_i_##func32(&p, ##__VA_ARGS__); \ + } \ + (ctx)->idx++; \ } while (0) +#define emit_instr(ctx, func, ...) \ + emit_instr_long(ctx, func, func, ##__VA_ARGS__) + static unsigned int j_target(struct jit_ctx *ctx, int target_idx) { unsigned long target_va, base_va; @@ -274,17 +281,17 @@ static int gen_int_prologue(struct jit_ctx *ctx) * If RA we are doing a function call and may need * extra 8-byte tmp area. */ - stack_adjust += 16; + stack_adjust += 2 * sizeof(long); if (ctx->flags & EBPF_SAVE_S0) - stack_adjust += 8; + stack_adjust += sizeof(long); if (ctx->flags & EBPF_SAVE_S1) - stack_adjust += 8; + stack_adjust += sizeof(long); if (ctx->flags & EBPF_SAVE_S2) - stack_adjust += 8; + stack_adjust += sizeof(long); if (ctx->flags & EBPF_SAVE_S3) - stack_adjust += 8; + stack_adjust += sizeof(long); if (ctx->flags & EBPF_SAVE_S4) - stack_adjust += 8; + stack_adjust += sizeof(long); BUILD_BUG_ON(MAX_BPF_STACK & 7); locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0; @@ -298,41 +305,49 @@ static int gen_int_prologue(struct jit_ctx *ctx) * On tail call we skip this instruction, and the TCC is * passed in $v1 from the caller. */ - emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); + emit_instr(ctx, addiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT); if (stack_adjust) - emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust); + emit_instr_long(ctx, daddiu, addiu, + MIPS_R_SP, MIPS_R_SP, -stack_adjust); else return 0; - store_offset = stack_adjust - 8; + store_offset = stack_adjust - sizeof(long); if (ctx->flags & EBPF_SAVE_RA) { - emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, sd, sw, + MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S0) { - emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, sd, sw, + MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S1) { - emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, sd, sw, + MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S2) { - emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, sd, sw, + MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S3) { - emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, sd, sw, + MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S4) { - emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, sd, sw, + MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1)) - emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); + emit_instr_long(ctx, daddu, addu, + MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO); return 0; } @@ -341,7 +356,7 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) { const struct bpf_prog *prog = ctx->skf; int stack_adjust = ctx->stack_size; - int store_offset = stack_adjust - 8; + int store_offset = stack_adjust - sizeof(long); enum reg_val_type td; int r0 = MIPS_R_V0; @@ -353,33 +368,40 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg) } if (ctx->flags & EBPF_SAVE_RA) { - emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, ld, lw, + MIPS_R_RA, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S0) { - emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, ld, lw, + MIPS_R_S0, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S1) { - emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, ld, lw, + MIPS_R_S1, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S2) { - emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, ld, lw, + MIPS_R_S2, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S3) { - emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, ld, lw, + MIPS_R_S3, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } if (ctx->flags & EBPF_SAVE_S4) { - emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP); - store_offset -= 8; + emit_instr_long(ctx, ld, lw, + MIPS_R_S4, store_offset, MIPS_R_SP); + store_offset -= sizeof(long); } emit_instr(ctx, jr, dest_reg); if (stack_adjust) - emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust); + emit_instr_long(ctx, daddiu, addiu, + MIPS_R_SP, MIPS_R_SP, stack_adjust); else emit_instr(ctx, nop); @@ -646,6 +668,10 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, s64 t64s; int bpf_op = BPF_OP(insn->code); + if (IS_ENABLED(CONFIG_32BIT) && ((BPF_CLASS(insn->code) == BPF_ALU64) + || (bpf_op == BPF_DW))) + return -EINVAL; + switch (insn->code) { case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */ case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */ @@ -678,8 +704,12 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (insn->imm == 1) /* Mult by 1 is a nop */ break; gen_imm_to_reg(insn, MIPS_R_AT, ctx); - emit_instr(ctx, dmultu, MIPS_R_AT, dst); - emit_instr(ctx, mflo, dst); + if (MIPS_ISA_REV >= 6) { + emit_instr(ctx, dmulu, dst, dst, MIPS_R_AT); + } else { + emit_instr(ctx, dmultu, MIPS_R_AT, dst); + emit_instr(ctx, mflo, dst); + } break; case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */ dst = ebpf_to_mips_reg(ctx, insn, dst_reg); @@ -701,8 +731,12 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, if (insn->imm == 1) /* Mult by 1 is a nop */ break; gen_imm_to_reg(insn, MIPS_R_AT, ctx); - emit_instr(ctx, multu, dst, MIPS_R_AT); - emit_instr(ctx, mflo, dst); + if (MIPS_ISA_REV >= 6) { + emit_instr(ctx, mulu, dst, dst, MIPS_R_AT); + } else { + emit_instr(ctx, multu, dst, MIPS_R_AT); + emit_instr(ctx, mflo, dst); + } break; case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */ dst = ebpf_to_mips_reg(ctx, insn, dst_reg); @@ -733,6 +767,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; } gen_imm_to_reg(insn, MIPS_R_AT, ctx); + if (MIPS_ISA_REV >= 6) { + if (bpf_op == BPF_DIV) + emit_instr(ctx, divu_r6, dst, dst, MIPS_R_AT); + else + emit_instr(ctx, modu, dst, dst, MIPS_R_AT); + break; + } emit_instr(ctx, divu, dst, MIPS_R_AT); if (bpf_op == BPF_DIV) emit_instr(ctx, mflo, dst); @@ -755,6 +796,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; } gen_imm_to_reg(insn, MIPS_R_AT, ctx); + if (MIPS_ISA_REV >= 6) { + if (bpf_op == BPF_DIV) + emit_instr(ctx, ddivu_r6, dst, dst, MIPS_R_AT); + else + emit_instr(ctx, modu, dst, dst, MIPS_R_AT); + break; + } emit_instr(ctx, ddivu, dst, MIPS_R_AT); if (bpf_op == BPF_DIV) emit_instr(ctx, mflo, dst); @@ -820,11 +868,23 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit_instr(ctx, and, dst, dst, src); break; case BPF_MUL: - emit_instr(ctx, dmultu, dst, src); - emit_instr(ctx, mflo, dst); + if (MIPS_ISA_REV >= 6) { + emit_instr(ctx, dmulu, dst, dst, src); + } else { + emit_instr(ctx, dmultu, dst, src); + emit_instr(ctx, mflo, dst); + } break; case BPF_DIV: case BPF_MOD: + if (MIPS_ISA_REV >= 6) { + if (bpf_op == BPF_DIV) + emit_instr(ctx, ddivu_r6, + dst, dst, src); + else + emit_instr(ctx, modu, dst, dst, src); + break; + } emit_instr(ctx, ddivu, dst, src); if (bpf_op == BPF_DIV) emit_instr(ctx, mflo, dst); @@ -904,6 +964,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_DIV: case BPF_MOD: + if (MIPS_ISA_REV >= 6) { + if (bpf_op == BPF_DIV) + emit_instr(ctx, divu_r6, dst, dst, src); + else + emit_instr(ctx, modu, dst, dst, src); + break; + } emit_instr(ctx, divu, dst, src); if (bpf_op == BPF_DIV) emit_instr(ctx, mflo, dst); @@ -1007,8 +1074,15 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit_instr(ctx, dsubu, MIPS_R_T8, dst, src); emit_instr(ctx, sltu, MIPS_R_AT, dst, src); /* SP known to be non-zero, movz becomes boolean not */ - emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8); - emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8); + if (MIPS_ISA_REV >= 6) { + emit_instr(ctx, seleqz, MIPS_R_T9, + MIPS_R_SP, MIPS_R_T8); + } else { + emit_instr(ctx, movz, MIPS_R_T9, + MIPS_R_SP, MIPS_R_T8); + emit_instr(ctx, movn, MIPS_R_T9, + MIPS_R_ZERO, MIPS_R_T8); + } emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT); cmp_eq = bpf_op == BPF_JGT; dst = MIPS_R_AT; @@ -1235,7 +1309,7 @@ jeq_common: case BPF_JMP | BPF_CALL: ctx->flags |= EBPF_SAVE_RA; - t64s = (s64)insn->imm + (s64)__bpf_call_base; + t64s = (s64)insn->imm + (long)__bpf_call_base; emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s); emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9); /* delay slot */ @@ -1367,6 +1441,17 @@ jeq_common: if (src < 0) return src; if (BPF_MODE(insn->code) == BPF_XADD) { + /* + * If mem_off does not fit within the 9 bit ll/sc + * instruction immediate field, use a temp reg. + */ + if (MIPS_ISA_REV >= 6 && + (mem_off >= BIT(8) || mem_off < -BIT(8))) { + emit_instr(ctx, daddiu, MIPS_R_T6, + dst, mem_off); + mem_off = 0; + dst = MIPS_R_T6; + } switch (BPF_SIZE(insn->code)) { case BPF_W: if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) { @@ -1721,7 +1806,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int image_size; u8 *image_ptr; - if (!prog->jit_requested || !cpu_has_mips64r2) + if (!prog->jit_requested || MIPS_ISA_REV < 2) return prog; tmp = bpf_jit_blind_constants(prog); diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig index e284e89183cc..7acbb50c1dcd 100644 --- a/arch/mips/pic32/Kconfig +++ b/arch/mips/pic32/Kconfig @@ -39,12 +39,12 @@ choice Select the devicetree. config DTB_PIC32_NONE - bool "None" + bool "None" config DTB_PIC32_MZDA_SK - bool "PIC32MZDA Starter Kit" - depends on PIC32MZDA - select BUILTIN_DTB + bool "PIC32MZDA Starter Kit" + depends on PIC32MZDA + select BUILTIN_DTB endchoice diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 0ede4deb8181..7221df24cb23 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -46,9 +46,7 @@ endif VDSO_LDFLAGS := \ -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \ $(addprefix -Wl$(comma),$(filter -E%,$(KBUILD_CFLAGS))) \ - -nostdlib -shared \ - $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ - $(call cc-ldoption, -Wl$(comma)--build-id) + -nostdlib -shared -Wl,--hash-style=sysv -Wl,--build-id GCOV_PROFILE := n UBSAN_SANITIZE := n diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index 688b6ed26227..f67a327777b5 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -39,7 +39,6 @@ generic-y += preempt.h generic-y += sections.h generic-y += segment.h generic-y += serial.h -generic-y += sizes.h generic-y += switch_to.h generic-y += timex.h generic-y += topology.h diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h index 95f3ea253e4c..02250626b9f0 100644 --- a/arch/nds32/include/asm/elf.h +++ b/arch/nds32/include/asm/elf.h @@ -10,14 +10,13 @@ #include <asm/ptrace.h> #include <asm/fpu.h> +#include <linux/elf-em.h> typedef unsigned long elf_greg_t; typedef unsigned long elf_freg_t[3]; extern unsigned int elf_hwcap; -#define EM_NDS32 167 - #define R_NDS32_NONE 0 #define R_NDS32_16_RELA 19 #define R_NDS32_32_RELA 20 diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 9f52db930c00..ee59c1f9e4fc 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -6,7 +6,7 @@ #define __PAGETABLE_PMD_FOLDED 1 #include <asm-generic/4level-fixup.h> -#include <asm-generic/sizes.h> +#include <linux/sizes.h> #include <asm/memory.h> #include <asm/nds32.h> diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index 671ebd357496..174b8571d362 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -5,6 +5,7 @@ #ifndef _ASM_NDS32_SYSCALL_H #define _ASM_NDS32_SYSCALL_H 1 +#include <uapi/linux/audit.h> #include <linux/err.h> struct task_struct; struct pt_regs; @@ -145,4 +146,12 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, memcpy(®s->uregs[0] + 1, args, 5 * sizeof(args[0])); } + +static inline int +syscall_get_arch(struct task_struct *task) +{ + return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) + ? AUDIT_ARCH_NDS32BE : AUDIT_ARCH_NDS32; +} + #endif /* _ASM_NDS32_SYSCALL_H */ diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S index db64b78b1232..fcefb62606ca 100644 --- a/arch/nds32/kernel/head.S +++ b/arch/nds32/kernel/head.S @@ -7,7 +7,7 @@ #include <asm/asm-offsets.h> #include <asm/page.h> #include <asm/pgtable.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/thread_info.h> #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 1d03633f89a9..1a4ab1b7525f 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -252,18 +252,6 @@ void __init mem_init(void) return; } -void free_initmem(void) -{ - free_initmem_default(-1); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index ea37394ff3ea..26a9c760a98b 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -23,7 +23,6 @@ config NIOS2 select SPARSE_IRQ select USB_ARCH_HAS_HCD if USB_SUPPORT select CPU_NO_EFFICIENT_FFS - select ARCH_DISCARD_MEMBLOCK select MMU_GATHER_NO_RANGE if MMU config GENERIC_CSUM diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h index d7624ed06efb..c4f3f8b86f28 100644 --- a/arch/nios2/include/asm/syscall.h +++ b/arch/nios2/include/asm/syscall.h @@ -17,6 +17,7 @@ #ifndef __ASM_NIOS2_SYSCALL_H__ #define __ASM_NIOS2_SYSCALL_H__ +#include <uapi/linux/audit.h> #include <linux/err.h> #include <linux/sched.h> @@ -79,4 +80,9 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->r9 = *args; } +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_NIOS2; +} + #endif diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c index 16cea5776b87..2c609c2516b2 100644 --- a/arch/nios2/mm/init.c +++ b/arch/nios2/mm/init.c @@ -82,18 +82,6 @@ void __init mmu_init(void) flush_tlb_all(); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - -void __ref free_initmem(void) -{ - free_initmem_default(-1); -} - #define __page_aligned(order) __aligned(PAGE_SIZE << (order)) pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned(PGD_ORDER); pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned(PTE_ORDER); diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h index b4ff07c1baed..61de227f53a1 100644 --- a/arch/openrisc/include/asm/syscall.h +++ b/arch/openrisc/include/asm/syscall.h @@ -68,7 +68,7 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs, memcpy(®s->gpr[3], args, 6 * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_OPENRISC; } diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index caeb4184e8a6..abe87e54e231 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -223,15 +223,3 @@ void __init mem_init(void) mem_init_done = 1; return; } - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - -void free_initmem(void) -{ - free_initmem_default(-1); -} diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index f1ed8ddfe486..09407ed1aacd 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -36,6 +36,7 @@ config PARISC select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE + select ARCH_DISCARD_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC select VIRT_TO_BUS select MODULES_USE_ELF_RELA @@ -44,6 +45,8 @@ config PARISC select HAVE_DEBUG_STACKOVERFLOW select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HASH + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_REGS_AND_STACK_ACCESS_API @@ -54,6 +57,9 @@ config PARISC select CPU_NO_EFFICIENT_FFS select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH + select HAVE_ARCH_KGDB + select HAVE_KPROBES + select HAVE_KRETPROBES help The PA-RISC microprocessor is designed by Hewlett-Packard and used @@ -305,21 +311,16 @@ config ARCH_SELECT_MEMORY_MODEL def_bool y depends on 64BIT -config ARCH_DISCONTIGMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE def_bool y depends on 64BIT config ARCH_FLATMEM_ENABLE def_bool y -config ARCH_DISCONTIGMEM_DEFAULT +config ARCH_SPARSEMEM_DEFAULT def_bool y - depends on ARCH_DISCONTIGMEM_ENABLE - -config NODES_SHIFT - int - default "3" - depends on NEED_MULTIPLE_NODES + depends on ARCH_SPARSEMEM_ENABLE source "kernel/Kconfig.hz" diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S index 5aba20fa48aa..e8b798fd0cf0 100644 --- a/arch/parisc/boot/compressed/head.S +++ b/arch/parisc/boot/compressed/head.S @@ -22,7 +22,7 @@ __HEAD ENTRY(startup) - .level LEVEL + .level PA_ASM_LEVEL #define PSW_W_SM 0x200 #define PSW_W_BIT 36 @@ -63,7 +63,7 @@ $bss_loop: load32 BOOTADDR(decompress_kernel),%r3 #ifdef CONFIG_64BIT - .level LEVEL + .level PA_ASM_LEVEL ssm PSW_W_SM, %r0 /* set W-bit */ depdi 0, 31, 32, %r3 #endif @@ -72,7 +72,7 @@ $bss_loop: startup_continue: #ifdef CONFIG_64BIT - .level LEVEL + .level PA_ASM_LEVEL rsm PSW_W_SM, %r0 /* clear W-bit */ #endif diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 2556bb181813..2d395998f524 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -145,14 +145,13 @@ static int putchar(int c) void __noreturn error(char *x) { - puts("\n\n"); - puts(x); - puts("\n\n -- System halted"); + if (x) puts(x); + puts("\n -- System halted\n"); while (1) /* wait forever */ ; } -static int print_hex(unsigned long num) +static int print_num(unsigned long num, int base) { const char hex[] = "0123456789abcdef"; char str[40]; @@ -160,12 +159,14 @@ static int print_hex(unsigned long num) str[i--] = '\0'; do { - str[i--] = hex[num & 0x0f]; - num >>= 4; + str[i--] = hex[num % base]; + num = num / base; } while (num); - str[i--] = 'x'; - str[i] = '0'; + if (base == 16) { + str[i--] = 'x'; + str[i] = '0'; + } else i++; puts(&str[i]); return 0; @@ -187,8 +188,9 @@ put: if (fmt[++i] == '%') goto put; + print_num(va_arg(args, unsigned long), + fmt[i] == 'x' ? 16:10); ++i; - print_hex(va_arg(args, unsigned long)); } va_end(args); @@ -327,8 +329,15 @@ unsigned long decompress_kernel(unsigned int started_wide, free_mem_end_ptr = rd_start; #endif - if (free_mem_ptr >= free_mem_end_ptr) - error("Kernel too big for machine."); + if (free_mem_ptr >= free_mem_end_ptr) { + int free_ram; + free_ram = (free_mem_ptr >> 20) + 1; + if (free_ram < 32) + free_ram = 32; + printf("\nKernel requires at least %d MB RAM.\n", + free_ram); + error(NULL); + } #ifdef DEBUG printf("\n"); diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 37ae4b57c001..a8f9bbef0975 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -14,7 +14,6 @@ CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PA7100LC=y CONFIG_SMP=y diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index b8c7db777144..ed2d8cc94909 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -10,7 +10,6 @@ generic-y += hw_irq.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h -generic-y += kprobes.h generic-y += kvm_para.h generic-y += local.h generic-y += local64.h diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index c17ec0ee6e7c..d85738a7bbe6 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -61,14 +61,14 @@ #define LDCW ldcw,co #define BL b,l # ifdef CONFIG_64BIT -# define LEVEL 2.0w +# define PA_ASM_LEVEL 2.0w # else -# define LEVEL 2.0 +# define PA_ASM_LEVEL 2.0 # endif #else #define LDCW ldcw #define BL bl -#define LEVEL 1.1 +#define PA_ASM_LEVEL 1.1 #endif #ifdef __ASSEMBLY__ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 006fb939cac8..73ca89a47f49 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -24,9 +24,6 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) -/* Read-only memory is marked before mark_rodata_ro() is called. */ -#define __ro_after_init __read_mostly - void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ void disable_sr_hashing(void); /* turns off space register hashing */ @@ -44,22 +41,22 @@ void parisc_setup_cache_timing(void); #define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) \ - : : "r" (addr)) + : : "r" (addr) : "memory") #define asm_io_sync() asm volatile("sync" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ - ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :: ) + ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :::"memory") #endif /* ! __ASSEMBLY__ */ diff --git a/arch/parisc/include/asm/fixmap.h b/arch/parisc/include/asm/fixmap.h index f7c3a0905de4..288da73d4cc0 100644 --- a/arch/parisc/include/asm/fixmap.h +++ b/arch/parisc/include/asm/fixmap.h @@ -15,17 +15,34 @@ * from areas congruently mapped with user space. It is 8MB large * and must be 16MB aligned */ #define TMPALIAS_MAP_START ((__PAGE_OFFSET) - 16*1024*1024) + +#define FIXMAP_SIZE (FIX_BITMAP_COUNT << PAGE_SHIFT) +#define FIXMAP_START (TMPALIAS_MAP_START - FIXMAP_SIZE) /* This is the kernel area for all maps (vmalloc, dma etc.) most * usually, it extends up to TMPALIAS_MAP_START. Virtual addresses * 0..GATEWAY_PAGE_SIZE are reserved for the gateway page */ #define KERNEL_MAP_START (GATEWAY_PAGE_SIZE) -#define KERNEL_MAP_END (TMPALIAS_MAP_START) +#define KERNEL_MAP_END (FIXMAP_START) #ifndef __ASSEMBLY__ + + +enum fixed_addresses { + /* Support writing RO kernel text via kprobes, jump labels, etc. */ + FIX_TEXT_POKE0, + FIX_BITMAP_COUNT +}; + extern void *parisc_vmalloc_start; #define PCXL_DMA_MAP_SIZE (8*1024*1024) #define VMALLOC_START ((unsigned long)parisc_vmalloc_start) #define VMALLOC_END (KERNEL_MAP_END) + +#define __fix_to_virt(_x) (FIXMAP_START + ((_x) << PAGE_SHIFT)) + +void set_fixmap(enum fixed_addresses idx, phys_addr_t phys); +void clear_fixmap(enum fixed_addresses idx); + #endif /*__ASSEMBLY__*/ #endif /*_ASM_FIXMAP_H*/ diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h index d6e1ed145031..9d3d7737c58b 100644 --- a/arch/parisc/include/asm/hardware.h +++ b/arch/parisc/include/asm/hardware.h @@ -120,7 +120,7 @@ extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path); extern void init_parisc_bus(void); extern struct device *hwpath_to_device(struct hardware_path *modpath); extern void device_to_hwpath(struct device *dev, struct hardware_path *path); - +extern int machine_has_merced_bus(void); /* inventory.c: */ extern void do_memory_inventory(void); diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h new file mode 100644 index 000000000000..7efb1aa2f7f8 --- /dev/null +++ b/arch/parisc/include/asm/jump_label.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_PARISC_JUMP_LABEL_H +#define _ASM_PARISC_JUMP_LABEL_H + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> +#include <asm/assembly.h> + +#define JUMP_LABEL_NOP_SIZE 4 + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".word 1b - ., %l[l_yes] - .\n\t" + __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b,n %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".word 1b - ., %l[l_yes] - .\n\t" + __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/parisc/include/asm/kgdb.h b/arch/parisc/include/asm/kgdb.h new file mode 100644 index 000000000000..f23e7f8f13a5 --- /dev/null +++ b/arch/parisc/include/asm/kgdb.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PA-RISC KGDB support + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + * + */ + +#ifndef __PARISC_KGDB_H__ +#define __PARISC_KGDB_H__ + +#define BREAK_INSTR_SIZE 4 +#define PARISC_KGDB_COMPILED_BREAK_INSN 0x3ffc01f +#define PARISC_KGDB_BREAK_INSN 0x3ffa01f + + +#define NUMREGBYTES sizeof(struct parisc_gdb_regs) +#define BUFMAX 4096 + +#define CACHE_FLUSH_IS_SAFE 1 + +#ifndef __ASSEMBLY__ + +static inline void arch_kgdb_breakpoint(void) +{ + asm(".word %0" : : "i"(PARISC_KGDB_COMPILED_BREAK_INSN) : "memory"); +} + +struct parisc_gdb_regs { + unsigned long gpr[32]; + unsigned long sar; + unsigned long iaoq_f; + unsigned long iasq_f; + unsigned long iaoq_b; + unsigned long iasq_b; + unsigned long eiem; + unsigned long iir; + unsigned long isr; + unsigned long ior; + unsigned long ipsw; + unsigned long __unused0; + unsigned long sr4; + unsigned long sr0; + unsigned long sr1; + unsigned long sr2; + unsigned long sr3; + unsigned long sr5; + unsigned long sr6; + unsigned long sr7; + unsigned long cr0; + unsigned long pid1; + unsigned long pid2; + unsigned long scrccr; + unsigned long pid3; + unsigned long pid4; + unsigned long cr24; + unsigned long cr25; + unsigned long cr26; + unsigned long cr27; + unsigned long cr28; + unsigned long cr29; + unsigned long cr30; + + u64 fr[32]; +}; + +#endif +#endif diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h new file mode 100644 index 000000000000..e09cf2deeafe --- /dev/null +++ b/arch/parisc/include/asm/kprobes.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/parisc/include/asm/kprobes.h + * + * PA-RISC kprobes implementation + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + */ + +#ifndef _PARISC_KPROBES_H +#define _PARISC_KPROBES_H + +#ifdef CONFIG_KPROBES + +#include <asm-generic/kprobes.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/notifier.h> + +#define PARISC_KPROBES_BREAK_INSN 0x3ff801f +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 1 + +typedef u32 kprobe_opcode_t; +struct kprobe; + +void arch_remove_kprobe(struct kprobe *p); + +#define flush_insn_slot(p) \ + flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \ + (unsigned long)&(p)->ainsn.insn[0] + \ + sizeof(kprobe_opcode_t)) + +#define kretprobe_blacklist_size 0 + +struct arch_specific_insn { + kprobe_opcode_t *insn; +}; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; +}; + +struct kprobe_ctlblk { + unsigned int kprobe_status; + struct prev_kprobe prev_kprobe; + unsigned long iaoq[2]; +}; + +int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs); +int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs); + +#endif /* CONFIG_KPROBES */ +#endif /* _PARISC_KPROBES_H */ diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h index fafa3893fd70..8d390406d862 100644 --- a/arch/parisc/include/asm/mmzone.h +++ b/arch/parisc/include/asm/mmzone.h @@ -2,62 +2,6 @@ #ifndef _PARISC_MMZONE_H #define _PARISC_MMZONE_H -#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ +#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */ -#ifdef CONFIG_DISCONTIGMEM - -extern int npmem_ranges; - -struct node_map_data { - pg_data_t pg_data; -}; - -extern struct node_map_data node_data[]; - -#define NODE_DATA(nid) (&node_data[nid].pg_data) - -/* We have these possible memory map layouts: - * Astro: 0-3.75, 67.75-68, 4-64 - * zx1: 0-1, 257-260, 4-256 - * Stretch (N-class): 0-2, 4-32, 34-xxx - */ - -/* Since each 1GB can only belong to one region (node), we can create - * an index table for pfn to nid lookup; each entry in pfnnid_map - * represents 1GB, and contains the node that the memory belongs to. */ - -#define PFNNID_SHIFT (30 - PAGE_SHIFT) -#define PFNNID_MAP_MAX 512 /* support 512GB */ -extern signed char pfnnid_map[PFNNID_MAP_MAX]; - -#ifndef CONFIG_64BIT -#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) -#else -/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ -#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) -#endif - -static inline int pfn_to_nid(unsigned long pfn) -{ - unsigned int i; - - if (unlikely(pfn_is_io(pfn))) - return 0; - - i = pfn >> PFNNID_SHIFT; - BUG_ON(i >= ARRAY_SIZE(pfnnid_map)); - - return pfnnid_map[i]; -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#endif #endif /* _PARISC_MMZONE_H */ diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h index b77f49ce6220..93caf17ac5e2 100644 --- a/arch/parisc/include/asm/page.h +++ b/arch/parisc/include/asm/page.h @@ -147,9 +147,9 @@ extern int npmem_ranges; #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM #define pfn_valid(pfn) ((pfn) < max_mapnr) -#endif /* CONFIG_DISCONTIGMEM */ +#endif #ifdef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */ diff --git a/arch/parisc/include/asm/patch.h b/arch/parisc/include/asm/patch.h new file mode 100644 index 000000000000..685b58a13968 --- /dev/null +++ b/arch/parisc/include/asm/patch.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _PARISC_KERNEL_PATCH_H +#define _PARISC_KERNEL_PATCH_H + +/* stop machine and patch kernel text */ +void patch_text(void *addr, unsigned int insn); + +/* patch kernel text with machine already stopped (e.g. in kgdb) */ +void __patch_text(void *addr, unsigned int insn); + +#endif diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index d05c678c77c4..ea75cc966dae 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -41,6 +41,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) __pgd_val_set(*pgd, PxD_FLAG_ATTACHED); #endif } + spin_lock_init(pgd_spinlock(actual_pgd)); return actual_pgd; } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index c7bb74e22436..a39b079e73f2 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -17,7 +17,7 @@ #include <asm/processor.h> #include <asm/cache.h> -extern spinlock_t pa_tlb_lock; +static inline spinlock_t *pgd_spinlock(pgd_t *); /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -34,16 +34,46 @@ extern spinlock_t pa_tlb_lock; */ #define kern_addr_valid(addr) (1) -/* Purge data and instruction TLB entries. Must be called holding - * the pa_tlb_lock. The TLB purge instructions are slow on SMP - * machines since the purge must be broadcast to all CPUs. +/* This is for the serialization of PxTLB broadcasts. At least on the N class + * systems, only one PxTLB inter processor broadcast can be active at any one + * time on the Merced bus. + + * PTE updates are protected by locks in the PMD. + */ +extern spinlock_t pa_tlb_flush_lock; +extern spinlock_t pa_swapper_pg_lock; +#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) +extern int pa_serialize_tlb_flushes; +#else +#define pa_serialize_tlb_flushes (0) +#endif + +#define purge_tlb_start(flags) do { \ + if (pa_serialize_tlb_flushes) \ + spin_lock_irqsave(&pa_tlb_flush_lock, flags); \ + else \ + local_irq_save(flags); \ + } while (0) +#define purge_tlb_end(flags) do { \ + if (pa_serialize_tlb_flushes) \ + spin_unlock_irqrestore(&pa_tlb_flush_lock, flags); \ + else \ + local_irq_restore(flags); \ + } while (0) + +/* Purge data and instruction TLB entries. The TLB purge instructions + * are slow on SMP machines since the purge must be broadcast to all CPUs. */ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) { + unsigned long flags; + + purge_tlb_start(flags); mtsp(mm->context, 1); pdtlb(addr); pitlb(addr); + purge_tlb_end(flags); } /* Certain architectures need to do special things when PTEs @@ -59,11 +89,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) do { \ pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_tlb_lock, flags); \ + spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\ old_pte = *ptep; \ set_pte(ptep, pteval); \ purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_tlb_lock, flags); \ + spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\ } while (0) #endif /* !__ASSEMBLY__ */ @@ -88,10 +118,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #if CONFIG_PGTABLE_LEVELS == 3 #define PGD_ORDER 1 /* Number of pages per pgd */ #define PMD_ORDER 1 /* Number of pages per pmd */ -#define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ +#define PGD_ALLOC_ORDER (2 + 1) /* first pgd contains pmd */ #else #define PGD_ORDER 1 /* Number of pages per pgd */ -#define PGD_ALLOC_ORDER PGD_ORDER +#define PGD_ALLOC_ORDER (PGD_ORDER + 1) #endif /* Definitions for 3rd level (we use PLD here for Page Lower directory @@ -459,6 +489,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +static inline spinlock_t *pgd_spinlock(pgd_t *pgd) +{ + if (unlikely(pgd == swapper_pg_dir)) + return &pa_swapper_pg_lock; + return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1))); +} + + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; @@ -467,15 +506,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_tlb_lock, flags); + spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags); return 1; } @@ -485,11 +524,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_tlb_lock, flags); + spin_lock_irqsave(pgd_spinlock(mm->pgd), flags); old_pte = *ptep; set_pte(ptep, __pte(0)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags); return old_pte; } @@ -497,10 +536,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_tlb_lock, flags); + spin_lock_irqsave(pgd_spinlock(mm->pgd), flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_tlb_lock, flags); + spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 9ff033d261ab..143fb2a89dd8 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -37,4 +37,17 @@ extern int regs_query_register_offset(const char *name); extern const char *regs_query_register_name(unsigned int offset); #define MAX_REG_OFFSET (offsetof(struct pt_regs, ipsw)) +#define kernel_stack_pointer(regs) ((regs)->gr[30]) + +static inline unsigned long regs_get_register(struct pt_regs *regs, + unsigned int offset) +{ + if (unlikely(offset > MAX_REG_OFFSET)) + return 0; + return *(unsigned long *)((unsigned long)regs + offset); +} + +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); +int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr); + #endif diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h new file mode 100644 index 000000000000..b5c3a79045b4 --- /dev/null +++ b/arch/parisc/include/asm/sparsemem.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_PARISC_SPARSEMEM_H +#define ASM_PARISC_SPARSEMEM_H + +/* We have these possible memory map layouts: + * Astro: 0-3.75, 67.75-68, 4-64 + * zx1: 0-1, 257-260, 4-256 + * Stretch (N-class): 0-2, 4-32, 34-xxx + */ + +#define MAX_PHYSMEM_BITS 39 /* 512 GB */ +#define SECTION_SIZE_BITS 27 /* 128 MB */ + +#endif diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 8a63515f03bf..197d2247e4db 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,7 +37,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); +#ifdef CONFIG_SMP + (void) __ldcw(a); +#else mb(); +#endif *a = 1; } diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h index 62a6d477fae0..80757e43cf2c 100644 --- a/arch/parisc/include/asm/syscall.h +++ b/arch/parisc/include/asm/syscall.h @@ -48,11 +48,11 @@ static inline void syscall_rollback(struct task_struct *task, /* do nothing */ } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_PARISC; #ifdef CONFIG_64BIT - if (!is_compat_task()) + if (!__is_compat_task(task)) arch = AUDIT_ARCH_PARISC64; #endif return arch; diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 6804374efa66..c5ded01d45be 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -8,21 +8,6 @@ #include <linux/sched.h> #include <asm/mmu_context.h> - -/* This is for the serialisation of PxTLB broadcasts. At least on the - * N class systems, only one PxTLB inter processor broadcast can be - * active at any one time on the Merced bus. This tlb purge - * synchronisation is fairly lightweight and harmless so we activate - * it on all systems not just the N class. - - * It is also used to ensure PTE updates are atomic and consistent - * with the TLB. - */ -extern spinlock_t pa_tlb_lock; - -#define purge_tlb_start(flags) spin_lock_irqsave(&pa_tlb_lock, flags) -#define purge_tlb_end(flags) spin_unlock_irqrestore(&pa_tlb_lock, flags) - extern void flush_tlb_all(void); extern void flush_tlb_all_local(void *); @@ -79,13 +64,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - unsigned long flags, sid; - - sid = vma->vm_mm->context; - purge_tlb_start(flags); - mtsp(sid, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); + purge_tlb_entries(vma->vm_mm, addr); } #endif diff --git a/arch/parisc/include/uapi/asm/sockios.h b/arch/parisc/include/uapi/asm/sockios.h deleted file mode 100644 index 66a3ba64d53f..000000000000 --- a/arch/parisc/include/uapi/asm/sockios.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef __ARCH_PARISC_SOCKIOS__ -#define __ARCH_PARISC_SOCKIOS__ - -/* Socket-level I/O control calls. */ -#define FIOSETOWN 0x8901 -#define SIOCSPGRP 0x8902 -#define FIOGETOWN 0x8903 -#define SIOCGPGRP 0x8904 -#define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ - -#endif diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 8e5f1ab65c68..fc0df5c44468 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -9,7 +9,8 @@ obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \ ptrace.o hardware.o inventory.o drivers.o alternative.o \ signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \ - process.o processor.o pdc_cons.o pdc_chassis.o unwind.o + process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \ + patch.o ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities @@ -32,3 +33,6 @@ obj-$(CONFIG_64BIT) += perf.o perf_asm.o $(obj64-y) obj-$(CONFIG_PARISC_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KPROBES) += kprobes.o diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 804880efa11e..a82b3eaa5398 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -29,9 +29,9 @@ #include <asm/sections.h> #include <asm/shmparam.h> -int split_tlb __read_mostly; -int dcache_stride __read_mostly; -int icache_stride __read_mostly; +int split_tlb __ro_after_init; +int dcache_stride __ro_after_init; +int icache_stride __ro_after_init; EXPORT_SYMBOL(dcache_stride); void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); @@ -40,16 +40,23 @@ void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); -/* On some machines (e.g. ones with the Merced bus), there can be +/* On some machines (i.e., ones with the Merced bus), there can be * only a single PxTLB broadcast at a time; this must be guaranteed - * by software. We put a spinlock around all TLB flushes to - * ensure this. + * by software. We need a spinlock around all TLB flushes to ensure + * this. */ -DEFINE_SPINLOCK(pa_tlb_lock); +DEFINE_SPINLOCK(pa_tlb_flush_lock); -struct pdc_cache_info cache_info __read_mostly; +/* Swapper page setup lock. */ +DEFINE_SPINLOCK(pa_swapper_pg_lock); + +#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) +int pa_serialize_tlb_flushes __ro_after_init; +#endif + +struct pdc_cache_info cache_info __ro_after_init; #ifndef CONFIG_PA20 -static struct pdc_btlb_info btlb_info __read_mostly; +static struct pdc_btlb_info btlb_info __ro_after_init; #endif #ifdef CONFIG_SMP @@ -374,10 +381,10 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __ro_after_init = FLUSH_THRESHOLD; #define FLUSH_TLB_THRESHOLD (16*1024) /* 16 KiB minimum TLB threshold */ -static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; +static unsigned long parisc_tlb_flush_threshold __ro_after_init = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 5eb979d04b90..00a181f1ecc6 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -38,9 +38,10 @@ #include <asm/io.h> #include <asm/pdc.h> #include <asm/parisc-device.h> +#include <asm/ropes.h> /* See comments in include/asm-parisc/pci.h */ -const struct dma_map_ops *hppa_dma_ops __read_mostly; +const struct dma_map_ops *hppa_dma_ops __ro_after_init; EXPORT_SYMBOL(hppa_dma_ops); static struct device root = { @@ -257,6 +258,30 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa) return ret ? d.dev : NULL; } +static int __init is_IKE_device(struct device *dev, void *data) +{ + struct parisc_device *pdev = to_parisc_device(dev); + + if (!check_dev(dev)) + return 0; + if (pdev->id.hw_type != HPHW_BCPORT) + return 0; + if (IS_IKE(pdev) || + (pdev->id.hversion == REO_MERCED_PORT) || + (pdev->id.hversion == REOG_MERCED_PORT)) { + return 1; + } + return 0; +} + +int __init machine_has_merced_bus(void) +{ + int ret; + + ret = for_each_padev(is_IKE_device, NULL); + return ret ? 1 : 0; +} + /** * find_pa_parent_type - Find a parent of a specific type * @dev: The device to start searching from diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index d5eb19efa65b..a1fc04570ade 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -50,12 +50,8 @@ .import pa_tlb_lock,data .macro load_pa_tlb_lock reg -#if __PA_LDCW_ALIGNMENT > 4 - load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg - depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg -#else - load32 PA(pa_tlb_lock), \reg -#endif + mfctl %cr25,\reg + addil L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg .endm /* space_to_prot macro creates a prot id from a space id */ @@ -471,8 +467,9 @@ nop LDREG 0(\ptp),\pte bb,<,n \pte,_PAGE_PRESENT_BIT,3f + LDCW 0(\tmp),\tmp1 b \fault - stw,ma \spc,0(\tmp) + stw \spc,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif 2: LDREG 0(\ptp),\pte @@ -481,20 +478,22 @@ .endm /* Release pa_tlb_lock lock without reloading lock address. */ - .macro tlb_unlock0 spc,tmp + .macro tlb_unlock0 spc,tmp,tmp1 #ifdef CONFIG_SMP 98: or,COND(=) %r0,\spc,%r0 - stw,ma \spc,0(\tmp) + LDCW 0(\tmp),\tmp1 + or,COND(=) %r0,\spc,%r0 + stw \spc,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm /* Release pa_tlb_lock lock. */ - .macro tlb_unlock1 spc,tmp + .macro tlb_unlock1 spc,tmp,tmp1 #ifdef CONFIG_SMP 98: load_pa_tlb_lock \tmp 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) - tlb_unlock0 \spc,\tmp + tlb_unlock0 \spc,\tmp,\tmp1 #endif .endm @@ -1177,7 +1176,7 @@ dtlb_miss_20w: idtlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1203,7 +1202,7 @@ nadtlb_miss_20w: idtlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1237,7 +1236,7 @@ dtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1270,7 +1269,7 @@ nadtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1299,7 +1298,7 @@ dtlb_miss_20: idtlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1327,7 +1326,7 @@ nadtlb_miss_20: idtlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1434,7 +1433,7 @@ itlb_miss_20w: iitlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1458,7 +1457,7 @@ naitlb_miss_20w: iitlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1492,7 +1491,7 @@ itlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1516,7 +1515,7 @@ naitlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1546,7 +1545,7 @@ itlb_miss_20: iitlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1566,7 +1565,7 @@ naitlb_miss_20: iitlbt pte,prot - tlb_unlock1 spc,t0 + tlb_unlock1 spc,t0,t1 rfir nop @@ -1596,7 +1595,7 @@ dbit_trap_20w: idtlbt pte,prot - tlb_unlock0 spc,t0 + tlb_unlock0 spc,t0,t1 rfir nop #else @@ -1622,7 +1621,7 @@ dbit_trap_11: mtsp t1, %sr1 /* Restore sr1 */ - tlb_unlock0 spc,t0 + tlb_unlock0 spc,t0,t1 rfir nop @@ -1642,7 +1641,7 @@ dbit_trap_20: idtlbt pte,prot - tlb_unlock0 spc,t0 + tlb_unlock0 spc,t0,t1 rfir nop #endif diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 7a17551ea31e..f01e102bbfa2 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -87,7 +87,7 @@ extern unsigned long pdc_result2[NUM_PDC_RESULT]; /* Firmware needs to be initially set to narrow to determine the * actual firmware width. */ -int parisc_narrow_firmware __read_mostly = 1; +int parisc_narrow_firmware __ro_after_init = 1; #endif /* On most currently-supported platforms, IODC I/O calls are 32-bit calls diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index fbb4e43fda05..951a339369dd 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -22,7 +22,7 @@ #include <linux/linkage.h> #include <linux/init.h> - .level LEVEL + .level PA_ASM_LEVEL __INITDATA ENTRY(boot_args) @@ -258,7 +258,7 @@ stext_pdc_ret: ldo R%PA(fault_vector_11)(%r10),%r10 $is_pa20: - .level LEVEL /* restore 1.1 || 2.0w */ + .level PA_ASM_LEVEL /* restore 1.1 || 2.0w */ #endif /*!CONFIG_64BIT*/ load32 PA(fault_vector_20),%r10 @@ -329,6 +329,19 @@ smp_slave_stext: mtsp %r0,%sr6 mtsp %r0,%sr7 +#ifdef CONFIG_64BIT + /* + * Enable Wide mode early, in case the task_struct for the idle + * task in smp_init_current_idle_task was allocated above 4GB. + */ +1: mfia %rp /* clear upper part of pcoq */ + ldo 2f-1b(%rp),%rp + depdi 0,31,32,%rp + bv (%rp) + ssm PSW_SM_W,%r0 +2: +#endif + /* Initialize the SP - monarch sets up smp_init_current_idle_task */ load32 PA(smp_init_current_idle_task),%sp LDREG 0(%sp),%sp /* load task address */ @@ -363,7 +376,7 @@ smp_slave_stext: ENDPROC(parisc_kernel_start) #ifndef CONFIG_64BIT - .section .data..read_mostly + .section .data..ro_after_init .align 4 .export $global$,data diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c index 35d05fdd7483..3f4a91c0b805 100644 --- a/arch/parisc/kernel/inventory.c +++ b/arch/parisc/kernel/inventory.c @@ -31,6 +31,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/parisc-device.h> +#include <asm/tlbflush.h> /* ** Debug options @@ -38,12 +39,12 @@ */ #undef DEBUG_PAT -int pdc_type __read_mostly = PDC_TYPE_ILLEGAL; +int pdc_type __ro_after_init = PDC_TYPE_ILLEGAL; /* cell number and location (PAT firmware only) */ -unsigned long parisc_cell_num __read_mostly; -unsigned long parisc_cell_loc __read_mostly; -unsigned long parisc_pat_pdc_cap __read_mostly; +unsigned long parisc_cell_num __ro_after_init; +unsigned long parisc_cell_loc __ro_after_init; +unsigned long parisc_pat_pdc_cap __ro_after_init; void __init setup_pdc(void) @@ -638,4 +639,10 @@ void __init do_device_inventory(void) } printk(KERN_INFO "Found devices:\n"); print_parisc_devices(); + +#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) + pa_serialize_tlb_flushes = machine_has_merced_bus(); + if (pa_serialize_tlb_flushes) + pr_info("Merced bus found: Enable PxTLB serialization.\n"); +#endif } diff --git a/arch/parisc/kernel/jump_label.c b/arch/parisc/kernel/jump_label.c new file mode 100644 index 000000000000..d2f3cb12e282 --- /dev/null +++ b/arch/parisc/kernel/jump_label.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Helge Deller <deller@gmx.de> + * + * Based on arch/arm64/kernel/jump_label.c + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <linux/bug.h> +#include <asm/alternative.h> +#include <asm/patch.h> + +static inline int reassemble_17(int as17) +{ + return (((as17 & 0x10000) >> 16) | + ((as17 & 0x0f800) << 5) | + ((as17 & 0x00400) >> 8) | + ((as17 & 0x003ff) << 3)); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + void *addr = (void *)jump_entry_code(entry); + u32 insn; + + if (type == JUMP_LABEL_JMP) { + void *target = (void *)jump_entry_target(entry); + int distance = target - addr; + /* + * Encode the PA1.1 "b,n" instruction with a 17-bit + * displacement. In case we hit the BUG(), we could use + * another branch instruction with a 22-bit displacement on + * 64-bit CPUs instead. But this seems sufficient for now. + */ + distance -= 8; + BUG_ON(distance > 262143 || distance < -262144); + insn = 0xe8000002 | reassemble_17(distance >> 2); + } else { + insn = INSN_NOP; + } + + patch_text(addr, insn); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + /* + * We use the architected NOP in arch_static_branch, so there's no + * need to patch an identical NOP over the top of it here. The core + * will call arch_jump_label_transform from a module notifier if the + * NOP needs to be replaced by a branch. + */ +} diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c new file mode 100644 index 000000000000..664278db9b97 --- /dev/null +++ b/arch/parisc/kernel/kgdb.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PA-RISC KGDB support + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + * + */ + +#include <linux/kgdb.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/notifier.h> +#include <linux/kdebug.h> +#include <linux/uaccess.h> +#include <asm/ptrace.h> +#include <asm/traps.h> +#include <asm/processor.h> +#include <asm/patch.h> +#include <asm/cacheflush.h> + +const struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { 0x03, 0xff, 0xa0, 0x1f } +}; + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int kgdb_notify(struct notifier_block *self, + unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + .priority = -INT_MAX, +}; + +int kgdb_arch_init(void) +{ + return register_die_notifier(&kgdb_notifier); +} + +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} + +void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + struct parisc_gdb_regs *gr = (struct parisc_gdb_regs *)gdb_regs; + + memset(gr, 0, sizeof(struct parisc_gdb_regs)); + + memcpy(gr->gpr, regs->gr, sizeof(gr->gpr)); + memcpy(gr->fr, regs->fr, sizeof(gr->fr)); + + gr->sr0 = regs->sr[0]; + gr->sr1 = regs->sr[1]; + gr->sr2 = regs->sr[2]; + gr->sr3 = regs->sr[3]; + gr->sr4 = regs->sr[4]; + gr->sr5 = regs->sr[5]; + gr->sr6 = regs->sr[6]; + gr->sr7 = regs->sr[7]; + + gr->sar = regs->sar; + gr->iir = regs->iir; + gr->isr = regs->isr; + gr->ior = regs->ior; + gr->ipsw = regs->ipsw; + gr->cr27 = regs->cr27; + + gr->iaoq_f = regs->iaoq[0]; + gr->iasq_f = regs->iasq[0]; + + gr->iaoq_b = regs->iaoq[1]; + gr->iasq_b = regs->iasq[1]; +} + +void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) +{ + struct parisc_gdb_regs *gr = (struct parisc_gdb_regs *)gdb_regs; + + + memcpy(regs->gr, gr->gpr, sizeof(regs->gr)); + memcpy(regs->fr, gr->fr, sizeof(regs->fr)); + + regs->sr[0] = gr->sr0; + regs->sr[1] = gr->sr1; + regs->sr[2] = gr->sr2; + regs->sr[3] = gr->sr3; + regs->sr[4] = gr->sr4; + regs->sr[5] = gr->sr5; + regs->sr[6] = gr->sr6; + regs->sr[7] = gr->sr7; + + regs->sar = gr->sar; + regs->iir = gr->iir; + regs->isr = gr->isr; + regs->ior = gr->ior; + regs->ipsw = gr->ipsw; + regs->cr27 = gr->cr27; + + regs->iaoq[0] = gr->iaoq_f; + regs->iasq[0] = gr->iasq_f; + + regs->iaoq[1] = gr->iaoq_b; + regs->iasq[1] = gr->iasq_b; +} + +void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, + struct task_struct *task) +{ + struct pt_regs *regs = task_pt_regs(task); + unsigned long gr30, iaoq; + + gr30 = regs->gr[30]; + iaoq = regs->iaoq[0]; + + regs->gr[30] = regs->ksp; + regs->iaoq[0] = regs->kpc; + pt_regs_to_gdb_regs(gdb_regs, regs); + + regs->gr[30] = gr30; + regs->iaoq[0] = iaoq; + +} + +static void step_instruction_queue(struct pt_regs *regs) +{ + regs->iaoq[0] = regs->iaoq[1]; + regs->iaoq[1] += 4; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) +{ + regs->iaoq[0] = ip; + regs->iaoq[1] = ip + 4; +} + +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (ret) + return ret; + + __patch_text((void *)bpt->bpt_addr, + *(unsigned int *)&arch_kgdb_ops.gdb_bpt_instr); + return ret; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ + __patch_text((void *)bpt->bpt_addr, *(unsigned int *)&bpt->saved_instr); + return 0; +} + +int kgdb_arch_handle_exception(int trap, int signo, + int err_code, char *inbuf, char *outbuf, + struct pt_regs *regs) +{ + unsigned long addr; + char *p = inbuf + 1; + + switch (inbuf[0]) { + case 'D': + case 'c': + case 'k': + kgdb_contthread = NULL; + kgdb_single_step = 0; + + if (kgdb_hex2long(&p, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (trap == 9 && regs->iir == + PARISC_KGDB_COMPILED_BREAK_INSN) + step_instruction_queue(regs); + return 0; + case 's': + kgdb_single_step = 1; + if (kgdb_hex2long(&p, &addr)) { + kgdb_arch_set_pc(regs, addr); + } else if (trap == 9 && regs->iir == + PARISC_KGDB_COMPILED_BREAK_INSN) { + step_instruction_queue(regs); + mtctl(-1, 0); + } else { + mtctl(0, 0); + } + regs->gr[0] |= PSW_R; + return 0; + + } + return -1; +} diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c new file mode 100644 index 000000000000..d58960b33bda --- /dev/null +++ b/arch/parisc/kernel/kprobes.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/parisc/kernel/kprobes.c + * + * PA-RISC kprobes implementation + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + */ + +#include <linux/types.h> +#include <linux/kprobes.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/patch.h> + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + if ((unsigned long)p->addr & 3UL) + return -EINVAL; + + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + + memcpy(p->ainsn.insn, p->addr, + MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; + flush_insn_slot(p); + return 0; +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (!p->ainsn.insn) + return; + + free_insn_slot(p->ainsn.insn, 0); + p->ainsn.insn = NULL; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, PARISC_KPROBES_BREAK_INSN); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static inline void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + kcb->iaoq[0] = regs->iaoq[0]; + kcb->iaoq[1] = regs->iaoq[1]; + regs->iaoq[0] = (unsigned long)p->ainsn.insn; + mtctl(0, 0); + regs->gr[0] |= PSW_R; +} + +int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + preempt_disable(); + + kcb = get_kprobe_ctlblk(); + p = get_kprobe((unsigned long *)regs->iaoq[0]); + + if (!p) { + preempt_enable_no_resched(); + return 0; + } + + if (kprobe_running()) { + /* + * We have reentered the kprobe_handler, since another kprobe + * was hit while within the handler, we save the original + * kprobes and single step on the instruction of the new probe + * without calling any user handlers to avoid recursive + * kprobes. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p); + kprobes_inc_nmissed_count(p); + setup_singlestep(p, kcb, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } + + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* If we have no pre-handler or it returned 0, we continue with + * normal processing. If we have a pre-handler and it returned + * non-zero - which means user handler setup registers to exit + * to another instruction, we must skip the single stepping. + */ + + if (!p->pre_handler || !p->pre_handler(p, regs)) { + setup_singlestep(p, kcb, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + } else { + reset_current_kprobe(); + preempt_enable_no_resched(); + } + return 1; +} + +int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe *p = kprobe_running(); + + if (regs->iaoq[0] != (unsigned long)p->ainsn.insn+4) + return 0; + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return 1; + } + + /* for absolute branch instructions we can copy iaoq_b. for relative + * branch instructions we need to calculate the new address based on the + * difference between iaoq_f and iaoq_b. We cannot use iaoq_b without + * modificationt because it's based on our ainsn.insn address. + */ + + if (p->post_handler) + p->post_handler(p, regs, 0); + + switch (regs->iir >> 26) { + case 0x38: /* BE */ + case 0x39: /* BE,L */ + case 0x3a: /* BV */ + case 0x3b: /* BVE */ + /* for absolute branches, regs->iaoq[1] has already the right + * address + */ + regs->iaoq[0] = kcb->iaoq[1]; + break; + default: + regs->iaoq[1] = kcb->iaoq[0]; + regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4; + regs->iaoq[0] = kcb->iaoq[1]; + break; + } + kcb->kprobe_status = KPROBE_HIT_SSDONE; + reset_current_kprobe(); + return 1; +} + +static inline void kretprobe_trampoline(void) +{ + asm volatile("nop"); + asm volatile("nop"); +} + +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs); + +static struct kprobe trampoline_p = { + .pre_handler = trampoline_probe_handler +}; + +static int __kprobes trampoline_probe_handler(struct kprobe *p, + struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = (unsigned long)trampoline_p.addr; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * a return probe installed on them, and/or more than one return + * probe was registered for a target function. + * + * We can handle this because: + * - instances are always inserted at the head of the list + * - when multiple return probes are registered for the same + * function, the first instance's ret_addr will point to the + * real return address, and all the rest will point to + * kretprobe_trampoline + */ + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + instruction_pointer_set(regs, orig_ret_address); + return 1; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->gr[2]; + + /* Replace the return addr with trampoline addr. */ + regs->gr[2] = (unsigned long)trampoline_p.addr; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return p->addr == trampoline_p.addr; +} +bool arch_kprobe_on_func_entry(unsigned long offset) +{ + return !offset; +} + +int __init arch_init_kprobes(void) +{ + trampoline_p.addr = (kprobe_opcode_t *) + dereference_function_descriptor(kretprobe_trampoline); + return register_kprobe(&trampoline_p); +} diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index 187f032c9dd8..4e4e8eb25874 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -311,39 +311,6 @@ fdsync: nop ENDPROC_CFI(flush_data_cache_local) -/* Macros to serialize TLB purge operations on SMP. */ - - .macro tlb_lock la,flags,tmp -#ifdef CONFIG_SMP -98: -#if __PA_LDCW_ALIGNMENT > 4 - load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la - depi 0,31,__PA_LDCW_ALIGN_ORDER, \la -#else - load32 pa_tlb_lock, \la -#endif - rsm PSW_SM_I,\flags -1: LDCW 0(\la),\tmp - cmpib,<>,n 0,\tmp,3f -2: ldw 0(\la),\tmp - cmpb,<> %r0,\tmp,1b - nop - b,n 2b -3: -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - .endm - - .macro tlb_unlock la,flags,tmp -#ifdef CONFIG_SMP -98: ldi 1,\tmp - sync - stw \tmp,0(\la) - mtsm \flags -99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) -#endif - .endm - /* Clear page using kernel mapping. */ ENTRY_CFI(clear_page_asm) @@ -601,10 +568,8 @@ ENTRY_CFI(copy_user_page_asm) pdtlb,l %r0(%r28) pdtlb,l %r0(%r29) #else - tlb_lock %r20,%r21,%r22 0: pdtlb %r0(%r28) 1: pdtlb %r0(%r29) - tlb_unlock %r20,%r21,%r22 ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif @@ -743,9 +708,7 @@ ENTRY_CFI(clear_user_page_asm) #ifdef CONFIG_PA20 pdtlb,l %r0(%r28) #else - tlb_lock %r20,%r21,%r22 0: pdtlb %r0(%r28) - tlb_unlock %r20,%r21,%r22 ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif @@ -821,9 +784,7 @@ ENTRY_CFI(flush_dcache_page_asm) #ifdef CONFIG_PA20 pdtlb,l %r0(%r28) #else - tlb_lock %r20,%r21,%r22 0: pdtlb %r0(%r28) - tlb_unlock %r20,%r21,%r22 ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif @@ -882,9 +843,7 @@ ENTRY_CFI(purge_dcache_page_asm) #ifdef CONFIG_PA20 pdtlb,l %r0(%r28) #else - tlb_lock %r20,%r21,%r22 0: pdtlb %r0(%r28) - tlb_unlock %r20,%r21,%r22 ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) #endif @@ -948,10 +907,8 @@ ENTRY_CFI(flush_icache_page_asm) 1: pitlb,l %r0(%sr4,%r28) ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP) #else - tlb_lock %r20,%r21,%r22 0: pdtlb %r0(%r28) 1: pitlb %r0(%sr4,%r28) - tlb_unlock %r20,%r21,%r22 ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB) ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP) diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 7baa2265d439..174213b1716e 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -138,12 +138,6 @@ extern void $$dyncall(void); EXPORT_SYMBOL($$dyncall); #endif -#ifdef CONFIG_DISCONTIGMEM -#include <asm/mmzone.h> -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(pfnnid_map); -#endif - #ifdef CONFIG_FUNCTION_TRACER extern void _mcount(void); EXPORT_SYMBOL(_mcount); diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c new file mode 100644 index 000000000000..cdcd981278b3 --- /dev/null +++ b/arch/parisc/kernel/patch.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 + /* + * functions to patch RO kernel text during runtime + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/kprobes.h> +#include <linux/mm.h> +#include <linux/stop_machine.h> + +#include <asm/cacheflush.h> +#include <asm/fixmap.h> +#include <asm/patch.h> + +struct patch { + void *addr; + unsigned int insn; +}; + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); + else if (!module && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + page = virt_to_page(addr); + else + return addr; + + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} + +void __kprobes __patch_text(void *addr, unsigned int insn) +{ + void *waddr = addr; + int size; + + waddr = patch_map(addr, FIX_TEXT_POKE0); + *(u32 *)waddr = insn; + size = sizeof(u32); + flush_kernel_vmap_range(waddr, size); + patch_unmap(FIX_TEXT_POKE0); + flush_icache_range((uintptr_t)(addr), + (uintptr_t)(addr) + size); +} + +static int __kprobes patch_text_stop_machine(void *data) +{ + struct patch *patch = data; + + __patch_text(patch->addr, patch->insn); + + return 0; +} + +void __kprobes patch_text(void *addr, unsigned int insn) +{ + struct patch patch = { + .addr = addr, + .insn = insn, + }; + + stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL); +} diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c index ae684ac6efb6..bc41ca243cfe 100644 --- a/arch/parisc/kernel/pci.c +++ b/arch/parisc/kernel/pci.c @@ -45,14 +45,14 @@ * #define pci_post_reset_delay 50 */ -struct pci_port_ops *pci_port __read_mostly; -struct pci_bios_ops *pci_bios __read_mostly; +struct pci_port_ops *pci_port __ro_after_init; +struct pci_bios_ops *pci_bios __ro_after_init; -static int pci_hba_count __read_mostly; +static int pci_hba_count __ro_after_init; /* parisc_pci_hba used by pci_port->in/out() ops to lookup bus data. */ #define PCI_HBA_MAX 32 -static struct pci_hba_data *parisc_pci_hba[PCI_HBA_MAX] __read_mostly; +static struct pci_hba_data *parisc_pci_hba[PCI_HBA_MAX] __ro_after_init; /******************************************************************** diff --git a/arch/parisc/kernel/perf_images.h b/arch/parisc/kernel/perf_images.h index 7fef9644df47..c108fee989d9 100644 --- a/arch/parisc/kernel/perf_images.h +++ b/arch/parisc/kernel/perf_images.h @@ -25,7 +25,7 @@ #define PCXU_IMAGE_SIZE 584 -static uint32_t onyx_images[][PCXU_IMAGE_SIZE/sizeof(uint32_t)] __read_mostly = { +static uint32_t onyx_images[][PCXU_IMAGE_SIZE/sizeof(uint32_t)] __ro_after_init = { /* * CPI: * @@ -2093,7 +2093,7 @@ static uint32_t onyx_images[][PCXU_IMAGE_SIZE/sizeof(uint32_t)] __read_mostly = }; #define PCXW_IMAGE_SIZE 576 -static uint32_t cuda_images[][PCXW_IMAGE_SIZE/sizeof(uint32_t)] __read_mostly = { +static uint32_t cuda_images[][PCXW_IMAGE_SIZE/sizeof(uint32_t)] __ro_after_init = { /* * CPI: FROM CPI.IDF (Image 0) * diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 841db71958cd..89e4f4497ffb 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -192,7 +192,8 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r) * QEMU idle the host too. */ -int running_on_qemu __read_mostly; +int running_on_qemu __ro_after_init; +EXPORT_SYMBOL(running_on_qemu); void __cpuidle arch_cpu_idle_dead(void) { diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 7f4d042856b5..e715871cd4ac 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -43,10 +43,10 @@ #include <asm/irq.h> /* for struct irq_region */ #include <asm/parisc-device.h> -struct system_cpuinfo_parisc boot_cpu_data __read_mostly; +struct system_cpuinfo_parisc boot_cpu_data __ro_after_init; EXPORT_SYMBOL(boot_cpu_data); #ifdef CONFIG_PA8X00 -int _parisc_requires_coherency __read_mostly; +int _parisc_requires_coherency __ro_after_init; EXPORT_SYMBOL(_parisc_requires_coherency); #endif @@ -305,7 +305,8 @@ void __init collect_boot_cpu_data(void) if (pdc_model_platform_info(orig_prod_num, current_prod_num, serial_no) == PDC_OK) { printk(KERN_INFO "product %s, original product %s, S/N: %s\n", - current_prod_num, orig_prod_num, serial_no); + current_prod_num[0] ? current_prod_num : "n/a", + orig_prod_num, serial_no); add_device_randomness(orig_prod_num, strlen(orig_prod_num)); add_device_randomness(current_prod_num, strlen(current_prod_num)); add_device_randomness(serial_no, strlen(serial_no)); diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 0964c236e3e5..a3d2fb4e6dd2 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -789,3 +789,38 @@ const char *regs_query_register_name(unsigned int offset) return roff->name; return NULL; } + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return ((addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr -= n; + + if (!regs_within_kernel_stack(regs, (unsigned long)addr)) + return 0; + + return *addr; +} diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index d908058d05c1..e05cb2a5c16d 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -343,6 +343,12 @@ static int __init parisc_init(void) boot_cpu_data.cpu_hz / 1000000, boot_cpu_data.cpu_hz % 1000000 ); +#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) + /* Don't serialize TLB flushes if we run on one CPU only. */ + if (num_online_cpus() == 1) + pa_serialize_tlb_flushes = 0; +#endif + apply_alternatives_all(); parisc_setup_cache_timing(); diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 376ea0d1b275..4407ac4c1d84 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -86,7 +86,8 @@ static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) stack_base = STACK_SIZE_MAX; /* Add space for stack randomization. */ - stack_base += (STACK_RND_MASK << PAGE_SHIFT); + if (current->flags & PF_RANDOMIZE) + stack_base += (STACK_RND_MASK << PAGE_SHIFT); return PAGE_ALIGN(STACK_TOP - stack_base); } diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 4f77bd9be66b..97ac707c6bff 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -48,7 +48,7 @@ registers). */ #define KILL_INSN break 0,0 - .level LEVEL + .level PA_ASM_LEVEL .text @@ -640,7 +640,10 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ - sync +#ifdef CONFIG_SMP +98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) +#endif stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ @@ -655,7 +658,10 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ - sync +#ifdef CONFIG_SMP +98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) +#endif stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) @@ -857,7 +863,10 @@ cas2_action: cas2_end: /* Free lock */ - sync +#ifdef CONFIG_SMP +98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) +#endif stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 @@ -868,7 +877,10 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ - sync +#ifdef CONFIG_SMP +98: LDCW 0(%sr2,%r20), %r1 /* Barrier */ +99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) +#endif stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index a1e772f909cb..04508158815c 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -40,7 +40,7 @@ #include <linux/timex.h> -static unsigned long clocktick __read_mostly; /* timer cycles per tick */ +static unsigned long clocktick __ro_after_init; /* timer cycles per tick */ /* * We keep time on PA-RISC Linux by using the Interval Timer which is diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 7e1ccafadf57..096e319adeb3 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -42,6 +42,8 @@ #include <asm/unwind.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <linux/kgdb.h> +#include <linux/kprobes.h> #include "../math-emu/math-emu.h" /* for handle_fpe() */ @@ -293,6 +295,22 @@ static void handle_break(struct pt_regs *regs) (tt == BUG_TRAP_TYPE_NONE) ? 9 : 0); } +#ifdef CONFIG_KPROBES + if (unlikely(iir == PARISC_KPROBES_BREAK_INSN)) { + parisc_kprobe_break_handler(regs); + return; + } + +#endif + +#ifdef CONFIG_KGDB + if (unlikely(iir == PARISC_KGDB_COMPILED_BREAK_INSN || + iir == PARISC_KGDB_BREAK_INSN)) { + kgdb_handle_exception(9, SIGTRAP, 0, regs); + return; + } +#endif + if (unlikely(iir != GDB_BREAK_INSN)) parisc_printk_ratelimited(0, regs, KERN_DEBUG "break %d,%d: pid=%d command='%s'\n", @@ -518,6 +536,19 @@ void notrace handle_interruption(int code, struct pt_regs *regs) case 3: /* Recovery counter trap */ regs->gr[0] &= ~PSW_R; + +#ifdef CONFIG_KPROBES + if (parisc_kprobe_ss_handler(regs)) + return; +#endif + +#ifdef CONFIG_KGDB + if (kgdb_single_step) { + kgdb_handle_exception(0, SIGTRAP, 0, regs); + return; + } +#endif + if (user_space(regs)) handle_gdb_break(regs, TRAP_TRACE); /* else this must be the start of a syscall - just let it run */ diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 2d14f17838d2..87ae476d1c4f 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -40,7 +40,7 @@ static DEFINE_SPINLOCK(unwind_lock); * we can call unwind_init as early in the bootup process as * possible (before the slab allocator is initialized) */ -static struct unwind_table kernel_unwind_table __read_mostly; +static struct unwind_table kernel_unwind_table __ro_after_init; static LIST_HEAD(unwind_tables); static inline const struct unwind_table_entry * diff --git a/arch/parisc/mm/Makefile b/arch/parisc/mm/Makefile index 134393de69d2..20e39b043a60 100644 --- a/arch/parisc/mm/Makefile +++ b/arch/parisc/mm/Makefile @@ -2,5 +2,5 @@ # Makefile for arch/parisc/mm # -obj-y := init.o fault.o ioremap.o +obj-y := init.o fault.o ioremap.o fixmap.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c new file mode 100644 index 000000000000..c8d41b54fb19 --- /dev/null +++ b/arch/parisc/mm/fixmap.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fixmaps for parisc + * + * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + */ + +#include <linux/kprobes.h> +#include <linux/mm.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> + +void set_fixmap(enum fixed_addresses idx, phys_addr_t phys) +{ + unsigned long vaddr = __fix_to_virt(idx); + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = pmd_offset(pgd, vaddr); + pte_t *pte; + + if (pmd_none(*pmd)) + pmd = pmd_alloc(NULL, pgd, vaddr); + + pte = pte_offset_kernel(pmd, vaddr); + if (pte_none(*pte)) + pte = pte_alloc_kernel(pmd, vaddr); + + set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX)); + flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); +} + +void clear_fixmap(enum fixed_addresses idx) +{ + unsigned long vaddr = __fix_to_virt(idx); + pgd_t *pgd = pgd_offset_k(vaddr); + pmd_t *pmd = pmd_offset(pgd, vaddr); + pte_t *pte = pte_offset_kernel(pmd, vaddr); + + pte_clear(&init_mm, vaddr, pte); + + flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); +} diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index d77479ae3af2..d578809e55cf 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -139,9 +139,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, { unsigned long flags; - purge_tlb_start(flags); + spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags); __set_huge_pte_at(mm, addr, ptep, entry); - purge_tlb_end(flags); + spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags); } @@ -151,10 +151,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, unsigned long flags; pte_t entry; - purge_tlb_start(flags); + spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags); entry = *ptep; __set_huge_pte_at(mm, addr, ptep, __pte(0)); - purge_tlb_end(flags); + spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags); return entry; } @@ -166,10 +166,10 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long flags; pte_t old_pte; - purge_tlb_start(flags); + spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags); old_pte = *ptep; __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); - purge_tlb_end(flags); + spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags); } int huge_ptep_set_access_flags(struct vm_area_struct *vma, @@ -178,13 +178,14 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, { unsigned long flags; int changed; + struct mm_struct *mm = vma->vm_mm; - purge_tlb_start(flags); + spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags); changed = !pte_same(*ptep, pte); if (changed) { - __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + __set_huge_pte_at(mm, addr, ptep, pte); } - purge_tlb_end(flags); + spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags); return changed; } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index d0b166256f1a..ddca8287d43b 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -32,6 +32,7 @@ #include <asm/mmzone.h> #include <asm/sections.h> #include <asm/msgbuf.h> +#include <asm/sparsemem.h> extern int data_start; extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ @@ -48,11 +49,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); -#ifdef CONFIG_DISCONTIGMEM -struct node_map_data node_data[MAX_NUMNODES] __read_mostly; -signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; -#endif - static struct resource data_resource = { .name = "Kernel data", .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, @@ -70,17 +66,17 @@ static struct resource pdcdata_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM, }; -static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly; +static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __ro_after_init; /* The following array is initialized from the firmware specific * information retrieved in kernel/inventory.c. */ -physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; -int npmem_ranges __read_mostly; +physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata; +int npmem_ranges __initdata; #ifdef CONFIG_64BIT -#define MAX_MEM (~0UL) +#define MAX_MEM (1UL << MAX_PHYSMEM_BITS) #else /* !CONFIG_64BIT */ #define MAX_MEM (3584U*1024U*1024U) #endif /* !CONFIG_64BIT */ @@ -119,7 +115,7 @@ static void __init mem_limit_func(void) static void __init setup_bootmem(void) { unsigned long mem_max; -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; int npmem_holes; #endif @@ -137,23 +133,20 @@ static void __init setup_bootmem(void) int j; for (j = i; j > 0; j--) { - unsigned long tmp; + physmem_range_t tmp; if (pmem_ranges[j-1].start_pfn < pmem_ranges[j].start_pfn) { break; } - tmp = pmem_ranges[j-1].start_pfn; - pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; - pmem_ranges[j].start_pfn = tmp; - tmp = pmem_ranges[j-1].pages; - pmem_ranges[j-1].pages = pmem_ranges[j].pages; - pmem_ranges[j].pages = tmp; + tmp = pmem_ranges[j-1]; + pmem_ranges[j-1] = pmem_ranges[j]; + pmem_ranges[j] = tmp; } } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* * Throw out ranges that are too far apart (controlled by * MAX_GAP). @@ -165,7 +158,7 @@ static void __init setup_bootmem(void) pmem_ranges[i-1].pages) > MAX_GAP) { npmem_ranges = i; printk("Large gap in memory detected (%ld pages). " - "Consider turning on CONFIG_DISCONTIGMEM\n", + "Consider turning on CONFIG_SPARSEMEM\n", pmem_ranges[i].start_pfn - (pmem_ranges[i-1].start_pfn + pmem_ranges[i-1].pages)); @@ -230,9 +223,8 @@ static void __init setup_bootmem(void) printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* Merge the ranges, keeping track of the holes */ - { unsigned long end_pfn; unsigned long hole_pages; @@ -255,18 +247,6 @@ static void __init setup_bootmem(void) } #endif -#ifdef CONFIG_DISCONTIGMEM - for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { - memset(NODE_DATA(i), 0, sizeof(pg_data_t)); - } - memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); - - for (i = 0; i < npmem_ranges; i++) { - node_set_state(i, N_NORMAL_MEMORY); - node_set_online(i); - } -#endif - /* * Initialize and free the full range of memory in each range. */ @@ -314,7 +294,7 @@ static void __init setup_bootmem(void) memblock_reserve(__pa(KERNEL_BINARY_TEXT_START), (unsigned long)(_end - KERNEL_BINARY_TEXT_START)); -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_SPARSEMEM /* reserve the holes */ @@ -360,19 +340,13 @@ static void __init setup_bootmem(void) /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ pdc_pdt_init(); -} -static int __init parisc_text_address(unsigned long vaddr) -{ - static unsigned long head_ptr __initdata; - - if (!head_ptr) - head_ptr = PAGE_MASK & (unsigned long) - dereference_function_descriptor(&parisc_kernel_start); - - return core_kernel_text(vaddr) || vaddr == head_ptr; + memblock_allow_resize(); + memblock_dump_all(); } +static bool kernel_set_to_readonly; + static void __init map_pages(unsigned long start_vaddr, unsigned long start_paddr, unsigned long size, pgprot_t pgprot, int force) @@ -389,10 +363,11 @@ static void __init map_pages(unsigned long start_vaddr, unsigned long vaddr; unsigned long ro_start; unsigned long ro_end; - unsigned long kernel_end; + unsigned long kernel_start, kernel_end; ro_start = __pa((unsigned long)_text); ro_end = __pa((unsigned long)&data_start); + kernel_start = __pa((unsigned long)&__init_begin); kernel_end = __pa((unsigned long)&_end); end_paddr = start_paddr + size; @@ -455,26 +430,30 @@ static void __init map_pages(unsigned long start_vaddr, pg_table = (pte_t *) __va(pg_table) + start_pte; for (tmp2 = start_pte; tmp2 < PTRS_PER_PTE; tmp2++, pg_table++) { pte_t pte; - - if (force) - pte = __mk_pte(address, pgprot); - else if (parisc_text_address(vaddr)) { - pte = __mk_pte(address, PAGE_KERNEL_EXEC); - if (address >= ro_start && address < kernel_end) - pte = pte_mkhuge(pte); + pgprot_t prot; + bool huge = false; + + if (force) { + prot = pgprot; + } else if (address < kernel_start || address >= kernel_end) { + /* outside kernel memory */ + prot = PAGE_KERNEL; + } else if (!kernel_set_to_readonly) { + /* still initializing, allow writing to RO memory */ + prot = PAGE_KERNEL_RWX; + huge = true; + } else if (address >= ro_start) { + /* Code (ro) and Data areas */ + prot = (address < ro_end) ? + PAGE_KERNEL_EXEC : PAGE_KERNEL; + huge = true; + } else { + prot = PAGE_KERNEL; } - else -#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) - if (address >= ro_start && address < ro_end) { - pte = __mk_pte(address, PAGE_KERNEL_EXEC); + + pte = __mk_pte(address, prot); + if (huge) pte = pte_mkhuge(pte); - } else -#endif - { - pte = __mk_pte(address, pgprot); - if (address >= ro_start && address < kernel_end) - pte = pte_mkhuge(pte); - } if (address >= end_paddr) break; @@ -495,7 +474,7 @@ static void __init map_pages(unsigned long start_vaddr, void __init set_kernel_text_rw(int enable_read_write) { - unsigned long start = (unsigned long) _text; + unsigned long start = (unsigned long) __init_begin; unsigned long end = (unsigned long) &data_start; map_pages(start, __pa(start), end-start, @@ -510,6 +489,12 @@ void __ref free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; + unsigned long kernel_end = (unsigned long)&_end; + + /* Remap kernel text and data, but do not touch init section yet. */ + kernel_set_to_readonly = true; + map_pages(init_end, __pa(init_end), kernel_end - init_end, + PAGE_KERNEL, 0); /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- @@ -526,7 +511,7 @@ void __ref free_initmem(void) PAGE_KERNEL, 1); /* force the kernel to see the new TLB entries */ - __flush_tlb_range(0, init_begin, init_end); + __flush_tlb_range(0, init_begin, kernel_end); /* finally dump all the instructions which were cached, since the * pages are no-longer executable */ @@ -544,8 +529,9 @@ void mark_rodata_ro(void) { /* rodata memory was already mapped with KERNEL_RO access rights by pagetable_init() and map_pages(). No need to do additional stuff here */ - printk (KERN_INFO "Write protecting the kernel read-only data: %luk\n", - (unsigned long)(__end_rodata - __start_rodata) >> 10); + unsigned long roai_size = __end_ro_after_init - __start_ro_after_init; + + pr_info("Write protected read-only-after-init data: %luk\n", roai_size >> 10); } #endif @@ -571,11 +557,11 @@ void mark_rodata_ro(void) #define SET_MAP_OFFSET(x) ((void *)(((unsigned long)(x) + VM_MAP_OFFSET) \ & ~(VM_MAP_OFFSET-1))) -void *parisc_vmalloc_start __read_mostly; +void *parisc_vmalloc_start __ro_after_init; EXPORT_SYMBOL(parisc_vmalloc_start); #ifdef CONFIG_PA11 -unsigned long pcxl_dma_start __read_mostly; +unsigned long pcxl_dma_start __ro_after_init; #endif void __init mem_init(void) @@ -622,15 +608,19 @@ void __init mem_init(void) * But keep code for debugging purposes. */ printk("virtual kernel memory layout:\n" - " vmalloc : 0x%px - 0x%px (%4ld MB)\n" - " memory : 0x%px - 0x%px (%4ld MB)\n" - " .init : 0x%px - 0x%px (%4ld kB)\n" - " .data : 0x%px - 0x%px (%4ld kB)\n" - " .text : 0x%px - 0x%px (%4ld kB)\n", + " vmalloc : 0x%px - 0x%px (%4ld MB)\n" + " fixmap : 0x%px - 0x%px (%4ld kB)\n" + " memory : 0x%px - 0x%px (%4ld MB)\n" + " .init : 0x%px - 0x%px (%4ld kB)\n" + " .data : 0x%px - 0x%px (%4ld kB)\n" + " .text : 0x%px - 0x%px (%4ld kB)\n", (void*)VMALLOC_START, (void*)VMALLOC_END, (VMALLOC_END - VMALLOC_START) >> 20, + (void *)FIXMAP_START, (void *)(FIXMAP_START + FIXMAP_SIZE), + (unsigned long)(FIXMAP_SIZE / 1024), + __va(0), high_memory, ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, @@ -645,7 +635,7 @@ void __init mem_init(void) #endif } -unsigned long *empty_zero_page __read_mostly; +unsigned long *empty_zero_page __ro_after_init; EXPORT_SYMBOL(empty_zero_page); /* @@ -709,37 +699,46 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } -void __init paging_init(void) +static void __init parisc_bootmem_free(void) { + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; + unsigned long holes_size[MAX_NR_ZONES] = { 0, }; + unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0; int i; + for (i = 0; i < npmem_ranges; i++) { + unsigned long start = pmem_ranges[i].start_pfn; + unsigned long size = pmem_ranges[i].pages; + unsigned long end = start + size; + + if (mem_start_pfn > start) + mem_start_pfn = start; + if (mem_end_pfn < end) + mem_end_pfn = end; + mem_size_pfn += size; + } + + zones_size[0] = mem_end_pfn - mem_start_pfn; + holes_size[0] = zones_size[0] - mem_size_pfn; + + free_area_init_node(0, zones_size, mem_start_pfn, holes_size); +} + +void __init paging_init(void) +{ setup_bootmem(); pagetable_init(); gateway_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); - for (i = 0; i < npmem_ranges; i++) { - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; - - zones_size[ZONE_NORMAL] = pmem_ranges[i].pages; - -#ifdef CONFIG_DISCONTIGMEM - /* Need to initialize the pfnnid_map before we can initialize - the zone */ - { - int j; - for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); - j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); - j++) { - pfnnid_map[j] = i; - } - } -#endif - - free_area_init_node(i, zones_size, - pmem_ranges[i].start_pfn, NULL); - } + /* + * Mark all memblocks as present for sparsemem using + * memory_present() and then initialize sparsemem. + */ + memblocks_present(); + sparse_init(); + parisc_bootmem_free(); } #ifdef CONFIG_PA20 @@ -921,10 +920,3 @@ void flush_tlb_all(void) spin_unlock(&sid_lock); } #endif - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index fa7219ffeadc..8c1c636308c8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAS_ZONE_DEVICE if PPC_BOOK3S_64 select ARCH_HAVE_NMI_SAFE_CMPXCHG + select ARCH_KEEP_MEMBLOCK select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX @@ -167,6 +168,7 @@ config PPC select GENERIC_TIME_VSYSCALL select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KASAN if PPC32 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT @@ -375,7 +377,6 @@ config ZONE_DMA config PGTABLE_LEVELS int default 2 if !PPC64 - default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64 default 4 source "arch/powerpc/sysdev/Kconfig" @@ -391,7 +392,7 @@ source "kernel/Kconfig.hz" config HUGETLB_PAGE_SIZE_VARIABLE bool - depends on HUGETLB_PAGE + depends on HUGETLB_PAGE && PPC_BOOK3S_64 default y config MATH_EMULATION @@ -832,9 +833,9 @@ config CMDLINE_BOOL bool "Default bootloader kernel arguments" config CMDLINE - string "Initial kernel command string" - depends on CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" + string "Initial kernel command string" if CMDLINE_BOOL + default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL + default "" help On some platforms, there is currently no way for the boot loader to pass arguments to the kernel. For these platforms, you can supply diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 4e00cb0a5464..c59920920ddc 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -117,6 +117,14 @@ config XMON_DISASSEMBLY to say Y here, unless you're building for a memory-constrained system. +config XMON_DEFAULT_RO_MODE + bool "Restrict xmon to read-only operations by default" + depends on XMON + default y + help + Operate xmon in read-only mode. The cmdline options 'xmon=rw' and + 'xmon=ro' override this default. + config DEBUGGER bool depends on KGDB || XMON @@ -361,8 +369,32 @@ config PPC_PTDUMP If you are unsure, say N. +config PPC_DEBUG_WX + bool "Warn on W+X mappings at boot" + depends on PPC_PTDUMP + help + Generate a warning if any W+X mappings are found at boot. + + This is useful for discovering cases where the kernel is leaving + W+X mappings after applying NX, as such mappings are a security risk. + + Note that even if the check fails, your kernel is possibly + still fine, as W+X mappings are not a security hole in + themselves, what they do is that they make the exploitation + of other unfixed kernel bugs easier. + + There is no runtime or memory usage effect of this option + once the kernel has booted up - it's a one time check. + + If in doubt, say "Y". + config PPC_FAST_ENDIAN_SWITCH bool "Deprecated fast endian-switch syscall" depends on DEBUG_KERNEL && PPC_BOOK3S_64 help If you're unsure what this is, say N. + +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xe0000000 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 7de49889bd5d..258ea6b2f2e7 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -34,11 +34,10 @@ ifdef CONFIG_PPC_BOOK3S_32 KBUILD_CFLAGS += -mcpu=powerpc endif -ifeq ($(CROSS_COMPILE),) -KBUILD_DEFCONFIG := $(shell uname -m)_defconfig -else -KBUILD_DEFCONFIG := ppc64_defconfig -endif +# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use +# ppc64_defconfig because we have nothing better to go on. +uname := $(shell uname -m) +KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig ifdef CONFIG_PPC64 new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) @@ -367,6 +366,10 @@ ppc32_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \ -f $(srctree)/Makefile allmodconfig +PHONY += ppc_defconfig +ppc_defconfig: + $(call merge_into_defconfig,book3s_32.config,) + PHONY += ppc64le_allmodconfig ppc64le_allmodconfig: $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \ @@ -406,7 +409,9 @@ vdso_install: ifdef CONFIG_PPC64 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ endif +ifdef CONFIG_VDSO32 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ +endif archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c index 9d9f6f334d3c..3da3e2b1b51b 100644 --- a/arch/powerpc/boot/addnote.c +++ b/arch/powerpc/boot/addnote.c @@ -223,7 +223,11 @@ main(int ac, char **av) PUT_16(E_PHNUM, np + 2); /* write back */ - lseek(fd, (long) 0, SEEK_SET); + i = lseek(fd, (long) 0, SEEK_SET); + if (i < 0) { + perror("lseek"); + exit(1); + } i = write(fd, buf, n); if (i < 0) { perror("write"); diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi index 999efd3bc167..05be919f3545 100644 --- a/arch/powerpc/boot/dts/fsl/b4qds.dtsi +++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi @@ -40,6 +40,7 @@ interrupt-parent = <&mpic>; aliases { + crypto = &crypto; phy_sgmii_10 = &phy_sgmii_10; phy_sgmii_11 = &phy_sgmii_11; phy_sgmii_1c = &phy_sgmii_1c; diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index b5cc7426c21f..3da091f651d6 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -33,7 +33,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index aac06d2ad01a..38d3d7769a2f 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -33,7 +33,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index c8e6f048a122..d427cee027a6 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -32,7 +32,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index f6dc23fef683..f593258806ad 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -33,7 +33,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 1e04122912f3..f34fee9464e5 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -33,7 +33,7 @@ CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 6c02f53271cd..6ae88d4879bf 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -34,7 +34,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index 1f69f4edf074..9dffb2e7f735 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -31,7 +31,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index 797fc3ffddee..a42232732c6d 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -31,7 +31,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config index c03d0fb16665..9575a38c9155 100644 --- a/arch/powerpc/configs/85xx-hw.config +++ b/arch/powerpc/configs/85xx-hw.config @@ -71,7 +71,7 @@ CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_M25P80=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PLATRAM=y diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index dd98f43b2fb8..d70b60314dad 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -73,7 +73,7 @@ CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig index 6106fadbbd8b..7037a6d8018c 100644 --- a/arch/powerpc/configs/85xx/socrates_defconfig +++ b/arch/powerpc/configs/85xx/socrates_defconfig @@ -31,7 +31,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_SOCRATES=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig index 2697e4e8a761..1c63cbdc3211 100644 --- a/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -35,8 +35,8 @@ CONFIG_MTD=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND_ECC_SMC=y -CONFIG_MTD_NAND=y +CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSL_UPM=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 6531139a8a8d..78f5beb2928c 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -65,7 +65,7 @@ CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_STAA=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_UPM=y CONFIG_BLK_DEV_LOOP=y diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config index d3dd6b8865c0..151164cf8cb3 100644 --- a/arch/powerpc/configs/86xx-hw.config +++ b/arch/powerpc/configs/86xx-hw.config @@ -47,7 +47,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_NETDEVICES=y CONFIG_NET_TULIP=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index e4bfb1101c0e..e4bf8aa87e60 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -46,7 +46,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_ROM=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_MPC5121_NFC=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index d1b82035d35f..005d00020fb9 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -46,7 +46,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 6daa56f8895c..c0423b2cf7c0 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -51,7 +51,7 @@ CONFIG_MTD=y CONFIG_MTD_BLOCK=y CONFIG_MTD_SLRAM=y CONFIG_MTD_PHRAM=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PASEMI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 66dd6bf45cde..db48039e0b11 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -44,7 +44,7 @@ CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_NAND=m +CONFIG_MTD_RAW_NAND=m CONFIG_MTD_NAND_NDFC=m CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index ea79c519863d..62e12f61a3b2 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -217,6 +217,7 @@ CONFIG_USB_MON=m CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=y +CONFIG_USB_XHCI_HCD=y CONFIG_USB_STORAGE=m CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=m diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1bcd468ab422..a887616e35a2 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -163,6 +163,8 @@ CONFIG_S2IO=m CONFIG_MLX4_EN=m # CONFIG_MLX4_CORE_GEN2 is not set CONFIG_MLX5_CORE=m +CONFIG_MLX5_CORE_EN=y +# CONFIG_MLX5_EN_RXNFC is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROSEMI is not set CONFIG_MYRI10GE=m diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c index 0153a9c6f4af..98ea4f4d3dde 100644 --- a/arch/powerpc/crypto/crc-vpmsum_test.c +++ b/arch/powerpc/crypto/crc-vpmsum_test.c @@ -78,16 +78,12 @@ static int __init crc_test_init(void) pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations); for (i=0; i<iterations; i++) { - size_t len, offset; + size_t offset = prandom_u32_max(16); + size_t len = prandom_u32_max(MAX_CRC_LENGTH); - get_random_bytes(data, MAX_CRC_LENGTH); - get_random_bytes(&len, sizeof(len)); - get_random_bytes(&offset, sizeof(offset)); - - len %= MAX_CRC_LENGTH; - offset &= 15; if (len <= offset) continue; + prandom_bytes(data, len); len -= offset; crypto_shash_update(crct10dif_shash, data+offset, len); diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index fd1d6c83f0c0..c4fa242dd652 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -1,10 +1,12 @@ #include <linux/crc32.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/cpufeature.h> +#include <asm/simd.h> #include <asm/switch_to.h> #define CHKSUM_BLOCK_SIZE 1 @@ -22,7 +24,7 @@ static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) unsigned int prealign; unsigned int tail; - if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt()) + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable()) return __crc32c_le(crc, p, len); if ((unsigned long)p & VMX_ALIGN_MASK) { diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c index 02ea277863d1..e27ff16573b5 100644 --- a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c +++ b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c @@ -12,11 +12,13 @@ #include <linux/crc-t10dif.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/cpufeature.h> +#include <asm/simd.h> #include <asm/switch_to.h> #define VMX_ALIGN 16 @@ -32,7 +34,7 @@ static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len) unsigned int tail; u32 crc = crci; - if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt()) + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable()) return crc_t10dif_generic(crc, p, len); if ((unsigned long)p & VMX_ALIGN_MASK) { diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 36bda391e549..b9f6e72bf4e5 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -10,3 +10,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += vtime.h generic-y += msi.h +generic-y += simd.h diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h new file mode 100644 index 000000000000..677e9babef80 --- /dev/null +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -0,0 +1,145 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H +#define _ASM_POWERPC_BOOK3S_32_KUP_H + +#include <asm/book3s/32/mmu-hash.h> + +#ifdef __ASSEMBLY__ + +.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */ +101: mtsrin \gpr1, \gpr2 + addi \gpr1, \gpr1, 0x111 /* next VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ + addis \gpr2, \gpr2, 0x1000 /* address of next segment */ + bdnz 101b + isync +.endm + +.macro kuep_lock gpr1, gpr2 +#ifdef CONFIG_PPC_KUEP + li \gpr1, NUM_USER_SEGMENTS + li \gpr2, 0 + mtctr \gpr1 + mfsrin \gpr1, \gpr2 + oris \gpr1, \gpr1, SR_NX@h /* set Nx */ + kuep_update_sr \gpr1, \gpr2 +#endif +.endm + +.macro kuep_unlock gpr1, gpr2 +#ifdef CONFIG_PPC_KUEP + li \gpr1, NUM_USER_SEGMENTS + li \gpr2, 0 + mtctr \gpr1 + mfsrin \gpr1, \gpr2 + rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */ + kuep_update_sr \gpr1, \gpr2 +#endif +.endm + +#ifdef CONFIG_PPC_KUAP + +.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */ +101: mtsrin \gpr1, \gpr2 + addi \gpr1, \gpr1, 0x111 /* next VSID */ + rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */ + addis \gpr2, \gpr2, 0x1000 /* address of next segment */ + cmplw \gpr2, \gpr3 + blt- 101b + isync +.endm + +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 + lwz \gpr2, KUAP(\thread) + rlwinm. \gpr3, \gpr2, 28, 0xf0000000 + stw \gpr2, STACK_REGS_KUAP(\sp) + beq+ 102f + li \gpr1, 0 + stw \gpr1, KUAP(\thread) + mfsrin \gpr1, \gpr2 + oris \gpr1, \gpr1, SR_KS@h /* set Ks */ + kuap_update_sr \gpr1, \gpr2, \gpr3 +102: +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 + lwz \gpr2, STACK_REGS_KUAP(\sp) + rlwinm. \gpr3, \gpr2, 28, 0xf0000000 + stw \gpr2, THREAD + KUAP(\current) + beq+ 102f + mfsrin \gpr1, \gpr2 + rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */ + kuap_update_sr \gpr1, \gpr2, \gpr3 +102: +.endm + +.macro kuap_check current, gpr +#ifdef CONFIG_PPC_KUAP_DEBUG + lwz \gpr2, KUAP(thread) +999: twnei \gpr, 0 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif +.endm + +#endif /* CONFIG_PPC_KUAP */ + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include <linux/sched.h> + +static inline void kuap_update_sr(u32 sr, u32 addr, u32 end) +{ + barrier(); /* make sure thread.kuap is updated before playing with SRs */ + while (addr < end) { + mtsrin(sr, addr); + sr += 0x111; /* next VSID */ + sr &= 0xf0ffffff; /* clear VSID overflow */ + addr += 0x10000000; /* address of next segment */ + } + isync(); /* Context sync required after mtsrin() */ +} + +static inline void allow_user_access(void __user *to, const void __user *from, u32 size) +{ + u32 addr, end; + + if (__builtin_constant_p(to) && to == NULL) + return; + + addr = (__force u32)to; + + if (!addr || addr >= TASK_SIZE || !size) + return; + + end = min(addr + size, TASK_SIZE); + current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf); + kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */ +} + +static inline void prevent_user_access(void __user *to, const void __user *from, u32 size) +{ + u32 addr = (__force u32)to; + u32 end = min(addr + size, TASK_SIZE); + + if (!addr || addr >= TASK_SIZE || !size) + return; + + current->thread.kuap = 0; + kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */ +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + if (!is_write) + return false; + + return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !"); +} + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */ diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 5cb588395fdc..2e277ca0170f 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -10,8 +10,6 @@ * BATs */ -#include <asm/page.h> - /* Block size masks */ #define BL_128K 0x000 #define BL_256K 0x001 @@ -49,8 +47,6 @@ struct ppc_bat { u32 batu; u32 batl; }; - -typedef pte_t *pgtable_t; #endif /* !__ASSEMBLY__ */ /* @@ -63,6 +59,11 @@ typedef pte_t *pgtable_t; #define PP_RWRW 2 /* Supervisor read/write, User read/write */ #define PP_RXRX 3 /* Supervisor read, User read */ +/* Values for Segment Registers */ +#define SR_NX 0x10000000 /* No Execute */ +#define SR_KP 0x20000000 /* User key */ +#define SR_KS 0x40000000 /* Supervisor key */ + #ifndef __ASSEMBLY__ /* diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 3633502e102c..998317702630 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -5,28 +5,6 @@ #include <linux/threads.h> #include <linux/slab.h> -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern void __bad_pte(pmd_t *pmd); - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), @@ -59,24 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, *pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) - -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -extern pgtable_t pte_alloc_one(struct mm_struct *mm); -void pte_frag_destroy(void *pte_frag); -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void pgtable_free(void *table, unsigned index_size) { if (!index_size) { @@ -87,7 +47,6 @@ static inline void pgtable_free(void *table, unsigned index_size) } } -#define check_pgt_cache() do { } while (0) #define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index aa8406b8f7ba..838de59f6754 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -134,15 +134,24 @@ static inline bool pte_user(pte_t pte) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) + +#ifndef __ASSEMBLY__ + +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); + +#endif /* !__ASSEMBLY__ */ + /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ +#include <asm/fixmap.h> + #ifdef CONFIG_HIGHMEM #define KVIRT_TOP PKMAP_BASE #else -#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#define KVIRT_TOP FIXADDR_START #endif /* @@ -373,8 +382,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); - /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} static inline int pte_read(pte_t pte) { return 1; } diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index cf5ba5254299..8fd8599c9395 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -2,10 +2,10 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H #define _ASM_POWERPC_BOOK3S_64_HASH_4K_H -#define H_PTE_INDEX_SIZE 9 -#define H_PMD_INDEX_SIZE 7 -#define H_PUD_INDEX_SIZE 9 -#define H_PGD_INDEX_SIZE 9 +#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB +#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB +#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB +#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB /* * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB @@ -13,6 +13,21 @@ */ #define MAX_EA_BITS_PER_CONTEXT 46 +#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2) + +/* + * Our page table limit us to 64TB. Hence for the kernel mapping, + * each MAP area is limited to 16 TB. + * The four map areas are: linear mapping, vmap, IO and vmemmap + */ +#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT) + +/* + * Define the address range of the kernel non-linear virtual area + * 16TB + */ +#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000) + #ifndef __ASSEMBLY__ #define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) #define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index f82ee8a3b561..d1d9177d9ebd 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -2,16 +2,29 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#define H_PTE_INDEX_SIZE 8 -#define H_PMD_INDEX_SIZE 10 -#define H_PUD_INDEX_SIZE 10 -#define H_PGD_INDEX_SIZE 8 +#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB +#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB +#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB +#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB + /* * Each context is 512TB size. SLB miss for first context/default context * is handled in the hotpath. */ #define MAX_EA_BITS_PER_CONTEXT 49 +#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT + +/* + * We use one context for each MAP area. + */ +#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) + +/* + * Define the address range of the kernel non-linear virtual area + * 2PB + */ +#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000) /* * 64k aligned address free up few of the lower bits of RPN for us diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 54b7af6cd27f..1d1183048cfd 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -29,6 +29,10 @@ #define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) +/* + * Top 2 bits are ignored in page table walk. + */ +#define EA_MASK (~(0xcUL << 60)) /* * We store the slot details in the second half of page table. @@ -42,59 +46,63 @@ #endif /* - * Define the address range of the kernel non-linear virtual area. In contrast - * to the linear mapping, this is managed using the kernel page tables and then - * inserted into the hash page table to actually take effect, similarly to user - * mappings. + * +------------------------------+ + * | | + * | | + * | | + * +------------------------------+ Kernel virtual map end (0xc00e000000000000) + * | | + * | | + * | 512TB/16TB of vmemmap | + * | | + * | | + * +------------------------------+ Kernel vmemmap start + * | | + * | 512TB/16TB of IO map | + * | | + * +------------------------------+ Kernel IO map start + * | | + * | 512TB/16TB of vmap | + * | | + * +------------------------------+ Kernel virt start (0xc008000000000000) + * | | + * | | + * | | + * +------------------------------+ Kernel linear (0xc.....) */ -#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -/* - * Allow virtual mapping of one context size. - * 512TB for 64K page size - * 64TB for 4K page size - */ -#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE H_KERN_MAP_SIZE +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) -/* - * 8TB IO mapping size - */ -#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */ +#define H_KERN_IO_START H_VMALLOC_END +#define H_KERN_IO_SIZE H_KERN_MAP_SIZE +#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE) -/* - * The vmalloc space starts at the beginning of the kernel non-linear virtual - * region, and occupies 504T (64K) or 56T (4K) - */ -#define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE) -#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_VMEMMAP_START H_KERN_IO_END +#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE +#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE) -#define H_KERN_IO_START H_VMALLOC_END +#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2) /* * Region IDs */ -#define REGION_SHIFT 60UL -#define REGION_MASK (0xfUL << REGION_SHIFT) -#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) - -#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START)) -#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ -#define USER_REGION_ID (0UL) +#define USER_REGION_ID 0 +#define LINEAR_MAP_REGION_ID 1 +#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START) +#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START) +#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START) /* * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) - #ifdef CONFIG_PPC_MM_SLICES #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif /* CONFIG_PPC_MM_SLICES */ - /* PTEIDX nibble */ #define _PTEIDX_SECONDARY 0x8 #define _PTEIDX_GROUP_IX 0x7 @@ -103,6 +111,25 @@ #define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1) #ifndef __ASSEMBLY__ +static inline int get_region_id(unsigned long ea) +{ + int region_id; + int id = (ea >> 60UL); + + if (id == 0) + return USER_REGION_ID; + + if (ea < H_KERN_VIRT_START) + return LINEAR_MAP_REGION_ID; + + VM_BUG_ON(id != 0xc); + BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2); + + region_id = NON_LINEAR_REGION_ID(ea); + VM_BUG_ON(region_id > VMEMMAP_REGION_ID); + return region_id; +} + #define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS) #define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS) static inline int hash__pgd_bad(pgd_t pgd) diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index ec2a55a553c7..12e150e615b7 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -36,8 +36,8 @@ static inline int hstate_get_psize(struct hstate *hstate) } } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) +#define __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED +static inline bool gigantic_page_runtime_supported(void) { /* * We used gigantic page reservation with hypervisor assist in some case. @@ -49,7 +49,6 @@ static inline bool gigantic_page_supported(void) return true; } -#endif /* hugepd entry valid bit */ #define HUGEPD_VAL_BITS (0x8000000000000000UL) @@ -62,4 +61,76 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t old_pte, pte_t new_pte); +/* + * This should work for other subarchs too. But right now we use the + * new format only for 64bit book3s + */ +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + /* + * We have only four bits to encode, MMU page size + */ + BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); + return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); +} + +static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) +{ + return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); +} +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + if (radix_enabled()) + return radix__flush_hugetlb_page(vma, vmaddr); +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); + + return hugepd_page(hpd) + idx; +} + +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2)); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + +static inline int check_and_get_huge_psize(int shift) +{ + int mmu_psize; + + if (shift > SLICE_HIGH_SHIFT) + return -EINVAL; + + mmu_psize = shift_to_mmu_psize(shift); + + /* + * We need to make sure that for different page sizes reported by + * firmware we only add hugetlb support for page sizes that can be + * supported by linux page table layout. + * For now we have + * Radix: 2M and 1G + * Hash: 16M and 16G + */ + if (radix_enabled()) { + if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) + return -EINVAL; + } else { + if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) + return -EINVAL; + } + return mmu_psize; +} + #endif diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h new file mode 100644 index 000000000000..f254de956d6a --- /dev/null +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H +#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H + +#include <linux/const.h> + +#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000) +#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000) +#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE) +#define AMR_KUAP_SHIFT 62 + +#ifdef __ASSEMBLY__ + +.macro kuap_restore_amr gpr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + ld \gpr, STACK_REGS_KUAP(r1) + mtspr SPRN_AMR, \gpr + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_check_amr gpr1, gpr2 +#ifdef CONFIG_PPC_KUAP_DEBUG + BEGIN_MMU_FTR_SECTION_NESTED(67) + mfspr \gpr1, SPRN_AMR + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT +999: tdne \gpr1, \gpr2 + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr +#ifdef CONFIG_PPC_KUAP + BEGIN_MMU_FTR_SECTION_NESTED(67) + .ifnb \msr_pr_cr + bne \msr_pr_cr, 99f + .endif + mfspr \gpr1, SPRN_AMR + std \gpr1, STACK_REGS_KUAP(r1) + li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT) + sldi \gpr2, \gpr2, AMR_KUAP_SHIFT + cmpd \use_cr, \gpr1, \gpr2 + beq \use_cr, 99f + // We don't isync here because we very recently entered via rfid + mtspr SPRN_AMR, \gpr2 + isync +99: + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_PPC_KUAP + +#include <asm/reg.h> + +/* + * We support individually allowing read or write, but we don't support nesting + * because that would require an expensive read/modify write of the AMR. + */ + +static inline void set_kuap(unsigned long value) +{ + if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) + return; + + /* + * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both + * before and after the move to AMR. See table 6 on page 1134. + */ + isync(); + mtspr(SPRN_AMR, value); + isync(); +} + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + // This is written so we can resolve to a single case at build time + if (__builtin_constant_p(to) && to == NULL) + set_kuap(AMR_KUAP_BLOCK_WRITE); + else if (__builtin_constant_p(from) && from == NULL) + set_kuap(AMR_KUAP_BLOCK_READ); + else + set_kuap(0); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + set_kuap(AMR_KUAP_BLOCKED); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#endif /* CONFIG_PPC_KUAP */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index a28a28079edb..1e4705516a54 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -588,7 +588,8 @@ extern void slb_set_size(u16 size); #endif #define MAX_VMALLOC_CTX_CNT 1 -#define MAX_MEMMAP_CTX_CNT 1 +#define MAX_IO_CTX_CNT 1 +#define MAX_VMEMMAP_CTX_CNT 1 /* * 256MB segment @@ -601,13 +602,10 @@ extern void slb_set_size(u16 size); * would give a protovsid of 0x1fffffffff. That will result in a VSID 0 * because of the modulo operation in vsid scramble. * - * We add one extra context to MIN_USER_CONTEXT so that we can map kernel - * context easily. The +1 is to map the unused 0xe region mapping. */ #define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2) #define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \ - MAX_MEMMAP_CTX_CNT + 2) - + MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT) /* * For platforms that support on 65bit VA we limit the context bits */ @@ -657,8 +655,8 @@ extern void slb_set_size(u16 size); /* 4 bits per slice and we have one slice per 1TB */ #define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) -#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41) - +#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE) +#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41) #ifndef __ASSEMBLY__ #ifdef CONFIG_PPC_SUBPAGE_PROT @@ -687,12 +685,41 @@ struct subpage_prot_table { #define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS) extern void subpage_prot_free(struct mm_struct *mm); -extern void subpage_prot_init_new_context(struct mm_struct *mm); #else static inline void subpage_prot_free(struct mm_struct *mm) {} -static inline void subpage_prot_init_new_context(struct mm_struct *mm) { } #endif /* CONFIG_PPC_SUBPAGE_PROT */ +/* + * One bit per slice. We have lower slices which cover 256MB segments + * upto 4G range. That gets us 16 low slices. For the rest we track slices + * in 1TB size. + */ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); +}; + +struct hash_mm_context { + u16 user_psize; /* page size index */ + + /* SLB page size encodings*/ + unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ]; + unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; + unsigned long slb_addr_limit; +#ifdef CONFIG_PPC_64K_PAGES + struct slice_mask mask_64k; +#endif + struct slice_mask mask_4k; +#ifdef CONFIG_HUGETLB_PAGE + struct slice_mask mask_16m; + struct slice_mask mask_16g; +#endif + +#ifdef CONFIG_PPC_SUBPAGE_PROT + struct subpage_prot_table *spt; +#endif /* CONFIG_PPC_SUBPAGE_PROT */ +}; + #if 0 /* * The code below is equivalent to this function for arguments @@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, /* * Bad address. We return VSID 0 for that */ - if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) + if ((ea & EA_MASK) >= H_PGTABLE_RANGE) return 0; if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) @@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff] * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff] * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff] - - * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ] - * 0x00006 - Not used - Can map 0xe000000000000000 range. - * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ] * - * So we can compute the context from the region (top nibble) by - * subtracting 11, or 0xc - 1. + * vmap, IO, vmemap + * + * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff] + * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff] + * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff] + * */ static inline unsigned long get_kernel_context(unsigned long ea) { - unsigned long region_id = REGION_ID(ea); + unsigned long region_id = get_region_id(ea); unsigned long ctx; /* - * For linear mapping we do support multiple context + * Depending on Kernel config, kernel region can have one context + * or more. */ - if (region_id == KERNEL_REGION_ID) { + if (region_id == LINEAR_MAP_REGION_ID) { /* * We already verified ea to be not beyond the addr limit. */ - ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT); + ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT); } else - ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT; + ctx = region_id + MAX_KERNEL_CTX_CNT - 1; return ctx; } diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1ceee000c18d..74d24201fc4f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -25,15 +25,22 @@ struct mmu_psize_def { }; }; extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +#endif /* __ASSEMBLY__ */ /* - * For BOOK3s 64 with 4k and 64K linux page size - * we want to use pointers, because the page table - * actually store pfn + * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS + * if we increase SECTIONS_WIDTH we will not store node details in page->flags and + * page_to_nid does a page->section->node lookup + * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce + * memory requirements with large number of sections. + * 51 bits is the max physical real address on POWER9 */ -typedef pte_t *pgtable_t; - -#endif /* __ASSEMBLY__ */ +#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ + defined(CONFIG_PPC_64K_PAGES) +#define MAX_PHYSMEM_BITS 51 +#else +#define MAX_PHYSMEM_BITS 46 +#endif /* 64-bit classic hash table MMU */ #include <asm/book3s/64/mmu-hash.h> @@ -89,16 +96,6 @@ struct spinlock; /* Maximum possible number of NPUs in a system. */ #define NV_MAX_NPUS 8 -/* - * One bit per slice. We have lower slices which cover 256MB segments - * upto 4G range. That gets us 16 low slices. For the rest we track slices - * in 1TB size. - */ -struct slice_mask { - u64 low_slices; - DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); -}; - typedef struct { union { /* @@ -112,7 +109,6 @@ typedef struct { mm_context_id_t id; mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE]; }; - u16 user_psize; /* page size index */ /* Number of bits in the mm_cpumask */ atomic_t active_cpus; @@ -122,27 +118,9 @@ typedef struct { /* NPU NMMU context */ struct npu_context *npu_context; + struct hash_mm_context *hash_context; -#ifdef CONFIG_PPC_MM_SLICES - /* SLB page size encodings*/ - unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE]; - unsigned char high_slices_psize[SLICE_ARRAY_SIZE]; - unsigned long slb_addr_limit; -# ifdef CONFIG_PPC_64K_PAGES - struct slice_mask mask_64k; -# endif - struct slice_mask mask_4k; -# ifdef CONFIG_HUGETLB_PAGE - struct slice_mask mask_16m; - struct slice_mask mask_16g; -# endif -#else - u16 sllp; /* SLB page size encoding */ -#endif unsigned long vdso_base; -#ifdef CONFIG_PPC_SUBPAGE_PROT - struct subpage_prot_table spt; -#endif /* CONFIG_PPC_SUBPAGE_PROT */ /* * pagetable fragment support */ @@ -163,6 +141,60 @@ typedef struct { #endif } mm_context_t; +static inline u16 mm_ctx_user_psize(mm_context_t *ctx) +{ + return ctx->hash_context->user_psize; +} + +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) +{ + ctx->hash_context->user_psize = user_psize; +} + +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) +{ + return ctx->hash_context->low_slices_psize; +} + +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) +{ + return ctx->hash_context->high_slices_psize; +} + +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) +{ + return ctx->hash_context->slb_addr_limit; +} + +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) +{ + ctx->hash_context->slb_addr_limit = limit; +} + +static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) +{ +#ifdef CONFIG_PPC_64K_PAGES + if (psize == MMU_PAGE_64K) + return &ctx->hash_context->mask_64k; +#endif +#ifdef CONFIG_HUGETLB_PAGE + if (psize == MMU_PAGE_16M) + return &ctx->hash_context->mask_16m; + if (psize == MMU_PAGE_16G) + return &ctx->hash_context->mask_16g; +#endif + BUG_ON(psize != MMU_PAGE_4K); + + return &ctx->hash_context->mask_4k; +} + +#ifdef CONFIG_PPC_SUBPAGE_PROT +static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx) +{ + return ctx->hash_context->spt; +} +#endif + /* * The current system page and segment sizes */ diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index 138bc2ecc0c4..d45e4449619f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -19,29 +19,7 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - -extern pte_t *pte_fragment_alloc(struct mm_struct *, int); extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long); -extern void pte_fragment_free(unsigned long *, int); extern void pmd_fragment_free(unsigned long *); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); #ifdef CONFIG_SMP @@ -81,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), pgtable_gfp_flags(mm, GFP_KERNEL)); + if (unlikely(!pgd)) + return pgd; + /* * Don't scan the PGD for pointers, it contains references to PUDs but * those references are not full pointers and so can't be recognised by @@ -185,31 +166,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS); } -static inline pgtable_t pmd_pgtable(pmd_t pmd) -{ - return (pgtable_t)pmd_page_vaddr(pmd); -} - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { @@ -221,8 +177,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, pgtable_free_tlb(tlb, table, PTE_INDEX); } -#define check_pgt_cache() do { } while (0) - extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT]; static inline void update_page_count(int psize, long count) { diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 581f91be9dd4..7dede2e34b70 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end; extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; extern unsigned long __kernel_io_start; +extern unsigned long __kernel_io_end; #define KERN_VIRT_START __kernel_virt_start -#define KERN_VIRT_SIZE __kernel_virt_size #define KERN_IO_START __kernel_io_start +#define KERN_IO_END __kernel_io_end + extern struct page *vmemmap; extern unsigned long ioremap_bot; extern unsigned long pci_io_base; @@ -296,8 +298,7 @@ extern unsigned long pci_io_base; #include <asm/barrier.h> /* - * The second half of the kernel virtual space is used for IO mappings, - * it's itself carved into the PIO region (ISA and PHB IO space) and + * IO space itself carved into the PIO region (ISA and PHB IO space) and * the ioremap space * * ISA_IO_BASE = KERN_IO_START, 64K reserved area @@ -310,7 +311,7 @@ extern unsigned long pci_io_base; #define PHB_IO_BASE (ISA_IO_END) #define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE) #define IOREMAP_BASE (PHB_IO_END) -#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) +#define IOREMAP_END (KERN_IO_END) /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP @@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd); (((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while(0) + +static inline void pte_unmap(pte_t *pte) { } /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h index 863c3e8286fb..d5f5ab73dc7f 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-4k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h @@ -5,10 +5,11 @@ /* * For 4K page size supported index is 13/9/9/9 */ -#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */ -#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ -#define RADIX_PUD_INDEX_SIZE 9 -#define RADIX_PGD_INDEX_SIZE 13 +#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB +#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB +#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB +#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB + /* * One fragment per per page */ diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h index ccb78ca9d0c5..54e33828b0fb 100644 --- a/arch/powerpc/include/asm/book3s/64/radix-64k.h +++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h @@ -5,10 +5,10 @@ /* * For 64K page size supported index is 13/9/9/5 */ -#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */ -#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */ -#define RADIX_PUD_INDEX_SIZE 9 -#define RADIX_PGD_INDEX_SIZE 13 +#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB +#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB +#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB +#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB /* * We use a 256 byte PTE page fragment in radix diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 5ab134eeed20..574eca33f893 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -72,19 +72,17 @@ * | | * | | * | | - * +------------------------------+ Kernel IO map end (0xc010000000000000) + * +------------------------------+ Kernel vmemmap end (0xc010000000000000) * | | + * | 512TB | * | | - * | 1/2 of virtual map | + * +------------------------------+ Kernel IO map end/vmemap start * | | + * | 512TB | * | | - * +------------------------------+ Kernel IO map start + * +------------------------------+ Kernel vmap end/ IO map start * | | - * | 1/4 of virtual map | - * | | - * +------------------------------+ Kernel vmemap start - * | | - * | 1/4 of virtual map | + * | 512TB | * | | * +------------------------------+ Kernel virt start (0xc008000000000000) * | | @@ -93,24 +91,24 @@ * +------------------------------+ Kernel linear (0xc.....) */ -#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) -#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000) - +#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) /* - * The vmalloc space starts at the beginning of that region, and - * occupies a quarter of it on radix config. - * (we keep a quarter for the virtual memmap) + * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick + * the same value as hash. */ +#define RADIX_KERN_MAP_SIZE (1UL << 49) + #define RADIX_VMALLOC_START RADIX_KERN_VIRT_START -#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) +#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE #define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) -/* - * Defines the address of the vmemap area, in its own region on - * hash table CPUs. - */ -#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) -#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) +#define RADIX_KERN_IO_START RADIX_VMALLOC_END +#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE +#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE) + +#define RADIX_VMEMMAP_START RADIX_KERN_IO_END +#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE +#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE) #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h index db0dedab65ee..f0d3194ba41b 100644 --- a/arch/powerpc/include/asm/book3s/64/slice.h +++ b/arch/powerpc/include/asm/book3s/64/slice.h @@ -2,8 +2,6 @@ #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H #define _ASM_POWERPC_BOOK3S_64_SLICE_H -#ifdef CONFIG_PPC_MM_SLICES - #define SLICE_LOW_SHIFT 28 #define SLICE_LOW_TOP (0x100000000ul) #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) @@ -13,15 +11,6 @@ #define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) -#else /* CONFIG_PPC_MM_SLICES */ - -#define get_slice_psize(mm, addr) ((mm)->context.user_psize) -#define slice_set_user_psize(mm, psize) \ -do { \ - (mm)->context.user_psize = (psize); \ - (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \ -} while (0) - -#endif /* CONFIG_PPC_MM_SLICES */ +#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64 #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 43e5f31fe64d..9844b3ded187 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -27,10 +27,11 @@ * the THREAD_WINKLE_BITS are set, which indicate which threads have not * yet woken from the winkle state. */ -#define PNV_CORE_IDLE_LOCK_BIT 0x10000000 +#define NR_PNV_CORE_IDLE_LOCK_BIT 28 +#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT) +#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16 #define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000 -#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000 #define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000 #define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8 #define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00 @@ -68,16 +69,6 @@ #define ERR_DEEP_STATE_ESL_MISMATCH -2 #ifndef __ASSEMBLY__ -/* Additional SPRs that need to be saved/restored during stop */ -struct stop_sprs { - u64 pid; - u64 ldbar; - u64 fscr; - u64 hfscr; - u64 mmcr1; - u64 mmcr2; - u64 mmcra; -}; #define PNV_IDLE_NAME_LEN 16 struct pnv_idle_states_t { @@ -92,10 +83,6 @@ struct pnv_idle_states_t { extern struct pnv_idle_states_t *pnv_idle_states; extern int nr_pnv_idle_states; -extern u32 pnv_fastsleep_workaround_at_entry[]; -extern u32 pnv_fastsleep_workaround_at_exit[]; - -extern u64 pnv_first_deep_stop_state; unsigned long pnv_cpu_offline(unsigned int cpu); int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h index 7c1d8e74b25d..7f3279b014db 100644 --- a/arch/powerpc/include/asm/drmem.h +++ b/arch/powerpc/include/asm/drmem.h @@ -17,6 +17,9 @@ struct drmem_lmb { u32 drc_index; u32 aa_index; u32 flags; +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; +#endif }; struct drmem_lmb_info { @@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb) lmb->aa_index = 0xffffffff; } +#ifdef CONFIG_MEMORY_HOTPLUG +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ + lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr); +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ + lmb->nid = -1; +} +#else +static inline void lmb_set_nid(struct drmem_lmb *lmb) +{ +} +static inline void lmb_clear_nid(struct drmem_lmb *lmb) +{ +} +#endif + #endif /* _ASM_POWERPC_LMB_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 937bb630093f..bef4e05a6823 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) RESTORE_CTR(r1, area); \ b bad_stack; \ 3: EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1, cr0; \ beq 4f; /* if from kernel mode */ \ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ SAVE_PPR(area, r9); \ @@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) */ #define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ EXCEPTION_PROLOG_COMMON_1(); \ + kuap_save_amr_and_lock r9, r10, cr1; \ EXCEPTION_PROLOG_COMMON_2(area); \ EXCEPTION_PROLOG_COMMON_3(trap); \ /* Volatile regs are potentially clobbered here */ \ diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 188776befaf9..e2099c0a15c3 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -219,5 +219,6 @@ extern void fadump_cleanup(void); static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } +static inline void fadump_cleanup(void) { } #endif #endif diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 40a6c9261a6b..f6fc31f8baff 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -100,6 +100,9 @@ label##5: \ #define END_MMU_FTR_SECTION(msk, val) \ END_MMU_FTR_SECTION_NESTED(msk, val, 97) +#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \ + END_MMU_FTR_SECTION_NESTED((msk), (msk), label) + #define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk)) #define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index b9fbed84ddca..0cfc365d814b 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -22,7 +22,12 @@ #include <asm/kmap_types.h> #endif +#ifdef CONFIG_KASAN +#include <asm/kasan.h> +#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE) +#else #define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE)) +#endif /* * Here we define all the compile-time 'special' virtual diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h index 88b38b37c21b..3a6aa57b9d90 100644 --- a/arch/powerpc/include/asm/futex.h +++ b/arch/powerpc/include/asm/futex.h @@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, { int oldval = 0, ret; + allow_write_to_user(uaddr, sizeof(*uaddr)); pagefault_disable(); switch (op) { @@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, if (!ret) *oval = oldval; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } @@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; + allow_write_to_user(uaddr, sizeof(*uaddr)); __asm__ __volatile__ ( PPC_ATOMIC_ENTRY_BARRIER "1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\ @@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = prev; + prevent_write_to_user(uaddr, sizeof(*uaddr)); return ret; } diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 8d40565ad0c3..20a101046cff 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -6,82 +6,16 @@ #include <asm/page.h> #ifdef CONFIG_PPC_BOOK3S_64 - #include <asm/book3s/64/hugetlb.h> -/* - * This should work for other subarchs too. But right now we use the - * new format only for 64bit book3s - */ -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - /* - * We have only four bits to encode, MMU page size - */ - BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf); - return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK); -} - -static inline unsigned int hugepd_mmu_psize(hugepd_t hpd) -{ - return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2; -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return mmu_psize_to_shift(hugepd_mmu_psize(hpd)); -} -static inline void flush_hugetlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - if (radix_enabled()) - return radix__flush_hugetlb_page(vma, vmaddr); -} - -#else - -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); -#ifdef CONFIG_PPC_8xx - return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); -#else - return (pte_t *)((hpd_val(hpd) & - ~HUGEPD_SHIFT_MASK) | PD_HUGE); -#endif -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ -#ifdef CONFIG_PPC_8xx - return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; -#else - return hpd_val(hpd) & HUGEPD_SHIFT_MASK; -#endif -} - +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#include <asm/nohash/hugetlb-book3e.h> +#elif defined(CONFIG_PPC_8xx) +#include <asm/nohash/32/hugetlb-8xx.h> #endif /* CONFIG_PPC_BOOK3S_64 */ +extern bool hugetlb_disabled; -static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, - unsigned pdshift) -{ - /* - * On FSL BookE, we have multiple higher-level table entries that - * point to the same hugepte. Just use the first one since they're all - * identical. So for that case, idx=0. - */ - unsigned long idx = 0; - - pte_t *dir = hugepd_page(hpd); -#ifdef CONFIG_PPC_8xx - idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; -#elif !defined(CONFIG_PPC_FSL_BOOK3E) - idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd); -#endif - - return dir + idx; -} +void hugetlbpage_init_default(void); void flush_dcache_icache_hugepage(struct page *page); @@ -99,15 +33,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte); -#ifdef CONFIG_PPC_8xx -static inline void flush_hugetlb_page(struct vm_area_struct *vma, - unsigned long vmaddr) -{ - flush_tlb_page(vma, vmaddr); -} -#else -void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -#endif #define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index ece4dc89c90b..0fe8c1e46bbc 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void) extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs); int hw_breakpoint_handler(struct die_args *args); +extern int set_dawr(struct arch_hw_breakpoint *brk); +extern bool dawr_force_enable; +static inline bool dawr_enabled(void) +{ + return dawr_force_enable; +} + #else /* CONFIG_HAVE_HW_BREAKPOINT */ static inline void hw_breakpoint_disable(void) { } static inline void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { } +static inline bool dawr_enabled(void) { return false; } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* __KERNEL__ */ #endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */ diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 69f516ecb2fd..7c2ef0e42661 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -33,6 +33,7 @@ */ #define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL #define THREAD_IMC_ENABLE 0x8000000000000000ULL +#define TRACE_IMC_ENABLE 0x4000000000000000ULL /* * For debugfs interface for imc-mode and imc-command @@ -59,6 +60,34 @@ struct imc_events { char *scale; }; +/* + * Trace IMC hardware updates a 64bytes record on + * Core Performance Monitoring Counter (CPMC) + * overflow. Here is the layout for the trace imc record + * + * DW 0 : Timebase + * DW 1 : Program Counter + * DW 2 : PIDR information + * DW 3 : CPMC1 + * DW 4 : CPMC2 + * DW 5 : CPMC3 + * Dw 6 : CPMC4 + * DW 7 : Timebase + * ..... + * + * The following is the data structure to hold trace imc data. + */ +struct trace_imc_data { + u64 tb1; + u64 ip; + u64 val; + u64 cpmc1; + u64 cpmc2; + u64 cpmc3; + u64 cpmc4; + u64 tb2; +}; + /* Event attribute array index */ #define IMC_FORMAT_ATTR 0 #define IMC_EVENT_ATTR 1 @@ -69,6 +98,13 @@ struct imc_events { #define IMC_EVENT_OFFSET_MASK 0xffffffffULL /* + * Macro to mask bits 0:21 of first double word(which is the timebase) to + * compare with 8th double word (timebase) of trace imc record data. + */ +#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL + + +/* * Device tree parser code detects IMC pmu support and * registers new IMC pmus. This structure will hold the * pmu functions, events, counter memory information @@ -113,6 +149,7 @@ struct imc_pmu_ref { enum { IMC_TYPE_THREAD = 0x1, + IMC_TYPE_TRACE = 0x2, IMC_TYPE_CORE = 0x4, IMC_TYPE_CHIP = 0x10, }; @@ -123,6 +160,8 @@ enum { #define IMC_DOMAIN_NEST 1 #define IMC_DOMAIN_CORE 2 #define IMC_DOMAIN_THREAD 3 +/* For trace-imc the domain is still thread but it operates in trace-mode */ +#define IMC_DOMAIN_TRACE 4 extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h new file mode 100644 index 000000000000..296e51c2f066 --- /dev/null +++ b/arch/powerpc/include/asm/kasan.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifdef CONFIG_KASAN +#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn) +#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn) +#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn) +#else +#define _GLOBAL_KASAN(fn) _GLOBAL(fn) +#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn) +#define EXPORT_SYMBOL_KASAN(fn) +#endif + +#ifndef __ASSEMBLY__ + +#include <asm/page.h> + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ + (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT)) + +#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET) + +#define KASAN_SHADOW_END 0UL + +#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START) + +#ifdef CONFIG_KASAN +void kasan_early_init(void); +void kasan_mmu_init(void); +void kasan_init(void); +#else +static inline void kasan_init(void) { } +static inline void kasan_mmu_init(void) { } +#endif + +#endif /* __ASSEMBLY */ +#endif diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h new file mode 100644 index 000000000000..5b5e39643a27 --- /dev/null +++ b/arch/powerpc/include/asm/kup.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_H_ +#define _ASM_POWERPC_KUP_H_ + +#ifdef CONFIG_PPC64 +#include <asm/book3s/64/kup-radix.h> +#endif +#ifdef CONFIG_PPC_8xx +#include <asm/nohash/32/kup-8xx.h> +#endif +#ifdef CONFIG_PPC_BOOK3S_32 +#include <asm/book3s/32/kup.h> +#endif + +#ifdef __ASSEMBLY__ +#ifndef CONFIG_PPC_KUAP +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 +.endm + +.macro kuap_check current, gpr +.endm + +#endif + +#else /* !__ASSEMBLY__ */ + +#include <asm/pgtable.h> + +void setup_kup(void); + +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled); +#else +static inline void setup_kuep(bool disabled) { } +#endif /* CONFIG_PPC_KUEP */ + +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled); +#else +static inline void setup_kuap(bool disabled) { } +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) { } +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; } +#endif /* CONFIG_PPC_KUAP */ + +static inline void allow_read_from_user(const void __user *from, unsigned long size) +{ + allow_user_access(NULL, from, size); +} + +static inline void allow_write_to_user(void __user *to, unsigned long size) +{ + allow_user_access(to, NULL, size); +} + +static inline void prevent_read_from_user(const void __user *from, unsigned long size) +{ + prevent_user_access(NULL, from, size); +} + +static inline void prevent_write_to_user(void __user *to, unsigned long size) +{ + prevent_user_access(to, NULL, size); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_KUP_H_ */ diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 17996bc9382b..23247a132ce8 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -31,7 +31,7 @@ enum MCE_Version { enum MCE_Severity { MCE_SEV_NO_ERROR = 0, MCE_SEV_WARNING = 1, - MCE_SEV_ERROR_SYNC = 2, + MCE_SEV_SEVERE = 2, MCE_SEV_FATAL = 3, }; @@ -56,6 +56,14 @@ enum MCE_ErrorType { MCE_ERROR_TYPE_LINK = 7, }; +enum MCE_ErrorClass { + MCE_ECLASS_UNKNOWN = 0, + MCE_ECLASS_HARDWARE, + MCE_ECLASS_HARD_INDETERMINATE, + MCE_ECLASS_SOFTWARE, + MCE_ECLASS_SOFT_INDETERMINATE, +}; + enum MCE_UeErrorType { MCE_UE_ERROR_INDETERMINATE = 0, MCE_UE_ERROR_IFETCH = 1, @@ -110,73 +118,75 @@ enum MCE_LinkErrorType { }; struct machine_check_event { - enum MCE_Version version:8; /* 0x00 */ - uint8_t in_use; /* 0x01 */ - enum MCE_Severity severity:8; /* 0x02 */ - enum MCE_Initiator initiator:8; /* 0x03 */ - enum MCE_ErrorType error_type:8; /* 0x04 */ - enum MCE_Disposition disposition:8; /* 0x05 */ - uint8_t reserved_1[2]; /* 0x06 */ - uint64_t gpr3; /* 0x08 */ - uint64_t srr0; /* 0x10 */ - uint64_t srr1; /* 0x18 */ - union { /* 0x20 */ + enum MCE_Version version:8; + u8 in_use; + enum MCE_Severity severity:8; + enum MCE_Initiator initiator:8; + enum MCE_ErrorType error_type:8; + enum MCE_ErrorClass error_class:8; + enum MCE_Disposition disposition:8; + bool sync_error; + u16 cpu; + u64 gpr3; + u64 srr0; + u64 srr1; + union { struct { enum MCE_UeErrorType ue_error_type:8; - uint8_t effective_address_provided; - uint8_t physical_address_provided; - uint8_t reserved_1[5]; - uint64_t effective_address; - uint64_t physical_address; - uint8_t reserved_2[8]; + u8 effective_address_provided; + u8 physical_address_provided; + u8 reserved_1[5]; + u64 effective_address; + u64 physical_address; + u8 reserved_2[8]; } ue_error; struct { enum MCE_SlbErrorType slb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } slb_error; struct { enum MCE_EratErrorType erat_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } erat_error; struct { enum MCE_TlbErrorType tlb_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } tlb_error; struct { enum MCE_UserErrorType user_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } user_error; struct { enum MCE_RaErrorType ra_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } ra_error; struct { enum MCE_LinkErrorType link_error_type:8; - uint8_t effective_address_provided; - uint8_t reserved_1[6]; - uint64_t effective_address; - uint8_t reserved_2[16]; + u8 effective_address_provided; + u8 reserved_1[6]; + u64 effective_address; + u8 reserved_2[16]; } link_error; } u; }; @@ -194,6 +204,8 @@ struct mce_error_info { } u; enum MCE_Severity severity:8; enum MCE_Initiator initiator:8; + enum MCE_ErrorClass error_class:8; + bool sync_error; }; #define MAX_MC_EVT 100 @@ -210,6 +222,7 @@ extern void release_mce_event(void); extern void machine_check_queue_event(void); extern void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest); +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr); #ifdef CONFIG_PPC_BOOK3S_64 void flush_and_reload_slb(void); #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 8ddd4a91bdc1..ba94ce8c22d7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -107,6 +107,11 @@ */ #define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000) +/* + * Supports KUAP (key 0 controlling userspace addresses) on radix + */ +#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000) + /* MMU feature bit sets for various CPUs */ #define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \ MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2 @@ -124,6 +129,9 @@ #ifndef __ASSEMBLY__ #include <linux/bug.h> #include <asm/cputable.h> +#include <asm/page.h> + +typedef pte_t *pgtable_t; #ifdef CONFIG_PPC_FSL_BOOK3E #include <asm/percpu.h> @@ -164,7 +172,10 @@ enum { #endif #ifdef CONFIG_PPC_RADIX_MMU MMU_FTR_TYPE_RADIX | -#endif +#ifdef CONFIG_PPC_KUAP + MMU_FTR_RADIX_KUAP | +#endif /* CONFIG_PPC_KUAP */ +#endif /* CONFIG_PPC_RADIX_MMU */ 0, }; @@ -341,21 +352,6 @@ static inline bool strict_kernel_rwx_enabled(void) */ #define MMU_PAGE_COUNT 16 -/* - * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS - * if we increase SECTIONS_WIDTH we will not store node details in page->flags and - * page_to_nid does a page->section->node lookup - * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce - * memory requirements with large number of sections. - * 51 bits is the max physical real address on POWER9 - */ -#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ - defined (CONFIG_PPC_64K_PAGES) -#define MAX_PHYSMEM_BITS 51 -#elif defined(CONFIG_PPC64) -#define MAX_PHYSMEM_BITS 46 -#endif - #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/mmu.h> #else /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6ee8195a2ffb..611204e588b9 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -52,6 +52,7 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, { return false; } +static inline void mm_iommu_init(struct mm_struct *mm) { } #endif extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm); extern void set_context(unsigned long id, pgd_t *pgd); @@ -228,13 +229,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, #endif } -#ifdef CONFIG_PPC_BOOK3E_64 -static inline void arch_exit_mmap(struct mm_struct *mm) -{ -} -#else extern void arch_exit_mmap(struct mm_struct *mm); -#endif static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h new file mode 100644 index 000000000000..a46616937d20 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H +#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H + +#define PAGE_SHIFT_8M 23 + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + + return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17; +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT; + + return hugepd_page(hpd) + idx; +} + +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ + flush_tlb_page(vma, vmaddr); +} + +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT | + (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K)); +} + +static inline int check_and_get_huge_psize(int shift) +{ + return shift_to_mmu_psize(shift); +} + +#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h new file mode 100644 index 000000000000..1c3133b5f86a --- /dev/null +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_KUP_8XX_H_ +#define _ASM_POWERPC_KUP_8XX_H_ + +#include <asm/bug.h> + +#ifdef CONFIG_PPC_KUAP + +#ifdef __ASSEMBLY__ + +.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3 + lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */ + mfspr \gpr1, SPRN_MD_AP + mtspr SPRN_MD_AP, \gpr2 + stw \gpr1, STACK_REGS_KUAP(\sp) +.endm + +.macro kuap_restore sp, current, gpr1, gpr2, gpr3 + lwz \gpr1, STACK_REGS_KUAP(\sp) + mtspr SPRN_MD_AP, \gpr1 +.endm + +.macro kuap_check current, gpr +#ifdef CONFIG_PPC_KUAP_DEBUG + mfspr \gpr, SPRN_MD_AP + rlwinm \gpr, \gpr, 16, 0xffff +999: twnei \gpr, MD_APG_KUAP@h + EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) +#endif +.endm + +#else /* !__ASSEMBLY__ */ + +#include <asm/reg.h> + +static inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + mtspr(SPRN_MD_AP, MD_APG_INIT); +} + +static inline void prevent_user_access(void __user *to, const void __user *from, + unsigned long size) +{ + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} + +static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) +{ + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + "Bug: fault blocked by AP register !"); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* CONFIG_PPC_KUAP */ + +#endif /* _ASM_POWERPC_KUP_8XX_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 0a1a3fc54e54..76af5b0cb16e 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -35,11 +35,18 @@ * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) + * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * We define all 16 groups so that all other bits of APG can take any value + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + */ +#define MI_APG_INIT 0x4fffffff + +/* + * 0 => Kernel => 01 (all accesses performed according to page definition) + * 1 => User => 10 (all accesses performed according to swaped page definition) + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) */ -#define MI_APG_INIT 0x44444444 +#define MI_APG_KUEP 0x6fffffff /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -108,11 +115,18 @@ * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => No user => 01 (all accesses performed according to page definition) + * 0 => Kernel => 01 (all accesses performed according to page definition) * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * We define all 16 groups so that all other bits of APG can take any value + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) + */ +#define MD_APG_INIT 0x4fffffff + +/* + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 10 (all accesses performed according to swaped page definition) + * 2-16 => NA => 11 (all accesses performed as user iaw page definition) */ -#define MD_APG_INIT 0x44444444 +#define MD_APG_KUAP 0x6fffffff /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in @@ -167,9 +181,26 @@ #ifdef CONFIG_PPC_MM_SLICES #include <asm/nohash/32/slice.h> #define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1)) +#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE #endif +#if defined(CONFIG_PPC_4K_PAGES) +#define mmu_virtual_psize MMU_PAGE_4K +#elif defined(CONFIG_PPC_16K_PAGES) +#define mmu_virtual_psize MMU_PAGE_16K +#define PTE_FRAG_NR 4 +#define PTE_FRAG_SIZE_SHIFT 12 +#define PTE_FRAG_SIZE (1UL << 12) +#else +#error "Unsupported PAGE_SIZE" +#endif + +#define mmu_linear_psize MMU_PAGE_8M + #ifndef __ASSEMBLY__ + +#include <linux/mmdebug.h> + struct slice_mask { u64 low_slices; DECLARE_BITMAP(high_slices, 0); @@ -185,14 +216,56 @@ typedef struct { unsigned char high_slices_psize[0]; unsigned long slb_addr_limit; struct slice_mask mask_base_psize; /* 4k or 16k */ -# ifdef CONFIG_HUGETLB_PAGE struct slice_mask mask_512k; struct slice_mask mask_8m; -# endif #endif void *pte_frag; } mm_context_t; +#ifdef CONFIG_PPC_MM_SLICES +static inline u16 mm_ctx_user_psize(mm_context_t *ctx) +{ + return ctx->user_psize; +} + +static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize) +{ + ctx->user_psize = user_psize; +} + +static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx) +{ + return ctx->low_slices_psize; +} + +static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx) +{ + return ctx->high_slices_psize; +} + +static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx) +{ + return ctx->slb_addr_limit; +} + +static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit) +{ + ctx->slb_addr_limit = limit; +} + +static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize) +{ + if (psize == MMU_PAGE_512K) + return &ctx->mask_512k; + if (psize == MMU_PAGE_8M) + return &ctx->mask_8m; + + BUG_ON(psize != mmu_virtual_psize); + + return &ctx->mask_base_psize; +} +#endif /* CONFIG_PPC_MM_SLICE */ + #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000) #define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE)) @@ -242,17 +315,4 @@ extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf; #endif /* !__ASSEMBLY__ */ -#if defined(CONFIG_PPC_4K_PAGES) -#define mmu_virtual_psize MMU_PAGE_4K -#elif defined(CONFIG_PPC_16K_PAGES) -#define mmu_virtual_psize MMU_PAGE_16K -#define PTE_FRAG_NR 4 -#define PTE_FRAG_SIZE_SHIFT 12 -#define PTE_FRAG_SIZE (1UL << 12) -#else -#error "Unsupported PAGE_SIZE" -#endif - -#define mmu_linear_psize MMU_PAGE_8M - #endif /* _ASM_POWERPC_MMU_8XX_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h deleted file mode 100644 index 7d94a36d57d2..000000000000 --- a/arch/powerpc/include/asm/nohash/32/mmu.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_ -#define _ASM_POWERPC_NOHASH_32_MMU_H_ - -#include <asm/page.h> - -#if defined(CONFIG_40x) -/* 40x-style software loaded TLB */ -#include <asm/nohash/32/mmu-40x.h> -#elif defined(CONFIG_44x) -/* 44x-style software loaded TLB */ -#include <asm/nohash/32/mmu-44x.h> -#elif defined(CONFIG_PPC_BOOK3E_MMU) -/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include <asm/nohash/mmu-book3e.h> -#elif defined (CONFIG_PPC_8xx) -/* Motorola/Freescale 8xx software loaded TLB */ -#include <asm/nohash/32/mmu-8xx.h> -#endif - -#ifndef __ASSEMBLY__ -typedef pte_t *pgtable_t; -#endif - -#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index bd186e85b4f7..11eac371e7e0 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -6,39 +6,6 @@ #include <linux/slab.h> /* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern void __bad_pte(pmd_t *pmd); - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); -} - -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); -} - -/* * We don't have any real pmd's, and this code never triggers because * the pgd will always be present.. */ @@ -47,96 +14,22 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) #define __pmd_free_tlb(tlb,x,a) do { } while (0) /* #define pgd_populate(mm, pmd, pte) BUG() */ -#ifndef CONFIG_BOOKE - -static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, - pte_t *pte) -{ - *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); -} - -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pte_page) -{ - *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); -} - -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -#else - static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp, pte_t *pte) { - *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); + if (IS_ENABLED(CONFIG_BOOKE)) + *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT); + else + *pmdp = __pmd(__pa(pte) | _PMD_PRESENT); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pte_page) { - *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); + if (IS_ENABLED(CONFIG_BOOKE)) + *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT); + else + *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT); } -#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd)) -#endif - -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -extern pgtable_t pte_alloc_one(struct mm_struct *mm); -void pte_frag_destroy(void *pte_frag); -pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); -void pte_fragment_free(unsigned long *table, int kernel); - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - pte_fragment_free((unsigned long *)pte, 1); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pte_fragment_free((unsigned long *)ptepage, 0); -} - -static inline void pgtable_free(void *table, unsigned index_size) -{ - if (!index_size) { - pte_fragment_free((unsigned long *)table, 0); - } else { - BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE); - kmem_cache_free(PGT_CACHE(index_size), table); - } -} - -#define check_pgt_cache() do { } while (0) -#define get_hugepd_cache_index(x) (x) - -#ifdef CONFIG_SMP -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - unsigned long pgf = (unsigned long)table; - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf |= shift; - tlb_remove_table(tlb, (void *)pgf); -} - -static inline void __tlb_remove_table(void *_table) -{ - void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); -} -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, - void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - -static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, - unsigned long address) -{ - tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, table, 0); -} #endif /* _ASM_POWERPC_PGALLOC_32_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index bed433358260..0284f8f5305f 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,15 +64,24 @@ extern int icache_44x_need_flush; #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) +#ifndef __ASSEMBLY__ + +int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); + +#endif /* !__ASSEMBLY__ */ + + /* * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary * value (for now) on others, from where we can start layout kernel * virtual space that goes below PKMAP and FIXMAP */ +#include <asm/fixmap.h> + #ifdef CONFIG_HIGHMEM #define KVIRT_TOP PKMAP_BASE #else -#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */ +#define KVIRT_TOP FIXADDR_START #endif /* @@ -379,8 +388,6 @@ static inline int pte_young(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */ diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h index 777d62e40ac0..39eb0154ae2d 100644 --- a/arch/powerpc/include/asm/nohash/32/slice.h +++ b/arch/powerpc/include/asm/nohash/32/slice.h @@ -13,6 +13,8 @@ #define SLICE_NUM_HIGH 0ul #define GET_HIGH_SLICE_INDEX(addr) (addr & 0) +#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW + #endif /* CONFIG_PPC_MM_SLICES */ #endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h deleted file mode 100644 index e6585480dfc4..000000000000 --- a/arch/powerpc/include/asm/nohash/64/mmu.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_ -#define _ASM_POWERPC_NOHASH_64_MMU_H_ - -/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ -#include <asm/nohash/mmu-book3e.h> - -#ifndef __ASSEMBLY__ -typedef struct page *pgtable_t; -#endif - -#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 66d086f85bd5..62321cd12da9 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -18,37 +18,6 @@ struct vmemmap_backing { }; extern struct vmemmap_backing *vmemmap_list; -/* - * Functions that deal with pagetables that could be at any level of - * the table need to be passed an "index_size" so they know how to - * handle allocation. For PTE pages (which are linked to a struct - * page for now, and drawn from the main get_free_pages() pool), the - * allocation size will be (2^index_size * sizeof(pointer)) and - * allocations are drawn from the kmem_cache in PGT_CACHE(index_size). - * - * The maximum index size needs to be big enough to allow any - * pagetable sizes we need, but small enough to fit in the low bits of - * any page table pointer. In other words all pagetables, even tiny - * ones, must be aligned to allow at least enough low 0 bits to - * contain this value. This value is also used as a mask, so it must - * be one less than a power of two. - */ -#define MAX_PGTABLE_INDEX_SIZE 0xf - -extern struct kmem_cache *pgtable_cache[]; -#define PGT_CACHE(shift) pgtable_cache[shift] - -static inline pgd_t *pgd_alloc(struct mm_struct *mm) -{ - return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), - pgtable_gfp_flags(mm, GFP_KERNEL)); -} - -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); -} - #define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -76,11 +45,9 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, (unsigned long)page_address(pte_page)); + pmd_set(pmd, (unsigned long)pte_page); } -#define pmd_pgtable(pmd) pmd_page(pmd) - static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX), @@ -92,91 +59,9 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd); } - -static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} - -static inline pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - struct page *page; - pte_t *pte; - - pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT); - if (!pte) - return NULL; - page = virt_to_page(pte); - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - return page; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) -{ - pgtable_page_dtor(ptepage); - __free_page(ptepage); -} - -static inline void pgtable_free(void *table, int shift) -{ - if (!shift) { - pgtable_page_dtor(virt_to_page(table)); - free_page((unsigned long)table); - } else { - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - kmem_cache_free(PGT_CACHE(shift), table); - } -} - -#define get_hugepd_cache_index(x) (x) -#ifdef CONFIG_SMP -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - unsigned long pgf = (unsigned long)table; - - BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); - pgf |= shift; - tlb_remove_table(tlb, (void *)pgf); -} - -static inline void __tlb_remove_table(void *_table) -{ - void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); - unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; - - pgtable_free(table, shift); -} - -#else -static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) -{ - pgtable_free(table, shift); -} -#endif - -static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, - unsigned long address) -{ - tlb_flush_pgtable(tlb, address); - pgtable_free_tlb(tlb, page_address(table), 0); -} - #define __pmd_free_tlb(tlb, pmd, addr) \ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX) -#ifndef CONFIG_PPC_64K_PAGES #define __pud_free_tlb(tlb, pud, addr) \ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE) -#endif /* CONFIG_PPC_64K_PAGES */ - -#define check_pgt_cache() do { } while (0) - #endif /* _ASM_POWERPC_PGALLOC_64_H */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index e77ed9761632..b9f66cf15c31 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -10,10 +10,6 @@ #include <asm/barrier.h> #include <asm/asm-const.h> -#ifdef CONFIG_PPC_64K_PAGES -#error "Page size not supported" -#endif - #define FIRST_USER_ADDRESS 0UL /* @@ -23,11 +19,7 @@ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) #define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) -#else #define PMD_CACHE_INDEX PMD_INDEX_SIZE -#endif #define PUD_CACHE_INDEX PUD_INDEX_SIZE /* @@ -73,7 +65,6 @@ #define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) -#define VMEMMAP_REGION_ID (0xfUL) /* Server only */ #define USER_REGION_ID (0UL) /* @@ -205,7 +196,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val) (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while(0) + +static inline void pte_unmap(pte_t *pte) { } /* to find an entry in a kernel page-table-directory */ /* This now only contains the vmalloc pages */ diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h deleted file mode 100644 index ad0d6e3cc1c5..000000000000 --- a/arch/powerpc/include/asm/nohash/64/slice.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H -#define _ASM_POWERPC_NOHASH_64_SLICE_H - -#ifdef CONFIG_PPC_64K_PAGES -#define get_slice_psize(mm, addr) MMU_PAGE_64K -#else /* CONFIG_PPC_64K_PAGES */ -#define get_slice_psize(mm, addr) MMU_PAGE_4K -#endif /* !CONFIG_PPC_64K_PAGES */ -#define slice_set_user_psize(mm, psize) do { BUG(); } while (0) - -#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */ diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h new file mode 100644 index 000000000000..ecd8694cb229 --- /dev/null +++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H +#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + if (WARN_ON(!hugepd_ok(hpd))) + return NULL; + + return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return hpd_val(hpd) & HUGEPD_SHIFT_MASK; +} + +static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, + unsigned int pdshift) +{ + /* + * On FSL BookE, we have multiple higher-level table entries that + * point to the same hugepte. Just use the first one since they're all + * identical. So for that case, idx=0. + */ + return hugepd_page(hpd); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); + +static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift) +{ + /* We use the old format for PPC_FSL_BOOK3E */ + *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); +} + +static inline int check_and_get_huge_psize(int shift) +{ + if (shift & 1) /* Not a power of 4 */ + return -EINVAL; + + return shift_to_mmu_psize(shift); +} + +#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */ diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h index e20072972e35..4c9777d256fb 100644 --- a/arch/powerpc/include/asm/nohash/mmu-book3e.h +++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h @@ -306,6 +306,8 @@ extern int book3e_htw_mode; #define mmu_cleanup_all NULL +#define MAX_PHYSMEM_BITS 44 + #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h index a037cb1efb57..edc793e5f08f 100644 --- a/arch/powerpc/include/asm/nohash/mmu.h +++ b/arch/powerpc/include/asm/nohash/mmu.h @@ -2,10 +2,18 @@ #ifndef _ASM_POWERPC_NOHASH_MMU_H_ #define _ASM_POWERPC_NOHASH_MMU_H_ -#ifdef CONFIG_PPC64 -#include <asm/nohash/64/mmu.h> -#else -#include <asm/nohash/32/mmu.h> +#if defined(CONFIG_40x) +/* 40x-style software loaded TLB */ +#include <asm/nohash/32/mmu-40x.h> +#elif defined(CONFIG_44x) +/* 44x-style software loaded TLB */ +#include <asm/nohash/32/mmu-44x.h> +#elif defined(CONFIG_PPC_BOOK3E_MMU) +/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */ +#include <asm/nohash/mmu-book3e.h> +#elif defined (CONFIG_PPC_8xx) +/* Motorola/Freescale 8xx software loaded TLB */ +#include <asm/nohash/32/mmu-8xx.h> #endif #endif /* _ASM_POWERPC_NOHASH_MMU_H_ */ diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h index 0634f2949438..332b13b4ecdb 100644 --- a/arch/powerpc/include/asm/nohash/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/pgalloc.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_NOHASH_PGALLOC_H #include <linux/mm.h> +#include <linux/slab.h> extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #ifdef CONFIG_PPC64 @@ -16,9 +17,64 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb, } #endif /* !CONFIG_PPC_BOOK3E */ +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), + pgtable_gfp_flags(mm, GFP_KERNEL)); +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); +} + #ifdef CONFIG_PPC64 #include <asm/nohash/64/pgalloc.h> #else #include <asm/nohash/32/pgalloc.h> #endif + +static inline void pgtable_free(void *table, int shift) +{ + if (!shift) { + pte_fragment_free((unsigned long *)table, 0); + } else { + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + kmem_cache_free(PGT_CACHE(shift), table); + } +} + +#define get_hugepd_cache_index(x) (x) + +#ifdef CONFIG_SMP +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + unsigned long pgf = (unsigned long)table; + + BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); + pgf |= shift; + tlb_remove_table(tlb, (void *)pgf); +} + +static inline void __tlb_remove_table(void *_table) +{ + void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE); + unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE; + + pgtable_free(table, shift); +} + +#else +static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) +{ + pgtable_free(table, shift); +} +#endif + +static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, + unsigned long address) +{ + tlb_flush_pgtable(tlb, address); + pgtable_free_tlb(tlb, table, 0); +} #endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h index dd40d200f274..813918f40765 100644 --- a/arch/powerpc/include/asm/nohash/pte-book3e.h +++ b/arch/powerpc/include/asm/nohash/pte-book3e.h @@ -60,13 +60,8 @@ #define _PAGE_SPECIAL _PAGE_SW0 /* Base page size */ -#ifdef CONFIG_PPC_64K_PAGES -#define _PAGE_PSIZE _PAGE_PSIZE_64K -#define PTE_RPN_SHIFT (28) -#else #define _PAGE_PSIZE _PAGE_PSIZE_4K #define PTE_RPN_SHIFT (24) -#endif #define PTE_WIMGE_SHIFT (19) #define PTE_BAP_SHIFT (2) diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 870fb7b239ea..e1577cfa7186 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -186,8 +186,8 @@ #define OPAL_XIVE_FREE_IRQ 140 #define OPAL_XIVE_SYNC 141 #define OPAL_XIVE_DUMP 142 -#define OPAL_XIVE_RESERVED3 143 -#define OPAL_XIVE_RESERVED4 144 +#define OPAL_XIVE_GET_QUEUE_STATE 143 +#define OPAL_XIVE_SET_QUEUE_STATE 144 #define OPAL_SIGNAL_SYSTEM_RESET 145 #define OPAL_NPU_INIT_CONTEXT 146 #define OPAL_NPU_DESTROY_CONTEXT 147 @@ -209,8 +209,10 @@ #define OPAL_SENSOR_GROUP_ENABLE 163 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165 +#define OPAL_HANDLE_HMI2 166 #define OPAL_NX_COPROC_INIT 167 -#define OPAL_LAST 167 +#define OPAL_XIVE_GET_VP_STATE 170 +#define OPAL_LAST 170 #define QUIESCE_HOLD 1 /* Spin all calls at entry */ #define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */ @@ -634,6 +636,15 @@ struct OpalHMIEvent { } u; }; +/* OPAL_HANDLE_HMI2 out_flags */ +enum { + OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */ + OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */ + OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */ + OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */ +}; + enum { OPAL_P7IOC_DIAG_TYPE_NONE = 0, OPAL_P7IOC_DIAG_TYPE_RGC = 1, @@ -1118,6 +1129,7 @@ enum { enum { OPAL_IMC_COUNTERS_NEST = 1, OPAL_IMC_COUNTERS_CORE = 2, + OPAL_IMC_COUNTERS_TRACE = 3, }; diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a55b01c90bb1..4cc37e708bc7 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data); int64_t opal_handle_hmi(void); +int64_t opal_handle_hmi2(__be64 *out_flags); int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end); int64_t opal_unregister_dump_region(uint32_t id); int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val); @@ -279,6 +280,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio, + __be32 *out_qtoggle, + __be32 *out_qindex); +int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio, + uint32_t qtoggle, + uint32_t qindex); +int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01); int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, uint64_t desc, uint16_t pe_number); @@ -352,6 +360,7 @@ int opal_power_control_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); extern int opal_hmi_exception_early(struct pt_regs *regs); +extern int opal_hmi_exception_early2(struct pt_regs *regs); extern int opal_handle_hmi_exception(struct pt_regs *regs); extern void opal_shutdown(void); diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 134e912d403f..62f27e0aef7c 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -174,7 +174,6 @@ struct paca_struct { u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened; /* irq happened while soft-disabled */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ - u8 nap_state_lost; /* NV GPR values lost in power7_idle */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE u8 pmcregs_in_use; /* pseries puts this in lppaca */ #endif @@ -184,23 +183,28 @@ struct paca_struct { #endif #ifdef CONFIG_PPC_POWERNV - /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */ - u32 *core_idle_state_ptr; - u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */ - /* Mask to indicate thread id in core */ - u8 thread_mask; - /* Mask to denote subcore sibling threads */ - u8 subcore_sibling_mask; - /* Flag to request this thread not to stop */ - atomic_t dont_stop; - /* The PSSCR value that the kernel requested before going to stop */ - u64 requested_psscr; - - /* - * Save area for additional SPRs that need to be - * saved/restored during cpuidle stop. - */ - struct stop_sprs stop_sprs; + /* PowerNV idle fields */ + /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */ + unsigned long idle_state; + union { + /* P7/P8 specific fields */ + struct { + /* PNV_THREAD_RUNNING/NAP/SLEEP */ + u8 thread_idle_state; + /* Mask to denote subcore sibling threads */ + u8 subcore_sibling_mask; + }; + + /* P9 specific fields */ + struct { +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* The PSSCR value that the kernel requested before going to stop */ + u64 requested_psscr; + /* Flag to request this thread not to stop */ + atomic_t dont_stop; +#endif + }; + }; #endif #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index ed870468ef6f..dbc8c0679480 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -28,11 +28,15 @@ #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) #ifndef __ASSEMBLY__ -#ifdef CONFIG_HUGETLB_PAGE -extern bool hugetlb_disabled; -extern unsigned int HPAGE_SHIFT; -#else +#ifndef CONFIG_HUGETLB_PAGE #define HPAGE_SHIFT PAGE_SHIFT +#elif defined(CONFIG_PPC_BOOK3S_64) +extern unsigned int hpage_shift; +#define HPAGE_SHIFT hpage_shift +#elif defined(CONFIG_PPC_8xx) +#define HPAGE_SHIFT 19 /* 512k pages */ +#elif defined(CONFIG_PPC_FSL_BOOK3E) +#define HPAGE_SHIFT 22 /* 4M pages */ #endif #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) @@ -132,18 +136,7 @@ static inline bool pfn_valid(unsigned long pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#ifdef CONFIG_PPC_BOOK3S_64 -/* - * On hash the vmalloc and other regions alias to the kernel region when passed - * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can - * return true for some vmalloc addresses, which is incorrect. So explicitly - * check that the address is in the kernel region. - */ -#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \ - pfn_valid(virt_to_pfn(kaddr))) -#else #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr)) -#endif /* * On Book-E parts we need __va to parse the device tree and we can't diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index e11f03007b57..2b2c60a1a66d 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -20,10 +20,61 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) #define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO) +pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel); + +static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) +{ + return (pte_t *)pte_fragment_alloc(mm, 1); +} + +static inline pgtable_t pte_alloc_one(struct mm_struct *mm) +{ + return (pgtable_t)pte_fragment_alloc(mm, 0); +} + +void pte_frag_destroy(void *pte_frag); +void pte_fragment_free(unsigned long *table, int kernel); + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + pte_fragment_free((unsigned long *)pte, 1); +} + +static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage) +{ + pte_fragment_free((unsigned long *)ptepage, 0); +} + +/* + * Functions that deal with pagetables that could be at any level of + * the table need to be passed an "index_size" so they know how to + * handle allocation. For PTE pages, the allocation size will be + * (2^index_size * sizeof(pointer)) and allocations are drawn from + * the kmem_cache in PGT_CACHE(index_size). + * + * The maximum index size needs to be big enough to allow any + * pagetable sizes we need, but small enough to fit in the low bits of + * any page table pointer. In other words all pagetables, even tiny + * ones, must be aligned to allow at least enough low 0 bits to + * contain this value. This value is also used as a mask, so it must + * be one less than a power of two. + */ +#define MAX_PGTABLE_INDEX_SIZE 0xf + +extern struct kmem_cache *pgtable_cache[]; +#define PGT_CACHE(shift) pgtable_cache[shift] + +static inline void check_pgt_cache(void) { } + #ifdef CONFIG_PPC_BOOK3S #include <asm/book3s/pgalloc.h> #else #include <asm/nohash/pgalloc.h> #endif +static inline pgtable_t pmd_pgtable(pmd_t pmd) +{ + return (pgtable_t)pmd_page_vaddr(pmd); +} + #endif /* _ASM_POWERPC_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h index a89c67b62680..b169bbf95fcb 100644 --- a/arch/powerpc/include/asm/pgtable-be-types.h +++ b/arch/powerpc/include/asm/pgtable-be-types.h @@ -33,11 +33,7 @@ static inline __be64 pmd_raw(pmd_t x) return x.pmd; } -/* - * 64 bit hash always use 4 level table. Everybody else use 4 level - * only for 4K page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +/* 64 bit always use 4 level table. */ typedef struct { __be64 pud; } pud_t; #define __pud(x) ((pud_t) { cpu_to_be64(x) }) #define __pud_raw(x) ((pud_t) { (x) }) @@ -51,7 +47,6 @@ static inline __be64 pud_raw(pud_t x) return x.pud; } -#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -77,7 +72,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) +#ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h index 3b0edf041b2e..d11b4c61d686 100644 --- a/arch/powerpc/include/asm/pgtable-types.h +++ b/arch/powerpc/include/asm/pgtable-types.h @@ -23,18 +23,13 @@ static inline unsigned long pmd_val(pmd_t x) return x.pmd; } -/* - * 64 bit hash always use 4 level table. Everybody else use 4 level - * only for 4K page size. - */ -#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES) +/* 64 bit always use 4 level table. */ typedef struct { unsigned long pud; } pud_t; #define __pud(x) ((pud_t) { (x) }) static inline unsigned long pud_val(pud_t x) { return x.pud; } -#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */ #endif /* CONFIG_PPC64 */ /* PGD level */ @@ -54,7 +49,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; * With hash config 64k pages additionally define a bigger "real PTE" type that * gathers the "second half" part of the PTE for pseudo 64k pages */ -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64) +#ifdef CONFIG_PPC_64K_PAGES typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; #else typedef struct { pte_t pte; } real_pte_t; diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 505550fb2935..3f53be60fb01 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -89,9 +89,6 @@ extern void paging_init(void); */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); -extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, - struct page **pages, int *nr); #ifndef CONFIG_TRANSPARENT_HUGEPAGE #define pmd_large(pmd) 0 #endif @@ -108,6 +105,12 @@ void mark_initmem_nx(void); static inline void mark_initmem_nx(void) { } #endif +#ifdef CONFIG_PPC_DEBUG_WX +void ptdump_check_wx(void); +#else +static inline void ptdump_check_wx(void) { } +#endif + /* * When used, PTE_FRAG_NR is defined in subarch pgtable.h * so we are sure it is included when arriving here. diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 3351bcf42f2d..706ac5df546f 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -164,6 +164,9 @@ struct thread_struct { unsigned long rtas_sp; /* stack pointer for when in RTAS */ #endif #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + unsigned long kuap; /* opened segments for user access */ +#endif /* Debug Registers */ struct debug_reg debug; struct thread_fp_state fp_state; @@ -411,14 +414,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32) } #endif +/* asm stubs */ +extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); +extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); +extern unsigned long isa206_idle_insn_mayloss(unsigned long type); + extern unsigned long cpuidle_disable; enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF}; extern int powersave_nap; /* set if nap mode can be used in idle loop */ -extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/ + extern void power7_idle_type(unsigned long type); -extern unsigned long power9_idle_stop(unsigned long psscr_val); -extern unsigned long power9_offline_stop(unsigned long psscr_val); extern void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask); diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 64271e562fed..6f047730e642 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -52,10 +52,17 @@ struct pt_regs }; }; + union { + struct { #ifdef CONFIG_PPC64 - unsigned long ppr; - unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ + unsigned long ppr; +#endif +#ifdef CONFIG_PPC_KUAP + unsigned long kuap; #endif + }; + unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */ + }; }; #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c5b2aff0ce8e..10caa145f98b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -168,6 +168,7 @@ #define PSSCR_ESL 0x00200000 /* Enable State Loss */ #define PSSCR_SD 0x00400000 /* Status Disable */ #define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */ +#define PSSCR_PLS_SHIFT 60 #define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */ #define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */ #define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */ @@ -758,10 +759,9 @@ #define SRR1_WAKERESET 0x00100000 /* System reset */ #define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */ #define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */ -#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained, - * may not be recoverable */ -#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ -#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ +#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */ +#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */ +#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */ #define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGILL 0x00080000 /* Illegal instruction */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index eb2a33d5df26..e382bd6ede84 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -41,7 +41,7 @@ #if defined(CONFIG_PPC_BOOK3E_64) #define MSR_64BIT MSR_CM -#define MSR_ (MSR_ME | MSR_CE) +#define MSR_ (MSR_ME | MSR_RI | MSR_CE) #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h index 44816cbc4198..c6f466f4c241 100644 --- a/arch/powerpc/include/asm/slice.h +++ b/arch/powerpc/include/asm/slice.h @@ -4,9 +4,7 @@ #ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/slice.h> -#elif defined(CONFIG_PPC64) -#include <asm/nohash/64/slice.h> -#elif defined(CONFIG_PPC_MMU_NOHASH) +#elif defined(CONFIG_PPC_MMU_NOHASH_32) #include <asm/nohash/32/slice.h> #endif @@ -38,6 +36,11 @@ void slice_setup_new_exec(void); static inline void slice_init_new_context_exec(struct mm_struct *mm) {} +static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) +{ + return 0; +} + #endif /* CONFIG_PPC_MM_SLICES */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 68da49320592..3192d454a733 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 -extern void resize_hpt_for_hotplug(unsigned long new_mem_size); +extern int resize_hpt_for_hotplug(unsigned long new_mem_size); #else -static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { } +static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; } #endif #ifdef CONFIG_NUMA diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index 1647de15a31e..9bf6dffb4090 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -4,14 +4,17 @@ #ifdef __KERNEL__ +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRNCPY #define __HAVE_ARCH_STRNCMP +#define __HAVE_ARCH_MEMCHR +#define __HAVE_ARCH_MEMCMP +#define __HAVE_ARCH_MEMSET16 +#endif + #define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMCPY #define __HAVE_ARCH_MEMMOVE -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_MEMCHR -#define __HAVE_ARCH_MEMSET16 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE extern char * strcpy(char *,const char *); @@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); extern void * memcpy_flushcache(void *,const void *,__kernel_size_t); +void *__memset(void *s, int c, __kernel_size_t count); +void *__memcpy(void *to, const void *from, __kernel_size_t n); +void *__memmove(void *to, const void *from, __kernel_size_t n); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + #ifdef CONFIG_PPC64 +#ifndef CONFIG_KASAN #define __HAVE_ARCH_MEMSET32 #define __HAVE_ARCH_MEMSET64 @@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) { return __memset64(p, v, n * 8); } +#endif #else +#ifndef CONFIG_KASAN #define __HAVE_ARCH_STRLEN +#endif extern void *memset16(uint16_t *, uint16_t, __kernel_size_t); #endif diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 1243045bad2d..a048fed0722f 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -94,9 +94,15 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr3 = args[0]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { - int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64; + int arch; + + if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT)) + arch = AUDIT_ARCH_PPC64; + else + arch = AUDIT_ARCH_PPC; + #ifdef __LITTLE_ENDIAN__ arch |= __AUDIT_ARCH_LE; #endif diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h index eab4779f6b84..c993482237ed 100644 --- a/arch/powerpc/include/asm/task_size_64.h +++ b/arch/powerpc/include/asm/task_size_64.h @@ -20,7 +20,7 @@ /* * For now 512TB is only supported with book3s and 64K linux page size. */ -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES) +#ifdef CONFIG_PPC_64K_PAGES /* * Max value currently used: */ diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 54bf7e68a7e1..57e968413d1e 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq; extern unsigned long ppc_tb_freq; #define DEFAULT_TB_FREQ 125000000UL +extern bool tb_invalid; + struct div_result { u64 result_high; u64 result_low; diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 58ef8c43a89d..08cd60cd70b7 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -54,6 +54,22 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit, TP_ARGS(regs) ); +#ifdef CONFIG_PPC_DOORBELL +DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs) +); + +DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit, + + TP_PROTO(struct pt_regs *regs), + + TP_ARGS(regs) +); +#endif + #ifdef CONFIG_PPC_PSERIES extern int hcall_tracepoint_regfunc(void); extern void hcall_tracepoint_unregfunc(void); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4d6d905e9138..76f34346b642 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -6,6 +6,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/extable.h> +#include <asm/kup.h> /* * The fs value determines whether argument validity checking should be @@ -140,6 +141,7 @@ extern long __put_user_bad(void); #define __put_user_size(x, ptr, size, retval) \ do { \ retval = 0; \ + allow_write_to_user(ptr, size); \ switch (size) { \ case 1: __put_user_asm(x, ptr, retval, "stb"); break; \ case 2: __put_user_asm(x, ptr, retval, "sth"); break; \ @@ -147,6 +149,7 @@ do { \ case 8: __put_user_asm2(x, ptr, retval); break; \ default: __put_user_bad(); \ } \ + prevent_write_to_user(ptr, size); \ } while (0) #define __put_user_nocheck(x, ptr, size) \ @@ -239,6 +242,7 @@ do { \ __chk_user_ptr(ptr); \ if (size > sizeof(x)) \ (x) = __get_user_bad(); \ + allow_read_from_user(ptr, size); \ switch (size) { \ case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \ case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \ @@ -246,6 +250,7 @@ do { \ case 8: __get_user_asm2(x, ptr, retval); break; \ default: (x) = __get_user_bad(); \ } \ + prevent_read_from_user(ptr, size); \ } while (0) /* @@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to, static inline unsigned long raw_copy_in_user(void __user *to, const void __user *from, unsigned long n) { - return __copy_tofrom_user(to, from, n); + unsigned long ret; + + allow_user_access(to, from, n); + ret = __copy_tofrom_user(to, from, n); + prevent_user_access(to, from, n); + return ret; } #endif /* __powerpc64__ */ static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to, } barrier_nospec(); - return __copy_tofrom_user((__force void __user *)to, from, n); + allow_read_from_user(from, n); + ret = __copy_tofrom_user((__force void __user *)to, from, n); + prevent_read_from_user(from, n); + return ret; } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { + unsigned long ret; if (__builtin_constant_p(n) && (n <= 8)) { - unsigned long ret = 1; + ret = 1; switch (n) { case 1: @@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to, return 0; } - return __copy_tofrom_user(to, (__force const void __user *)from, n); + allow_write_to_user(to, n); + ret = __copy_tofrom_user(to, (__force const void __user *)from, n); + prevent_write_to_user(to, n); + return ret; } extern unsigned long __clear_user(void __user *addr, unsigned long size); static inline unsigned long clear_user(void __user *addr, unsigned long size) { + unsigned long ret = size; might_fault(); - if (likely(access_ok(addr, size))) - return __clear_user(addr, size); - return size; + if (likely(access_ok(addr, size))) { + allow_write_to_user(addr, size); + ret = __clear_user(addr, size); + prevent_write_to_user(addr, size); + } + return ret; } extern long strncpy_from_user(char *dst, const char __user *src, long count); diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 3c704f5dd3ae..b579a943407b 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio, extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio); extern void xive_native_sync_source(u32 hw_irq); +extern void xive_native_sync_queue(u32 hw_irq); extern bool is_xive_irq(struct irq_chip *chip); extern int xive_native_enable_vp(u32 vp_id, bool single_escalation); extern int xive_native_disable_vp(u32 vp_id); extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id); extern bool xive_native_has_single_escalation(void); +extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio, + u64 *out_qpage, + u64 *out_qsize, + u64 *out_qeoi_page, + u32 *out_escalate_irq, + u64 *out_qflags); + +extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle, + u32 *qindex); +extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle, + u32 qindex); +extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state); + #else static inline bool xive_enabled(void) { return false; } diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index cddadccf551d..0ea6c4aa3a20 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -31,6 +31,18 @@ CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE) endif +KASAN_SANITIZE_early_32.o := n +KASAN_SANITIZE_cputable.o := n +KASAN_SANITIZE_prom_init.o := n +KASAN_SANITIZE_btext.o := n + +ifdef CONFIG_KASAN +CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING +endif + obj-y := cputable.o ptrace.o syscalls.o \ irq.o align.o signal_32.o pmc.o vdso.o \ process.o systbl.o idle.o \ @@ -93,7 +105,7 @@ extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o -obj-$(CONFIG_PPC32) += entry_32.o setup_32.o +obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_BOOTX_TEXT) += btext.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 86a61e5f8285..8e02444e9d3d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -147,6 +147,9 @@ int main(void) #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu); #endif +#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) + OFFSET(KUAP, thread_struct, kuap); +#endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM OFFSET(PACATMSCRATCH, paca_struct, tm_scratch); @@ -268,7 +271,6 @@ int main(void) OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); - OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost); OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE @@ -332,6 +334,10 @@ int main(void) STACK_PT_REGS_OFFSET(_PPR, ppr); #endif /* CONFIG_PPC64 */ +#ifdef CONFIG_PPC_KUAP + STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap); +#endif + #if defined(CONFIG_PPC32) #if defined(CONFIG_BOOKE) || defined(CONFIG_40x) DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); @@ -766,23 +772,6 @@ int main(void) OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl); #endif -#ifdef CONFIG_PPC_POWERNV - OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr); - OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state); - OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask); - OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); - OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); - OFFSET(PACA_DONT_STOP, paca_struct, dont_stop); -#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) - STOP_SPR(STOP_PID, pid); - STOP_SPR(STOP_LDBAR, ldbar); - STOP_SPR(STOP_FSCR, fscr); - STOP_SPR(STOP_HFSCR, hfscr); - STOP_SPR(STOP_MMCR1, mmcr1); - STOP_SPR(STOP_MMCR2, mmcr2); - STOP_SPR(STOP_MMCRA, mmcra); -#endif - DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 53102764fd2f..f2ed3ef4b129 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -759,23 +759,22 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index, index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); if (!index_dir) - goto err; + return; index_dir->cache = cache; rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, cache_dir->kobj, "index%d", index); - if (rc) - goto err; + if (rc) { + kobject_put(&index_dir->kobj); + kfree(index_dir); + return; + } index_dir->next = cache_dir->index; cache_dir->index = index_dir; cacheinfo_create_index_opt_attrs(index_dir); - - return; -err: - kfree(index_dir); } static void cacheinfo_sysfs_populate(unsigned int cpu_id, diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 1eab54bc6ee9..cd12f362b61f 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2147,7 +2147,11 @@ void __init set_cur_cpu_spec(struct cpu_spec *s) struct cpu_spec *t = &the_cpu_spec; t = PTRRELOC(t); - *t = *s; + /* + * use memcpy() instead of *t = *s so that GCC replaces it + * by __memcpy() when KASAN is active + */ + memcpy(t, s, sizeof(*t)); *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; } @@ -2161,8 +2165,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t = PTRRELOC(t); old = *t; - /* Copy everything, then do fixups */ - *t = *s; + /* + * Copy everything, then do fixups. Use memcpy() instead of *t = *s + * so that GCC replaces it by __memcpy() when KASAN is active + */ + memcpy(t, s, sizeof(*t)); /* * If we are overriding a previous value derived from the real diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index b6fe883b1016..5ec3b3835925 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -18,6 +18,7 @@ #include <asm/dbell.h> #include <asm/irq_regs.h> #include <asm/kvm_ppc.h> +#include <asm/trace.h> #ifdef CONFIG_SMP @@ -81,6 +82,7 @@ void doorbell_exception(struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); irq_enter(); + trace_doorbell_entry(regs); ppc_msgsync(); @@ -91,6 +93,7 @@ void doorbell_exception(struct pt_regs *regs) smp_ipi_demux_relaxed(); /* already performed the barrier */ + trace_doorbell_exit(regs); irq_exit(); set_irq_regs(old_regs); } diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c new file mode 100644 index 000000000000..3482118ffe76 --- /dev/null +++ b/arch/powerpc/kernel/early_32.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Early init before relocation + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/setup.h> +#include <asm/sections.h> +#include <asm/asm-prototypes.h> + +/* + * We're called here very early in the boot. + * + * Note that the kernel may be running at an address which is different + * from the address that it was linked at, so we must use RELOC/PTRRELOC + * to access static data (including strings). -- paulus + */ +notrace unsigned long __init early_init(unsigned long dt_ptr) +{ + unsigned long offset = reloc_offset(); + + /* First zero the BSS */ + memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start); + + /* + * Identify the CPU type and fix up code sections + * that depend on which cpu we have. + */ + identify_cpu(offset, mfspr(SPRN_PVR)); + + apply_feature_fixups(); + + return KERNELBASE + offset; +} diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index b61cfd29c76f..c18f3490a77e 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -36,15 +36,10 @@ #include <asm/asm-405.h> #include <asm/feature-fixups.h> #include <asm/barrier.h> +#include <asm/kup.h> +#include <asm/bug.h> -/* - * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. - */ -#if MSR_KERNEL >= 0x10000 -#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l -#else -#define LOAD_MSR_KERNEL(r, x) li r,(x) -#endif +#include "head_32.h" /* * Align to 4k in order to ensure that all functions modyfing srr0/srr1 @@ -150,8 +145,8 @@ transfer_to_handler: stw r12,_CTR(r11) stw r2,_XER(r11) mfspr r12,SPRN_SPRG_THREAD - addi r2,r12,-THREAD beq 2f /* if from user, fix up THREAD.regs */ + addi r2, r12, -THREAD addi r11,r1,STACK_FRAME_OVERHEAD stw r11,PT_REGS(r12) #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) @@ -161,6 +156,9 @@ transfer_to_handler: andis. r12,r12,DBCR0_IDM@h #endif ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #if defined(CONFIG_40x) || defined(CONFIG_BOOKE) beq+ 3f /* From user and task is ptraced - load up global dbcr0 */ @@ -186,6 +184,8 @@ transfer_to_handler: 2: /* if from kernel, check interrupted DOZE/NAP mode and * check for stack overflow */ + kuap_save_and_lock r11, r12, r9, r2, r0 + addi r2, r12, -THREAD lwz r9,KSP_LIMIT(r12) cmplw r1,r9 /* if r1 <= ksp_limit */ ble- stack_ovf /* then the kernel stack overflowed */ @@ -207,26 +207,43 @@ transfer_to_handler_cont: mtspr SPRN_NRI, r0 #endif #ifdef CONFIG_TRACE_IRQFLAGS + /* + * When tracing IRQ state (lockdep) we enable the MMU before we call + * the IRQ tracing functions as they might access vmalloc space or + * perform IOs for console output. + * + * To speed up the syscall path where interrupts stay on, let's check + * first if we are changing the MSR value at all. + */ + tophys(r12, r1) + lwz r12,_MSR(r12) + andi. r12,r12,MSR_EE + bne 1f + + /* MSR isn't changing, just transition directly */ +#endif + mtspr SPRN_SRR0,r11 + mtspr SPRN_SRR1,r10 + mtlr r9 + SYNC + RFI /* jump to handler, enable MMU */ + +#ifdef CONFIG_TRACE_IRQFLAGS +1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to + * keep interrupts disabled at this point otherwise we might risk + * taking an interrupt before we tell lockdep they are enabled. + */ lis r12,reenable_mmu@h ori r12,r12,reenable_mmu@l + LOAD_MSR_KERNEL(r0, MSR_KERNEL) mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR1,r0 SYNC RFI -reenable_mmu: /* re-enable mmu so we can */ - mfmsr r10 - lwz r12,_MSR(r1) - xor r10,r10,r12 - andi. r10,r10,MSR_EE /* Did EE change? */ - beq 1f +reenable_mmu: /* - * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1. - * If from user mode there is only one stack frame on the stack, and - * accessing CALLER_ADDR1 will cause oops. So we need create a dummy - * stack frame to make trace_hardirqs_off happy. - * - * This is handy because we also need to save a bunch of GPRs, + * We save a bunch of GPRs, * r3 can be different from GPR3(r1) at this point, r9 and r11 * contains the old MSR and handler address respectively, * r4 & r5 can contain page fault arguments that need to be passed @@ -234,14 +251,19 @@ reenable_mmu: /* re-enable mmu so we can */ * they aren't useful past this point (aren't syscall arguments), * the rest is restored from the exception frame. */ + stwu r1,-32(r1) stw r9,8(r1) stw r11,12(r1) stw r3,16(r1) stw r4,20(r1) stw r5,24(r1) - bl trace_hardirqs_off - lwz r5,24(r1) + + /* If we are disabling interrupts (normal case), simply log it with + * lockdep + */ +1: bl trace_hardirqs_off +2: lwz r5,24(r1) lwz r4,20(r1) lwz r3,16(r1) lwz r11,12(r1) @@ -251,15 +273,9 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r6,GPR6(r1) lwz r7,GPR7(r1) lwz r8,GPR8(r1) -1: mtctr r11 + mtctr r11 mtlr r9 bctr /* jump to handler */ -#else /* CONFIG_TRACE_IRQFLAGS */ - mtspr SPRN_SRR0,r11 - mtspr SPRN_SRR1,r10 - mtlr r9 - SYNC - RFI /* jump to handler, enable MMU */ #endif /* CONFIG_TRACE_IRQFLAGS */ #if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500) @@ -272,6 +288,7 @@ reenable_mmu: /* re-enable mmu so we can */ lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */ rlwinm r9,r9,0,~MSR_EE lwz r12,_LINK(r11) /* and return to address in LR */ + kuap_restore r11, r2, r3, r4, r5 b fast_exception_return #endif @@ -301,6 +318,33 @@ stack_ovf: SYNC RFI +#ifdef CONFIG_TRACE_IRQFLAGS +trace_syscall_entry_irq_off: + /* + * Syscall shouldn't happen while interrupts are disabled, + * so let's do a warning here. + */ +0: trap + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING + bl trace_hardirqs_on + + /* Now enable for real */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) + mtmsr r10 + + REST_GPR(0, r1) + REST_4GPRS(3, r1) + REST_2GPRS(7, r1) + b DoSyscall +#endif /* CONFIG_TRACE_IRQFLAGS */ + + .globl transfer_to_syscall +transfer_to_syscall: +#ifdef CONFIG_TRACE_IRQFLAGS + andi. r12,r9,MSR_EE + beq- trace_syscall_entry_irq_off +#endif /* CONFIG_TRACE_IRQFLAGS */ + /* * Handle a system call. */ @@ -312,33 +356,14 @@ _GLOBAL(DoSyscall) stw r3,ORIG_GPR3(r1) li r12,0 stw r12,RESULT(r1) - lwz r11,_CCR(r1) /* Clear SO bit in CR */ - rlwinm r11,r11,0,4,2 - stw r11,_CCR(r1) #ifdef CONFIG_TRACE_IRQFLAGS - /* Return from syscalls can (and generally will) hard enable - * interrupts. You aren't supposed to call a syscall with - * interrupts disabled in the first place. However, to ensure - * that we get it right vs. lockdep if it happens, we force - * that hard enable here with appropriate tracing if we see - * that we have been called with interrupts off - */ + /* Make sure interrupts are enabled */ mfmsr r11 andi. r12,r11,MSR_EE - bne+ 1f - /* We came in with interrupts disabled, we enable them now */ - bl trace_hardirqs_on - mfmsr r11 - lwz r0,GPR0(r1) - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - ori r11,r11,MSR_EE - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - mtmsr r11 -1: + /* We came in with interrupts disabled, we WARN and mark them enabled + * for lockdep now */ +0: tweqi r12, 0 + EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING #endif /* CONFIG_TRACE_IRQFLAGS */ lwz r11,TI_FLAGS(r2) andi. r11,r11,_TIF_SYSCALL_DOTRACE @@ -392,8 +417,7 @@ syscall_exit_cont: lwz r8,_MSR(r1) #ifdef CONFIG_TRACE_IRQFLAGS /* If we are going to return from the syscall with interrupts - * off, we trace that here. It shouldn't happen though but we - * want to catch the bugger if it does right ? + * off, we trace that here. It shouldn't normally happen. */ andi. r10,r8,MSR_EE bne+ 1f @@ -422,12 +446,11 @@ BEGIN_FTR_SECTION lwarx r7,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) stwcx. r0,0,r1 /* to clear the reservation */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - andi. r4,r8,MSR_PR - beq 3f ACCOUNT_CPU_USER_EXIT(r2, r5, r7) -3: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_unlock r5, r7 #endif + kuap_check r2, r4 lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -678,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE) stw r10,_CCR(r1) stw r1,KSP(r3) /* Set old stack pointer */ + kuap_check r2, r4 #ifdef CONFIG_SMP /* We need a sync somewhere here to make sure that if the * previous task gets rescheduled on another CPU, it sees all @@ -820,6 +844,9 @@ restore_user: bnel- load_dbcr0 #endif ACCOUNT_CPU_USER_EXIT(r2, r10, r11) +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_unlock r10, r11 +#endif b restore @@ -866,12 +893,12 @@ resume_kernel: /* check current_thread_info->preempt_count */ lwz r0,TI_PREEMPT(r2) cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ - bne restore + bne restore_kuap andi. r8,r8,_TIF_NEED_RESCHED - beq+ restore + beq+ restore_kuap lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ - beq restore /* don't schedule if so */ + beq restore_kuap /* don't schedule if so */ #ifdef CONFIG_TRACE_IRQFLAGS /* Lockdep thinks irqs are enabled, we need to call * preempt_schedule_irq with IRQs off, so we inform lockdep @@ -879,10 +906,7 @@ resume_kernel: */ bl trace_hardirqs_off #endif -1: bl preempt_schedule_irq - lwz r3,TI_FLAGS(r2) - andi. r0,r3,_TIF_NEED_RESCHED - bne- 1b + bl preempt_schedule_irq #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -890,6 +914,8 @@ resume_kernel: bl trace_hardirqs_on #endif #endif /* CONFIG_PREEMPT */ +restore_kuap: + kuap_restore r1, r2, r9, r10, r0 /* interrupts are hard-disabled at this point */ restore: @@ -913,28 +939,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) * off in this assembly code while peeking at TI_FLAGS() and such. However * we need to inform it if the exception turned interrupts off, and we * are about to trun them back on. - * - * The problem here sadly is that we don't know whether the exceptions was - * one that turned interrupts off or not. So we always tell lockdep about - * turning them on here when we go back to wherever we came from with EE - * on, even if that may meen some redudant calls being tracked. Maybe later - * we could encode what the exception did somewhere or test the exception - * type in the pt_regs but that sounds overkill */ andi. r10,r9,MSR_EE beq 1f - /* - * Since the ftrace irqsoff latency trace checks CALLER_ADDR1, - * which is the stack frame here, we need to force a stack frame - * in case we came from user space. - */ stwu r1,-32(r1) mflr r0 stw r0,4(r1) - stwu r1,-32(r1) bl trace_hardirqs_on - lwz r1,0(r1) - lwz r1,0(r1) + addi r1, r1, 32 lwz r9,_MSR(r1) 1: #endif /* CONFIG_TRACE_IRQFLAGS */ @@ -1197,6 +1209,7 @@ load_dbcr0: .section .bss .align 4 + .global global_dbcr0 global_dbcr0: .space 8*NR_CPUS .previous @@ -1207,9 +1220,10 @@ do_work: /* r10 contains MSR_KERNEL here */ beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ - /* Note: We don't need to inform lockdep that we are enabling - * interrupts here. As far as it knows, they are already enabled - */ +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on + mfmsr r10 +#endif ori r10,r10,MSR_EE SYNC MTMSRD(r10) /* hard-enable interrupts */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 15c67d2c0534..d978af78bf2a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -46,6 +46,7 @@ #include <asm/exception-64e.h> #endif #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * System calls. @@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION addi r9,r1,STACK_FRAME_OVERHEAD ld r11,exception_marker@toc(r2) std r11,-16(r9) /* "regshere" marker */ + + kuap_check_amr r10, r11 + #if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR) BEGIN_FW_FTR_SECTION beq 33f @@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) andi. r6,r8,MSR_PR ld r4,_LINK(r1) + kuap_check_amr r10, r11 + #ifdef CONFIG_PPC_BOOK3S /* * Clear MSR_RI, MSR_EE is already and remains disabled. We could do @@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) std r8, PACATMSCRATCH(r13) #endif + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ ld r2,GPR2(r1) ld r1,GPR1(r1) @@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) RFI_TO_USER b . /* prevent speculative execution */ - /* exit to kernel */ -1: ld r2,GPR2(r1) +1: /* exit to kernel */ + kuap_restore_amr r2 + + ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 @@ -594,6 +606,8 @@ _GLOBAL(_switch) std r23,_CCR(r1) std r1,KSP(r3) /* Set old stack pointer */ + kuap_check_amr r9, r10 + FLUSH_COUNT_CACHE /* @@ -851,13 +865,7 @@ resume_kernel: * sure we are soft-disabled first and reconcile irq state. */ RECONCILE_IRQ_STATE(r3,r4) -1: bl preempt_schedule_irq - - /* Re-test flags and eventually loop */ - ld r9, PACA_THREAD_INFO(r13) - ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED - bne 1b + bl preempt_schedule_irq /* * arch_local_irq_restore() from preempt_schedule_irq above may @@ -942,6 +950,8 @@ fast_exception_return: ld r4,_XER(r1) mtspr SPRN_XER,r4 + kuap_check_amr r5, r6 + REST_8GPRS(5, r1) andi. r0,r3,MSR_RI @@ -974,6 +984,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ACCOUNT_CPU_USER_EXIT(r13, r2, r4) REST_GPR(13, r1) + /* + * We don't need to restore AMR on the way back to userspace for KUAP. + * The value of AMR only matters while we're in the kernel. + */ mtspr SPRN_SRR1,r3 ld r2,_CCR(r1) @@ -1006,6 +1020,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ld r0,GPR0(r1) ld r2,GPR2(r1) ld r3,GPR3(r1) + + kuap_restore_amr r4 + ld r4,GPR4(r1) ld r1,GPR1(r1) RFI_TO_KERNEL diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9481a117e242..6b86055e5251 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -19,6 +19,7 @@ #include <asm/cpuidle.h> #include <asm/head-64.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> /* * There are a few constraints to be concerned with. @@ -120,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100) mfspr r10,SPRN_SRR1 ; \ rlwinm. r10,r10,47-31,30,31 ; \ beq- 1f ; \ - cmpwi cr3,r10,2 ; \ + cmpwi cr1,r10,2 ; \ + mfspr r3,SPRN_SRR1 ; \ + bltlr cr1 ; /* no state loss, return to idle caller */ \ BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 1: \ KVMTEST_PR(n) ; \ @@ -144,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100) #ifdef CONFIG_PPC_P7_NAP EXC_COMMON_BEGIN(system_reset_idle_common) - mfspr r12,SPRN_SRR1 - b pnv_powersave_wakeup + /* + * This must be a direct branch (without linker branch stub) because + * we can not use TOC at this point as r2 may not be restored yet. + */ + b idle_return_gpr_loss #endif /* @@ -309,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ + kuap_save_amr_and_lock r9, r10, cr1 /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ @@ -427,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common) * Then decrement MCE nesting after finishing with the stack. */ ld r3,_MSR(r1) + ld r4,_LINK(r1) lhz r11,PACA_IN_MCE(r13) subi r11,r11,1 sth r11,PACA_IN_MCE(r13) - /* Turn off the RI bit because SRR1 is used by idle wakeup code. */ - /* Recoverability could be improved by reducing the use of SRR1. */ - li r11,0 - mtmsrd r11,1 - - b pnv_powersave_wakeup_mce + mtlr r4 + rlwinm r10,r3,47-31,30,31 + cmpwi cr1,r10,2 + bltlr cr1 /* no state loss, return to idle caller */ + b idle_return_gpr_loss #endif /* * Handle machine check early in real mode. We come here with @@ -1109,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mfspr r11,SPRN_HSRR0 /* Save HSRR0 */ mfspr r12,SPRN_HSRR1 /* Save HSRR1 */ EXCEPTION_PROLOG_COMMON_1() + /* We don't touch AMR here, we never go to virtual mode */ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN) EXCEPTION_PROLOG_COMMON_3(0xe60) addi r3,r1,STACK_FRAME_OVERHEAD diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 45a8d0be1c96..25f063f56ec5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -36,6 +36,7 @@ #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/cma.h> +#include <linux/hugetlb.h> #include <asm/debugfs.h> #include <asm/page.h> diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 529dcc21c3f9..cecd57e1d046 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -63,6 +63,7 @@ _GLOBAL(load_fp_state) REST_32FPVSRS(0, R4, R3) blr EXPORT_SYMBOL(load_fp_state) +_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */ /* * Store FP state into memory, including FPSCR diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e25b615e9f9e..755fab9641d6 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -37,6 +37,8 @@ #include <asm/export.h> #include <asm/feature-fixups.h> +#include "head_32.h" + /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ #define LOAD_BAT(n, reg, RA, RB) \ /* see the comment for clear_bats() -- Cort */ \ @@ -160,6 +162,10 @@ __after_mmu_off: bl flush_tlbs bl initial_bats + bl load_segment_registers +#ifdef CONFIG_KASAN + bl early_hash_table +#endif #if defined(CONFIG_BOOTX_TEXT) bl setup_disp_bat #endif @@ -205,7 +211,7 @@ __after_mmu_off: */ turn_on_mmu: mfmsr r0 - ori r0,r0,MSR_DR|MSR_IR + ori r0,r0,MSR_DR|MSR_IR|MSR_RI mtspr SPRN_SRR1,r0 lis r0,start_here@h ori r0,r0,start_here@l @@ -242,103 +248,6 @@ __secondary_hold_spinloop: __secondary_hold_acknowledge: .long -1 -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcr r10; \ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addi r11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - MTMSRD(r10); /* (except for mach check in rtas) */ \ - stw r0,GPR0(r11); \ - lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addi r10,r10,STACK_FRAME_REGS_MARKER@l; \ - stw r10,8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer) \ - . = n; \ - DO_KVM n; \ -label: \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9); \ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ - ret_from_except) - /* System reset */ /* core99 pmac starts the seconary here by changing the vector, and putting it back to what it was (unknown_exception) when done. */ @@ -387,7 +296,11 @@ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) +#ifdef CONFIG_PPC_KUAP + andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h +#else andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h +#endif bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ @@ -428,7 +341,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -449,24 +362,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) bl load_up_fpu /* if from user, just load it up */ b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception) /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 DO_KVM 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) /* * The Altivec unavailable trap is at 0x0f20. Foo. @@ -522,9 +434,9 @@ InstructionTLBMiss: andc. r1,r1,r0 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - ori r1, r1, 0xe05 /* clear out reserved bits */ - andc r1, r0, r1 /* PP = user? 2 : 0 */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r1, r1, 0xe06 /* clear out reserved bits */ + andc r1, r0, r1 /* PP = user? 1 : 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -590,11 +502,11 @@ DataLoadTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */ rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */ ori r1,r1,0xe04 /* clear out reserved bits */ - andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */ + andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -670,9 +582,9 @@ DataStoreTLBMiss: * we would need to update the pte atomically with lwarx/stwcx. */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */ - li r1,0xe05 /* clear out reserved bits & PP lsb */ - andc r1,r0,r1 /* PP = user? 2: 0 */ + rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */ + li r1,0xe06 /* clear out reserved bits & PP msb */ + andc r1,r0,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) @@ -698,35 +610,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) #define altivec_assist_exception unknown_exception #endif - EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE) - EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD) EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) - EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD) + EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD) . = 0x3000 @@ -738,7 +650,7 @@ AltiVecUnavailable: b fast_exception_return #endif /* CONFIG_ALTIVEC */ 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception) + EXC_XFER_LITE(0xf20, altivec_unavailable_exception) PerformanceMonitor: EXCEPTION_PROLOG @@ -880,11 +792,24 @@ _ENTRY(__restore_cpu_setup) blr #endif /* !defined(CONFIG_PPC_BOOK3S_32) */ - /* * Load stuff into the MMU. Intended to be called with * IR=0 and DR=0. */ +#ifdef CONFIG_KASAN +early_hash_table: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6, early_hash - PAGE_OFFSET@h + ori r6, r6, 3 /* 256kB table */ + mtspr SPRN_SDR1, r6 + blr +#endif + load_up_mmu: sync /* Force all PTE updates to finish */ isync @@ -896,14 +821,6 @@ load_up_mmu: tophys(r6,r6) lwz r6,_SDR1@l(r6) mtspr SPRN_SDR1,r6 - li r0,16 /* load up segment register values */ - mtctr r0 /* for context 0 */ - lis r3,0x2000 /* Ku = 1, VSID = 0 */ - li r4,0 -3: mtsrin r3,r4 - addi r3,r3,0x111 /* increment VSID */ - addis r4,r4,0x1000 /* address of next segment */ - bdnz 3b /* Load the BAT registers with the values set up by MMU_init. MMU_init takes care of whether we're on a 601 or not. */ @@ -925,6 +842,32 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) blr +load_segment_registers: + li r0, NUM_USER_SEGMENTS /* load up user segment register values */ + mtctr r0 /* for context 0 */ + li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif + li r4, 0 +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */ + mtctr r0 /* for context 0 */ + rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */ + rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */ + oris r3, r3, SR_KP@h /* Kp = 1 */ +3: mtsrin r3, r4 + addi r3, r3, 0x111 /* increment VSID */ + addis r4, r4, 0x1000 /* address of next segment */ + bdnz 3b + blr + /* * This is where the main kernel code starts. */ @@ -950,11 +893,17 @@ start_here: * Do early platform-specific initialization, * and set up the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init bl __save_cpu_setup bl MMU_init +BEGIN_MMU_FTR_SECTION + bl MMU_init_hw_patch +END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) /* * Go back to running unmapped so we can load up new values @@ -1006,7 +955,12 @@ _ENTRY(switch_mmu_context) blt- 4f mulli r3,r3,897 /* multiply context by skew factor */ rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ - addis r3,r3,0x6000 /* Set Ks, Ku bits */ +#ifdef CONFIG_PPC_KUEP + oris r3, r3, SR_NX@h /* Set Nx */ +#endif +#ifdef CONFIG_PPC_KUAP + oris r3, r3, SR_KS@h /* Set Ks */ +#endif li r0,NUM_USER_SEGMENTS mtctr r0 diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h new file mode 100644 index 000000000000..4a692553651f --- /dev/null +++ b/arch/powerpc/kernel/head_32.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __HEAD_32_H__ +#define __HEAD_32_H__ + +#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ + +/* + * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE. + */ +.macro __LOAD_MSR_KERNEL r, x +.if \x >= 0x8000 + lis \r, (\x)@h + ori \r, \r, (\x)@l +.else + li \r, (\x) +.endif +.endm +#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ + +.macro EXCEPTION_PROLOG + mtspr SPRN_SPRG_SCRATCH0,r10 + mtspr SPRN_SPRG_SCRATCH1,r11 + mfcr r10 + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 +.endm + +.macro EXCEPTION_PROLOG_1 + mfspr r11,SPRN_SRR1 /* check whether user or kernel */ + andi. r11,r11,MSR_PR + tophys(r11,r1) /* use tophys(r1) if kernel */ + beq 1f + mfspr r11,SPRN_SPRG_THREAD + lwz r11,TASK_STACK-THREAD(r11) + addi r11,r11,THREAD_SIZE + tophys(r11,r11) +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ +.endm + +.macro EXCEPTION_PROLOG_2 + stw r10,_CCR(r11) /* save registers */ + stw r12,GPR12(r11) + stw r9,GPR9(r11) + mfspr r10,SPRN_SPRG_SCRATCH0 + stw r10,GPR10(r11) + mfspr r12,SPRN_SPRG_SCRATCH1 + stw r12,GPR11(r11) + mflr r10 + stw r10,_LINK(r11) + mfspr r12,SPRN_SRR0 + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + stw r0,GPR0(r11) + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + addi r10,r10,STACK_FRAME_REGS_MARKER@l + stw r10,8(r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) +.endm + +.macro SYSCALL_ENTRY trapno + mfspr r12,SPRN_SPRG_THREAD + mfcr r10 + lwz r11,TASK_STACK-THREAD(r12) + mflr r9 + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE + rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */ + tophys(r11,r11) + stw r10,_CCR(r11) /* save registers */ + mfspr r10,SPRN_SRR0 + stw r9,_LINK(r11) + mfspr r9,SPRN_SRR1 + stw r1,GPR1(r11) + stw r1,0(r11) + tovirt(r1,r11) /* set new kernel sp */ + stw r10,_NIP(r11) +#ifdef CONFIG_40x + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */ + MTMSRD(r10) /* (except for mach check in rtas) */ +#endif + lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addi r10,r10,STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r10,8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + addi r11,r1,STACK_FRAME_OVERHEAD + addi r2,r12,-THREAD + stw r11,PT_REGS(r12) +#if defined(CONFIG_40x) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r12) + andis. r12,r12,DBCR0_IDM@h +#endif + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) +#if defined(CONFIG_40x) + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) +#endif + +3: + tovirt(r2, r2) /* set r2 to current */ + lis r11, transfer_to_syscall@h + ori r11, r11, transfer_to_syscall@l +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If MSR is changing we need to keep interrupts disabled at this point + * otherwise we might risk taking an interrupt before we tell lockdep + * they are enabled. + */ + LOAD_MSR_KERNEL(r10, MSR_KERNEL) + rlwimi r10, r9, 0, MSR_EE +#else + LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE) +#endif +#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS) + mtspr SPRN_NRI, r0 +#endif + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + SYNC + RFI /* jump to handler, enable MMU */ +.endm + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#ifdef CONFIG_PPC_BOOK3S +#define START_EXCEPTION(n, label) \ + . = n; \ + DO_KVM n; \ +label: + +#else +#define START_EXCEPTION(n, label) \ + . = n; \ +label: + +#endif + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(n, label) \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ + li r10,trap; \ + stw r10,_TRAP(r11); \ + LOAD_MSR_KERNEL(r10, msr); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ + ret_from_except) + +#endif /* __HEAD_32_H__ */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a9c934f2319b..cf54b784100d 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -44,6 +44,8 @@ #include <asm/export.h> #include <asm/asm-405.h> +#include "head_32.h" + /* As with the other PowerPC ports, it is expected that when code * execution begins here, the following registers contain valid, yet * optional, information: @@ -99,46 +101,6 @@ _ENTRY(saved_ksp_limit) .space 4 /* - * Exception vector entry code. This code runs with address translation - * turned off (i.e. using physical addresses). We assume SPRG_THREAD has - * the physical address of the current task thread_struct. - * Note that we have to have decremented r1 before we write to any fields - * of the exception frame, since a critical interrupt could occur at any - * time, and it will write to the area immediately below the current r1. - */ -#define NORMAL_EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mtspr SPRN_SPRG_SCRATCH2,r1; \ - mfcr r10; /* save CR in r10 for now */\ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ - andi. r11,r11,MSR_PR; \ - beq 1f; \ - mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\ - lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\ - addi r1,r1,THREAD_SIZE; \ -1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ - tophys(r11,r1); \ - stw r10,_CCR(r11); /* save various registers */\ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH2; \ - mfspr r12,SPRN_SRR0; \ - stw r10,GPR1(r11); \ - mfspr r9,SPRN_SRR1; \ - stw r10,0(r11); \ - rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ - stw r0,GPR0(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* * Exception prolog for critical exceptions. This is a little different * from the normal exception prolog above since a critical exception * can potentially occur at any point during normal exception processing. @@ -177,6 +139,9 @@ _ENTRY(saved_ksp_limit) tovirt(r1,r11); \ rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ stw r0,GPR0(r11); \ + lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\ + addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ + stw r10, 8(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -196,53 +161,12 @@ _ENTRY(saved_ksp_limit) /* * Exception vectors. */ -#define START_EXCEPTION(n, label) \ - . = n; \ -label: - -#define EXCEPTION(n, label, hdlr, xfer) \ - START_EXCEPTION(n, label); \ - NORMAL_EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - #define CRITICAL_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(n, label); \ CRITICAL_EXCEPTION_PROLOG; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) - -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - lis r10,msr@h; \ - ori r10,r10,msr@l; \ - copyee(r10, r9); \ - bl tfer; \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ - ret_from_except) - + crit_transfer_to_handler, ret_from_crit_exc) /* * 0x0100 - Critical Interrupt Exception @@ -393,7 +317,7 @@ label: * This is caused by a fetch from non-execute or guarded pages. */ START_EXCEPTION(0x0400, InstructionAccess) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mr r4,r12 /* Pass SRR0 as arg2 */ li r5,0 /* Pass zero as arg3 */ EXC_XFER_LITE(0x400, handle_page_fault) @@ -403,33 +327,32 @@ label: /* 0x0600 - Alignment Exception */ START_EXCEPTION(0x0600, Alignment) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ stw r4,_DEAR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* 0x0700 - Program Exception */ START_EXCEPTION(0x0700, ProgramCheck) - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r4,SPRN_ESR /* Grab the ESR and save it */ stw r4,_ESR(r11) addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_STD(0x700, program_check_exception) - EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD) /* 0x0C00 - System Call Exception */ START_EXCEPTION(0x0C00, SystemCall) - NORMAL_EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 - EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD) /* 0x1000 - Programmable Interval Timer (PIT) Exception */ . = 0x1000 @@ -646,25 +569,25 @@ label: mfspr r10, SPRN_SPRG_SCRATCH0 b InstructionAccess - EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) #ifdef CONFIG_IBM405_ERR51 /* 405GP errata 51 */ START_EXCEPTION(0x1700, Trap_17) b DTLBMiss #else - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) #endif - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD) /* Check for a single step debug exception while in an exception * handler before state has been saved. This is to catch the case @@ -726,11 +649,11 @@ label: addi r3,r1,STACK_FRAME_OVERHEAD EXC_XFER_TEMPLATE(DebugException, 0x2002, \ (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ Decrementer: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG lis r0,TSR_PIS@h mtspr SPRN_TSR,r0 /* Clear the PIT exception */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -738,9 +661,9 @@ Decrementer: /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ FITException: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG addi r3,r1,STACK_FRAME_OVERHEAD; - EXC_XFER_EE(0x1010, unknown_exception) + EXC_XFER_STD(0x1010, unknown_exception) /* Watchdog Timer (WDT) Exception. (from 0x1020) */ WDTException: @@ -748,15 +671,14 @@ WDTException: addi r3,r1,STACK_FRAME_OVERHEAD; EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), - NOCOPY, crit_transfer_to_handler, - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) /* * The other Data TLB exceptions bail out to this point * if they can't resolve the lightweight TLB fault. */ DataAccess: - NORMAL_EXCEPTION_PROLOG + EXCEPTION_PROLOG mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ stw r5,_ESR(r11) mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ @@ -848,6 +770,9 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 37117ab11584..f15fba58c744 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -203,6 +203,9 @@ _ENTRY(_start); /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init @@ -278,16 +281,15 @@ interrupt_base: FP_UNAVAILABLE_EXCEPTION #else EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \ - FloatingPointUnavailable, unknown_exception, EXC_XFER_EE) + FloatingPointUnavailable, unknown_exception, EXC_XFER_STD) #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \ - AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE) + AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION @@ -295,7 +297,7 @@ interrupt_base: /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Watchdog Timer Interrupt */ /* TODO: Add watchdog support */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3fad8d499767..5321a11c2835 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -968,7 +968,9 @@ start_here_multiplatform: /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl early_setup /* also sets r13 and SPRG_PACA */ + LOAD_REG_ADDR(r12, DOTSYM(early_setup)) + mtctr r12 + bctrl /* also sets r13 and SPRG_PACA */ LOAD_REG_ADDR(r3, start_here_common) ld r4,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 03c73b4c6435..885be7f3d29a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -33,6 +33,8 @@ #include <asm/export.h> #include <asm/code-patching-asm.h> +#include "head_32.h" + #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 /* By simply checking Address >= 0x80000000, we know if its a kernel address */ #define SIMPLE_KERNEL_ADDRESS 1 @@ -123,102 +125,6 @@ instruction_counter: .space 4 #endif -/* - * Exception entry code. This code runs with address translation - * turned off, i.e. using physical addresses. - * We assume sprg3 has the physical address of the current - * task's thread_struct. - */ -#define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0, r10; \ - mtspr SPRN_SPRG_SCRATCH1, r11; \ - mfcr r10; \ - EXCEPTION_PROLOG_1; \ - EXCEPTION_PROLOG_2 - -#define EXCEPTION_PROLOG_1 \ - mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ - andi. r11,r11,MSR_PR; \ - tophys(r11,r1); /* use tophys(r1) if kernel */ \ - beq 1f; \ - mfspr r11,SPRN_SPRG_THREAD; \ - lwz r11,TASK_STACK-THREAD(r11); \ - addi r11,r11,THREAD_SIZE; \ - tophys(r11,r11); \ -1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ - - -#define EXCEPTION_PROLOG_2 \ - stw r10,_CCR(r11); /* save registers */ \ - stw r12,GPR12(r11); \ - stw r9,GPR9(r11); \ - mfspr r10,SPRN_SPRG_SCRATCH0; \ - stw r10,GPR10(r11); \ - mfspr r12,SPRN_SPRG_SCRATCH1; \ - stw r12,GPR11(r11); \ - mflr r10; \ - stw r10,_LINK(r11); \ - mfspr r12,SPRN_SRR0; \ - mfspr r9,SPRN_SRR1; \ - stw r1,GPR1(r11); \ - stw r1,0(r11); \ - tovirt(r1,r11); /* set new kernel sp */ \ - li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - mtmsr r10; \ - stw r0,GPR0(r11); \ - lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \ - addi r10, r10, STACK_FRAME_REGS_MARKER@l; \ - stw r10, 8(r11); \ - SAVE_4GPRS(3, r11); \ - SAVE_2GPRS(7, r11) - -/* - * Note: code which follows this uses cr0.eq (set if from kernel), - * r11, r12 (SRR0), and r9 (SRR1). - * - * Note2: once we have set r1 we are in a position to take exceptions - * again, and we could thus set MSR:RI at that point. - */ - -/* - * Exception vectors. - */ -#define EXCEPTION(n, label, hdlr, xfer) \ - . = n; \ -label: \ - EXCEPTION_PROLOG; \ - addi r3,r1,STACK_FRAME_OVERHEAD; \ - xfer(n, hdlr) - -#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ - li r10,trap; \ - stw r10,_TRAP(r11); \ - li r10,MSR_KERNEL; \ - copyee(r10, r9); \ - bl tfer; \ -i##n: \ - .long hdlr; \ - .long ret - -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - -#define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ - ret_from_except) - /* System reset */ EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD) @@ -261,7 +167,7 @@ Alignment: mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE(0x600, alignment_exception) + EXC_XFER_STD(0x600, alignment_exception) /* Program check exception */ EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD) @@ -273,19 +179,18 @@ Alignment: /* Decrementer */ EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) - EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD) /* System call */ . = 0xc00 SystemCall: - EXCEPTION_PROLOG - EXC_XFER_EE_LITE(0xc00, DoSyscall) + SYSCALL_ENTRY 0xc00 /* Single step - not used on 601 */ EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD) - EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. @@ -615,13 +520,13 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXC_XFER_LITE(0x300, handle_page_fault) - EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD) /* On the MPC8xx, these next four traps are used for development * support of breakpoints and such. Someday I will get around to @@ -643,7 +548,7 @@ DataBreakpoint: mfspr r4,SPRN_BAR stw r4,_DAR(r11) mfspr r5,SPRN_DSISR - EXC_XFER_EE(0x1c00, do_break) + EXC_XFER_STD(0x1c00, do_break) 11: mtcr r10 mfspr r10, SPRN_SPRG_SCRATCH0 @@ -663,10 +568,10 @@ InstructionBreakpoint: mfspr r10, SPRN_SPRG_SCRATCH0 rfi #else - EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD) #endif - EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE) - EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD) + EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD) . = 0x2000 @@ -853,6 +758,9 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif li r3,0 mr r4,r31 bl machine_init diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 1b22a8dea399..bfeb469e8106 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -6,6 +6,8 @@ #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#ifdef __ASSEMBLY__ + /* * Macros used for common Book-e exception handling */ @@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) +.macro SYSCALL_ENTRY trapno intno + mfspr r10, SPRN_SPRG_THREAD +#ifdef CONFIG_KVM_BOOKE_HV +BEGIN_FTR_SECTION + mtspr SPRN_SPRG_WSCRATCH0, r10 + stw r11, THREAD_NORMSAVE(0)(r10) + stw r13, THREAD_NORMSAVE(2)(r10) + mfcr r13 /* save CR in r13 for now */ + mfspr r11, SPRN_SRR1 + mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */ + bf 3, 1975f + b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1 +1975: + mr r12, r13 + lwz r13, THREAD_NORMSAVE(2)(r10) +FTR_SECTION_ELSE +#endif + mfcr r12 +#ifdef CONFIG_KVM_BOOKE_HV +ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) +#endif + BOOKE_CLEAR_BTB(r11) + lwz r11, TASK_STACK - THREAD(r10) + rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */ + ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE) + stw r12, _CCR(r11) /* save various registers */ + mflr r12 + stw r12,_LINK(r11) + mfspr r12,SPRN_SRR0 + stw r1, GPR1(r11) + mfspr r9,SPRN_SRR1 + stw r1, 0(r11) + mr r1, r11 + stw r12,_NIP(r11) + rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ + lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ + stw r2,GPR2(r11) + addi r12, r12, STACK_FRAME_REGS_MARKER@l + stw r9,_MSR(r11) + li r2, \trapno + 1 + stw r12, 8(r11) + stw r2,_TRAP(r11) + SAVE_GPR(0, r11) + SAVE_4GPRS(3, r11) + SAVE_2GPRS(7, r11) + + addi r11,r1,STACK_FRAME_OVERHEAD + addi r2,r10,-THREAD + stw r11,PT_REGS(r10) + /* Check to see if the dbcr0 register is set up to debug. Use the + internal debug mode bit to do this. */ + lwz r12,THREAD_DBCR0(r10) + andis. r12,r12,DBCR0_IDM@h + ACCOUNT_CPU_USER_ENTRY(r2, r11, r12) + beq+ 3f + /* From user and task is ptraced - load up global dbcr0 */ + li r12,-1 /* clear all pending debug events */ + mtspr SPRN_DBSR,r12 + lis r11,global_dbcr0@ha + tophys(r11,r11) + addi r11,r11,global_dbcr0@l +#ifdef CONFIG_SMP + lwz r9,TASK_CPU(r2) + slwi r9,r9,3 + add r11,r11,r9 +#endif + lwz r12,0(r11) + mtspr SPRN_DBCR0,r12 + lwz r12,4(r11) + addi r12,r12,-1 + stw r12,4(r11) + +3: + tovirt(r2, r2) /* set r2 to current */ + lis r11, transfer_to_syscall@h + ori r11, r11, transfer_to_syscall@l +#ifdef CONFIG_TRACE_IRQFLAGS + /* + * If MSR is changing we need to keep interrupts disabled at this point + * otherwise we might risk taking an interrupt before we tell lockdep + * they are enabled. + */ + lis r10, MSR_KERNEL@h + ori r10, r10, MSR_KERNEL@l + rlwimi r10, r9, 0, MSR_EE +#else + lis r10, (MSR_KERNEL | MSR_EE)@h + ori r10, r10, (MSR_KERNEL | MSR_EE)@l +#endif + mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r11 + SYNC + RFI /* jump to handler, enable MMU */ +.endm + /* To handle the additional exception priority levels on 40x and Book-E * processors we allocate a stack per additional priority level. * @@ -217,8 +314,7 @@ label: CRITICAL_EXCEPTION_PROLOG(intno); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, crit_transfer_to_handler, \ - ret_from_crit_exc) + crit_transfer_to_handler, ret_from_crit_exc) #define MCHECK_EXCEPTION(n, label, hdlr) \ START_EXCEPTION(label); \ @@ -227,36 +323,23 @@ label: stw r5,_ESR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ - NOCOPY, mcheck_transfer_to_handler, \ - ret_from_mcheck_exc) + mcheck_transfer_to_handler, ret_from_mcheck_exc) -#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \ li r10,trap; \ stw r10,_TRAP(r11); \ lis r10,msr@h; \ ori r10,r10,msr@l; \ - copyee(r10, r9); \ bl tfer; \ .long hdlr; \ .long ret -#define COPY_EE(d, s) rlwimi d,s,0,16,16 -#define NOCOPY(d, s) - #define EXC_XFER_STD(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \ ret_from_except_full) #define EXC_XFER_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ - ret_from_except) - -#define EXC_XFER_EE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ - ret_from_except_full) - -#define EXC_XFER_EE_LITE(n, hdlr) \ - EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \ ret_from_except) /* Check for a single step debug exception while in an exception @@ -323,7 +406,7 @@ label: /* continue normal handling for a debug exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc) #define DEBUG_CRIT_EXCEPTION \ START_EXCEPTION(DebugCrit); \ @@ -376,7 +459,7 @@ label: /* continue normal handling for a critical exception... */ \ 2: mfspr r4,SPRN_DBSR; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc) #define DATA_STORAGE_EXCEPTION \ START_EXCEPTION(DataStorage) \ @@ -401,7 +484,7 @@ label: mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \ stw r4,_DEAR(r11); \ addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE(0x0600, alignment_exception) + EXC_XFER_STD(0x0600, alignment_exception) #define PROGRAM_EXCEPTION \ START_EXCEPTION(Program) \ @@ -426,9 +509,9 @@ label: bl load_up_fpu; /* if from user, just load it up */ \ b fast_exception_return; \ 1: addi r3,r1,STACK_FRAME_OVERHEAD; \ - EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception) + EXC_XFER_STD(0x800, kernel_fp_unavailable_exception) -#ifndef __ASSEMBLY__ +#else /* __ASSEMBLY__ */ struct exception_regs { unsigned long mas0; unsigned long mas1; diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 32332e24e421..6621f230cc37 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -268,6 +268,9 @@ set_ivor: /* * Decide what sort of machine this is and initialize the MMU. */ +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif mr r3,r30 mr r4,r31 bl machine_init @@ -380,7 +383,7 @@ interrupt_base: EXC_XFER_LITE(0x0300, handle_page_fault) 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x0300, CacheLockingException) + EXC_XFER_LITE(0x0300, CacheLockingException) /* Instruction Storage Interrupt */ INSTRUCTION_STORAGE_EXCEPTION @@ -401,21 +404,20 @@ interrupt_base: #ifdef CONFIG_E200 /* E200 treats 'normal' floating point instructions as FP Unavail exception */ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - program_check_exception, EXC_XFER_EE) + program_check_exception, EXC_XFER_STD) #else EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif #endif /* System Call Interrupt */ START_EXCEPTION(SystemCall) - NORMAL_EXCEPTION_PROLOG(SYSCALL) - EXC_XFER_EE_LITE(0x0c00, DoSyscall) + SYSCALL_ENTRY 0xc00 SYSCALL /* Auxiliary Processor Unavailable Interrupt */ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Decrementer Interrupt */ DECREMENTER_EXCEPTION @@ -423,7 +425,7 @@ interrupt_base: /* Fixed Internal Timer Interrupt */ /* TODO: Add FIT support */ EXCEPTION(0x3100, FIT, FixedIntervalTimer, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) /* Watchdog Timer Interrupt */ #ifdef CONFIG_BOOKE_WDT @@ -633,25 +635,25 @@ END_BTB_FLUSH_SECTION bl load_up_spe b fast_exception_return 1: addi r3,r1,STACK_FRAME_OVERHEAD - EXC_XFER_EE_LITE(0x2010, KernelSPE) + EXC_XFER_LITE(0x2010, KernelSPE) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif /* CONFIG_SPE_POSSIBLE */ /* SPE Floating Point Data */ #ifdef CONFIG_SPE EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData, - SPEFloatingPointException, EXC_XFER_EE) + SPEFloatingPointException, EXC_XFER_STD) /* SPE Floating Point Round */ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - SPEFloatingPointRoundException, EXC_XFER_EE) + SPEFloatingPointRoundException, EXC_XFER_STD) #elif defined(CONFIG_SPE_POSSIBLE) EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \ - unknown_exception, EXC_XFER_EE) + unknown_exception, EXC_XFER_STD) #endif /* CONFIG_SPE_POSSIBLE */ @@ -674,10 +676,10 @@ END_BTB_FLUSH_SECTION unknown_exception) /* Hypercall */ - EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE) + EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD) /* Embedded Hypervisor Privilege */ - EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE) + EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD) interrupt_end: diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index fec8a6773119..da307dd93ee3 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -29,11 +29,15 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> +#include <linux/debugfs.h> +#include <linux/init.h> #include <asm/hw_breakpoint.h> #include <asm/processor.h> #include <asm/sstep.h> #include <asm/debug.h> +#include <asm/debugfs.h> +#include <asm/hvcall.h> #include <linux/uaccess.h> /* @@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, if (!ppc_breakpoint_available()) return -ENODEV; length_max = 8; /* DABR */ - if (cpu_has_feature(CPU_FTR_DAWR)) { + if (dawr_enabled()) { length_max = 512 ; /* 64 doublewords */ /* DAWR region can't cross 512 boundary */ if ((attr->bp_addr >> 9) != @@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp) { /* TODO */ } + +bool dawr_force_enable; +EXPORT_SYMBOL_GPL(dawr_force_enable); + +static ssize_t dawr_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct arch_hw_breakpoint null_brk = {0, 0, 0}; + size_t rc; + + /* Send error to user if they hypervisor won't allow us to write DAWR */ + if ((!dawr_force_enable) && + (firmware_has_feature(FW_FEATURE_LPAR)) && + (set_dawr(&null_brk) != H_SUCCESS)) + return -1; + + rc = debugfs_write_file_bool(file, user_buf, count, ppos); + if (rc) + return rc; + + /* If we are clearing, make sure all CPUs have the DAWR cleared */ + if (!dawr_force_enable) + smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0); + + return rc; +} + +static const struct file_operations dawr_enable_fops = { + .read = debugfs_read_file_bool, + .write = dawr_write_file_bool, + .open = simple_open, + .llseek = default_llseek, +}; + +static int __init dawr_force_setup(void) +{ + dawr_force_enable = false; + + if (cpu_has_feature(CPU_FTR_DAWR)) { + /* Don't setup sysfs file for user control on P8 */ + dawr_force_enable = true; + return 0; + } + + if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) { + /* Turn DAWR off by default, but allow admin to turn it on */ + dawr_force_enable = false; + debugfs_create_file_unsafe("dawr_enable_dangerous", 0600, + powerpc_debugfs_root, + &dawr_force_enable, + &dawr_enable_fops); + } + return 0; +} +arch_initcall(dawr_force_setup); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 7f5ac2e8581b..2dfbd5d5b932 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -1,956 +1,188 @@ /* - * This file contains idle entry/exit functions for POWER7, - * POWER8 and POWER9 CPUs. + * Copyright 2018, IBM Corporation. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. + * + * This file contains general idle entry/exit functions to save + * and restore stack and NVGPRs which allows C code to call idle + * states that lose GPRs, and it will return transparently with + * SRR1 wakeup reason return value. + * + * The platform / CPU caller must ensure SPRs and any other non-GPR + * state is saved and restored correctly, handle KVM, interrupts, etc. */ -#include <linux/threads.h> -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> #include <asm/ppc_asm.h> #include <asm/asm-offsets.h> #include <asm/ppc-opcode.h> -#include <asm/hw_irq.h> -#include <asm/kvm_book3s_asm.h> -#include <asm/opal.h> #include <asm/cpuidle.h> -#include <asm/exception-64s.h> -#include <asm/book3s/64/mmu-hash.h> -#include <asm/mmu.h> -#include <asm/asm-compat.h> -#include <asm/feature-fixups.h> - -#undef DEBUG - -/* - * Use unused space in the interrupt stack to save and restore - * registers for winkle support. - */ -#define _MMCR0 GPR0 -#define _SDR1 GPR3 -#define _PTCR GPR3 -#define _RPR GPR4 -#define _SPURR GPR5 -#define _PURR GPR6 -#define _TSCR GPR7 -#define _DSCR GPR8 -#define _AMOR GPR9 -#define _WORT GPR10 -#define _WORC GPR11 -#define _LPCR GPR12 - -#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 - .text - -/* - * Used by threads before entering deep idle states. Saves SPRs - * in interrupt stack frame - */ -save_sprs_to_stack: - /* - * Note all register i.e per-core, per-subcore or per-thread is saved - * here since any thread in the core might wake up first - */ -BEGIN_FTR_SECTION - /* - * Note - SDR1 is dropped in Power ISA v3. Hence not restoring - * SDR1 here - */ - mfspr r3,SPRN_PTCR - std r3,_PTCR(r1) - mfspr r3,SPRN_LPCR - std r3,_LPCR(r1) -FTR_SECTION_ELSE - mfspr r3,SPRN_SDR1 - std r3,_SDR1(r1) -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) - mfspr r3,SPRN_RPR - std r3,_RPR(r1) - mfspr r3,SPRN_SPURR - std r3,_SPURR(r1) - mfspr r3,SPRN_PURR - std r3,_PURR(r1) - mfspr r3,SPRN_TSCR - std r3,_TSCR(r1) - mfspr r3,SPRN_DSCR - std r3,_DSCR(r1) - mfspr r3,SPRN_AMOR - std r3,_AMOR(r1) - mfspr r3,SPRN_WORT - std r3,_WORT(r1) - mfspr r3,SPRN_WORC - std r3,_WORC(r1) /* - * On POWER9, there are idle states such as stop4, invoked via cpuidle, - * that lose hypervisor resources. In such cases, we need to save - * additional SPRs before entering those idle states so that they can - * be restored to their older values on wakeup from the idle state. + * Desired PSSCR in r3 * - * On POWER8, the only such deep idle state is winkle which is used - * only in the context of CPU-Hotplug, where these additional SPRs are - * reinitiazed to a sane value. Hence there is no need to save/restore - * these SPRs. + * No state will be lost regardless of wakeup mechanism (interrupt or NIA). + * + * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can + * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR, + * and must blr, to return to caller with r3 set according to caller's expected + * return code (for Book3S/64 that is SRR1). */ -BEGIN_FTR_SECTION - blr -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - -power9_save_additional_sprs: - mfspr r3, SPRN_PID - mfspr r4, SPRN_LDBAR - std r3, STOP_PID(r13) - std r4, STOP_LDBAR(r13) - - mfspr r3, SPRN_FSCR - mfspr r4, SPRN_HFSCR - std r3, STOP_FSCR(r13) - std r4, STOP_HFSCR(r13) - - mfspr r3, SPRN_MMCRA - mfspr r4, SPRN_MMCR0 - std r3, STOP_MMCRA(r13) - std r4, _MMCR0(r1) - - mfspr r3, SPRN_MMCR1 - mfspr r4, SPRN_MMCR2 - std r3, STOP_MMCR1(r13) - std r4, STOP_MMCR2(r13) - blr - -power9_restore_additional_sprs: - ld r3,_LPCR(r1) - ld r4, STOP_PID(r13) - mtspr SPRN_LPCR,r3 - mtspr SPRN_PID, r4 - - ld r3, STOP_LDBAR(r13) - ld r4, STOP_FSCR(r13) - mtspr SPRN_LDBAR, r3 - mtspr SPRN_FSCR, r4 - - ld r3, STOP_HFSCR(r13) - ld r4, STOP_MMCRA(r13) - mtspr SPRN_HFSCR, r3 - mtspr SPRN_MMCRA, r4 - - ld r3, _MMCR0(r1) - ld r4, STOP_MMCR1(r13) - mtspr SPRN_MMCR0, r3 - mtspr SPRN_MMCR1, r4 - - ld r3, STOP_MMCR2(r13) - ld r4, PACA_SPRG_VDSO(r13) - mtspr SPRN_MMCR2, r3 - mtspr SPRN_SPRG3, r4 +_GLOBAL(isa300_idle_stop_noloss) + mtspr SPRN_PSSCR,r3 + PPC_STOP + li r3,0 blr /* - * Used by threads when the lock bit of core_idle_state is set. - * Threads will spin in HMT_LOW until the lock bit is cleared. - * r14 - pointer to core_idle_state - * r15 - used to load contents of core_idle_state - * r9 - used as a temporary variable + * Desired PSSCR in r3 + * + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only. + * The SRESET wakeup returns to this function's caller by calling + * idle_return_gpr_loss with r3 set to desired return value. + * + * A wakeup without GPR loss may alteratively be handled as in + * isa300_idle_stop_noloss and blr directly, as an optimisation. + * + * The caller is responsible for saving/restoring SPRs, MSR, timebase, + * etc. */ - -core_idle_lock_held: - HMT_LOW -3: lwz r15,0(r14) - andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - bne 3b - HMT_MEDIUM - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bne- core_idle_lock_held - blr +_GLOBAL(isa300_idle_stop_mayloss) + mtspr SPRN_PSSCR,r3 + std r1,PACAR1(r13) + mflr r4 + mfcr r5 + /* use stack red zone rather than a new frame for saving regs */ + std r2,-8*0(r1) + std r14,-8*1(r1) + std r15,-8*2(r1) + std r16,-8*3(r1) + std r17,-8*4(r1) + std r18,-8*5(r1) + std r19,-8*6(r1) + std r20,-8*7(r1) + std r21,-8*8(r1) + std r22,-8*9(r1) + std r23,-8*10(r1) + std r24,-8*11(r1) + std r25,-8*12(r1) + std r26,-8*13(r1) + std r27,-8*14(r1) + std r28,-8*15(r1) + std r29,-8*16(r1) + std r30,-8*17(r1) + std r31,-8*18(r1) + std r4,-8*19(r1) + std r5,-8*20(r1) + /* 168 bytes */ + PPC_STOP + b . /* catch bugs */ /* - * Pass requested state in r3: - * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8 - * - Requested PSSCR value in POWER9 + * Desired return value in r3 + * + * The idle wakeup SRESET interrupt can call this after calling + * to return to the idle sleep function caller with r3 as the return code. * - * Address of idle handler to branch to in realmode in r4 + * This must not be used if idle was entered via a _noloss function (use + * a simple blr instead). */ -pnv_powersave_common: - /* Use r3 to pass state nap/sleep/winkle */ - /* NAP is a state loss, we create a regs frame on the - * stack, fill it up with the state we care about and - * stick a pointer to it in PACAR1. We really only - * need to save PC, some CR bits and the NV GPRs, - * but for now an interrupt frame will do. - */ - mtctr r4 - - mflr r0 - std r0,16(r1) - stdu r1,-INT_FRAME_SIZE(r1) - std r0,_LINK(r1) - std r0,_NIP(r1) - - /* We haven't lost state ... yet */ - li r0,0 - stb r0,PACA_NAPSTATELOST(r13) - - /* Continue saving state */ - SAVE_GPR(2, r1) - SAVE_NVGPRS(r1) - mfcr r5 - std r5,_CCR(r1) - std r1,PACAR1(r13) - -BEGIN_FTR_SECTION - /* - * POWER9 does not require real mode to stop, and presently does not - * set hwthread_state for KVM (threads don't share MMU context), so - * we can remain in virtual mode for this. - */ - bctr -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - /* - * POWER8 - * Go to real mode to do the nap, as required by the architecture. - * Also, we need to be in real mode before setting hwthread_state, - * because as soon as we do that, another thread can switch - * the MMU context to the guest. - */ - LOAD_REG_IMMEDIATE(r7, MSR_IDLE) - mtmsrd r7,0 - bctr +_GLOBAL(idle_return_gpr_loss) + ld r1,PACAR1(r13) + ld r4,-8*19(r1) + ld r5,-8*20(r1) + mtlr r4 + mtcr r5 + /* + * KVM nap requires r2 to be saved, rather than just restoring it + * from PACATOC. This could be avoided for that less common case + * if KVM saved its r2. + */ + ld r2,-8*0(r1) + ld r14,-8*1(r1) + ld r15,-8*2(r1) + ld r16,-8*3(r1) + ld r17,-8*4(r1) + ld r18,-8*5(r1) + ld r19,-8*6(r1) + ld r20,-8*7(r1) + ld r21,-8*8(r1) + ld r22,-8*9(r1) + ld r23,-8*10(r1) + ld r24,-8*11(r1) + ld r25,-8*12(r1) + ld r26,-8*13(r1) + ld r27,-8*14(r1) + ld r28,-8*15(r1) + ld r29,-8*16(r1) + ld r30,-8*17(r1) + ld r31,-8*18(r1) + blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. + * + * The 0(r1) slot is used to save r2 in isa206, so use that here. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ + std r2,0(r1); \ ptesync; \ - ld r0,0(r1); \ -236: cmpd cr0,r0,r0; \ + ld r2,0(r1); \ +236: cmpd cr0,r2,r2; \ bne 236b; \ - IDLE_INST; - - - .globl pnv_enter_arch207_idle_mode -pnv_enter_arch207_idle_mode: -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - /******************************************************/ - /* N O T E W E L L ! ! ! N O T E W E L L */ - /* The following store to HSTATE_HWTHREAD_STATE(r13) */ - /* MUST occur in real mode, i.e. with the MMU off, */ - /* and the MMU must stay off until we clear this flag */ - /* and test HSTATE_HWTHREAD_REQ(r13) in */ - /* pnv_powersave_wakeup in this file. */ - /* The reason is that another thread can switch the */ - /* MMU to a guest context whenever this flag is set */ - /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ - /* that would potentially cause this thread to start */ - /* executing instructions from guest memory in */ - /* hypervisor mode, leading to a host crash or data */ - /* corruption, or worse. */ - /******************************************************/ - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - stb r3,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr3,r3,PNV_THREAD_SLEEP - bge cr3,2f - IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) - /* No return */ -2: - /* Sleep or winkle */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - li r5,0 - beq cr3,3f - lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h -3: -lwarx_loop1: - lwarx r15,0,r14 - - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - - add r15,r15,r5 /* Add if winkle */ - andc r15,r15,r7 /* Clear thread bit */ - - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS - -/* - * If cr0 = 0, then current thread is the last thread of the core entering - * sleep. Last thread needs to execute the hardware bug workaround code if - * required by the platform. - * Make the workaround call unconditionally here. The below branch call is - * patched out when the idle states are discovered if the platform does not - * require it. - */ -.global pnv_fastsleep_workaround_at_entry -pnv_fastsleep_workaround_at_entry: - beq fastsleep_workaround_at_entry - - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - -common_enter: /* common code for all the threads entering sleep or winkle */ - bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) - -fastsleep_workaround_at_entry: - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 - bne- lwarx_loop1 - isync - - /* Fast sleep workaround */ - li r3,1 - li r4,1 - bl opal_config_cpu_idle_state - - /* Unlock */ - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - lwsync - stw r15,0(r14) - b common_enter - -enter_winkle: - bl save_sprs_to_stack - - IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) - -/* - * r3 - PSSCR value corresponding to the requested stop state. - */ -power_enter_stop: -/* - * Check if we are executing the lite variant with ESL=EC=0 - */ - andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED - clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne .Lhandle_esl_ec_set - PPC_STOP - li r3,0 /* Since we didn't lose state, return 0 */ - std r3, PACA_REQ_PSSCR(r13) - - /* - * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so - * it can determine if the wakeup reason is an HMI in - * CHECK_HMI_INTERRUPT. - * - * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup - * reason, so there is no point setting r12 to SRR1. - * - * Further, we clear r12 here, so that we don't accidentally enter the - * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI. - */ - li r12, 0 - b pnv_wakeup_noloss - -.Lhandle_esl_ec_set: -BEGIN_FTR_SECTION - /* - * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after - * a state-loss idle. Saving and restoring MMCR0 over idle is a - * workaround. - */ - mfspr r4,SPRN_MMCR0 - std r4,_MMCR0(r1) -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) + IDLE_INST; \ + b . /* catch bugs */ /* - * Check if the requested state is a deep idle state. - */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) - ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) - cmpd r3,r4 - bge .Lhandle_deep_stop - PPC_STOP /* Does not return (system reset interrupt) */ - -.Lhandle_deep_stop: -/* - * Entering deep idle state. - * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to - * stack and enter stop - */ - lbz r7,PACA_THREAD_MASK(r13) - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - -lwarx_loop_stop: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - andc r15,r15,r7 /* Clear thread bit */ - - stwcx. r15,0,r14 - bne- lwarx_loop_stop - isync - - bl save_sprs_to_stack - - PPC_STOP /* Does not return (system reset interrupt) */ - -/* - * Entered with MSR[EE]=0 and no soft-masked interrupts pending. - * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE). - */ -_GLOBAL(power7_idle_insn) - /* Now check if user or arch enabled NAP mode */ - LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode) - b pnv_powersave_common - -#define CHECK_HMI_INTERRUPT \ -BEGIN_FTR_SECTION_NESTED(66); \ - rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \ -FTR_SECTION_ELSE_NESTED(66); \ - rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \ -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \ - cmpwi r0,0xa; /* Hypervisor maintenance ? */ \ - bne+ 20f; \ - /* Invoke opal call to handle hmi */ \ - ld r2,PACATOC(r13); \ - ld r1,PACAR1(r13); \ - std r3,ORIG_GPR3(r1); /* Save original r3 */ \ - li r3,0; /* NULL argument */ \ - bl hmi_exception_realmode; \ - nop; \ - ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \ -20: nop; - -/* - * Entered with MSR[EE]=0 and no soft-masked interrupts pending. - * r3 contains desired PSSCR register value. + * Desired instruction type in r3 * - * Offline (CPU unplug) case also must notify KVM that the CPU is - * idle. - */ -_GLOBAL(power9_offline_stop) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* - * Tell KVM we're entering idle. - * This does not have to be done in real mode because the P9 MMU - * is independent per-thread. Some steppings share radix/hash mode - * between threads, but in that case KVM has a barrier sync in real - * mode before and after switching between radix and hash. - */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* fall through */ - -_GLOBAL(power9_idle_stop) - std r3, PACA_REQ_PSSCR(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -BEGIN_FTR_SECTION - sync - lwz r5, PACA_DONT_STOP(r13) - cmpwi r5, 0 - bne 1f -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG) -#endif - mtspr SPRN_PSSCR,r3 - LOAD_REG_ADDR(r4,power_enter_stop) - b pnv_powersave_common - /* No return */ -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -1: - /* - * We get here when TM / thread reconfiguration bug workaround - * code wants to get the CPU into SMT4 mode, and therefore - * we are being asked not to stop. - */ - li r3, 0 - std r3, PACA_REQ_PSSCR(r13) - blr /* return 0 for wakeup cause / SRR1 value */ -#endif - -/* - * Called from machine check handler for powersave wakeups. - * Low level machine check processing has already been done. Now just - * go through the wake up path to get everything in order. + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only. + * The SRESET wakeup returns to this function's caller by calling + * idle_return_gpr_loss with r3 set to desired return value. * - * r3 - The original SRR1 value. - * Original SRR[01] have been clobbered. - * MSR_RI is clear. - */ -.global pnv_powersave_wakeup_mce -pnv_powersave_wakeup_mce: - /* Set cr3 for pnv_powersave_wakeup */ - rlwinm r11,r3,47-31,30,31 - cmpwi cr3,r11,2 - - /* - * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake - * reason into r12, which allows reuse of the system reset wakeup - * code without being mistaken for another type of wakeup. - */ - oris r12,r3,SRR1_WAKEMCE_RESVD@h - - b pnv_powersave_wakeup - -/* - * Called from reset vector for powersave wakeups. - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - * r12 - SRR1 - */ -.global pnv_powersave_wakeup -pnv_powersave_wakeup: - ld r2, PACATOC(r13) - -BEGIN_FTR_SECTION - bl pnv_restore_hyp_resource_arch300 -FTR_SECTION_ELSE - bl pnv_restore_hyp_resource_arch207 -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) - - li r0,PNV_THREAD_RUNNING - stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */ - - mr r3,r12 - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - lbz r0,HSTATE_HWTHREAD_STATE(r13) - cmpwi r0,KVM_HWTHREAD_IN_KERNEL - beq 0f - li r0,KVM_HWTHREAD_IN_KERNEL - stb r0,HSTATE_HWTHREAD_STATE(r13) - /* Order setting hwthread_state vs. testing hwthread_req */ - sync -0: lbz r0,HSTATE_HWTHREAD_REQ(r13) - cmpwi r0,0 - beq 1f - b kvm_start_guest -1: -#endif - - /* Return SRR1 from power7_nap() */ - blt cr3,pnv_wakeup_noloss - b pnv_wakeup_loss - -/* - * Check whether we have woken up with hypervisor state loss. - * If yes, restore hypervisor state and return back to link. + * A wakeup without GPR loss may alteratively be handled as in + * isa300_idle_stop_noloss and blr directly, as an optimisation. * - * cr3 - set to gt if waking up with partial/complete hypervisor state loss - */ -pnv_restore_hyp_resource_arch300: - /* - * Workaround for POWER9, if we lost resources, the ERAT - * might have been mixed up and needs flushing. We also need - * to reload MMCR0 (see comment above). We also need to set - * then clear bit 60 in MMCRA to ensure the PMU starts running. - */ - blt cr3,1f -BEGIN_FTR_SECTION - PPC_INVALIDATE_ERAT - ld r1,PACAR1(r13) - ld r4,_MMCR0(r1) - mtspr SPRN_MMCR0,r4 -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1) - mfspr r4,SPRN_MMCRA - ori r4,r4,(1 << (63-60)) - mtspr SPRN_MMCRA,r4 - xori r4,r4,(1 << (63-60)) - mtspr SPRN_MMCRA,r4 -1: - /* - * POWER ISA 3. Use PSSCR to determine if we - * are waking up from deep idle state - */ - LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) - ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) - - /* - * 0-3 bits correspond to Power-Saving Level Status - * which indicates the idle state we are waking up from - */ - mfspr r5, SPRN_PSSCR - rldicl r5,r5,4,60 - li r0, 0 /* clear requested_psscr to say we're awake */ - std r0, PACA_REQ_PSSCR(r13) - cmpd cr4,r5,r4 - bge cr4,pnv_wakeup_tb_loss /* returns to caller */ - - blr /* Waking up without hypervisor state loss. */ - -/* Same calling convention as arch300 */ -pnv_restore_hyp_resource_arch207: - /* - * POWER ISA 2.07 or less. - * Check if we slept with sleep or winkle. - */ - lbz r4,PACA_THREAD_IDLE_STATE(r13) - cmpwi cr2,r4,PNV_THREAD_NAP - bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */ - - /* - * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking - * up from nap. At this stage CR3 shouldn't contains 'gt' since that - * indicates we are waking with hypervisor state loss from nap. - */ - bgt cr3,. - - blr /* Waking up without hypervisor state loss */ - -/* - * Called if waking up from idle state which can cause either partial or - * complete hyp state loss. - * In POWER8, called if waking up from fastsleep or winkle - * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state - * - * r13 - PACA - * cr3 - gt if waking up with partial/complete hypervisor state loss - * - * If ISA300: - * cr4 - gt or eq if waking up from complete hypervisor state loss. + * The caller is responsible for saving/restoring SPRs, MSR, timebase, + * etc. * - * If ISA207: - * r4 - PACA_THREAD_IDLE_STATE + * This must be called in real-mode (MSR_IDLE). */ -pnv_wakeup_tb_loss: - ld r1,PACAR1(r13) - /* - * Before entering any idle state, the NVGPRs are saved in the stack. - * If there was a state loss, or PACA_NAPSTATELOST was set, then the - * NVGPRs are restored. If we are here, it is likely that state is lost, - * but not guaranteed -- neither ISA207 nor ISA300 tests to reach - * here are the same as the test to restore NVGPRS: - * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300, - * and SRR1 test for restoring NVGPRs. - * - * We are about to clobber NVGPRs now, so set NAPSTATELOST to - * guarantee they will always be restored. This might be tightened - * with careful reading of specs (particularly for ISA300) but this - * is already a slow wakeup path and it's simpler to be safe. - */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) - - /* - * - * Save SRR1 and LR in NVGPRs as they might be clobbered in - * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required - * to determine the wakeup reason if we branch to kvm_start_guest. LR - * is required to return back to reset vector after hypervisor state - * restore is complete. - */ - mr r19,r12 - mr r18,r4 - mflr r17 -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - - ld r14,PACA_CORE_IDLE_STATE_PTR(r13) - lbz r7,PACA_THREAD_MASK(r13) - - /* - * Take the core lock to synchronize against other threads. - * - * Lock bit is set in one of the 2 cases- - * a. In the sleep/winkle enter path, the last thread is executing - * fastsleep workaround code. - * b. In the wake up path, another thread is executing fastsleep - * workaround undo code or resyncing timebase or restoring context - * In either case loop until the lock bit is cleared. - */ -1: - lwarx r15,0,r14 - andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h - bnel- core_idle_lock_held - oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - stwcx. r15,0,r14 - bne- 1b - isync - - andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS - cmpwi cr2,r9,0 - - /* - * At this stage - * cr2 - eq if first thread to wakeup in core - * cr3- gt if waking up with partial/complete hypervisor state loss - * ISA300: - * cr4 - gt or eq if waking up from complete hypervisor state loss. - */ - -BEGIN_FTR_SECTION - /* - * Were we in winkle? - * If yes, check if all threads were in winkle, decrement our - * winkle count, set all thread winkle bits if all were in winkle. - * Check if our thread has a winkle bit set, and set cr4 accordingly - * (to match ISA300, above). Pseudo-code for core idle state - * transitions for ISA207 is as follows (everything happens atomically - * due to store conditional and/or lock bit): - * - * nap_idle() { } - * nap_wake() { } - * - * sleep_idle() - * { - * core_idle_state &= ~thread_in_core - * } - * - * sleep_wake() - * { - * bool first_in_core, first_in_subcore; - * - * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; - * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; - * - * core_idle_state |= thread_in_core; - * } - * - * winkle_idle() - * { - * core_idle_state &= ~thread_in_core; - * core_idle_state += 1 << WINKLE_COUNT_SHIFT; - * } - * - * winkle_wake() - * { - * bool first_in_core, first_in_subcore, winkle_state_lost; - * - * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0; - * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0; - * - * core_idle_state |= thread_in_core; - * - * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT)) - * core_idle_state |= THREAD_WINKLE_BITS; - * core_idle_state -= 1 << WINKLE_COUNT_SHIFT; - * - * winkle_state_lost = core_idle_state & - * (thread_in_core << WINKLE_THREAD_SHIFT); - * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT); - * } - * - */ - cmpwi r18,PNV_THREAD_WINKLE +_GLOBAL(isa206_idle_insn_mayloss) + std r1,PACAR1(r13) + mflr r4 + mfcr r5 + /* use stack red zone rather than a new frame for saving regs */ + std r2,-8*0(r1) + std r14,-8*1(r1) + std r15,-8*2(r1) + std r16,-8*3(r1) + std r17,-8*4(r1) + std r18,-8*5(r1) + std r19,-8*6(r1) + std r20,-8*7(r1) + std r21,-8*8(r1) + std r22,-8*9(r1) + std r23,-8*10(r1) + std r24,-8*11(r1) + std r25,-8*12(r1) + std r26,-8*13(r1) + std r27,-8*14(r1) + std r28,-8*15(r1) + std r29,-8*16(r1) + std r30,-8*17(r1) + std r31,-8*18(r1) + std r4,-8*19(r1) + std r5,-8*20(r1) + cmpwi r3,PNV_THREAD_NAP + bne 1f + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) +1: cmpwi r3,PNV_THREAD_SLEEP bne 2f - andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h - subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h - beq 2f - ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */ -2: - /* Shift thread bit to winkle mask, then test if this thread is set, - * and remove it from the winkle bits */ - slwi r8,r7,8 - and r8,r8,r15 - andc r15,r15,r8 - cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */ - - lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) - and r4,r4,r15 - cmpwi r4,0 /* Check if first in subcore */ - - or r15,r15,r7 /* Set thread bit */ - beq first_thread_in_subcore -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) - - or r15,r15,r7 /* Set thread bit */ - beq cr2,first_thread_in_core - - /* Not first thread in core or subcore to wake up */ - b clear_lock - -first_thread_in_subcore: - /* - * If waking up from sleep, subcore state is not lost. Hence - * skip subcore state restore - */ - blt cr4,subcore_state_restored - - /* Restore per-subcore state */ - ld r4,_SDR1(r1) - mtspr SPRN_SDR1,r4 - - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 - -subcore_state_restored: - /* - * Check if the thread is also the first thread in the core. If not, - * skip to clear_lock. - */ - bne cr2,clear_lock - -first_thread_in_core: - - /* - * First thread in the core waking up from any state which can cause - * partial or complete hypervisor state loss. It needs to - * call the fastsleep workaround code if the platform requires it. - * Call it unconditionally here. The below branch instruction will - * be patched out if the platform does not have fastsleep or does not - * require the workaround. Patching will be performed during the - * discovery of idle-states. - */ -.global pnv_fastsleep_workaround_at_exit -pnv_fastsleep_workaround_at_exit: - b fastsleep_workaround_at_exit - -timebase_resync: - /* - * Use cr3 which indicates that we are waking up with atleast partial - * hypervisor state loss to determine if TIMEBASE RESYNC is needed. - */ - ble cr3,.Ltb_resynced - /* Time base re-sync */ - bl opal_resync_timebase; - /* - * If waking up from sleep (POWER8), per core state - * is not lost, skip to clear_lock. - */ -.Ltb_resynced: - blt cr4,clear_lock - - /* - * First thread in the core to wake up and its waking up with - * complete hypervisor state loss. Restore per core hypervisor - * state. - */ -BEGIN_FTR_SECTION - ld r4,_PTCR(r1) - mtspr SPRN_PTCR,r4 - ld r4,_RPR(r1) - mtspr SPRN_RPR,r4 - ld r4,_AMOR(r1) - mtspr SPRN_AMOR,r4 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - - ld r4,_TSCR(r1) - mtspr SPRN_TSCR,r4 - ld r4,_WORC(r1) - mtspr SPRN_WORC,r4 - -clear_lock: - xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h - lwsync - stw r15,0(r14) - -common_exit: - /* - * Common to all threads. - * - * If waking up from sleep, hypervisor state is not lost. Hence - * skip hypervisor state restore. - */ - blt cr4,hypervisor_state_restored - - /* Waking up from winkle */ - -BEGIN_MMU_FTR_SECTION - b no_segments -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) - /* Restore SLB from PACA */ - ld r8,PACA_SLBSHADOWPTR(r13) - - .rept SLB_NUM_BOLTED - li r3, SLBSHADOW_SAVEAREA - LDX_BE r5, r8, r3 - addi r3, r3, 8 - LDX_BE r6, r8, r3 - andis. r7,r5,SLB_ESID_V@h - beq 1f - slbmte r6,r5 -1: addi r8,r8,16 - .endr -no_segments: - - /* Restore per thread state */ - - ld r4,_SPURR(r1) - mtspr SPRN_SPURR,r4 - ld r4,_PURR(r1) - mtspr SPRN_PURR,r4 - ld r4,_DSCR(r1) - mtspr SPRN_DSCR,r4 - ld r4,_WORT(r1) - mtspr SPRN_WORT,r4 - - /* Call cur_cpu_spec->cpu_restore() */ - LOAD_REG_ADDR(r4, cur_cpu_spec) - ld r4,0(r4) - ld r12,CPU_SPEC_RESTORE(r4) -#ifdef PPC64_ELF_ABI_v1 - ld r12,0(r12) -#endif - mtctr r12 - bctrl - -/* - * On POWER9, we can come here on wakeup from a cpuidle stop state. - * Hence restore the additional SPRs to the saved value. - * - * On POWER8, we come here only on winkle. Since winkle is used - * only in the case of CPU-Hotplug, we don't need to restore - * the additional SPRs. - */ -BEGIN_FTR_SECTION - bl power9_restore_additional_sprs -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) -hypervisor_state_restored: - - mr r12,r19 - mtlr r17 - blr /* return to pnv_powersave_wakeup */ - -fastsleep_workaround_at_exit: - li r3,1 - li r4,0 - bl opal_config_cpu_idle_state - b timebase_resync - -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - * R12 contains SRR1 for CHECK_HMI_INTERRUPT. - */ -.global pnv_wakeup_loss -pnv_wakeup_loss: - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - REST_NVGPRS(r1) - REST_GPR(2, r1) - ld r4,PACAKMSR(r13) - ld r5,_LINK(r1) - ld r6,_CCR(r1) - addi r1,r1,INT_FRAME_SIZE - mtlr r5 - mtcr r6 - mtmsrd r4 - blr + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) +2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) -/* - * R3 here contains the value that will be returned to the caller - * of power7_nap. - * R12 contains SRR1 for CHECK_HMI_INTERRUPT. - */ -pnv_wakeup_noloss: - lbz r0,PACA_NAPSTATELOST(r13) - cmpwi r0,0 - bne pnv_wakeup_loss - ld r1,PACAR1(r13) -BEGIN_FTR_SECTION - CHECK_HMI_INTERRUPT -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ld r4,PACAKMSR(r13) - ld r5,_NIP(r1) - ld r6,_CCR(r1) - addi r1,r1,INT_FRAME_SIZE - mtlr r5 - mtcr r6 - mtmsrd r4 - blr diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8a936723c791..ada901af4950 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -81,10 +81,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); -int __irq_offset_value; - #ifdef CONFIG_PPC32 -EXPORT_SYMBOL(__irq_offset_value); atomic_t ppc_n_lost_interrupts; #ifdef CONFIG_TAU_INT @@ -261,16 +258,9 @@ notrace void arch_local_irq_restore(unsigned long mask) */ irq_happened = get_irq_happened(); if (!irq_happened) { - /* - * FIXME. Here we'd like to be able to do: - * - * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG - * WARN_ON(!(mfmsr() & MSR_EE)); - * #endif - * - * But currently it hits in a few paths, we should fix those and - * enable the warning. - */ +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON(!(mfmsr() & MSR_EE)); +#endif return; } diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index b5fec1f9751a..4581377cfc98 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->srr1 = regs->msr; mce->gpr3 = regs->gpr[3]; mce->in_use = 1; + mce->cpu = get_paca()->paca_index; /* Mark it recovered if we have handled it and MSR(RI=1). */ if (handled && (regs->msr & MSR_RI)) @@ -121,6 +122,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->initiator = mce_err->initiator; mce->severity = mce_err->severity; + mce->sync_error = mce_err->sync_error; + mce->error_class = mce_err->error_class; /* * Populate the mce error_type and type-specific error_type. @@ -310,7 +313,11 @@ static void machine_check_process_queued_event(struct irq_work *work) void machine_check_print_event_info(struct machine_check_event *evt, bool user_mode, bool in_guest) { - const char *level, *sevstr, *subtype; + const char *level, *sevstr, *subtype, *err_type; + uint64_t ea = 0, pa = 0; + int n = 0; + char dar_str[50]; + char pa_str[50]; static const char *mc_ue_types[] = { "Indeterminate", "Instruction fetch", @@ -357,6 +364,13 @@ void machine_check_print_event_info(struct machine_check_event *evt, "Store (timeout)", "Page table walk Load/Store (timeout)", }; + static const char *mc_error_class[] = { + "Unknown", + "Hardware error", + "Probable Hardware error (some chance of software cause)", + "Software error", + "Probable Software error (some chance of hardware cause)", + }; /* Print things out */ if (evt->version != MCE_V1) { @@ -371,9 +385,9 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; case MCE_SEV_WARNING: level = KERN_WARNING; - sevstr = ""; + sevstr = "Warning"; break; - case MCE_SEV_ERROR_SYNC: + case MCE_SEV_SEVERE: level = KERN_ERR; sevstr = "Severe"; break; @@ -384,101 +398,107 @@ void machine_check_print_event_info(struct machine_check_event *evt, break; } - printk("%s%s Machine check interrupt [%s]\n", level, sevstr, - evt->disposition == MCE_DISPOSITION_RECOVERED ? - "Recovered" : "Not recovered"); - - if (in_guest) { - printk("%s Guest NIP: %016llx\n", level, evt->srr0); - } else if (user_mode) { - printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level, - evt->srr0, current->pid, current->comm); - } else { - printk("%s NIP [%016llx]: %pS\n", level, evt->srr0, - (void *)evt->srr0); - } - - printk("%s Initiator: %s\n", level, - evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown"); switch (evt->error_type) { case MCE_ERROR_TYPE_UE: + err_type = "UE"; subtype = evt->u.ue_error.ue_error_type < ARRAY_SIZE(mc_ue_types) ? mc_ue_types[evt->u.ue_error.ue_error_type] : "Unknown"; - printk("%s Error type: UE [%s]\n", level, subtype); if (evt->u.ue_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ue_error.effective_address); + ea = evt->u.ue_error.effective_address; if (evt->u.ue_error.physical_address_provided) - printk("%s Physical address: %016llx\n", - level, evt->u.ue_error.physical_address); + pa = evt->u.ue_error.physical_address; break; case MCE_ERROR_TYPE_SLB: + err_type = "SLB"; subtype = evt->u.slb_error.slb_error_type < ARRAY_SIZE(mc_slb_types) ? mc_slb_types[evt->u.slb_error.slb_error_type] : "Unknown"; - printk("%s Error type: SLB [%s]\n", level, subtype); if (evt->u.slb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.slb_error.effective_address); + ea = evt->u.slb_error.effective_address; break; case MCE_ERROR_TYPE_ERAT: + err_type = "ERAT"; subtype = evt->u.erat_error.erat_error_type < ARRAY_SIZE(mc_erat_types) ? mc_erat_types[evt->u.erat_error.erat_error_type] : "Unknown"; - printk("%s Error type: ERAT [%s]\n", level, subtype); if (evt->u.erat_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.erat_error.effective_address); + ea = evt->u.erat_error.effective_address; break; case MCE_ERROR_TYPE_TLB: + err_type = "TLB"; subtype = evt->u.tlb_error.tlb_error_type < ARRAY_SIZE(mc_tlb_types) ? mc_tlb_types[evt->u.tlb_error.tlb_error_type] : "Unknown"; - printk("%s Error type: TLB [%s]\n", level, subtype); if (evt->u.tlb_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.tlb_error.effective_address); + ea = evt->u.tlb_error.effective_address; break; case MCE_ERROR_TYPE_USER: + err_type = "User"; subtype = evt->u.user_error.user_error_type < ARRAY_SIZE(mc_user_types) ? mc_user_types[evt->u.user_error.user_error_type] : "Unknown"; - printk("%s Error type: User [%s]\n", level, subtype); if (evt->u.user_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.user_error.effective_address); + ea = evt->u.user_error.effective_address; break; case MCE_ERROR_TYPE_RA: + err_type = "Real address"; subtype = evt->u.ra_error.ra_error_type < ARRAY_SIZE(mc_ra_types) ? mc_ra_types[evt->u.ra_error.ra_error_type] : "Unknown"; - printk("%s Error type: Real address [%s]\n", level, subtype); if (evt->u.ra_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.ra_error.effective_address); + ea = evt->u.ra_error.effective_address; break; case MCE_ERROR_TYPE_LINK: + err_type = "Link"; subtype = evt->u.link_error.link_error_type < ARRAY_SIZE(mc_link_types) ? mc_link_types[evt->u.link_error.link_error_type] : "Unknown"; - printk("%s Error type: Link [%s]\n", level, subtype); if (evt->u.link_error.effective_address_provided) - printk("%s Effective address: %016llx\n", - level, evt->u.link_error.effective_address); + ea = evt->u.link_error.effective_address; break; default: case MCE_ERROR_TYPE_UNKNOWN: - printk("%s Error type: Unknown\n", level); + err_type = "Unknown"; + subtype = ""; break; } + + dar_str[0] = pa_str[0] = '\0'; + if (ea && evt->srr0 != ea) { + /* Load/Store address */ + n = sprintf(dar_str, "DAR: %016llx ", ea); + if (pa) + sprintf(dar_str + n, "paddr: %016llx ", pa); + } else if (pa) { + sprintf(pa_str, " paddr: %016llx", pa); + } + + printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n", + level, evt->cpu, sevstr, in_guest ? "Guest" : "Host", + err_type, subtype, dar_str, + evt->disposition == MCE_DISPOSITION_RECOVERED ? + "Recovered" : "Not recovered"); + + if (in_guest || user_mode) { + printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n", + level, evt->cpu, current->pid, current->comm, + in_guest ? "Guest " : "", evt->srr0, pa_str); + } else { + printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n", + level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str); + } + + subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ? + mc_error_class[evt->error_class] : "Unknown"; + printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); } EXPORT_SYMBOL_GPL(machine_check_print_event_info); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index 6b800eec31f2..b5e876efe864 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -36,7 +36,7 @@ * Convert an address related to an mm to a PFN. NOTE: we are in real * mode, we could potentially race with page table updates. */ -static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) +unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr) { pte_t *ptep; unsigned long flags; @@ -131,213 +131,232 @@ struct mce_ierror_table { bool nip_valid; /* nip is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_ierror_table mce_p7_ierror_table[] = { { 0x00000000001c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000100000, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000001c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000001c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p8_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; static const struct mce_ierror_table mce_p9_ierror_table[] = { { 0x00000000081c0000, 0x0000000000040000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000000080000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000000c0000, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000100000, true, - MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000140000, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000000081c0000, 0x0000000000180000, true, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000001c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008000000, true, - MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008040000, true, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x00000000080c0000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008100000, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000000081c0000, 0x0000000008140000, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ { 0x00000000081c0000, 0x0000000008180000, false, MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */ -{ 0x00000000081c0000, 0x00000000081c0000, true, + MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */ +{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, 0, 0, 0, 0, 0 } }; + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, 0, 0, 0, 0, 0, 0 } }; struct mce_derror_table { unsigned long dsisr_value; bool dar_valid; /* dar is a valid indicator of faulting address */ unsigned int error_type; unsigned int error_subtype; + unsigned int error_class; unsigned int initiator; unsigned int severity; + bool sync_error; }; static const struct mce_derror_table mce_p7_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p8_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, true, MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static const struct mce_derror_table mce_p9_derror_table[] = { { 0x00008000, false, - MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00004000, true, MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00002000, true, - MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00001000, true, MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000800, true, - MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000400, true, - MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000200, false, - MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000080, true, MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */ - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_SOFT_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_WARNING, true }, { 0x00000100, true, - MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000040, true, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000020, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000010, false, MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, + MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, { 0x00000008, false, - MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, - MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, }, -{ 0, false, 0, 0, 0, 0 } }; + MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE, + MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true }, +{ 0, false, 0, 0, 0, 0, 0 } }; static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr, uint64_t *phys_addr) @@ -404,6 +423,7 @@ static int mce_handle_ierror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -427,11 +447,12 @@ static int mce_handle_ierror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].nip_valid) { *addr = regs->nip; - if (mce_err->severity == MCE_SEV_ERROR_SYNC && + if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { unsigned long pfn; @@ -448,8 +469,10 @@ static int mce_handle_ierror(struct pt_regs *regs, } mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } @@ -496,6 +519,7 @@ static int mce_handle_derror(struct pt_regs *regs, /* now fill in mce_error_info */ mce_err->error_type = table[i].error_type; + mce_err->error_class = table[i].error_class; switch (table[i].error_type) { case MCE_ERROR_TYPE_UE: mce_err->u.ue_error_type = table[i].error_subtype; @@ -519,11 +543,12 @@ static int mce_handle_derror(struct pt_regs *regs, mce_err->u.link_error_type = table[i].error_subtype; break; } + mce_err->sync_error = table[i].sync_error; mce_err->severity = table[i].severity; mce_err->initiator = table[i].initiator; if (table[i].dar_valid) *addr = regs->dar; - else if (mce_err->severity == MCE_SEV_ERROR_SYNC && + else if (mce_err->sync_error && table[i].error_type == MCE_ERROR_TYPE_UE) { /* * We do a maximum of 4 nested MCE calls, see @@ -539,8 +564,10 @@ static int mce_handle_derror(struct pt_regs *regs, return handled; mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN; - mce_err->severity = MCE_SEV_ERROR_SYNC; + mce_err->error_class = MCE_ECLASS_UNKNOWN; + mce_err->severity = MCE_SEV_SEVERE; mce_err->initiator = MCE_INITIATOR_CPU; + mce_err->sync_error = true; return 0; } diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index e7382abee868..9cc91d03ab62 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_id = context->id; #ifdef CONFIG_PPC_MM_SLICES - VM_BUG_ON(!mm->context.slb_addr_limit); - get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit; - memcpy(&get_paca()->mm_ctx_low_slices_psize, - &context->low_slices_psize, sizeof(context->low_slices_psize)); - memcpy(&get_paca()->mm_ctx_high_slices_psize, - &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm)); + VM_BUG_ON(!mm_ctx_slb_addr_limit(context)); + get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context); + memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context), + LOW_SLICE_ARRAY_SZ); + memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context), + TASK_SLICE_ARRAY_SZ(context)); #else /* CONFIG_PPC_MM_SLICES */ get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index dd9e0d5386ee..87da40129927 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -67,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/stacktrace.h> +#include <asm/hw_breakpoint.h> #include <linux/kprobes.h> #include <linux/kdebug.h> @@ -133,7 +134,8 @@ static int __init enable_strict_msr_control(char *str) } early_param("ppc_strict_facility_enable", enable_strict_msr_control); -unsigned long msr_check_and_set(unsigned long bits) +/* notrace because it's called by restore_math */ +unsigned long notrace msr_check_and_set(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -152,7 +154,8 @@ unsigned long msr_check_and_set(unsigned long bits) } EXPORT_SYMBOL_GPL(msr_check_and_set); -void __msr_check_and_clear(unsigned long bits) +/* notrace because it's called by restore_math */ +void notrace __msr_check_and_clear(unsigned long bits) { unsigned long oldmsr = mfmsr(); unsigned long newmsr; @@ -525,7 +528,17 @@ void giveup_all(struct task_struct *tsk) } EXPORT_SYMBOL(giveup_all); -void restore_math(struct pt_regs *regs) +/* + * The exception exit path calls restore_math() with interrupts hard disabled + * but the soft irq state not "reconciled". ftrace code that calls + * local_irq_save/restore causes warnings. + * + * Rather than complicate the exit path, just don't trace restore_math. This + * could be done by having ftrace entry code check for this un-reconciled + * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and + * temporarily fix it up for the duration of the ftrace call. + */ +void notrace restore_math(struct pt_regs *regs) { unsigned long msr; @@ -784,7 +797,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk) return __set_dabr(dabr, dabrx); } -static inline int set_dawr(struct arch_hw_breakpoint *brk) +int set_dawr(struct arch_hw_breakpoint *brk) { unsigned long dawr, dawrx, mrd; @@ -816,7 +829,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) { memcpy(this_cpu_ptr(¤t_brk), brk, sizeof(*brk)); - if (cpu_has_feature(CPU_FTR_DAWR)) + if (dawr_enabled()) // Power8 or later set_dawr(brk); else if (!cpu_has_feature(CPU_FTR_ARCH_207S)) @@ -830,8 +843,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk) /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { - if (cpu_has_feature(CPU_FTR_DAWR)) - return true; /* POWER8 DAWR */ + if (dawr_enabled()) + return true; /* POWER8 DAWR or POWER9 forced DAWR */ if (cpu_has_feature(CPU_FTR_ARCH_207S)) return false; /* POWER9 with DAWR disabled */ /* DABR: Everything but POWER8 and POWER9 */ @@ -1151,11 +1164,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, thread_pkey_regs_restore(new_thread, old_thread); } -#ifdef CONFIG_PPC_BOOK3S_64 -#define CP_SIZE 128 -static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE))); -#endif - struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { @@ -1729,7 +1737,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */ #ifdef CONFIG_PPC_BOOK3S_64 - preload_new_slb_context(start, sp); + if (!radix_enabled()) + preload_new_slb_context(start, sp); #endif #endif diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index f33ff4163a51..00682b8df330 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -154,10 +154,8 @@ static struct prom_t __prombss prom; static unsigned long __prombss prom_entry; -#define PROM_SCRATCH_SIZE 256 - static char __prombss of_stdout_device[256]; -static char __prombss prom_scratch[PROM_SCRATCH_SIZE]; +static char __prombss prom_scratch[256]; static unsigned long __prombss dt_header_start; static unsigned long __prombss dt_struct_start, dt_struct_end; @@ -224,6 +222,135 @@ static bool __prombss rtas_has_query_cpu_stopped; #define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR) #define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR) +/* Copied from lib/string.c and lib/kstrtox.c */ + +static int __init prom_strcmp(const char *cs, const char *ct) +{ + unsigned char c1, c2; + + while (1) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + } + return 0; +} + +static char __init *prom_strcpy(char *dest, const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} + +static int __init prom_strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} + +static size_t __init prom_strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} + +static int __init prom_memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} + +static char __init *prom_strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = prom_strlen(s2); + if (!l2) + return (char *)s1; + l1 = prom_strlen(s1); + while (l1 >= l2) { + l1--; + if (!prom_memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} + +static size_t __init prom_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = prom_strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#ifdef CONFIG_PPC_PSERIES +static int __init prom_strtobool(const char *s, bool *res) +{ + if (!s) + return -EINVAL; + + switch (s[0]) { + case 'y': + case 'Y': + case '1': + *res = true; + return 0; + case 'n': + case 'N': + case '0': + *res = false; + return 0; + case 'o': + case 'O': + switch (s[1]) { + case 'n': + case 'N': + *res = true; + return 0; + case 'f': + case 'F': + *res = false; + return 0; + default: + break; + } + default: + break; + } + + return -EINVAL; +} +#endif /* This is the one and *ONLY* place where we actually call open * firmware. @@ -501,14 +628,14 @@ static int __init prom_next_node(phandle *nodep) } } -static inline int prom_getprop(phandle node, const char *pname, - void *value, size_t valuelen) +static inline int __init prom_getprop(phandle node, const char *pname, + void *value, size_t valuelen) { return call_prom("getprop", 4, 1, node, ADDR(pname), (u32)(unsigned long) value, (u32) valuelen); } -static inline int prom_getproplen(phandle node, const char *pname) +static inline int __init prom_getproplen(phandle node, const char *pname) { return call_prom("getproplen", 2, 1, node, ADDR(pname)); } @@ -555,7 +682,7 @@ static int __init prom_setprop(phandle node, const char *nodename, add_string(&p, tohex((u32)(unsigned long) value)); add_string(&p, tohex(valuelen)); add_string(&p, tohex(ADDR(pname))); - add_string(&p, tohex(strlen(pname))); + add_string(&p, tohex(prom_strlen(pname))); add_string(&p, "property"); *p = 0; return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd); @@ -631,33 +758,30 @@ static void __init early_cmdline_parse(void) const char *opt; char *p; - int l __maybe_unused = 0; + int l = 0; prom_cmd_line[0] = 0; p = prom_cmd_line; if ((long)prom.chosen > 0) l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1); -#ifdef CONFIG_CMDLINE - if (l <= 0 || p[0] == '\0') /* dbl check */ - strlcpy(prom_cmd_line, - CONFIG_CMDLINE, sizeof(prom_cmd_line)); -#endif /* CONFIG_CMDLINE */ + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */ + prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line)); prom_printf("command line: %s\n", prom_cmd_line); #ifdef CONFIG_PPC64 - opt = strstr(prom_cmd_line, "iommu="); + opt = prom_strstr(prom_cmd_line, "iommu="); if (opt) { prom_printf("iommu opt is: %s\n", opt); opt += 6; while (*opt && *opt == ' ') opt++; - if (!strncmp(opt, "off", 3)) + if (!prom_strncmp(opt, "off", 3)) prom_iommu_off = 1; - else if (!strncmp(opt, "force", 5)) + else if (!prom_strncmp(opt, "force", 5)) prom_iommu_force_on = 1; } #endif - opt = strstr(prom_cmd_line, "mem="); + opt = prom_strstr(prom_cmd_line, "mem="); if (opt) { opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); @@ -669,13 +793,13 @@ static void __init early_cmdline_parse(void) #ifdef CONFIG_PPC_PSERIES prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT); - opt = strstr(prom_cmd_line, "disable_radix"); + opt = prom_strstr(prom_cmd_line, "disable_radix"); if (opt) { opt += 13; if (*opt && *opt == '=') { bool val; - if (kstrtobool(++opt, &val)) + if (prom_strtobool(++opt, &val)) prom_radix_disable = false; else prom_radix_disable = val; @@ -1028,7 +1152,7 @@ static int __init prom_count_smt_threads(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu")) + if (prom_strcmp(type, "cpu")) continue; /* * There is an entry for each smt thread, each entry being @@ -1138,8 +1262,14 @@ static void __init prom_check_platform_support(void) int prop_len = prom_getproplen(prom.chosen, "ibm,arch-vec-5-platform-support"); - /* First copy the architecture vec template */ - ibm_architecture_vec = ibm_architecture_vec_template; + /* + * First copy the architecture vec template + * + * use memcpy() instead of *vec = *vec_template so that GCC replaces it + * by __memcpy() when KASAN is active + */ + memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template, + sizeof(ibm_architecture_vec)); if (prop_len > 1) { int i; @@ -1475,7 +1605,7 @@ static void __init prom_init_mem(void) */ prom_getprop(node, "name", type, sizeof(type)); } - if (strcmp(type, "memory")) + if (prom_strcmp(type, "memory")) continue; plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf)); @@ -1487,8 +1617,8 @@ static void __init prom_init_mem(void) endp = p + (plen / sizeof(cell_t)); #ifdef DEBUG_PROM - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + memset(path, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); prom_debug(" node %s :\n", path); #endif /* DEBUG_PROM */ @@ -1756,19 +1886,19 @@ static void __init prom_initialize_tce_table(void) prom_getprop(node, "device_type", type, sizeof(type)); prom_getprop(node, "model", model, sizeof(model)); - if ((type[0] == 0) || (strstr(type, "pci") == NULL)) + if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL)) continue; /* Keep the old logic intact to avoid regression. */ if (compatible[0] != 0) { - if ((strstr(compatible, "python") == NULL) && - (strstr(compatible, "Speedwagon") == NULL) && - (strstr(compatible, "Winnipeg") == NULL)) + if ((prom_strstr(compatible, "python") == NULL) && + (prom_strstr(compatible, "Speedwagon") == NULL) && + (prom_strstr(compatible, "Winnipeg") == NULL)) continue; } else if (model[0] != 0) { - if ((strstr(model, "ython") == NULL) && - (strstr(model, "peedwagon") == NULL) && - (strstr(model, "innipeg") == NULL)) + if ((prom_strstr(model, "ython") == NULL) && + (prom_strstr(model, "peedwagon") == NULL) && + (prom_strstr(model, "innipeg") == NULL)) continue; } @@ -1796,10 +1926,10 @@ static void __init prom_initialize_tce_table(void) local_alloc_bottom = base; /* It seems OF doesn't null-terminate the path :-( */ - memset(path, 0, PROM_SCRATCH_SIZE); + memset(path, 0, sizeof(prom_scratch)); /* Call OF to setup the TCE hardware */ if (call_prom("package-to-path", 3, 1, node, - path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) { + path, sizeof(prom_scratch) - 1) == PROM_ERROR) { prom_printf("package-to-path failed\n"); } @@ -1917,12 +2047,12 @@ static void __init prom_hold_cpus(void) type[0] = 0; prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") != 0) + if (prom_strcmp(type, "cpu") != 0) continue; /* Skip non-configured cpus. */ if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, "okay") != 0) + if (prom_strcmp(type, "okay") != 0) continue; reg = cpu_to_be32(-1); /* make sparse happy */ @@ -1998,9 +2128,9 @@ static void __init prom_find_mmu(void) return; version[sizeof(version) - 1] = 0; /* XXX might need to add other versions here */ - if (strcmp(version, "Open Firmware, 1.0.5") == 0) + if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0) of_workarounds = OF_WA_CLAIM; - else if (strncmp(version, "FirmWorks,3.", 12) == 0) { + else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) { of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL; call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim"); } else @@ -2033,7 +2163,7 @@ static void __init prom_init_stdout(void) call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); prom_printf("OF stdout device is: %s\n", of_stdout_device); prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", - path, strlen(path) + 1); + path, prom_strlen(path) + 1); /* instance-to-package fails on PA-Semi */ stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout); @@ -2043,7 +2173,7 @@ static void __init prom_init_stdout(void) /* If it's a display, note it */ memset(type, 0, sizeof(type)); prom_getprop(stdout_node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") == 0) + if (prom_strcmp(type, "display") == 0) prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); } } @@ -2064,19 +2194,19 @@ static int __init prom_find_machine_type(void) compat[len] = 0; while (i < len) { char *p = &compat[i]; - int sl = strlen(p); + int sl = prom_strlen(p); if (sl == 0) break; - if (strstr(p, "Power Macintosh") || - strstr(p, "MacRISC")) + if (prom_strstr(p, "Power Macintosh") || + prom_strstr(p, "MacRISC")) return PLATFORM_POWERMAC; #ifdef CONFIG_PPC64 /* We must make sure we don't detect the IBM Cell * blades as pSeries due to some firmware issues, * so we do it here. */ - if (strstr(p, "IBM,CBEA") || - strstr(p, "IBM,CPBW-1.0")) + if (prom_strstr(p, "IBM,CBEA") || + prom_strstr(p, "IBM,CPBW-1.0")) return PLATFORM_GENERIC; #endif /* CONFIG_PPC64 */ i += sl + 1; @@ -2093,7 +2223,7 @@ static int __init prom_find_machine_type(void) compat, sizeof(compat)-1); if (len <= 0) return PLATFORM_GENERIC; - if (strcmp(compat, "chrp")) + if (prom_strcmp(compat, "chrp")) return PLATFORM_GENERIC; /* Default to pSeries. We need to know if we are running LPAR */ @@ -2155,19 +2285,19 @@ static void __init prom_check_displays(void) for (node = 0; prom_next_node(&node); ) { memset(type, 0, sizeof(type)); prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "display") != 0) + if (prom_strcmp(type, "display") != 0) continue; /* It seems OF doesn't null-terminate the path :-( */ path = prom_scratch; - memset(path, 0, PROM_SCRATCH_SIZE); + memset(path, 0, sizeof(prom_scratch)); /* * leave some room at the end of the path for appending extra * arguments */ if (call_prom("package-to-path", 3, 1, node, path, - PROM_SCRATCH_SIZE-10) == PROM_ERROR) + sizeof(prom_scratch) - 10) == PROM_ERROR) continue; prom_printf("found display : %s, opening... ", path); @@ -2259,9 +2389,9 @@ static unsigned long __init dt_find_string(char *str) s = os = (char *)dt_string_start; s += 4; while (s < (char *)dt_string_end) { - if (strcmp(s, str) == 0) + if (prom_strcmp(s, str) == 0) return s - os; - s += strlen(s) + 1; + s += prom_strlen(s) + 1; } return 0; } @@ -2294,7 +2424,7 @@ static void __init scan_dt_build_strings(phandle node, } /* skip "name" */ - if (strcmp(namep, "name") == 0) { + if (prom_strcmp(namep, "name") == 0) { *mem_start = (unsigned long)namep; prev_name = "name"; continue; @@ -2306,7 +2436,7 @@ static void __init scan_dt_build_strings(phandle node, namep = sstart + soff; } else { /* Trim off some if we can */ - *mem_start = (unsigned long)namep + strlen(namep) + 1; + *mem_start = (unsigned long)namep + prom_strlen(namep) + 1; dt_string_end = *mem_start; } prev_name = namep; @@ -2363,8 +2493,8 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* get it again for debugging */ path = prom_scratch; - memset(path, 0, PROM_SCRATCH_SIZE); - call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); + memset(path, 0, sizeof(prom_scratch)); + call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1); /* get and store all properties */ prev_name = ""; @@ -2375,7 +2505,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, break; /* skip "name" */ - if (strcmp(pname, "name") == 0) { + if (prom_strcmp(pname, "name") == 0) { prev_name = "name"; continue; } @@ -2406,7 +2536,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, call_prom("getprop", 4, 1, node, pname, valp, l); *mem_start = _ALIGN(*mem_start, 4); - if (!strcmp(pname, "phandle")) + if (!prom_strcmp(pname, "phandle")) has_phandle = 1; } @@ -2476,8 +2606,8 @@ static void __init flatten_device_tree(void) /* Add "phandle" in there, we'll need it */ namep = make_room(&mem_start, &mem_end, 16, 1); - strcpy(namep, "phandle"); - mem_start = (unsigned long)namep + strlen(namep) + 1; + prom_strcpy(namep, "phandle"); + mem_start = (unsigned long)namep + prom_strlen(namep) + 1; /* Build string array */ prom_printf("Building dt strings...\n"); @@ -2799,7 +2929,7 @@ static void __init fixup_device_tree_efika(void) rv = prom_getprop(node, "model", prop, sizeof(prop)); if (rv == PROM_ERROR) return; - if (strcmp(prop, "EFIKA5K2")) + if (prom_strcmp(prop, "EFIKA5K2")) return; prom_printf("Applying EFIKA device tree fixups\n"); @@ -2807,13 +2937,13 @@ static void __init fixup_device_tree_efika(void) /* Claiming to be 'chrp' is death */ node = call_prom("finddevice", 1, 1, ADDR("/")); rv = prom_getprop(node, "device_type", prop, sizeof(prop)); - if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0)) + if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0)) prom_setprop(node, "/", "device_type", "efika", sizeof("efika")); /* CODEGEN,description is exposed in /proc/cpuinfo so fix that too */ rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop)); - if (rv != PROM_ERROR && (strstr(prop, "CHRP"))) + if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP"))) prom_setprop(node, "/", "CODEGEN,description", "Efika 5200B PowerPC System", sizeof("Efika 5200B PowerPC System")); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 667df97d2595..4cac45cb5de5 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -16,10 +16,18 @@ # If you really need to reference something from prom_init.o add # it to the list below: +grep "^CONFIG_KASAN=y$" .config >/dev/null +if [ $? -eq 0 ] +then + MEM_FUNCS="__memcpy __memset" +else + MEM_FUNCS="memcpy memset" +fi + WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush -_end enter_prom memcpy memset reloc_offset __secondary_hold +_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start -strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224 +logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext __prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d9ac7d94656e..684b0b315c32 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -43,6 +43,7 @@ #include <asm/tm.h> #include <asm/asm-prototypes.h> #include <asm/debug.h> +#include <asm/hw_breakpoint.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request, dbginfo.sizeof_condition = 0; #ifdef CONFIG_HAVE_HW_BREAKPOINT dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE; - if (cpu_has_feature(CPU_FTR_DAWR)) + if (dawr_enabled()) dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR; #else dbginfo.features = 0; diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 70568ccbd9fd..e1c9cf079503 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -104,6 +104,14 @@ static __init int barrier_nospec_debugfs_init(void) return 0; } device_initcall(barrier_nospec_debugfs_init); + +static __init int security_feature_debugfs_init(void) +{ + debugfs_create_x64("security_features", 0400, powerpc_debugfs_root, + (u64 *)&powerpc_security_features); + return 0; +} +device_initcall(security_feature_debugfs_init); #endif /* CONFIG_DEBUG_FS */ #ifdef CONFIG_PPC_FSL_BOOK3E diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2e5dfb6e0823..aad9f5df6ab6 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -67,6 +67,7 @@ #include <asm/livepatch.h> #include <asm/mmu_context.h> #include <asm/cpu_has_feature.h> +#include <asm/kasan.h> #include "setup.h" @@ -133,13 +134,11 @@ int crashing_cpu = -1; /* also used by kexec */ void machine_shutdown(void) { -#ifdef CONFIG_FA_DUMP /* * if fadump is active, cleanup the fadump registration before we * shutdown. */ fadump_cleanup(); -#endif if (ppc_md.machine_shutdown) ppc_md.machine_shutdown(); @@ -200,14 +199,15 @@ static void show_cpuinfo_summary(struct seq_file *m) { struct device_node *root; const char *model = NULL; -#if defined(CONFIG_SMP) && defined(CONFIG_PPC32) unsigned long bogosum = 0; int i; - for_each_online_cpu(i) - bogosum += loops_per_jiffy; - seq_printf(m, "total bogomips\t: %lu.%02lu\n", - bogosum/(500000/HZ), bogosum/(5000/HZ) % 100); -#endif /* CONFIG_SMP && CONFIG_PPC32 */ + + if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) { + for_each_online_cpu(i) + bogosum += loops_per_jiffy; + seq_printf(m, "total bogomips\t: %lu.%02lu\n", + bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100); + } seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq); if (ppc_md.name) seq_printf(m, "platform\t: %s\n", ppc_md.name); @@ -221,11 +221,10 @@ static void show_cpuinfo_summary(struct seq_file *m) if (ppc_md.show_cpuinfo != NULL) ppc_md.show_cpuinfo(m); -#ifdef CONFIG_PPC32 /* Display the amount of memory */ - seq_printf(m, "Memory\t\t: %d MB\n", - (unsigned int)(total_memory / (1024 * 1024))); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + seq_printf(m, "Memory\t\t: %d MB\n", + (unsigned int)(total_memory / (1024 * 1024))); } static int show_cpuinfo(struct seq_file *m, void *v) @@ -252,26 +251,24 @@ static int show_cpuinfo(struct seq_file *m, void *v) else seq_printf(m, "unknown (%08x)", pvr); -#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC)) seq_printf(m, ", altivec supported"); -#endif /* CONFIG_ALTIVEC */ seq_printf(m, "\n"); #ifdef CONFIG_TAU - if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) { -#ifdef CONFIG_TAU_AVERAGE - /* more straightforward, but potentially misleading */ - seq_printf(m, "temperature \t: %u C (uncalibrated)\n", - cpu_temp(cpu_id)); -#else - /* show the actual temp sensor range */ - u32 temp; - temp = cpu_temp_both(cpu_id); - seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", - temp & 0xff, temp >> 16); -#endif + if (cpu_has_feature(CPU_FTR_TAU)) { + if (IS_ENABLED(CONFIG_TAU_AVERAGE)) { + /* more straightforward, but potentially misleading */ + seq_printf(m, "temperature \t: %u C (uncalibrated)\n", + cpu_temp(cpu_id)); + } else { + /* show the actual temp sensor range */ + u32 temp; + temp = cpu_temp_both(cpu_id); + seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n", + temp & 0xff, temp >> 16); + } } #endif /* CONFIG_TAU */ @@ -335,11 +332,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n", maj, min, PVR_VER(pvr), PVR_REV(pvr)); -#ifdef CONFIG_PPC32 - seq_printf(m, "bogomips\t: %lu.%02lu\n", - loops_per_jiffy / (500000/HZ), - (loops_per_jiffy / (5000/HZ)) % 100); -#endif + if (IS_ENABLED(CONFIG_PPC32)) + seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ), + (loops_per_jiffy / (5000 / HZ)) % 100); + seq_printf(m, "\n"); /* If this is the last cpu, print the summary */ @@ -401,8 +397,8 @@ void __init check_for_initrd(void) #ifdef CONFIG_SMP -int threads_per_core, threads_per_subcore, threads_shift; -cpumask_t threads_core_mask; +int threads_per_core, threads_per_subcore, threads_shift __read_mostly; +cpumask_t threads_core_mask __read_mostly; EXPORT_SYMBOL_GPL(threads_per_core); EXPORT_SYMBOL_GPL(threads_per_subcore); EXPORT_SYMBOL_GPL(threads_shift); @@ -740,23 +736,19 @@ void __init setup_panic(void) * BUG() in that case. */ -#ifdef CONFIG_NOT_COHERENT_CACHE -#define KERNEL_COHERENCY 0 -#else -#define KERNEL_COHERENCY 1 -#endif +#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE)) static int __init check_cache_coherency(void) { struct device_node *np; const void *prop; - int devtree_coherency; + bool devtree_coherency; np = of_find_node_by_path("/"); prop = of_get_property(np, "coherency-off", NULL); of_node_put(np); - devtree_coherency = prop ? 0 : 1; + devtree_coherency = prop ? false : true; if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR @@ -799,12 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev) static __init void print_system_info(void) { pr_info("-----------------------------------------------------\n"); -#ifdef CONFIG_PPC_BOOK3S_64 - pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - pr_info("Hash_size = 0x%lx\n", Hash_size); -#endif pr_info("phys_mem_size = 0x%llx\n", (unsigned long long)memblock_phys_mem_size()); @@ -826,18 +812,7 @@ static __init void print_system_info(void) pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features); #endif -#ifdef CONFIG_PPC_BOOK3S_64 - if (htab_address) - pr_info("htab_address = 0x%p\n", htab_address); - if (htab_hash_mask) - pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); -#endif -#ifdef CONFIG_PPC_BOOK3S_32 - if (Hash) - pr_info("Hash = 0x%p\n", Hash); - if (Hash_mask) - pr_info("Hash_mask = 0x%lx\n", Hash_mask); -#endif + print_system_hash_info(); if (PHYSICAL_START > 0) pr_info("physical_start = 0x%llx\n", @@ -868,6 +843,8 @@ static void smp_setup_pacas(void) */ void __init setup_arch(char **cmdline_p) { + kasan_init(); + *cmdline_p = boot_command_line; /* Set a half-reasonable default so udelay does something sensible */ @@ -947,20 +924,7 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = klimit; -#ifdef CONFIG_PPC_MM_SLICES -#ifdef CONFIG_PPC64 - if (!radix_enabled()) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; -#elif defined(CONFIG_PPC_8xx) - init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW; -#else -#error "context.addr_limit not initialized." -#endif -#endif - -#ifdef CONFIG_SPAPR_TCE_IOMMU mm_iommu_init(&init_mm); -#endif irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); @@ -969,9 +933,9 @@ void __init setup_arch(char **cmdline_p) early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); -#ifdef CONFIG_DUMMY_CONSOLE - conswitchp = &dummy_con; -#endif + if (IS_ENABLED(CONFIG_DUMMY_CONSOLE)) + conswitchp = &dummy_con; + if (ppc_md.setup_arch) ppc_md.setup_arch(); @@ -983,10 +947,8 @@ void __init setup_arch(char **cmdline_p) /* Initialize the MMU context management stuff. */ mmu_context_init(); -#ifdef CONFIG_PPC64 /* Interrupt code needs to be 64K-aligned. */ - if ((unsigned long)_stext & 0xffff) + if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff) panic("Kernelbase not 64K-aligned (0x%lx)!\n", (unsigned long)_stext); -#endif } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 4a65e08a6042..3fb9f64f88fd 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ); EXPORT_SYMBOL(DMA_MODE_WRITE); /* - * We're called here very early in the boot. - * - * Note that the kernel may be running at an address which is different - * from the address that it was linked at, so we must use RELOC/PTRRELOC - * to access static data (including strings). -- paulus - */ -notrace unsigned long __init early_init(unsigned long dt_ptr) -{ - unsigned long offset = reloc_offset(); - - /* First zero the BSS -- use memset_io, some platforms don't have - * caches on yet */ - memset_io((void __iomem *)PTRRELOC(&__bss_start), 0, - __bss_stop - __bss_start); - - /* - * Identify the CPU type and fix up code sections - * that depend on which cpu we have. - */ - identify_cpu(offset, mfspr(SPRN_PVR)); - - apply_feature_fixups(); - - return KERNELBASE + offset; -} - - -/* * This is run before start_kernel(), the kernel has been relocated * and we are running with enough of the MMU enabled to have our * proper kernel virtual addresses diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4f49e1a3594c..a400854a5036 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -68,6 +68,7 @@ #include <asm/cputhreads.h> #include <asm/hw_irq.h> #include <asm/feature-fixups.h> +#include <asm/kup.h> #include "setup.h" @@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr) */ configure_exceptions(); + /* + * Configure Kernel Userspace Protection. This needs to happen before + * feature fixups for platforms that implement this using features. + */ + setup_kup(); + /* Apply all the dynamic patching */ apply_feature_fixups(); setup_feature_keys(); @@ -383,6 +390,9 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + /* Perform any KUP setup that is per-cpu */ + setup_kup(); + /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 6794466f6420..06c299ef6132 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk, preempt_disable(); /* pull in MSR TS bits from user context */ - regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK); + regs->msr |= msr & MSR_TS_MASK; /* * Ensure that TM is enabled in regs->msr before we leave the signal @@ -745,6 +745,31 @@ SYSCALL_DEFINE0(rt_sigreturn) if (MSR_TM_SUSPENDED(mfmsr())) tm_reclaim_current(0); + /* + * Disable MSR[TS] bit also, so, if there is an exception in the + * code below (as a page fault in copy_ckvsx_to_user()), it does + * not recheckpoint this task if there was a context switch inside + * the exception. + * + * A major page fault can indirectly call schedule(). A reschedule + * process in the middle of an exception can have a side effect + * (Changing the CPU MSR[TS] state), since schedule() is called + * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended + * (switch_to() calls tm_recheckpoint() for the 'new' process). In + * this case, the process continues to be the same in the CPU, but + * the CPU state just changed. + * + * This can cause a TM Bad Thing, since the MSR in the stack will + * have the MSR[TS]=0, and this is what will be used to RFID. + * + * Clearing MSR[TS] state here will avoid a recheckpoint if there + * is any process reschedule in kernel space. The MSR[TS] state + * does not need to be saved also, since it will be replaced with + * the MSR[TS] that came from user context later, at + * restore_tm_sigcontexts. + */ + regs->msr &= ~MSR_TS_MASK; + if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR])) goto badframe; if (MSR_TM_ACTIVE(msr)) { diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index e8e93c2c7d03..7a1708875d27 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -610,7 +610,7 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2); SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3); SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); -#ifdef CONFIG_DEBUG_KERNEL +#ifdef CONFIG_DEBUG_MISC SYSFS_SPRSETUP(hid0, SPRN_HID0); SYSFS_SPRSETUP(hid1, SPRN_HID1); SYSFS_SPRSETUP(hid4, SPRN_HID4); @@ -639,7 +639,7 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0); SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1); SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2); SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3); -#endif /* CONFIG_DEBUG_KERNEL */ +#endif /* CONFIG_DEBUG_MISC */ #endif /* HAS_PPC_PMC_PA6T */ #ifdef HAS_PPC_PMC_IBM @@ -680,7 +680,7 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3), __ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4), __ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5), -#ifdef CONFIG_DEBUG_KERNEL +#ifdef CONFIG_DEBUG_MISC __ATTR(hid0, 0600, show_hid0, store_hid0), __ATTR(hid1, 0600, show_hid1, store_hid1), __ATTR(hid4, 0600, show_hid4, store_hid4), @@ -709,7 +709,7 @@ static struct device_attribute pa6t_attrs[] = { __ATTR(tsr1, 0600, show_tsr1, store_tsr1), __ATTR(tsr2, 0600, show_tsr2, store_tsr2), __ATTR(tsr3, 0600, show_tsr3, store_tsr3), -#endif /* CONFIG_DEBUG_KERNEL */ +#endif /* CONFIG_DEBUG_MISC */ }; #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc0503ef9c9c..325d60633dfa 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -43,7 +43,6 @@ #include <linux/timex.h> #include <linux/kernel_stat.h> #include <linux/time.h> -#include <linux/clockchips.h> #include <linux/init.h> #include <linux/profile.h> #include <linux/cpu.h> @@ -151,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq); unsigned long ppc_tb_freq; EXPORT_SYMBOL_GPL(ppc_tb_freq); +bool tb_invalid; + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Factor for converting from cputime_t (timebase ticks) to @@ -460,6 +461,13 @@ void __delay(unsigned long loops) diff += 1000000000; spin_cpu_relax(); } while (diff < loops); + } else if (tb_invalid) { + /* + * TB is in error state and isn't ticking anymore. + * HMI handler was unable to recover from TB error. + * Return immediately, so that kernel won't get stuck here. + */ + spin_cpu_relax(); } else { start = get_tbl(); while (get_tbl() - start < loops) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 1fd45a8650e1..665f294725cb 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs) int code = FPE_FLTUNK; int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + flush_spe_to_thread(current); spefscr = current->thread.spefscr; @@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) extern int speround_handler(struct pt_regs *regs); int err; + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + preempt_disable(); if (regs->msr & MSR_SPE) giveup_spe(current); diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index ce199f6e4256..06f54d947057 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -26,9 +26,8 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=both) +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO32__ -s obj-y += vdso32_wrapper.o diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 28e7d112aa2f..32ebb3522ea1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -12,9 +12,8 @@ GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n -ccflags-y := -shared -fno-common -fno-builtin -ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ - $(call cc-ldoption, -Wl$(comma)--hash-style=both) +ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ + -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both asflags-y := -D__VDSO64__ -s obj-y += vdso64_wrapper.o diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 21165da0052d..8eb867dbad5f 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -21,6 +21,7 @@ _GLOBAL(load_vr_state) REST_32VRS(0,r4,r3) blr EXPORT_SYMBOL(load_vr_state) +_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */ /* * Store VMX state into memory, including VSCR. diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3c6ab22a0c4e..af3c15a1d41e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ -static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer); static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ @@ -293,21 +293,21 @@ out: nmi_exit(); } -static void wd_timer_reset(unsigned int cpu, struct timer_list *t) -{ - t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); - if (wd_timer_period_ms > 1000) - t->expires = __round_jiffies_up(t->expires, cpu); - add_timer_on(t, cpu); -} - -static void wd_timer_fn(struct timer_list *t) +static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return HRTIMER_NORESTART; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return HRTIMER_NORESTART; + watchdog_timer_interrupt(cpu); - wd_timer_reset(cpu, t); + hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms)); + + return HRTIMER_RESTART; } void arch_touch_nmi_watchdog(void) @@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -static void start_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - per_cpu(wd_timer_tb, cpu) = get_tb(); - - timer_setup(t, wd_timer_fn, TIMER_PINNED); - wd_timer_reset(cpu, t); -} - -static void stop_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - del_timer_sync(t); -} - -static int start_wd_on_cpu(unsigned int cpu) +static void start_watchdog(void *arg) { + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); - return 0; + return; } if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - return 0; + return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) - return 0; + return; wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); @@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu) } wd_smp_unlock(&flags); - start_watchdog_timer_on(cpu); + *this_cpu_ptr(&wd_timer_tb) = get_tb(); - return 0; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), + HRTIMER_MODE_REL_PINNED); } -static int stop_wd_on_cpu(unsigned int cpu) +static int start_watchdog_on_cpu(unsigned int cpu) { + return smp_call_function_single(cpu, start_watchdog, NULL, true); +} + +static void stop_watchdog(void *arg) +{ + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return 0; /* Can happen in CPU unplug case */ + return; /* Can happen in CPU unplug case */ - stop_watchdog_timer_on(cpu); + hrtimer_cancel(hrtimer); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); wd_smp_clear_cpu_pending(cpu, get_tb()); +} - return 0; +static int stop_watchdog_on_cpu(unsigned int cpu) +{ + return smp_call_function_single(cpu, stop_watchdog, NULL, true); } static void watchdog_calc_timeouts(void) @@ -402,7 +400,7 @@ void watchdog_nmi_stop(void) int cpu; for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); + stop_watchdog_on_cpu(cpu); } void watchdog_nmi_start(void) @@ -411,7 +409,7 @@ void watchdog_nmi_start(void) watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + start_watchdog_on_cpu(cpu); } /* @@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void) err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); + start_watchdog_on_cpu, + stop_watchdog_on_cpu); if (err < 0) { pr_warn("could not be initialized"); return err; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index bfdde04e4905..f53997a8ca62 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -20,7 +20,6 @@ if VIRTUALIZATION config KVM bool select PREEMPT_NOTIFIERS - select ANON_INODES select HAVE_KVM_EVENTFD select HAVE_KVM_VCPU_ASYNC_IOCTL select SRCU diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index be7bc070eae5..ab3d484c5e2e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -600,7 +600,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* If writing != 0, then the HPTE must allow writing, if we get here */ write_ok = writing; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, writing, pages); + npages = get_user_pages_fast(hva, 1, writing ? FOLL_WRITE : 0, pages); if (npages < 1) { /* Check if it's an I/O mapping */ down_read(¤t->mm->mmap_sem); @@ -1193,7 +1193,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) goto err; hva = gfn_to_hva_memslot(memslot, gfn); - npages = get_user_pages_fast(hva, 1, 1, pages); + npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages); if (npages < 1) goto err; page = pages[0]; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b2b29d4f9842..7bdcd4d7a9f0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -74,6 +74,7 @@ #include <asm/opal.h> #include <asm/xics.h> #include <asm/xive.h> +#include <asm/hw_breakpoint.h> #include "book3s.h" @@ -3374,7 +3375,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_PURR, vcpu->arch.purr); mtspr(SPRN_SPURR, vcpu->arch.spurr); - if (cpu_has_feature(CPU_FTR_DAWR)) { + if (dawr_enabled()) { mtspr(SPRN_DAWR, vcpu->arch.dawr); mtspr(SPRN_DAWRX, vcpu->arch.dawrx); } diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 3b9662a4207e..085509148d95 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -822,7 +822,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr) raddr = per_cpu_ptr(addr, cpu); l = (unsigned long)raddr; - if (REGION_ID(l) == VMALLOC_REGION_ID) { + if (get_region_id(l) == VMALLOC_REGION_ID) { l = vmalloc_to_phys(raddr); raddr = (unsigned int *)l; } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 3a5e719ef032..dd014308f065 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -35,6 +35,7 @@ #include <asm/thread_info.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> +#include <asm/cpuidle.h> /* Sign-extend HDEC if not on POWER9 */ #define EXTEND_HDEC(reg) \ @@ -45,6 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) /* Values in HSTATE_NAPPING(r13) */ #define NAPPING_CEDE 1 #define NAPPING_NOVCPU 2 +#define NAPPING_UNSPLIT 3 /* Stack frame offsets for kvmppc_hv_entry */ #define SFS 208 @@ -290,17 +292,19 @@ kvm_novcpu_exit: b kvmhv_switch_to_host /* - * We come in here when wakened from nap mode. - * Relocation is off and most register values are lost. - * r13 points to the PACA. + * We come in here when wakened from Linux offline idle code. + * Relocation is off * r3 contains the SRR1 wakeup value, SRR1 is trashed. */ - .globl kvm_start_guest -kvm_start_guest: - /* Set runlatch bit the minute you wake up from nap */ - mfspr r0, SPRN_CTRLF - ori r0, r0, 1 - mtspr SPRN_CTRLT, r0 +_GLOBAL(idle_kvm_start_guest) + ld r4,PACAEMERGSP(r13) + mfcr r5 + mflr r0 + std r1,0(r4) + std r5,8(r4) + std r0,16(r4) + subi r1,r4,STACK_FRAME_OVERHEAD + SAVE_NVGPRS(r1) /* * Could avoid this and pass it through in r3. For now, @@ -308,27 +312,23 @@ kvm_start_guest: */ mtspr SPRN_SRR1,r3 - ld r2,PACATOC(r13) - li r0,0 stb r0,PACA_FTRACE_ENABLED(r13) li r0,KVM_HWTHREAD_IN_KVM stb r0,HSTATE_HWTHREAD_STATE(r13) - /* NV GPR values from power7_idle() will no longer be valid */ - li r0,1 - stb r0,PACA_NAPSTATELOST(r13) - - /* were we napping due to cede? */ + /* kvm cede / napping does not come through here */ lbz r0,HSTATE_NAPPING(r13) - cmpwi r0,NAPPING_CEDE - beq kvm_end_cede - cmpwi r0,NAPPING_NOVCPU - beq kvm_novcpu_wakeup + twnei r0,0 - ld r1,PACAEMERGSP(r13) - subi r1,r1,STACK_FRAME_OVERHEAD + b 1f + +kvm_unsplit_wakeup: + li r0, 0 + stb r0, HSTATE_NAPPING(r13) + +1: /* * We weren't napping due to cede, so this must be a secondary @@ -437,19 +437,25 @@ kvm_no_guest: lbz r3, HSTATE_HWTHREAD_REQ(r13) cmpwi r3, 0 bne 54f -/* - * We jump to pnv_wakeup_loss, which will return to the caller - * of power7_nap in the powernv cpu offline loop. The value we - * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss - * requires SRR1 in r12. - */ + + /* + * Jump to idle_return_gpr_loss, which returns to the + * idle_kvm_start_guest caller. + */ li r3, LPCR_PECE0 mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - li r3, 0 - mfspr r12,SPRN_SRR1 - b pnv_wakeup_loss + /* set up r3 for return */ + mfspr r3,SPRN_SRR1 + REST_NVGPRS(r1) + addi r1, r1, STACK_FRAME_OVERHEAD + ld r0, 16(r1) + ld r5, 8(r1) + ld r1, 0(r1) + mtlr r0 + mtcr r5 + blr 53: HMT_LOW ld r5, HSTATE_KVM_VCORE(r13) @@ -534,6 +540,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) lbz r0, KVM_SPLIT_DO_NAP(r3) cmpwi r0, 0 beq 57f + li r3, NAPPING_UNSPLIT + stb r3, HSTATE_NAPPING(r13) li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4 mfspr r5, SPRN_LPCR rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1) @@ -822,18 +830,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) mtspr SPRN_IAMR, r5 mtspr SPRN_PSPB, r6 mtspr SPRN_FSCR, r7 - ld r5, VCPU_DAWR(r4) - ld r6, VCPU_DAWRX(r4) - ld r7, VCPU_CIABR(r4) - ld r8, VCPU_TAR(r4) /* * Handle broken DAWR case by not writing it. This means we * can still store the DAWR register for migration. */ -BEGIN_FTR_SECTION + LOAD_REG_ADDR(r5, dawr_force_enable) + lbz r5, 0(r5) + cmpdi r5, 0 + beq 1f + ld r5, VCPU_DAWR(r4) + ld r6, VCPU_DAWRX(r4) mtspr SPRN_DAWR, r5 mtspr SPRN_DAWRX, r6 -END_FTR_SECTION_IFSET(CPU_FTR_DAWR) +1: + ld r7, VCPU_CIABR(r4) + ld r8, VCPU_TAR(r4) mtspr SPRN_CIABR, r7 mtspr SPRN_TAR, r8 ld r5, VCPU_IC(r4) @@ -2513,11 +2524,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) blr 2: -BEGIN_FTR_SECTION - /* POWER9 with disabled DAWR */ + LOAD_REG_ADDR(r11, dawr_force_enable) + lbz r11, 0(r11) + cmpdi r11, 0 li r3, H_HARDWARE - blr -END_FTR_SECTION_IFCLR(CPU_FTR_DAWR) + beqlr /* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW rlwimi r5, r4, 2, DAWRX_WT @@ -2654,6 +2665,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */ + /* Go back to host stack */ + ld r1, HSTATE_HOST_R1(r13) + /* * Take a nap until a decrementer or external or doobell interrupt * occurs, with PECE1 and PECE0 set in LPCR. @@ -2682,26 +2696,42 @@ BEGIN_FTR_SECTION * requested level = 0 (just stop dispatching) */ lis r3, (PSSCR_EC | PSSCR_ESL)@h - mtspr SPRN_PSSCR, r3 /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */ li r4, LPCR_PECE_HVEE@higher sldi r4, r4, 32 or r5, r5, r4 -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) +FTR_SECTION_ELSE + li r3, PNV_THREAD_NAP +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r5 isync - li r0, 0 - std r0, HSTATE_SCRATCH0(r13) - ptesync - ld r0, HSTATE_SCRATCH0(r13) -1: cmpd r0, r0 - bne 1b + BEGIN_FTR_SECTION - nap + bl isa300_idle_stop_mayloss FTR_SECTION_ELSE - PPC_STOP -ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) - b . + bl isa206_idle_insn_mayloss +ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) + + mfspr r0, SPRN_CTRLF + ori r0, r0, 1 + mtspr SPRN_CTRLT, r0 + + mtspr SPRN_SRR1, r3 + + li r0, 0 + stb r0, PACA_FTRACE_ENABLED(r13) + + li r0, KVM_HWTHREAD_IN_KVM + stb r0, HSTATE_HWTHREAD_STATE(r13) + + lbz r0, HSTATE_NAPPING(r13) + cmpwi r0, NAPPING_CEDE + beq kvm_end_cede + cmpwi r0, NAPPING_NOVCPU + beq kvm_novcpu_wakeup + cmpwi r0, NAPPING_UNSPLIT + beq kvm_unsplit_wakeup + twi 31,0,0 /* Nap state must not be zero */ 33: mr r4, r3 li r3, 0 @@ -2709,12 +2739,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) b 34f kvm_end_cede: + /* Woken by external or decrementer interrupt */ + /* get vcpu pointer */ ld r4, HSTATE_KVM_VCPU(r13) - /* Woken by external or decrementer interrupt */ - ld r1, HSTATE_HOST_R1(r13) - #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING addi r3, r4, VCPU_TB_RMINTR bl kvmhv_accumulate_time diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c index 24296f4cadc6..e0af53fd78c5 100644 --- a/arch/powerpc/kvm/e500_mmu.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -783,7 +783,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, if (!pages) return -ENOMEM; - ret = get_user_pages_fast(cfg->array, num_pages, 1, pages); + ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages); if (ret < 0) goto free_pages; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 79396e184bca..c55f9c27bf79 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -8,9 +8,22 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) -obj-y += string.o alloc.o code-patching.o feature-fixups.o +KASAN_SANITIZE_code-patching.o := n +KASAN_SANITIZE_feature-fixups.o := n -obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o +ifdef CONFIG_KASAN +CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING +CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING +endif + +obj-y += alloc.o code-patching.o feature-fixups.o + +ifndef CONFIG_KASAN +obj-y += string.o memcmp_$(BITS).o +obj-$(CONFIG_PPC32) += strlen_32.o +endif + +obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o @@ -34,7 +47,7 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \ test_emulate_step_exec_instr.o obj-y += checksum_$(BITS).o checksum_wrappers.o \ - string_$(BITS).o memcmp_$(BITS).o + string_$(BITS).o obj-y += sstep.o ldstfp.o quad.o obj64-y += quad.o diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c index 890d4ddd91d6..bb9307ce2440 100644 --- a/arch/powerpc/lib/checksum_wrappers.c +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, unsigned int csum; might_sleep(); + allow_read_from_user(src, len); *err_ptr = 0; @@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst, } out: + prevent_read_from_user(src, len); return (__force __wsum)csum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, unsigned int csum; might_sleep(); + allow_write_to_user(dst, len); *err_ptr = 0; @@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, } out: + prevent_write_to_user(dst, len); return (__force __wsum)csum; } EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 506413a2c25e..90c9d4a1e36f 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -15,7 +15,6 @@ #include <linux/cpuhotplug.h> #include <linux/slab.h> #include <linux/uaccess.h> -#include <linux/kprobes.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -26,9 +25,9 @@ static int __patch_instruction(unsigned int *exec_addr, unsigned int instr, unsigned int *patch_addr) { - int err; + int err = 0; - __put_user_size(instr, patch_addr, 4, err); + __put_user_asm(instr, patch_addr, err, "stw"); if (err) return err; diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index ba66846fe973..d5642481fb98 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -14,6 +14,7 @@ #include <asm/ppc_asm.h> #include <asm/export.h> #include <asm/code-patching-asm.h> +#include <asm/kasan.h> #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -68,6 +69,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) +#ifndef CONFIG_KASAN _GLOBAL(memset16) rlwinm. r0 ,r5, 31, 1, 31 addi r6, r3, -4 @@ -81,6 +83,7 @@ _GLOBAL(memset16) sth r4, 4(r6) blr EXPORT_SYMBOL(memset16) +#endif /* * Use dcbz on the complete cache lines in the destination @@ -91,7 +94,7 @@ EXPORT_SYMBOL(memset16) * We therefore skip the optimised bloc that uses dcbz. This jump is * replaced by a nop once cache is active. This is done in machine_init() */ -_GLOBAL(memset) +_GLOBAL_KASAN(memset) cmplwi 0,r5,4 blt 7f @@ -151,6 +154,7 @@ _GLOBAL(memset) bdnz 9b blr EXPORT_SYMBOL(memset) +EXPORT_SYMBOL_KASAN(memset) /* * This version uses dcbz on the complete cache lines in the @@ -163,12 +167,12 @@ EXPORT_SYMBOL(memset) * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is * replaced by a nop once cache is active. This is done in machine_init() */ -_GLOBAL(memmove) +_GLOBAL_KASAN(memmove) cmplw 0,r3,r4 bgt backwards_memcpy /* fall through */ -_GLOBAL(memcpy) +_GLOBAL_KASAN(memcpy) 1: b generic_memcpy patch_site 1b, patch__memcpy_nocache @@ -244,6 +248,8 @@ _GLOBAL(memcpy) 65: blr EXPORT_SYMBOL(memcpy) EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL_KASAN(memcpy) +EXPORT_SYMBOL_KASAN(memmove) generic_memcpy: srwi. r7,r5,3 diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 3c3be02f33b7..7f6bd031c306 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -12,7 +12,9 @@ #include <asm/errno.h> #include <asm/ppc_asm.h> #include <asm/export.h> +#include <asm/kasan.h> +#ifndef CONFIG_KASAN _GLOBAL(__memset16) rlwimi r4,r4,16,0,15 /* fall through */ @@ -29,8 +31,9 @@ _GLOBAL(__memset64) EXPORT_SYMBOL(__memset16) EXPORT_SYMBOL(__memset32) EXPORT_SYMBOL(__memset64) +#endif -_GLOBAL(memset) +_GLOBAL_KASAN(memset) neg r0,r3 rlwimi r4,r4,8,16,23 andi. r0,r0,7 /* # bytes to be 8-byte aligned */ @@ -96,8 +99,9 @@ _GLOBAL(memset) stb r4,0(r6) blr EXPORT_SYMBOL(memset) +EXPORT_SYMBOL_KASAN(memset) -_GLOBAL_TOC(memmove) +_GLOBAL_TOC_KASAN(memmove) cmplw 0,r3,r4 bgt backwards_memcpy b memcpy @@ -139,3 +143,4 @@ _GLOBAL(backwards_memcpy) mtctr r7 b 1b EXPORT_SYMBOL(memmove) +EXPORT_SYMBOL_KASAN(memmove) diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 273ea67e60a1..25c3772c1dfb 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -11,6 +11,7 @@ #include <asm/export.h> #include <asm/asm-compat.h> #include <asm/feature-fixups.h> +#include <asm/kasan.h> #ifndef SELFTEST_CASE /* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */ @@ -18,7 +19,7 @@ #endif .align 7 -_GLOBAL_TOC(memcpy) +_GLOBAL_TOC_KASAN(memcpy) BEGIN_FTR_SECTION #ifdef __LITTLE_ENDIAN__ cmpdi cr7,r5,0 @@ -230,3 +231,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blr #endif EXPORT_SYMBOL(memcpy) +EXPORT_SYMBOL_KASAN(memcpy) diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 3c1bd9fa23cd..0f499db315d6 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,53 +5,18 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) - obj-y := fault.o mem.o pgtable.o mmap.o \ init_$(BITS).o pgtable_$(BITS).o \ + pgtable-frag.o \ init-common.o mmu_context.o drmem.o -obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ - tlb_nohash_low.o -obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o -hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o -obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o -obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \ - $(hash64-y) mmu_context_book3s64.o \ - pgtable-book3s64.o pgtable-frag.o -obj-$(CONFIG_PPC32) += pgtable-frag.o -obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o -obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o -obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o -ifdef CONFIG_PPC_BOOK3S_64 -obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o -obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o -endif -obj-$(CONFIG_40x) += 40x_mmu.o -obj-$(CONFIG_44x) += 44x_mmu.o -obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o -obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o +obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/ +obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/ +obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/ obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o -obj-$(CONFIG_PPC_SPLPAR) += vphn.o obj-$(CONFIG_PPC_MM_SLICES) += slice.o -obj-y += hugetlbpage.o -ifdef CONFIG_HUGETLB_PAGE -obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o -obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o -obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o -endif -obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o -obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o -obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o obj-$(CONFIG_PPC_PTDUMP) += ptdump/ -obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o - -# Disable kcov instrumentation on sensitive code -# This is necessary for booting with kcov enabled on book3e machines -KCOV_INSTRUMENT_tlb_nohash.o := n -KCOV_INSTRUMENT_fsl_booke_mmu.o := n - -# Instrumenting the SLB fault path can lead to duplicate SLB entries -KCOV_INSTRUMENT_slb.o := n +obj-$(CONFIG_KASAN) += kasan/ diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile new file mode 100644 index 000000000000..1732eaa740a9 --- /dev/null +++ b/arch/powerpc/mm/book3s32/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +KASAN_SANITIZE_mmu.o := n + +ifdef CONFIG_KASAN +CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING +endif + +obj-y += mmu.o hash_low.o mmu_context.o tlb.o diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/book3s32/hash_low.S index a6c491f18a04..e27792d0b744 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -309,13 +309,13 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64) _GLOBAL(create_hpte) /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ - rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ - rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */ + rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */ and r8,r8,r0 /* writable if _RW & _DIRTY */ rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ ori r8,r8,0xe04 /* clear out reserved bits */ - andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ + andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */ BEGIN_FTR_SECTION rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/book3s32/mmu.c index 5d9c3ff728c9..fc073cb2c517 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -34,11 +34,12 @@ #include <asm/code-patching.h> #include <asm/sections.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> -struct hash_pte *Hash, *Hash_end; -unsigned long Hash_size, Hash_mask; +struct hash_pte *Hash; +static unsigned long Hash_size, Hash_mask; unsigned long _SDR1; +static unsigned int hash_mb, hash_mb2; struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ @@ -318,7 +319,6 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, */ void __init MMU_init_hw(void) { - unsigned int hmask, mb, mb2; unsigned int n_hpteg, lg_n_hpteg; if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) @@ -355,26 +355,34 @@ void __init MMU_init_hw(void) __func__, Hash_size, Hash_size); _SDR1 = __pa(Hash) | SDR1_LOW_BITS; - Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); + pr_info("Total memory = %lldMB; using %ldkB for hash table\n", + (unsigned long long)(total_memory >> 20), Hash_size >> 10); - printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n", - (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash); + Hash_mask = n_hpteg - 1; + hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; + if (lg_n_hpteg > 16) + hash_mb2 = 16 - LG_HPTEG_SIZE; +} + +void __init MMU_init_hw_patch(void) +{ + unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); + + if (ppc_md.progress) + ppc_md.progress("hash:patch", 0x345); + if (ppc_md.progress) + ppc_md.progress("hash:done", 0x205); + + /* WARNING: Make sure nothing can trigger a KASAN check past this point */ /* * Patch up the instructions in hashtable.S:create_hpte */ - if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); - Hash_mask = n_hpteg - 1; - hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); - mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; - if (lg_n_hpteg > 16) - mb2 = 16 - LG_HPTEG_SIZE; - modify_instruction_site(&patch__hash_page_A0, 0xffff, ((unsigned int)Hash - PAGE_OFFSET) >> 16); - modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6); - modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6); + modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6); + modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6); modify_instruction_site(&patch__hash_page_B, 0xffff, hmask); modify_instruction_site(&patch__hash_page_C, 0xffff, hmask); @@ -383,11 +391,9 @@ void __init MMU_init_hw(void) */ modify_instruction_site(&patch__flush_hash_A0, 0xffff, ((unsigned int)Hash - PAGE_OFFSET) >> 16); - modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6); - modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6); + modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6); + modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6); modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask); - - if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); } void setup_initial_memory_limit(phys_addr_t first_memblock_base, @@ -404,3 +410,33 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, else /* Anything else has 256M mapped */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000)); } + +void __init print_system_hash_info(void) +{ + pr_info("Hash_size = 0x%lx\n", Hash_size); + if (Hash_mask) + pr_info("Hash_mask = 0x%lx\n", Hash_mask); +} + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + if (cpu_has_feature(CPU_FTR_601)) + pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n"); + + if (disabled) + pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n"); +} +#endif + +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + pr_info("Activating Kernel Userspace Access Protection\n"); + + if (disabled) + pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n"); +} +#endif diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/book3s32/mmu_context.c index 921c1e33e941..921c1e33e941 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/book3s32/tlb.c index cf8472cf3d59..8d56f0417f87 100644 --- a/arch/powerpc/mm/tlb_hash32.c +++ b/arch/powerpc/mm/book3s32/tlb.c @@ -32,7 +32,7 @@ #include <asm/tlbflush.h> #include <asm/tlb.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * Called when unmapping pages to flush entries from the TLB/hash table. diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile new file mode 100644 index 000000000000..974b4fc19f4f --- /dev/null +++ b/arch/powerpc/mm/book3s64/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-y := $(NO_MINIMAL_TOC) + +CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE) + +obj-y += hash_pgtable.o hash_utils.o slb.o \ + mmu_context.o pgtable.o hash_tlb.o +obj-$(CONFIG_PPC_NATIVE) += hash_native.o +obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o +obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o +obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o +obj-$(CONFIG_PPC_SPLPAR) += vphn.o +obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o +ifdef CONFIG_HUGETLB_PAGE +obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o +endif +obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o +obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o +obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o +obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o + +# Instrumenting the SLB fault path can lead to duplicate SLB entries +KCOV_INSTRUMENT_slb.o := n diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c index 6fa6765a10eb..22e787123cdf 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/book3s64/hash_4k.c @@ -1,6 +1,6 @@ /* * Copyright IBM Corporation, 2015 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU Lesser General Public License diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c index 3afa253d7f52..7084ce2951e6 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/book3s64/hash_64k.c @@ -1,6 +1,6 @@ /* * Copyright IBM Corporation, 2015 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU Lesser General Public License diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugepage.c index dfbc3b32f09b..440823797de7 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/book3s64/hash_hugepage.c @@ -1,6 +1,6 @@ /* * Copyright IBM Corporation, 2013 - * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index b0d9209d9a86..eefa89c6117b 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -15,6 +15,9 @@ #include <asm/cacheflush.h> #include <asm/machdep.h> +unsigned int hpage_shift; +EXPORT_SYMBOL(hpage_shift); + extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); @@ -34,7 +37,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Search the Linux page table for a match with va */ vpn = hpt_vpn(ea, vsid, ssize); - /* At this point, we have a pte (old_pte) which can be used to build + /* + * At this point, we have a pte (old_pte) which can be used to build * or update an HPTE. There are 2 cases: * * 1. There is a valid (present) pte with no associated HPTE (this is @@ -55,8 +59,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (unlikely(!check_pte_access(access, old_pte))) return 1; - /* Try to lock the PTE, add ACCESSED and DIRTY if it was - * a write access */ + /* + * Try to lock the PTE, add ACCESSED and DIRTY if it was + * a write access + */ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; @@ -74,8 +80,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, rpte = __real_pte(__pte(old_pte), ptep, offset); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) - /* No CPU has hugepages but lacks no execute, so we - * don't need to worry about that case */ + /* + * No CPU has hugepages but lacks no execute, so we + * don't need to worry about that case + */ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); /* Check if pte already has an hpte (case 2) */ @@ -145,3 +153,16 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr old_pte, pte); set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } + +void hugetlbpage_init_default(void) +{ + /* Set default large page size. Currently, we pick 16M or 1M + * depending on what is available + */ + if (mmu_psize_defs[MMU_PAGE_16M].shift) + hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift; + else if (mmu_psize_defs[MMU_PAGE_1M].shift) + hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift; + else if (mmu_psize_defs[MMU_PAGE_2M].shift) + hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift; +} diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/book3s64/hash_native.c index aaa28fd918fe..aaa28fd918fe 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/book3s64/hash_native.c diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/book3s64/hash_pgtable.c index c08d49046a96..1fd025dba4a3 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/book3s64/hash_pgtable.c @@ -19,7 +19,7 @@ #include <asm/mmu.h> #include <asm/tlb.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #define CREATE_TRACE_POINTS #include <trace/events/thp.h> @@ -112,9 +112,16 @@ int __meminit hash__vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) { - int rc = htab_bolt_mapping(start, start + page_size, phys, - pgprot_val(PAGE_KERNEL), - mmu_vmemmap_psize, mmu_kernel_ssize); + int rc; + + if ((start + page_size) >= H_VMEMMAP_END) { + pr_warn("Outside the supported range\n"); + return -1; + } + + rc = htab_bolt_mapping(start, start + page_size, phys, + pgprot_val(PAGE_KERNEL), + mmu_vmemmap_psize, mmu_kernel_ssize); if (rc < 0) { int rc2 = htab_remove_mapping(start, start + page_size, mmu_vmemmap_psize, diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/book3s64/hash_tlb.c index 87d71dd25441..d4f0101447b1 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/book3s64/hash_tlb.c @@ -55,7 +55,8 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, i = batch->index; - /* Get page size (maybe move back to caller). + /* + * Get page size (maybe move back to caller). * * NOTE: when using special 64K mappings in 4K environment like * for SPEs, we obtain the page size from the slice, which thus @@ -77,10 +78,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, #endif } else { psize = pte_pagesize_index(mm, addr, pte); - /* Mask the address for the standard page size. If we + /* + * Mask the address for the standard page size. If we * have a 64k page kernel, but the hardware does not * support 64k pages, this might be different from the - * hardware page size encoded in the slice table. */ + * hardware page size encoded in the slice table. + */ addr &= PAGE_MASK; offset = PTRS_PER_PTE; } @@ -161,7 +164,8 @@ void hash__tlb_flush(struct mmu_gather *tlb) { struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch); - /* If there's a TLB batch pending, then we must flush it because the + /* + * If there's a TLB batch pending, then we must flush it because the * pages are going to be freed and we really don't want to have a CPU * access a freed page because it has a stale TLB */ @@ -201,7 +205,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, BUG_ON(!mm->pgd); - /* Note: Normally, we should only ever use a batch within a + /* + * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work * since we don't actually modify the PTEs, we just flush the * hash while leaving the PTEs intact (including their reference @@ -238,7 +243,8 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) unsigned long flags; addr = _ALIGN_DOWN(addr, PMD_SIZE); - /* Note: Normally, we should only ever use a batch within a + /* + * Note: Normally, we should only ever use a batch within a * PTE locked section. This violates the rule, but will work * since we don't actually modify the PTEs, we just flush the * hash while leaving the PTEs intact (including their reference diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/book3s64/hash_utils.c index 0a4f939a8161..919a861a8ec0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -37,6 +37,7 @@ #include <linux/context_tracking.h> #include <linux/libfdt.h> #include <linux/pkeys.h> +#include <linux/hugetlb.h> #include <asm/debugfs.h> #include <asm/processor.h> @@ -65,6 +66,8 @@ #include <asm/pte-walk.h> #include <asm/asm-prototypes.h> +#include <mm/mmu_decl.h> + #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) #else @@ -128,7 +131,8 @@ static DEFINE_SPINLOCK(linear_map_hash_lock); struct mmu_hash_ops mmu_hash_ops; EXPORT_SYMBOL(mmu_hash_ops); -/* There are definitions of page sizes arrays to be used when none +/* + * These are definitions of page sizes arrays to be used when none * is provided by the firmware. */ @@ -145,7 +149,8 @@ static struct mmu_psize_def mmu_psize_defaults[] = { }, }; -/* POWER4, GPUL, POWER5 +/* + * POWER4, GPUL, POWER5 * * Support for 16Mb large pages */ @@ -479,7 +484,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, } #ifdef CONFIG_HUGETLB_PAGE -/* Scan for 16G memory blocks that have been set aside for huge pages +/* + * Scan for 16G memory blocks that have been set aside for huge pages * and reserve those blocks for 16G huge pages. */ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, @@ -496,8 +502,10 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, if (type == NULL || strcmp(type, "memory") != 0) return 0; - /* This property is the log base 2 of the number of virtual pages that - * will represent this memory block. */ + /* + * This property is the log base 2 of the number of virtual pages that + * will represent this memory block. + */ page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); if (page_count_prop == NULL) return 0; @@ -673,7 +681,8 @@ static void __init htab_init_page_sizes(void) #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_SPARSEMEM_VMEMMAP - /* We try to use 16M pages for vmemmap if that is supported + /* + * We try to use 16M pages for vmemmap if that is supported * and we have at least 1G of RAM at boot */ if (mmu_psize_defs[MMU_PAGE_16M].shift && @@ -742,7 +751,8 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size) static unsigned long __init htab_get_table_size(void) { - /* If hash size isn't already provided by the platform, we try to + /* + * If hash size isn't already provided by the platform, we try to * retrieve it from the device-tree. If it's not there neither, we * calculate it now based on the total RAM size */ @@ -755,12 +765,12 @@ static unsigned long __init htab_get_table_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -void resize_hpt_for_hotplug(unsigned long new_mem_size) +int resize_hpt_for_hotplug(unsigned long new_mem_size) { unsigned target_hpt_shift; if (!mmu_hash_ops.resize_hpt) - return; + return 0; target_hpt_shift = htab_shift_for_mem_size(new_mem_size); @@ -772,23 +782,25 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) * reduce unless the target shift is at least 2 below the * current shift */ - if ((target_hpt_shift > ppc64_pft_size) - || (target_hpt_shift < (ppc64_pft_size - 1))) { - int rc; - - rc = mmu_hash_ops.resize_hpt(target_hpt_shift); - if (rc && (rc != -ENODEV)) - printk(KERN_WARNING - "Unable to resize hash page table to target order %d: %d\n", - target_hpt_shift, rc); - } + if (target_hpt_shift > ppc64_pft_size || + target_hpt_shift < ppc64_pft_size - 1) + return mmu_hash_ops.resize_hpt(target_hpt_shift); + + return 0; } int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) { - int rc = htab_bolt_mapping(start, end, __pa(start), - pgprot_val(PAGE_KERNEL), mmu_linear_psize, - mmu_kernel_ssize); + int rc; + + if (end >= H_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + return -1; + } + + rc = htab_bolt_mapping(start, end, __pa(start), + pgprot_val(PAGE_KERNEL), mmu_linear_psize, + mmu_kernel_ssize); if (rc < 0) { int rc2 = htab_remove_mapping(start, end, mmu_linear_psize, @@ -929,6 +941,11 @@ static void __init htab_initialize(void) DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); + if ((base + size) >= H_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + continue; + } + BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } @@ -968,6 +985,7 @@ void __init hash__early_init_devtree(void) htab_scan_page_sizes(); } +struct hash_mm_context init_hash_mm_context; void __init hash__early_init_mmu(void) { #ifndef CONFIG_PPC_64K_PAGES @@ -1013,11 +1031,11 @@ void __init hash__early_init_mmu(void) __pgd_val_bits = HASH_PGD_VAL_BITS; __kernel_virt_start = H_KERN_VIRT_START; - __kernel_virt_size = H_KERN_VIRT_SIZE; __vmalloc_start = H_VMALLOC_START; __vmalloc_end = H_VMALLOC_END; __kernel_io_start = H_KERN_IO_START; - vmemmap = (struct page *)H_VMEMMAP_BASE; + __kernel_io_end = H_KERN_IO_END; + vmemmap = (struct page *)H_VMEMMAP_START; ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PCI @@ -1035,12 +1053,16 @@ void __init hash__early_init_mmu(void) if (!mmu_hash_ops.hpte_insert) panic("hash__early_init_mmu: No MMU hash ops defined!\n"); - /* Initialize the MMU Hash table and create the linear mapping + /* + * Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. */ htab_initialize(); + init_mm.context.hash_context = &init_hash_mm_context; + mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT); + pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); @@ -1147,10 +1169,13 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) */ static int subpage_protection(struct mm_struct *mm, unsigned long ea) { - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context); u32 spp = 0; u32 **sbpm, *sbpp; + if (!spt) + return 0; + if (ea >= spt->maxaddr) return 0; if (ea < 0x100000000UL) { @@ -1214,7 +1239,8 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, } } -/* Result code is: +/* + * Result code is: * 0 - handled * 1 - normal page fault * -1 - critical hash insertion error @@ -1238,7 +1264,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, trace_hash_fault(ea, access, trap); /* Get region & vsid */ - switch (REGION_ID(ea)) { + switch (get_region_id(ea)) { case USER_REGION_ID: user_region = 1; if (! mm) { @@ -1252,15 +1278,19 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, break; case VMALLOC_REGION_ID: vsid = get_kernel_vsid(ea, mmu_kernel_ssize); - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; + psize = mmu_vmalloc_psize; + ssize = mmu_kernel_ssize; + break; + + case IO_REGION_ID: + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + psize = mmu_io_psize; ssize = mmu_kernel_ssize; break; default: - /* Not a valid range - * Send the problem up to do_page_fault + /* + * Not a valid range + * Send the problem up to do_page_fault() */ rc = 1; goto bail; @@ -1285,7 +1315,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES - /* If we use 4K pages and our psize is not 4K, then we might + /* + * If we use 4K pages and our psize is not 4K, then we might * be hitting a special driver mapping, and need to align the * address before we fetch the PTE. * @@ -1307,7 +1338,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, /* Add _PAGE_PRESENT to the required access perm */ access |= _PAGE_PRESENT; - /* Pre-check access permissions (will be re-checked atomically + /* + * Pre-check access permissions (will be re-checked atomically * in __hash_page_XX but this pre-check is a fast path */ if (!check_pte_access(access, pte_val(*ptep))) { @@ -1354,7 +1386,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, psize = MMU_PAGE_4K; } - /* If this PTE is non-cacheable and we have restrictions on + /* + * If this PTE is non-cacheable and we have restrictions on * using non cacheable large pages, then we switch to 4k */ if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) { @@ -1395,7 +1428,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, flags, ssize, spp); } - /* Dump some info in case of hash insertion failure, they should + /* + * Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do */ if (rc == -1) @@ -1421,7 +1455,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, unsigned long flags = 0; struct mm_struct *mm = current->mm; - if (REGION_ID(ea) == VMALLOC_REGION_ID) + if ((get_region_id(ea) == VMALLOC_REGION_ID) || + (get_region_id(ea) == IO_REGION_ID)) mm = &init_mm; if (dsisr & DSISR_NOHPTE) @@ -1437,8 +1472,9 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; struct mm_struct *mm = current->mm; + unsigned int region_id = get_region_id(ea); - if (REGION_ID(ea) == VMALLOC_REGION_ID) + if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; if (dsisr & DSISR_NOHPTE) @@ -1455,7 +1491,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, * 2) user space access kernel space. */ access |= _PAGE_PRIVILEGED; - if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) + if ((msr & MSR_PR) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; if (trap == 0x400) @@ -1470,7 +1506,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) int psize = get_slice_psize(mm, ea); /* We only prefault standard pages for now */ - if (unlikely(psize != mm->context.user_psize)) + if (unlikely(psize != mm_ctx_user_psize(&mm->context))) return false; /* @@ -1499,7 +1535,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, int rc, ssize, update_flags = 0; unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0); - BUG_ON(REGION_ID(ea) != USER_REGION_ID); + BUG_ON(get_region_id(ea) != USER_REGION_ID); if (!should_hash_preload(mm, ea)) return; @@ -1549,7 +1585,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Hash it in */ #ifdef CONFIG_PPC_64K_PAGES - if (mm->context.user_psize == MMU_PAGE_64K) + if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K) rc = __hash_page_64K(ea, access, vsid, ptep, trap, update_flags, ssize); else @@ -1562,8 +1598,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, */ if (rc == -1) hash_failure_debug(ea, access, vsid, trap, ssize, - mm->context.user_psize, - mm->context.user_psize, + mm_ctx_user_psize(&mm->context), + mm_ctx_user_psize(&mm->context), pte_val(*ptep)); out_exit: local_irq_restore(flags); @@ -1634,7 +1670,8 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift, return gslot; } -/* WARNING: This is called from hash_low_64.S, if you change this prototype, +/* + * WARNING: This is called from hash_low_64.S, if you change this prototype, * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, @@ -1855,7 +1892,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - /* We don't currently support the first MEMBLOCK not mapping 0 + /* + * We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ BUG_ON(first_memblock_base != 0); @@ -1909,3 +1947,14 @@ static int __init hash64_debugfs(void) } machine_device_initcall(pseries, hash64_debugfs); #endif /* CONFIG_DEBUG_FS */ + +void __init print_system_hash_info(void) +{ + pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + + if (htab_hash_mask) + pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask); + pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START); + pr_info("kernel IO start = 0x%lx\n", KERN_IO_START); + pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap); +} diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/book3s64/iommu_api.c index 8330f135294f..5c521f3924a5 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -141,8 +141,9 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, for (entry = 0; entry < entries; entry += chunk) { unsigned long n = min(entries - entry, chunk); - ret = get_user_pages_longterm(ua + (entry << PAGE_SHIFT), n, - FOLL_WRITE, mem->hpages + entry, NULL); + ret = get_user_pages(ua + (entry << PAGE_SHIFT), n, + FOLL_WRITE | FOLL_LONGTERM, + mem->hpages + entry, NULL); if (ret == n) { pinned += n; continue; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/book3s64/mmu_context.c index f720c5cc0b5e..cb2b08635508 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -63,6 +63,13 @@ static int hash__init_new_context(struct mm_struct *mm) if (index < 0) return index; + mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), + GFP_KERNEL); + if (!mm->context.hash_context) { + ida_free(&mmu_context_ida, index); + return -ENOMEM; + } + /* * The old code would re-promote on fork, we don't do that when using * slices as it could cause problem promoting slices that have been @@ -77,10 +84,26 @@ static int hash__init_new_context(struct mm_struct *mm) * We should not be calling init_new_context() on init_mm. Hence a * check against 0 is OK. */ - if (mm->context.id == 0) + if (mm->context.id == 0) { + memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context)); slice_init_new_context_exec(mm); + } else { + /* This is fork. Copy hash_context details from current->mm */ + memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context)); +#ifdef CONFIG_PPC_SUBPAGE_PROT + /* inherit subpage prot detalis if we have one. */ + if (current->mm->context.hash_context->spt) { + mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table), + GFP_KERNEL); + if (!mm->context.hash_context->spt) { + ida_free(&mmu_context_ida, index); + kfree(mm->context.hash_context); + return -ENOMEM; + } + } +#endif - subpage_prot_init_new_context(mm); + } pkey_mm_init(mm); return index; @@ -118,6 +141,7 @@ static int radix__init_new_context(struct mm_struct *mm) asm volatile("ptesync;isync" : : : "memory"); mm->context.npu_context = NULL; + mm->context.hash_context = NULL; return index; } @@ -162,6 +186,7 @@ static void destroy_contexts(mm_context_t *ctx) if (context_id) ida_free(&mmu_context_ida, context_id); } + kfree(ctx->hash_context); } static void pmd_frag_destroy(void *pmd_frag) diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/book3s64/pgtable.c index a4341aba0af4..16bda049187a 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -17,7 +17,7 @@ #include <asm/trace.h> #include <asm/powernv.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #include <trace/events/thp.h> unsigned long __pmd_frag_nr; diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 587807763737..ae7fca40e5b3 100644 --- a/arch/powerpc/mm/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -7,6 +7,7 @@ #include <asm/mman.h> #include <asm/mmu_context.h> +#include <asm/mmu.h> #include <asm/setup.h> #include <linux/pkeys.h> #include <linux/of_device.h> diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c index cab06331c0c0..cab06331c0c0 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 154472a28c77..c9bcf428dd2b 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -29,6 +29,7 @@ #include <asm/powernv.h> #include <asm/sections.h> #include <asm/trace.h> +#include <asm/uaccess.h> #include <trace/events/thp.h> @@ -135,6 +136,10 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa, */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); +#ifdef CONFIG_PPC_64K_PAGES + BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT)); +#endif + if (unlikely(!slab_is_available())) return early_map_kernel_page(ea, pa, flags, map_page_size, nid, region_start, region_end); @@ -334,6 +339,12 @@ void __init radix_init_pgtable(void) * page tables will be allocated within the range. No * need or a node (which we don't have yet). */ + + if ((reg->base + reg->size) >= RADIX_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + continue; + } + WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size, -1)); @@ -531,8 +542,15 @@ static void radix_init_amor(void) mtspr(SPRN_AMOR, (3ul << 62)); } -static void radix_init_iamr(void) +#ifdef CONFIG_PPC_KUEP +void setup_kuep(bool disabled) { + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) + pr_info("Activating Kernel Userspace Execution Prevention\n"); + /* * Radix always uses key0 of the IAMR to determine if an access is * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction @@ -540,6 +558,25 @@ static void radix_init_iamr(void) */ mtspr(SPRN_IAMR, (1ul << 62)); } +#endif + +#ifdef CONFIG_PPC_KUAP +void setup_kuap(bool disabled) +{ + if (disabled || !early_radix_enabled()) + return; + + if (smp_processor_id() == boot_cpuid) { + pr_info("Activating Kernel Userspace Access Prevention\n"); + cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP; + } + + /* Make sure userspace can't change the AMR */ + mtspr(SPRN_UAMOR, 0); + mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); + isync(); +} +#endif void __init radix__early_init_mmu(void) { @@ -574,11 +611,11 @@ void __init radix__early_init_mmu(void) __pgd_val_bits = RADIX_PGD_VAL_BITS; __kernel_virt_start = RADIX_KERN_VIRT_START; - __kernel_virt_size = RADIX_KERN_VIRT_SIZE; __vmalloc_start = RADIX_VMALLOC_START; __vmalloc_end = RADIX_VMALLOC_END; __kernel_io_start = RADIX_KERN_IO_START; - vmemmap = (struct page *)RADIX_VMEMMAP_BASE; + __kernel_io_end = RADIX_KERN_IO_END; + vmemmap = (struct page *)RADIX_VMEMMAP_START; ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PCI @@ -601,7 +638,6 @@ void __init radix__early_init_mmu(void) memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); - radix_init_iamr(); radix_init_pgtable(); /* Switch to the guard PID before turning on MMU */ radix__switch_mmu_context(NULL, &init_mm); @@ -623,7 +659,6 @@ void radix__early_init_mmu_secondary(void) __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); radix_init_amor(); } - radix_init_iamr(); radix__switch_mmu_context(NULL, &init_mm); if (cpu_has_feature(CPU_FTR_HVMODE)) @@ -646,7 +681,8 @@ void radix__mmu_cleanup_all(void) void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - /* We don't currently support the first MEMBLOCK not mapping 0 + /* + * We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors */ BUG_ON(first_memblock_base != 0); @@ -866,6 +902,11 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end) int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid) { + if (end >= RADIX_VMALLOC_START) { + pr_warn("Outside the supported range\n"); + return -1; + } + return create_physical_mapping(start, end, nid); } @@ -893,6 +934,11 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); int ret; + if ((start + page_size) >= RADIX_VMEMMAP_END) { + pr_warn("Outside the supported range\n"); + return -1; + } + ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid); BUG_ON(ret); @@ -958,45 +1004,44 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { - struct list_head *lh = (struct list_head *) pgtable; + struct list_head *lh = (struct list_head *) pgtable; - assert_spin_locked(pmd_lockptr(mm, pmdp)); + assert_spin_locked(pmd_lockptr(mm, pmdp)); - /* FIFO */ - if (!pmd_huge_pte(mm, pmdp)) - INIT_LIST_HEAD(lh); - else - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); - pmd_huge_pte(mm, pmdp) = pgtable; + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { - pte_t *ptep; - pgtable_t pgtable; - struct list_head *lh; - - assert_spin_locked(pmd_lockptr(mm, pmdp)); - - /* FIFO */ - pgtable = pmd_huge_pte(mm, pmdp); - lh = (struct list_head *) pgtable; - if (list_empty(lh)) - pmd_huge_pte(mm, pmdp) = NULL; - else { - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; - list_del(lh); - } - ptep = (pte_t *) pgtable; - *ptep = __pte(0); - ptep++; - *ptep = __pte(0); - return pgtable; -} + pte_t *ptep; + pgtable_t pgtable; + struct list_head *lh; + assert_spin_locked(pmd_lockptr(mm, pmdp)); + + /* FIFO */ + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + ptep = (pte_t *) pgtable; + *ptep = __pte(0); + ptep++; + *ptep = __pte(0); + return pgtable; +} pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long addr, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { pmd_t old_pmd; unsigned long old; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 6a23b9ebd2a1..4d841369399f 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -90,7 +90,7 @@ void radix__tlbiel_all(unsigned int action) asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } -static inline void __tlbiel_pid(unsigned long pid, int set, +static __always_inline void __tlbiel_pid(unsigned long pid, int set, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -106,7 +106,7 @@ static inline void __tlbiel_pid(unsigned long pid, int set, trace_tlbie(0, 1, rb, rs, ric, prs, r); } -static inline void __tlbie_pid(unsigned long pid, unsigned long ric) +static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -120,7 +120,7 @@ static inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static inline void __tlbiel_lpid(unsigned long lpid, int set, +static __always_inline void __tlbiel_lpid(unsigned long lpid, int set, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -136,7 +136,7 @@ static inline void __tlbiel_lpid(unsigned long lpid, int set, trace_tlbie(lpid, 1, rb, rs, ric, prs, r); } -static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) +static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -928,7 +928,7 @@ void radix__tlb_flush(struct mmu_gather *tlb) tlb->need_flush_all = 0; } -static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, +static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, unsigned long end, int psize, bool also_pwc) { diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/book3s64/slb.c index 5986df48359b..c22742218bd3 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -554,7 +554,8 @@ void slb_initialize(void) asm volatile("isync; slbia; isync":::"memory"); create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX); - /* For the boot cpu, we're running on the stack in init_thread_union, + /* + * For the boot cpu, we're running on the stack in init_thread_union, * which is in the first segment of the linear mapping, and also * get_paca()->kstack hasn't been initialized yet. * For secondary cpus, we need to bolt the kernel stack entry now. @@ -691,10 +692,10 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) unsigned long flags; int ssize; - if (id == KERNEL_REGION_ID) { + if (id == LINEAR_MAP_REGION_ID) { /* We only support upto MAX_PHYSMEM_BITS */ - if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS)) + if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS)) return -EFAULT; flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp; @@ -702,20 +703,25 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) #ifdef CONFIG_SPARSEMEM_VMEMMAP } else if (id == VMEMMAP_REGION_ID) { - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + if (ea >= H_VMEMMAP_END) return -EFAULT; flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp; #endif } else if (id == VMALLOC_REGION_ID) { - if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT)) + if (ea >= H_VMALLOC_END) return -EFAULT; - if (ea < H_VMALLOC_END) - flags = local_paca->vmalloc_sllp; - else - flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; + flags = local_paca->vmalloc_sllp; + + } else if (id == IO_REGION_ID) { + + if (ea >= H_KERN_IO_END) + return -EFAULT; + + flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; + } else { return -EFAULT; } @@ -725,6 +731,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) ssize = MMU_SEGSIZE_256M; context = get_kernel_context(ea); + return slb_insert_entry(ea, context, flags, ssize, true); } @@ -739,7 +746,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) * consider this as bad access if we take a SLB miss * on an address above addr limit. */ - if (ea >= mm->context.slb_addr_limit) + if (ea >= mm_ctx_slb_addr_limit(&mm->context)) return -EFAULT; context = get_user_context(&mm->context, ea); @@ -761,7 +768,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) long do_slb_fault(struct pt_regs *regs, unsigned long ea) { - unsigned long id = REGION_ID(ea); + unsigned long id = get_region_id(ea); /* IRQs are not reconciled here, so can't check irqs_disabled */ VM_WARN_ON(mfmsr() & MSR_EE); @@ -784,7 +791,7 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) * first class kernel code. But for performance it's probably nicer * if they go via fast_exception_return too. */ - if (id >= KERNEL_REGION_ID) { + if (id >= LINEAR_MAP_REGION_ID) { long err; #ifdef CONFIG_DEBUG_VM /* Catch recursive kernel SLB faults. */ diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c index 5e4178790dee..473dd430e306 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/book3s64/subpage_prot.c @@ -25,10 +25,13 @@ */ void subpage_prot_free(struct mm_struct *mm) { - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context); unsigned long i, j, addr; u32 **p; + if (!spt) + return; + for (i = 0; i < 4; ++i) { if (spt->low_prot[i]) { free_page((unsigned long)spt->low_prot[i]); @@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm) free_page((unsigned long)p); } spt->maxaddr = 0; -} - -void subpage_prot_init_new_context(struct mm_struct *mm) -{ - struct subpage_prot_table *spt = &mm->context.spt; - - memset(spt, 0, sizeof(*spt)); + kfree(spt); } static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, @@ -93,13 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, static void subpage_prot_clear(unsigned long addr, unsigned long len) { struct mm_struct *mm = current->mm; - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt; u32 **spm, *spp; unsigned long i; size_t nw; unsigned long next, limit; down_write(&mm->mmap_sem); + + spt = mm_ctx_subpage_prot(&mm->context); + if (!spt) + goto err_out; + limit = addr + len; if (limit > spt->maxaddr) limit = spt->maxaddr; @@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) /* now flush any existing HPTEs for the range */ hpte_flush_range(mm, addr, nw); } + +err_out: up_write(&mm->mmap_sem); } @@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, unsigned long, len, u32 __user *, map) { struct mm_struct *mm = current->mm; - struct subpage_prot_table *spt = &mm->context.spt; + struct subpage_prot_table *spt; u32 **spm, *spp; unsigned long i; size_t nw; @@ -218,6 +222,21 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, return -EFAULT; down_write(&mm->mmap_sem); + + spt = mm_ctx_subpage_prot(&mm->context); + if (!spt) { + /* + * Allocate subpage prot table if not already done. + * Do this with mmap_sem held + */ + spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL); + if (!spt) { + err = -ENOMEM; + goto out; + } + mm->context.hash_context->spt = spt; + } + subpage_mark_vma_nohuge(mm, addr, len); for (limit = addr + len; addr < limit; addr = next) { next = pmd_addr_end(addr, limit); diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/book3s64/vphn.c index f83044faac23..0ee7734afb50 100644 --- a/arch/powerpc/mm/vphn.c +++ b/arch/powerpc/mm/book3s64/vphn.c @@ -42,7 +42,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked) u16 new = be16_to_cpup(field++); if (is_32bit) { - /* Let's concatenate the 16 bits of this field to the + /* + * Let's concatenate the 16 bits of this field to the * 15 lower bits of the previous field */ unpacked[++nr_assoc_doms] = @@ -56,7 +57,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked) unpacked[++nr_assoc_doms] = cpu_to_be32(new & VPHN_FIELD_MASK); } else { - /* Data is in the lower 15 bits of this field + /* + * Data is in the lower 15 bits of this field * concatenated with the next 16 bit field */ last = new; diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/book3s64/vphn.h index f9ffdb3942fc..f0b93c2dd578 100644 --- a/arch/powerpc/mm/vphn.h +++ b/arch/powerpc/mm/book3s64/vphn.h @@ -2,8 +2,7 @@ #ifndef _ARCH_POWERPC_MM_VPHN_H_ #define _ARCH_POWERPC_MM_VPHN_H_ -/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. - */ +/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ #define VPHN_REGISTER_COUNT 6 /* diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index c8da352e8686..f137286740cb 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) u64 vsid, vsidkey; int psize, ssize; - switch (REGION_ID(ea)) { + switch (get_region_id(ea)) { case USER_REGION_ID: pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); if (mm == NULL) @@ -117,16 +117,20 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) break; case VMALLOC_REGION_ID: pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); - if (ea < VMALLOC_END) - psize = mmu_vmalloc_psize; - else - psize = mmu_io_psize; + psize = mmu_vmalloc_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); vsidkey = SLB_VSID_KERNEL; break; - case KERNEL_REGION_ID: - pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); + case IO_REGION_ID: + pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea); + psize = mmu_io_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + vsidkey = SLB_VSID_KERNEL; + break; + case LINEAR_MAP_REGION_ID: + pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea); psize = mmu_linear_psize; ssize = mmu_kernel_ssize; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index b5d2658c26af..2f6154b76328 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -36,7 +36,7 @@ #include <asm/tlbflush.h> #include <asm/dma.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * This address range defaults to a value that is safe for all diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c index 3f1803672c9b..641891df2046 100644 --- a/arch/powerpc/mm/drmem.c +++ b/arch/powerpc/mm/drmem.c @@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop) if (!drmem_info->lmbs) return; - for_each_drmem_lmb(lmb) + for_each_drmem_lmb(lmb) { read_drconf_v1_cell(lmb, &prop); + lmb_set_nid(lmb); + } } static void __init init_drmem_v2_lmbs(const __be32 *prop) @@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop) lmb->aa_index = dr_cell.aa_index; lmb->flags = dr_cell.flags; + + lmb_set_nid(lmb); } } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 887f11bcf330..b5d3578d9f65 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -44,6 +44,7 @@ #include <asm/mmu_context.h> #include <asm/siginfo.h> #include <asm/debug.h> +#include <asm/kup.h> static inline bool notify_page_fault(struct pt_regs *regs) { @@ -223,19 +224,46 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, } /* Is this a bad kernel fault ? */ -static bool bad_kernel_fault(bool is_exec, unsigned long error_code, - unsigned long address) +static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, + unsigned long address, bool is_write) { + int is_exec = TRAP(regs) == 0x400; + /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT | DSISR_PROTFAULT))) { - printk_ratelimited(KERN_CRIT "kernel tried to execute" - " exec-protected page (%lx) -" - "exploit attempt? (uid: %d)\n", - address, from_kuid(&init_user_ns, - current_uid())); + pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n", + address >= TASK_SIZE ? "exec-protected" : "user", + address, + from_kuid(&init_user_ns, current_uid())); + + // Kernel exec fault is always bad + return true; } - return is_exec || (address >= TASK_SIZE); + + if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) && + !search_exception_tables(regs->nip)) { + pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n", + address, + from_kuid(&init_user_ns, current_uid())); + } + + // Kernel fault on kernel address is bad + if (address >= TASK_SIZE) + return true; + + // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad + if (!search_exception_tables(regs->nip)) + return true; + + // Read/write fault in a valid region (the exception table search passed + // above), but blocked by KUAP is bad, it can never succeed. + if (bad_kuap_fault(regs, is_write)) + return true; + + // What's left? Kernel fault on user in well defined regions (extable + // matched), and allowed by KUAP in the faulting context. + return false; } static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, @@ -455,9 +483,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, /* * The kernel should never take an execute fault nor should it - * take a page fault to a kernel address. + * take a page fault to a kernel address or a page fault to a user + * address outside of dedicated places */ - if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address))) + if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) return SIGSEGV; /* diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index 82a0e37557a5..320c1672b2ae 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -43,9 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); -#ifdef CONFIG_DEBUG_HIGHMEM - BUG_ON(!pte_none(*(kmap_pte-idx))); -#endif + WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx))); __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1); local_flush_tlb_page(NULL, vaddr); @@ -56,7 +54,6 @@ EXPORT_SYMBOL(kmap_atomic_prot); void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - int type __maybe_unused; if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); @@ -64,14 +61,12 @@ void __kunmap_atomic(void *kvaddr) return; } - type = kmap_atomic_idx(); - -#ifdef CONFIG_DEBUG_HIGHMEM - { + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) { + int type = kmap_atomic_idx(); unsigned int idx; idx = type + KM_TYPE_NR * smp_processor_id(); - BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); /* * force other mappings to Oops if they'll try to access @@ -80,7 +75,6 @@ void __kunmap_atomic(void *kvaddr) pte_clear(&init_mm, vaddr, kmap_pte-idx); local_flush_tlb_page(NULL, vaddr); } -#endif kmap_atomic_idx_pop(); pagefault_enable(); diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 9e732bb2c84a..c5c9ff2d7afc 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -26,20 +26,8 @@ #include <asm/hugetlb.h> #include <asm/pte-walk.h> - -#ifdef CONFIG_HUGETLB_PAGE - -#define PAGE_SHIFT_64K 16 -#define PAGE_SHIFT_512K 19 -#define PAGE_SHIFT_8M 23 -#define PAGE_SHIFT_16M 24 -#define PAGE_SHIFT_16G 34 - bool hugetlb_disabled = false; -unsigned int HPAGE_SHIFT; -EXPORT_SYMBOL(HPAGE_SHIFT); - #define hugepd_none(hpd) (hpd_val(hpd) == 0) #define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *))) @@ -98,19 +86,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, for (i = 0; i < num_hugepd; i++, hpdp++) { if (unlikely(!hugepd_none(*hpdp))) break; - else { -#ifdef CONFIG_PPC_BOOK3S_64 - *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | - (shift_to_mmu_psize(pshift) << 2)); -#elif defined(CONFIG_PPC_8xx) - *hpdp = __hugepd(__pa(new) | _PMD_USER | - (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : - _PMD_PAGE_512K) | _PMD_PRESENT); -#else - /* We use the old format for PPC_FSL_BOOK3E */ - *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift); -#endif - } + hugepd_populate(hpdp, new, pshift); } /* If we bailed from the for loop early, an error occurred, clean up */ if (i < num_hugepd) { @@ -250,7 +226,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h) return __alloc_bootmem_huge_page(h); } -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) +#ifndef CONFIG_PPC_BOOK3S_64 #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -542,23 +518,6 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, return (__boundary - 1 < end - 1) ? __boundary : end; } -int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift, - unsigned long end, int write, struct page **pages, int *nr) -{ - pte_t *ptep; - unsigned long sz = 1UL << hugepd_shift(hugepd); - unsigned long next; - - ptep = hugepte_offset(hugepd, addr, pdshift); - do { - next = hugepte_addr_end(addr, end, sz); - if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) - return 0; - } while (ptep++, addr = next, addr != end); - - return 1; -} - #ifdef CONFIG_PPC_MM_SLICES unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, @@ -578,24 +537,15 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { -#ifdef CONFIG_PPC_MM_SLICES /* With radix we don't use slice, so derive it from vma*/ - if (!radix_enabled()) { + if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) { unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); return 1UL << mmu_psize_to_shift(psize); } -#endif return vma_kernel_pagesize(vma); } -static inline bool is_power_of_4(unsigned long x) -{ - if (is_power_of_2(x)) - return (__ilog2(x) % 2) ? false : true; - return false; -} - static int __init add_huge_page_size(unsigned long long size) { int shift = __ffs(size); @@ -603,37 +553,13 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ - if (size <= PAGE_SIZE) - return -EINVAL; -#if defined(CONFIG_PPC_FSL_BOOK3E) - if (!is_power_of_4(size)) + if (size <= PAGE_SIZE || !is_power_of_2(size)) return -EINVAL; -#elif !defined(CONFIG_PPC_8xx) - if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT)) - return -EINVAL; -#endif - if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) + mmu_psize = check_and_get_huge_psize(size); + if (mmu_psize < 0) return -EINVAL; -#ifdef CONFIG_PPC_BOOK3S_64 - /* - * We need to make sure that for different page sizes reported by - * firmware we only add hugetlb support for page sizes that can be - * supported by linux page table layout. - * For now we have - * Radix: 2M and 1G - * Hash: 16M and 16G - */ - if (radix_enabled()) { - if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G) - return -EINVAL; - } else { - if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) - return -EINVAL; - } -#endif - BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); /* Return if huge page size has already been setup */ @@ -669,10 +595,10 @@ static int __init hugetlbpage_init(void) return 0; } -#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx) - if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE)) + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() && + !mmu_has_feature(MMU_FTR_16M_PAGE)) return -ENODEV; -#endif + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { unsigned shift; unsigned pdshift; @@ -710,29 +636,13 @@ static int __init hugetlbpage_init(void) pgtable_cache_add(PTE_INDEX_SIZE); else if (pdshift > shift) pgtable_cache_add(pdshift - shift); -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) - else + else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx)) pgtable_cache_add(PTE_T_ORDER); -#endif } -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) - /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */ - if (mmu_psize_defs[MMU_PAGE_4M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; - else if (mmu_psize_defs[MMU_PAGE_512K].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift; -#else - /* Set default large page size. Currently, we pick 16M or 1M - * depending on what is available - */ - if (mmu_psize_defs[MMU_PAGE_16M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; - else if (mmu_psize_defs[MMU_PAGE_1M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; - else if (mmu_psize_defs[MMU_PAGE_2M].shift) - HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift; -#endif + if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE)) + hugetlbpage_init_default(); + return 0; } @@ -756,113 +666,8 @@ void flush_dcache_icache_hugepage(struct page *page) } } -#endif /* CONFIG_HUGETLB_PAGE */ - -/* - * We have 4 cases for pgds and pmds: - * (1) invalid (all zeroes) - * (2) pointer to next table, as normal; bottom 6 bits == 0 - * (3) leaf pte for huge page _PAGE_PTE set - * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table - * - * So long as we atomically load page table pointers we are safe against teardown, - * we can follow the address down to the the page and take a ref on it. - * This function need to be called with interrupts disabled. We use this variant - * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED - */ -pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, - bool *is_thp, unsigned *hpage_shift) -{ - pgd_t pgd, *pgdp; - pud_t pud, *pudp; - pmd_t pmd, *pmdp; - pte_t *ret_pte; - hugepd_t *hpdp = NULL; - unsigned pdshift = PGDIR_SHIFT; - - if (hpage_shift) - *hpage_shift = 0; - - if (is_thp) - *is_thp = false; - - pgdp = pgdir + pgd_index(ea); - pgd = READ_ONCE(*pgdp); - /* - * Always operate on the local stack value. This make sure the - * value don't get updated by a parallel THP split/collapse, - * page fault or a page unmap. The return pte_t * is still not - * stable. So should be checked there for above conditions. - */ - if (pgd_none(pgd)) - return NULL; - else if (pgd_huge(pgd)) { - ret_pte = (pte_t *) pgdp; - goto out; - } else if (is_hugepd(__hugepd(pgd_val(pgd)))) - hpdp = (hugepd_t *)&pgd; - else { - /* - * Even if we end up with an unmap, the pgtable will not - * be freed, because we do an rcu free and here we are - * irq disabled - */ - pdshift = PUD_SHIFT; - pudp = pud_offset(&pgd, ea); - pud = READ_ONCE(*pudp); - - if (pud_none(pud)) - return NULL; - else if (pud_huge(pud)) { - ret_pte = (pte_t *) pudp; - goto out; - } else if (is_hugepd(__hugepd(pud_val(pud)))) - hpdp = (hugepd_t *)&pud; - else { - pdshift = PMD_SHIFT; - pmdp = pmd_offset(&pud, ea); - pmd = READ_ONCE(*pmdp); - /* - * A hugepage collapse is captured by pmd_none, because - * it mark the pmd none and do a hpte invalidate. - */ - if (pmd_none(pmd)) - return NULL; - - if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { - if (is_thp) - *is_thp = true; - ret_pte = (pte_t *) pmdp; - goto out; - } - /* - * pmd_large check below will handle the swap pmd pte - * we need to do both the check because they are config - * dependent. - */ - if (pmd_huge(pmd) || pmd_large(pmd)) { - ret_pte = (pte_t *) pmdp; - goto out; - } else if (is_hugepd(__hugepd(pmd_val(pmd)))) - hpdp = (hugepd_t *)&pmd; - else - return pte_offset_kernel(&pmd, ea); - } - } - if (!hpdp) - return NULL; - - ret_pte = hugepte_offset(*hpdp, ea, pdshift); - pdshift = hugepd_shift(*hpdp); -out: - if (hpage_shift) - *hpage_shift = pdshift; - return ret_pte; -} -EXPORT_SYMBOL_GPL(__find_linux_pte); - -int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, - unsigned long end, int write, struct page **pages, int *nr) +static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, + unsigned long end, int write, struct page **pages, int *nr) { unsigned long pte_end; struct page *head, *page; @@ -908,3 +713,20 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, return 1; } + +int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift, + unsigned long end, int write, struct page **pages, int *nr) +{ + pte_t *ptep; + unsigned long sz = 1UL << hugepd_shift(hugepd); + unsigned long next; + + ptep = hugepte_offset(hugepd, addr, pdshift); + do { + next = hugepte_addr_end(addr, end, sz); + if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) + return 0; + } while (ptep++, addr = next, addr != end); + + return 1; +} diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 1e6910eb70ed..3bcae9e5e954 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -24,6 +24,32 @@ #include <linux/string.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> +#include <asm/kup.h> + +static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP); +static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP); + +static int __init parse_nosmep(char *p) +{ + disable_kuep = true; + pr_warn("Disabling Kernel Userspace Execution Prevention\n"); + return 0; +} +early_param("nosmep", parse_nosmep); + +static int __init parse_nosmap(char *p) +{ + disable_kuap = true; + pr_warn("Disabling Kernel Userspace Access Protection\n"); + return 0; +} +early_param("nosmap", parse_nosmap); + +void __ref setup_kup(void) +{ + setup_kuep(disable_kuep); + setup_kuap(disable_kuap); +} #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 41a3513cadc9..c3121b6c8cbd 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -45,8 +45,10 @@ #include <asm/tlb.h> #include <asm/sections.h> #include <asm/hugetlb.h> +#include <asm/kup.h> +#include <asm/kasan.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) /* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */ @@ -178,6 +180,10 @@ void __init MMU_init(void) btext_unmap(); #endif + kasan_mmu_init(); + + setup_kup(); + /* Shortly after that, the entire linear mapping will be available */ memblock_set_current_limit(lowmem_end_addr); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a4c155af1597..45b02fa11cd8 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -66,7 +66,7 @@ #include <asm/iommu.h> #include <asm/vdso.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> phys_addr_t memstart_addr = ~0; EXPORT_SYMBOL_GPL(memstart_addr); diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile new file mode 100644 index 000000000000..6577897673dd --- /dev/null +++ b/arch/powerpc/mm/kasan/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +KASAN_SANITIZE := n + +obj-$(CONFIG_PPC32) += kasan_init_32.o diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c new file mode 100644 index 000000000000..0d62be3cba47 --- /dev/null +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define DISABLE_BRANCH_PROFILING + +#include <linux/kasan.h> +#include <linux/printk.h> +#include <linux/memblock.h> +#include <linux/sched/task.h> +#include <linux/vmalloc.h> +#include <asm/pgalloc.h> +#include <asm/code-patching.h> +#include <mm/mmu_decl.h> + +static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) +{ + unsigned long va = (unsigned long)kasan_early_shadow_page; + phys_addr_t pa = __pa(kasan_early_shadow_page); + int i; + + for (i = 0; i < PTRS_PER_PTE; i++, ptep++) + __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); +} + +static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +{ + pmd_t *pmd; + unsigned long k_cur, k_next; + + pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start); + + for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) { + pte_t *new; + + k_next = pgd_addr_end(k_cur, k_end); + if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) + continue; + + new = pte_alloc_one_kernel(&init_mm); + + if (!new) + return -ENOMEM; + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + kasan_populate_pte(new, PAGE_READONLY); + else + kasan_populate_pte(new, PAGE_KERNEL_RO); + pmd_populate_kernel(&init_mm, pmd, new); + } + return 0; +} + +static void __ref *kasan_get_one_page(void) +{ + if (slab_is_available()) + return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + + return memblock_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static int __ref kasan_init_region(void *start, size_t size) +{ + unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); + unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); + unsigned long k_cur; + int ret; + void *block = NULL; + + ret = kasan_init_shadow_page_tables(k_start, k_end); + if (ret) + return ret; + + if (!slab_is_available()) + block = memblock_alloc(k_end - k_start, PAGE_SIZE); + + for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) { + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); + void *va = block ? block + k_cur - k_start : kasan_get_one_page(); + pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + + if (!va) + return -ENOMEM; + + __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); + } + flush_tlb_kernel_range(k_start, k_end); + return 0; +} + +static void __init kasan_remap_early_shadow_ro(void) +{ + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY); + else + kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO); + + flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END); +} + +void __init kasan_mmu_init(void) +{ + int ret; + struct memblock_region *reg; + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (ret) + panic("kasan: kasan_init_shadow_page_tables() failed"); + } + + for_each_memblock(memory, reg) { + phys_addr_t base = reg->base; + phys_addr_t top = min(base + reg->size, total_lowmem); + + if (base >= top) + continue; + + ret = kasan_init_region(__va(base), top - base); + if (ret) + panic("kasan: kasan_init_region() failed"); + } +} + +void __init kasan_init(void) +{ + kasan_remap_early_shadow_ro(); + + clear_page(kasan_early_shadow_page); + + /* At this point kasan is fully initialized. Enable error messages */ + init_task.kasan_depth = 0; + pr_info("KASAN init done\n"); +} + +#ifdef CONFIG_MODULES +void *module_alloc(unsigned long size) +{ + void *base = vmalloc_exec(size); + + if (!base) + return NULL; + + if (!kasan_init_region(base, size)) + return base; + + vfree(base); + + return NULL; +} +#endif + +#ifdef CONFIG_PPC_BOOK3S_32 +u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0}; + +static void __init kasan_early_hash_table(void) +{ + modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16); + modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16); + + Hash = (struct hash_pte *)early_hash; +} +#else +static void __init kasan_early_hash_table(void) {} +#endif + +void __init kasan_early_init(void) +{ + unsigned long addr = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + unsigned long next; + pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr); + + BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK); + + kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL); + + do { + next = pgd_addr_end(addr, end); + pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte); + } while (pmd++, addr = next, addr != end); + + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) + kasan_early_hash_table(); +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index f6787f90e158..e885fe2aafcc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -54,7 +54,7 @@ #include <asm/swiotlb.h> #include <asm/rtas.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #ifndef CPU_FTR_COHERENT_ICACHE #define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ @@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int __ref arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -127,12 +127,12 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * } flush_inval_dcache_range(start, start + size); - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE -int __meminit arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -147,23 +147,21 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size, if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); - if (ret) - return ret; + __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); /* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); flush_inval_dcache_range(start, start + size); ret = remove_section_mapping(start, start + size); + WARN_ON_ONCE(ret); /* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory */ vm_unmap_aliases(); - resize_hpt_for_hotplug(memblock_phys_mem_size()); - - return ret; + if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC) + pr_warn("Hash collision while resizing HPT\n"); } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ @@ -309,6 +307,10 @@ void __init mem_init(void) mem_init_print_info(NULL); #ifdef CONFIG_PPC32 pr_info("Kernel virtual memory layout:\n"); +#ifdef CONFIG_KASAN + pr_info(" * 0x%08lx..0x%08lx : kasan shadow mem\n", + KASAN_SHADOW_START, KASAN_SHADOW_END); +#endif pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); #ifdef CONFIG_HIGHMEM pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n", @@ -333,13 +335,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - /* * This is called when a page has been modified by the kernel. * It just marks the page as not i-cache clean. We do the i-cache diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index bb52320b7369..6b049d82b98a 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -98,7 +98,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, switch_mmu_context(prev, next, tsk); } -#ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_BOOK3S_64 void arch_exit_mmap(struct mm_struct *mm) { void *frag = pte_frag_get(&mm->context); diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 74ff61dabcb1..7bac0aa2026a 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid, } #endif +static inline void print_system_hash_info(void) {} + #else /* CONFIG_PPC_MMU_NOHASH */ extern void hash_preload(struct mm_struct *mm, unsigned long ea, @@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea, extern void _tlbie(unsigned long address); extern void _tlbia(void); +void print_system_hash_info(void); + #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 @@ -104,8 +108,8 @@ extern int __map_without_bats; extern unsigned int rtas_data, rtas_size; struct hash_pte; -extern struct hash_pte *Hash, *Hash_end; -extern unsigned long Hash_size, Hash_mask; +extern struct hash_pte *Hash; +extern u8 early_hash[]; #endif /* CONFIG_PPC32 */ @@ -130,6 +134,7 @@ extern void wii_memory_fixups(void); */ #ifdef CONFIG_PPC32 extern void MMU_init_hw(void); +void MMU_init_hw_patch(void); unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); #endif diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/nohash/40x.c index b9cf6f8764b0..460459b6f53e 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/nohash/40x.c @@ -49,7 +49,7 @@ #include <asm/machdep.h> #include <asm/setup.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> extern int __map_without_ltlbs; /* diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/nohash/44x.c index aad127acdbaa..c07983ebc02e 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/nohash/44x.c @@ -31,7 +31,7 @@ #include <asm/cacheflush.h> #include <asm/code-patching.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* Used by the 44x TLB replacement exception handler. * Just needed it declared someplace. diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/nohash/8xx.c index fe1f6443d57f..70d55b615b62 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -17,7 +17,7 @@ #include <asm/fixmap.h> #include <asm/code-patching.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) @@ -213,3 +213,27 @@ void flush_instruction_cache(void) mtspr(SPRN_IC_CST, IDC_INVALL); isync(); } + +#ifdef CONFIG_PPC_KUEP +void __init setup_kuep(bool disabled) +{ + if (disabled) + return; + + pr_info("Activating Kernel Userspace Execution Prevention\n"); + + mtspr(SPRN_MI_AP, MI_APG_KUEP); +} +#endif + +#ifdef CONFIG_PPC_KUAP +void __init setup_kuap(bool disabled) +{ + pr_info("Activating Kernel Userspace Access Protection\n"); + + if (disabled) + pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n"); + + mtspr(SPRN_MD_AP, MD_APG_KUAP); +} +#endif diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile new file mode 100644 index 000000000000..33b6f6f29d3f --- /dev/null +++ b/arch/powerpc/mm/nohash/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) + +obj-y += mmu_context.o tlb.o tlb_low.o +obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o +obj-$(CONFIG_40x) += 40x.o +obj-$(CONFIG_44x) += 44x.o +obj-$(CONFIG_PPC_8xx) += 8xx.o +obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o +ifdef CONFIG_HUGETLB_PAGE +obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o +endif + +# Disable kcov instrumentation on sensitive code +# This is necessary for booting with kcov enabled on book3e machines +KCOV_INSTRUMENT_tlb.o := n +KCOV_INSTRUMENT_fsl_booke.o := n diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c index f84ec46cdb26..61915f4d3c7f 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c @@ -11,8 +11,9 @@ #include <asm/mmu.h> -#ifdef CONFIG_PPC_FSL_BOOK3E #ifdef CONFIG_PPC64 +#include <asm/paca.h> + static inline int tlb1_next(void) { struct paca_struct *paca = get_paca(); @@ -29,33 +30,6 @@ static inline int tlb1_next(void) tcd->esel_next = next; return this; } -#else -static inline int tlb1_next(void) -{ - int index, ncams; - - ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - - index = this_cpu_read(next_tlbcam_idx); - - /* Just round-robin the entries and wrap when we hit the end */ - if (unlikely(index == ncams - 1)) - __this_cpu_write(next_tlbcam_idx, tlbcam_index); - else - __this_cpu_inc(next_tlbcam_idx); - - return index; -} -#endif /* !PPC64 */ -#endif /* FSL */ - -static inline int mmu_get_tsize(int psize) -{ - return mmu_psize_defs[psize].enc; -} - -#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64) -#include <asm/paca.h> static inline void book3e_tlb_lock(void) { @@ -98,6 +72,23 @@ static inline void book3e_tlb_unlock(void) paca->tcd_ptr->lock = 0; } #else +static inline int tlb1_next(void) +{ + int index, ncams; + + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + index = this_cpu_read(next_tlbcam_idx); + + /* Just round-robin the entries and wrap when we hit the end */ + if (unlikely(index == ncams - 1)) + __this_cpu_write(next_tlbcam_idx, tlbcam_index); + else + __this_cpu_inc(next_tlbcam_idx); + + return index; +} + static inline void book3e_tlb_lock(void) { } @@ -139,10 +130,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, unsigned long psize, tsize, shift; unsigned long flags; struct mm_struct *mm; - -#ifdef CONFIG_PPC_FSL_BOOK3E int index; -#endif if (unlikely(is_kernel_addr(ea))) return; @@ -166,11 +154,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, return; } -#ifdef CONFIG_PPC_FSL_BOOK3E /* We have to use the CAM(TLB1) on FSL parts for hugepages */ index = tlb1_next(); mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); -#endif mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); mas2 = ea & ~((1UL << shift) - 1); diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/nohash/book3e_pgtable.c index 1032ef7aaf62..75e9e2c35fe2 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/nohash/book3e_pgtable.c @@ -15,7 +15,7 @@ #include <asm/tlb.h> #include <asm/dma.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #ifdef CONFIG_SPARSEMEM_VMEMMAP /* @@ -55,7 +55,7 @@ void vmemmap_remove_mapping(unsigned long start, #endif #endif /* CONFIG_SPARSEMEM_VMEMMAP */ -static __ref void *early_alloc_pgtable(unsigned long size) +static void __init *early_alloc_pgtable(unsigned long size) { void *ptr; @@ -74,7 +74,7 @@ static __ref void *early_alloc_pgtable(unsigned long size) * map_kernel_page adds an entry to the ioremap page table * and adds an entry to the HPT, possibly bolting it */ -int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) +int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) { pgd_t *pgdp; pud_t *pudp; @@ -98,20 +98,17 @@ int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot) #ifndef __PAGETABLE_PUD_FOLDED if (pgd_none(*pgdp)) { pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - BUG_ON(pudp == NULL); pgd_populate(&init_mm, pgdp, pudp); } #endif /* !__PAGETABLE_PUD_FOLDED */ pudp = pud_offset(pgdp, ea); if (pud_none(*pudp)) { pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - BUG_ON(pmdp == NULL); pud_populate(&init_mm, pudp, pmdp); } pmdp = pmd_offset(pudp, ea); if (!pmd_present(*pmdp)) { ptep = early_alloc_pgtable(PAGE_SIZE); - BUG_ON(ptep == NULL); pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, ea); diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/nohash/fsl_booke.c index 210cbc1faf63..71a1a36751dd 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/nohash/fsl_booke.c @@ -54,7 +54,7 @@ #include <asm/setup.h> #include <asm/paca.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> unsigned int tlbcam_index; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/nohash/mmu_context.c index 1945c5f19f5e..ae4505d5b4b8 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/nohash/mmu_context.c @@ -52,7 +52,7 @@ #include <asm/mmu_context.h> #include <asm/tlbflush.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * The MPC8xx has only 16 contexts. We rotate through them on each task switch. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/nohash/tlb.c index ac23dc1c6535..24f88efb05bf 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -46,7 +46,7 @@ #include <asm/hugetlb.h> #include <asm/paca.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> /* * This struct lists the sw-supported page sizes. The hardawre MMU may support @@ -433,11 +433,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) unsigned long rid = (address & rmask) | 0x1000000000000000ul; unsigned long vpte = address & ~rmask; -#ifdef CONFIG_PPC_64K_PAGES - vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful; -#else vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; -#endif vpte |= rid; __flush_tlb_page(tlb->mm, vpte, tsize, 0); } @@ -625,21 +621,12 @@ static void early_init_this_mmu(void) case PPC_HTW_IBM: mas4 |= MAS4_INDD; -#ifdef CONFIG_PPC_64K_PAGES - mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT; - mmu_pte_psize = MMU_PAGE_256M; -#else mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; mmu_pte_psize = MMU_PAGE_1M; -#endif break; case PPC_HTW_NONE: -#ifdef CONFIG_PPC_64K_PAGES - mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT; -#else mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; -#endif mmu_pte_psize = mmu_virtual_psize; break; } @@ -800,5 +787,9 @@ void __init early_init_mmu(void) #ifdef CONFIG_PPC_47x early_init_mmu_47x(); #endif + +#ifdef CONFIG_PPC_MM_SLICES + mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT); +#endif } #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/nohash/tlb_low.S index e066a658acac..e066a658acac 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/nohash/tlb_low.S diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 9ed90064f542..58959ce15415 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -24,11 +24,7 @@ #include <asm/kvm_booke_hv_asm.h> #include <asm/feature-fixups.h> -#ifdef CONFIG_PPC_64K_PAGES -#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1) -#else #define VPTE_PMD_SHIFT (PTE_INDEX_SIZE) -#endif #define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE) #define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE) #define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE) @@ -167,13 +163,11 @@ MMU_FTR_SECTION_ELSE ldx r14,r14,r15 /* grab pgd entry */ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV) -#ifndef CONFIG_PPC_64K_PAGES rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3 clrrdi r15,r15,3 cmpdi cr0,r14,0 bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */ ldx r14,r14,r15 /* grab pud entry */ -#endif /* CONFIG_PPC_64K_PAGES */ rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3 clrrdi r15,r15,3 @@ -682,18 +676,7 @@ normal_tlb_miss: * order to handle the weird page table format used by linux */ ori r10,r15,0x1 -#ifdef CONFIG_PPC_64K_PAGES - /* For the top bits, 16 bytes per PTE */ - rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4 - /* Now create the bottom bits as 0 in position 0x8000 and - * the rest calculated for 8 bytes per PTE - */ - rldicl r15,r16,64-(PAGE_SHIFT-3),64-15 - /* Insert the bottom bits in */ - rlwimi r14,r15,0,16,31 -#else rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4 -#endif sldi r15,r10,60 clrrdi r14,r14,3 or r10,r15,r14 @@ -732,11 +715,7 @@ finish_normal_tlb_miss: /* Check page size, if not standard, update MAS1 */ rldicl r11,r14,64-8,64-8 -#ifdef CONFIG_PPC_64K_PAGES - cmpldi cr0,r11,BOOK3E_PAGESZ_64K -#else cmpldi cr0,r11,BOOK3E_PAGESZ_4K -#endif beq- 1f mfspr r11,SPRN_MAS1 rlwimi r11,r14,31,21,24 @@ -857,14 +836,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) cmpdi cr0,r15,0 bge virt_page_table_tlb_miss_fault -#ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 cmpdi cr0,r15,0 bge virt_page_table_tlb_miss_fault -#endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 @@ -1106,14 +1083,12 @@ htw_tlb_miss: cmpdi cr0,r15,0 bge htw_tlb_miss_fault -#ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 cmpdi cr0,r15,0 bge htw_tlb_miss_fault -#endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 @@ -1132,9 +1107,7 @@ htw_tlb_miss: * 4K page we need to extract a bit from the virtual address and * insert it into the "PA52" bit of the RPN. */ -#ifndef CONFIG_PPC_64K_PAGES rlwimi r15,r16,32-9,20,20 -#endif /* Now we build the MAS: * * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG @@ -1144,11 +1117,7 @@ htw_tlb_miss: * MAS 2 : Use defaults * MAS 3+7 : Needs to be done */ -#ifdef CONFIG_PPC_64K_PAGES - ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT) -#else ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) -#endif BEGIN_MMU_FTR_SECTION srdi r16,r10,32 diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index f976676004ad..57e64273cb33 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -32,7 +32,6 @@ #include <asm/sparsemem.h> #include <asm/prom.h> #include <asm/smp.h> -#include <asm/cputhreads.h> #include <asm/topology.h> #include <asm/firmware.h> #include <asm/paca.h> @@ -908,16 +907,22 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); -static bool topology_updates_enabled = true; +/* + * The platform can inform us through one of several mechanisms + * (post-migration device tree updates, PRRN or VPHN) that the NUMA + * assignment of a resource has changed. This controls whether we act + * on that. Disabled by default. + */ +static bool topology_updates_enabled; static int __init early_topology_updates(char *p) { if (!p) return 0; - if (!strcmp(p, "off")) { - pr_info("Disabling topology updates\n"); - topology_updates_enabled = false; + if (!strcmp(p, "on")) { + pr_warn("Caution: enabling topology updates\n"); + topology_updates_enabled = true; } return 0; @@ -1063,7 +1068,7 @@ u64 memory_hotplug_max(void) /* Virtual Processor Home Node (VPHN) support */ #ifdef CONFIG_PPC_SPLPAR -#include "vphn.h" +#include "book3s64/vphn.h" struct topology_update_data { struct topology_update_data *next; @@ -1498,6 +1503,9 @@ int start_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (firmware_has_feature(FW_FEATURE_PRRN)) { if (!prrn_enabled) { prrn_enabled = 1; @@ -1531,6 +1539,9 @@ int stop_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (prrn_enabled) { prrn_enabled = 0; #ifdef CONFIG_SMP @@ -1588,11 +1599,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf, kbuf[read_len] = '\0'; - if (!strncmp(kbuf, "on", 2)) + if (!strncmp(kbuf, "on", 2)) { + topology_updates_enabled = true; start_topology_update(); - else if (!strncmp(kbuf, "off", 3)) + } else if (!strncmp(kbuf, "off", 3)) { stop_topology_update(); - else + topology_updates_enabled = false; + } else return -EINVAL; return count; @@ -1607,9 +1620,7 @@ static const struct file_operations topology_ops = { static int topology_update_init(void) { - /* Do not poll for changes if disabled at boot */ - if (topology_updates_enabled) - start_topology_update(); + start_topology_update(); if (vphn_enabled) topology_schedule_update(); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index d3d61d29b4f1..db4a6253df92 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -30,6 +30,7 @@ #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> +#include <asm/hugetlb.h> static inline int is_exec_fault(void) { @@ -299,3 +300,116 @@ unsigned long vmalloc_to_phys(void *va) return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va); } EXPORT_SYMBOL_GPL(vmalloc_to_phys); + +/* + * We have 4 cases for pgds and pmds: + * (1) invalid (all zeroes) + * (2) pointer to next table, as normal; bottom 6 bits == 0 + * (3) leaf pte for huge page _PAGE_PTE set + * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table + * + * So long as we atomically load page table pointers we are safe against teardown, + * we can follow the address down to the the page and take a ref on it. + * This function need to be called with interrupts disabled. We use this variant + * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED + */ +pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea, + bool *is_thp, unsigned *hpage_shift) +{ + pgd_t pgd, *pgdp; + pud_t pud, *pudp; + pmd_t pmd, *pmdp; + pte_t *ret_pte; + hugepd_t *hpdp = NULL; + unsigned pdshift = PGDIR_SHIFT; + + if (hpage_shift) + *hpage_shift = 0; + + if (is_thp) + *is_thp = false; + + pgdp = pgdir + pgd_index(ea); + pgd = READ_ONCE(*pgdp); + /* + * Always operate on the local stack value. This make sure the + * value don't get updated by a parallel THP split/collapse, + * page fault or a page unmap. The return pte_t * is still not + * stable. So should be checked there for above conditions. + */ + if (pgd_none(pgd)) + return NULL; + + if (pgd_huge(pgd)) { + ret_pte = (pte_t *)pgdp; + goto out; + } + if (is_hugepd(__hugepd(pgd_val(pgd)))) { + hpdp = (hugepd_t *)&pgd; + goto out_huge; + } + + /* + * Even if we end up with an unmap, the pgtable will not + * be freed, because we do an rcu free and here we are + * irq disabled + */ + pdshift = PUD_SHIFT; + pudp = pud_offset(&pgd, ea); + pud = READ_ONCE(*pudp); + + if (pud_none(pud)) + return NULL; + + if (pud_huge(pud)) { + ret_pte = (pte_t *)pudp; + goto out; + } + if (is_hugepd(__hugepd(pud_val(pud)))) { + hpdp = (hugepd_t *)&pud; + goto out_huge; + } + pdshift = PMD_SHIFT; + pmdp = pmd_offset(&pud, ea); + pmd = READ_ONCE(*pmdp); + /* + * A hugepage collapse is captured by pmd_none, because + * it mark the pmd none and do a hpte invalidate. + */ + if (pmd_none(pmd)) + return NULL; + + if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) { + if (is_thp) + *is_thp = true; + ret_pte = (pte_t *)pmdp; + goto out; + } + /* + * pmd_large check below will handle the swap pmd pte + * we need to do both the check because they are config + * dependent. + */ + if (pmd_huge(pmd) || pmd_large(pmd)) { + ret_pte = (pte_t *)pmdp; + goto out; + } + if (is_hugepd(__hugepd(pmd_val(pmd)))) { + hpdp = (hugepd_t *)&pmd; + goto out_huge; + } + + return pte_offset_kernel(&pmd, ea); + +out_huge: + if (!hpdp) + return NULL; + + ret_pte = hugepte_offset(*hpdp, ea, pdshift); + pdshift = hugepd_shift(*hpdp); +out: + if (hpage_shift) + *hpage_shift = pdshift; + return ret_pte; +} +EXPORT_SYMBOL_GPL(__find_linux_pte); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 6e56a6240bfa..16ada373b32b 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -36,26 +36,13 @@ #include <asm/setup.h> #include <asm/sections.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ extern char etext[], _stext[], _sinittext[], _einittext[]; -__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm) -{ - if (!slab_is_available()) - return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); - - return (pte_t *)pte_fragment_alloc(mm, 1); -} - -pgtable_t pte_alloc_one(struct mm_struct *mm) -{ - return (pgtable_t)pte_fragment_alloc(mm, 0); -} - void __iomem * ioremap(phys_addr_t addr, unsigned long size) { @@ -205,7 +192,29 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); -int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) +static void __init *early_alloc_pgtable(unsigned long size) +{ + void *ptr = memblock_alloc(size, size); + + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, size, size); + + return ptr; +} + +static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va) +{ + if (pmd_none(*pmdp)) { + pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE); + + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + return pte_offset_kernel(pmdp, va); +} + + +int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) { pmd_t *pd; pte_t *pg; @@ -214,7 +223,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot) /* Use upper 10 bits of VA to index the first level map */ pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va); /* Use middle 10 bits of VA to index the second-level map */ - pg = pte_alloc_kernel(pd, va); + if (likely(slab_is_available())) + pg = pte_alloc_kernel(pd, va); + else + pg = early_pte_alloc_kernel(pd, va); if (pg != 0) { err = 0; /* The PTE should never be already set nor present in the @@ -384,6 +396,9 @@ void mark_rodata_ro(void) PFN_DOWN((unsigned long)__start_rodata); change_page_attr(page, numpages, PAGE_KERNEL_RO); + + // mark_initmem_nx() should have already run by now + ptdump_check_wx(); } #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index fb1375c07e8c..d2d976ff8a0e 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -52,7 +52,7 @@ #include <asm/firmware.h> #include <asm/dma.h> -#include "mmu_decl.h" +#include <mm/mmu_decl.h> #ifdef CONFIG_PPC_BOOK3S_64 @@ -90,14 +90,13 @@ unsigned long __pgd_val_bits; EXPORT_SYMBOL(__pgd_val_bits); unsigned long __kernel_virt_start; EXPORT_SYMBOL(__kernel_virt_start); -unsigned long __kernel_virt_size; -EXPORT_SYMBOL(__kernel_virt_size); unsigned long __vmalloc_start; EXPORT_SYMBOL(__vmalloc_start); unsigned long __vmalloc_end; EXPORT_SYMBOL(__vmalloc_end); unsigned long __kernel_io_start; EXPORT_SYMBOL(__kernel_io_start); +unsigned long __kernel_io_end; struct page *vmemmap; EXPORT_SYMBOL(vmemmap); unsigned long __pte_frag_nr; @@ -121,6 +120,11 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_ if (pgprot_val(prot) & H_PAGE_4K_PFN) return NULL; + if ((ea + size) >= (void *)IOREMAP_END) { + pr_warn("Outside the supported range\n"); + return NULL; + } + WARN_ON(pa & ~PAGE_MASK); WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); @@ -328,6 +332,9 @@ void mark_rodata_ro(void) radix__mark_rodata_ro(); else hash__mark_rodata_ro(); + + // mark_initmem_nx() should have already run by now + ptdump_check_wx(); } void mark_initmem_nx(void) diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c index b430e4e08af6..b9bda0105841 100644 --- a/arch/powerpc/mm/ptdump/hashpagetable.c +++ b/arch/powerpc/mm/ptdump/hashpagetable.c @@ -500,7 +500,7 @@ static void populate_markers(void) address_markers[7].start_address = IOREMAP_BASE; address_markers[8].start_address = IOREMAP_END; #ifdef CONFIG_PPC_BOOK3S_64 - address_markers[9].start_address = H_VMEMMAP_BASE; + address_markers[9].start_address = H_VMEMMAP_START; #else address_markers[9].start_address = VMEMMAP_BASE; #endif diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 37138428ab55..646876d9da64 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -31,7 +31,7 @@ #include "ptdump.h" #ifdef CONFIG_PPC32 -#define KERN_VIRT_START 0 +#define KERN_VIRT_START PAGE_OFFSET #endif /* @@ -68,6 +68,8 @@ struct pg_state { unsigned long last_pa; unsigned int level; u64 current_flags; + bool check_wx; + unsigned long wx_pages; }; struct addr_marker { @@ -101,9 +103,25 @@ static struct addr_marker address_markers[] = { { 0, "Fixmap start" }, { 0, "Fixmap end" }, #endif +#ifdef CONFIG_KASAN + { 0, "kasan shadow mem start" }, + { 0, "kasan shadow mem end" }, +#endif { -1, NULL }, }; +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_putc(m, c) \ +({ \ + if (m) \ + seq_putc(m, c); \ +}) + static void dump_flag_info(struct pg_state *st, const struct flag_info *flag, u64 pte, int num) { @@ -121,19 +139,19 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info val = pte & flag->val; if (flag->shift) val = val >> flag->shift; - seq_printf(st->seq, " %s:%llx", flag->set, val); + pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val); } else { if ((pte & flag->mask) == flag->val) s = flag->set; else s = flag->clear; if (s) - seq_printf(st->seq, " %s", s); + pt_dump_seq_printf(st->seq, " %s", s); } st->current_flags &= ~flag->mask; } if (st->current_flags != 0) - seq_printf(st->seq, " unknown flags:%llx", st->current_flags); + pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags); } static void dump_addr(struct pg_state *st, unsigned long addr) @@ -148,12 +166,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr) #define REG "0x%08lx" #endif - seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); + pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1); if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) { - seq_printf(st->seq, "[" REG "]", st->start_pa); + pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa); delta = PAGE_SIZE >> 10; } else { - seq_printf(st->seq, " " REG " ", st->start_pa); + pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa); delta = (addr - st->start_address) >> 10; } /* Work out what appropriate unit to use */ @@ -161,10 +179,24 @@ static void dump_addr(struct pg_state *st, unsigned long addr) delta >>= 10; unit++; } - seq_printf(st->seq, "%9lu%c", delta, *unit); + pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit); } +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X))) + return; + + WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val) { @@ -178,7 +210,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->start_address = addr; st->start_pa = pa; st->last_pa = pa; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: * - the PTE flags from one entry to the next differs. @@ -194,6 +226,7 @@ static void note_page(struct pg_state *st, unsigned long addr, /* Check the PTE flags */ if (st->current_flags) { + note_prot_wx(st, addr); dump_addr(st, addr); /* Dump all the flags */ @@ -202,7 +235,7 @@ static void note_page(struct pg_state *st, unsigned long addr, st->current_flags, pg_level[st->level].num); - seq_putc(st->seq, '\n'); + pt_dump_seq_putc(st->seq, '\n'); } /* @@ -211,7 +244,7 @@ static void note_page(struct pg_state *st, unsigned long addr, */ while (addr >= st->marker[1].start_address) { st->marker++; - seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } st->start_address = addr; st->start_pa = pa; @@ -303,8 +336,9 @@ static void populate_markers(void) address_markers[i++].start_address = PHB_IO_END; address_markers[i++].start_address = IOREMAP_BASE; address_markers[i++].start_address = IOREMAP_END; + /* What is the ifdef about? */ #ifdef CONFIG_PPC_BOOK3S_64 - address_markers[i++].start_address = H_VMEMMAP_BASE; + address_markers[i++].start_address = H_VMEMMAP_START; #else address_markers[i++].start_address = VMEMMAP_BASE; #endif @@ -322,6 +356,10 @@ static void populate_markers(void) #endif address_markers[i++].start_address = FIXADDR_START; address_markers[i++].start_address = FIXADDR_TOP; +#ifdef CONFIG_KASAN + address_markers[i++].start_address = KASAN_SHADOW_START; + address_markers[i++].start_address = KASAN_SHADOW_END; +#endif #endif /* CONFIG_PPC64 */ } @@ -366,6 +404,30 @@ static void build_pgtable_complete_mask(void) pg_level[i].mask |= pg_level[i].flag[j].mask; } +#ifdef CONFIG_PPC_DEBUG_WX +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = address_markers, + .check_wx = true, + }; + + if (radix_enabled()) + st.start_address = PAGE_OFFSET; + else + st.start_address = KERN_VIRT_START; + + walk_pagetables(&st); + + if (st.wx_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", + st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} +#endif + static int ptdump_init(void) { struct dentry *debugfs_file; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index aec91dbcdc0b..97fbf7b54422 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, { struct vm_area_struct *vma; - if ((mm->context.slb_addr_limit - len) < addr) + if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr) return 0; vma = find_vma(mm, addr); return (!vma || (addr + len) <= vm_start_gap(vma)); @@ -118,13 +118,11 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) unsigned long start = slice << SLICE_HIGH_SHIFT; unsigned long end = start + (1ul << SLICE_HIGH_SHIFT); -#ifdef CONFIG_PPC64 /* Hack, so that each addresses is controlled by exactly one * of the high or low area bitmaps, the first high area starts * at 4GB, not 0 */ if (start == 0) - start = SLICE_LOW_TOP; -#endif + start = (unsigned long)SLICE_LOW_TOP; return !slice_area_is_free(mm, start, end - start); } @@ -150,40 +148,6 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, __set_bit(i, ret->high_slices); } -#ifdef CONFIG_PPC_BOOK3S_64 -static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) -{ -#ifdef CONFIG_PPC_64K_PAGES - if (psize == MMU_PAGE_64K) - return &mm->context.mask_64k; -#endif - if (psize == MMU_PAGE_4K) - return &mm->context.mask_4k; -#ifdef CONFIG_HUGETLB_PAGE - if (psize == MMU_PAGE_16M) - return &mm->context.mask_16m; - if (psize == MMU_PAGE_16G) - return &mm->context.mask_16g; -#endif - BUG(); -} -#elif defined(CONFIG_PPC_8xx) -static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize) -{ - if (psize == mmu_virtual_psize) - return &mm->context.mask_base_psize; -#ifdef CONFIG_HUGETLB_PAGE - if (psize == MMU_PAGE_512K) - return &mm->context.mask_512k; - if (psize == MMU_PAGE_8M) - return &mm->context.mask_8m; -#endif - BUG(); -} -#else -#error "Must define the slice masks for page sizes supported by the platform" -#endif - static bool slice_check_range_fits(struct mm_struct *mm, const struct slice_mask *available, unsigned long start, unsigned long len) @@ -246,14 +210,14 @@ static void slice_convert(struct mm_struct *mm, slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize); slice_print_mask(" mask", mask); - psize_mask = slice_mask_for_size(mm, psize); + psize_mask = slice_mask_for_size(&mm->context, psize); /* We need to use a spinlock here to protect against * concurrent 64k -> 4k demotion ... */ spin_lock_irqsave(&slice_convert_lock, flags); - lpsizes = mm->context.low_slices_psize; + lpsizes = mm_ctx_low_slices(&mm->context); for (i = 0; i < SLICE_NUM_LOW; i++) { if (!(mask->low_slices & (1u << i))) continue; @@ -263,7 +227,7 @@ static void slice_convert(struct mm_struct *mm, /* Update the slice_mask */ old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf; - old_mask = slice_mask_for_size(mm, old_psize); + old_mask = slice_mask_for_size(&mm->context, old_psize); old_mask->low_slices &= ~(1u << i); psize_mask->low_slices |= 1u << i; @@ -272,8 +236,8 @@ static void slice_convert(struct mm_struct *mm, (((unsigned long)psize) << (mask_index * 4)); } - hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + hpsizes = mm_ctx_high_slices(&mm->context); + for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) { if (!test_bit(i, mask->high_slices)) continue; @@ -282,7 +246,7 @@ static void slice_convert(struct mm_struct *mm, /* Update the slice_mask */ old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf; - old_mask = slice_mask_for_size(mm, old_psize); + old_mask = slice_mask_for_size(&mm->context, old_psize); __clear_bit(i, old_mask->high_slices); __set_bit(i, psize_mask->high_slices); @@ -292,8 +256,8 @@ static void slice_convert(struct mm_struct *mm, } slice_dbg(" lsps=%lx, hsps=%lx\n", - (unsigned long)mm->context.low_slices_psize, - (unsigned long)mm->context.high_slices_psize); + (unsigned long)mm_ctx_low_slices(&mm->context), + (unsigned long)mm_ctx_high_slices(&mm->context)); spin_unlock_irqrestore(&slice_convert_lock, flags); @@ -393,7 +357,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * DEFAULT_MAP_WINDOW we should apply this. */ if (high_limit > DEFAULT_MAP_WINDOW) - addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW; + addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW; while (addr > min_addr) { info.high_limit = addr; @@ -505,20 +469,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, return -ENOMEM; } - if (high_limit > mm->context.slb_addr_limit) { + if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) { /* * Increasing the slb_addr_limit does not require * slice mask cache to be recalculated because it should * be already initialised beyond the old address limit. */ - mm->context.slb_addr_limit = high_limit; + mm_ctx_set_slb_addr_limit(&mm->context, high_limit); on_each_cpu(slice_flush_segments, mm, 1); } /* Sanity checks */ BUG_ON(mm->task_size == 0); - BUG_ON(mm->context.slb_addr_limit == 0); + BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0); VM_BUG_ON(radix_enabled()); slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize); @@ -538,7 +502,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - maskp = slice_mask_for_size(mm, psize); + maskp = slice_mask_for_size(&mm->context, psize); /* * Here "good" means slices that are already the right page size, @@ -565,7 +529,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * a pointer to good mask for the next code to use. */ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { - compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K); if (fixed) slice_or_mask(&good_mask, maskp, compat_maskp); else @@ -642,14 +606,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, newaddr = slice_find_area(mm, len, &potential_mask, psize, topdown, high_limit); -#ifdef CONFIG_PPC_64K_PAGES - if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) { + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM && + psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ slice_or_mask(&potential_mask, &potential_mask, compat_maskp); newaddr = slice_find_area(mm, len, &potential_mask, psize, topdown, high_limit); } -#endif if (newaddr == -ENOMEM) return -ENOMEM; @@ -696,7 +659,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long flags) { return slice_get_unmapped_area(addr, len, flags, - current->mm->context.user_psize, 0); + mm_ctx_user_psize(¤t->mm->context), 0); } unsigned long arch_get_unmapped_area_topdown(struct file *filp, @@ -706,7 +669,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, const unsigned long flags) { return slice_get_unmapped_area(addr0, len, flags, - current->mm->context.user_psize, 1); + mm_ctx_user_psize(¤t->mm->context), 1); } unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) @@ -717,10 +680,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr) VM_BUG_ON(radix_enabled()); if (slice_addr_is_low(addr)) { - psizes = mm->context.low_slices_psize; + psizes = mm_ctx_low_slices(&mm->context); index = GET_LOW_SLICE_INDEX(addr); } else { - psizes = mm->context.high_slices_psize; + psizes = mm_ctx_high_slices(&mm->context); index = GET_HIGH_SLICE_INDEX(addr); } mask_index = index & 0x1; @@ -741,27 +704,22 @@ void slice_init_new_context_exec(struct mm_struct *mm) * case of fork it is just inherited from the mm being * duplicated. */ -#ifdef CONFIG_PPC64 - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64; -#else - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; -#endif - - mm->context.user_psize = psize; + mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT); + mm_ctx_set_user_psize(&mm->context, psize); /* * Set all slice psizes to the default. */ - lpsizes = mm->context.low_slices_psize; + lpsizes = mm_ctx_low_slices(&mm->context); memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1); - hpsizes = mm->context.high_slices_psize; + hpsizes = mm_ctx_high_slices(&mm->context); memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1); /* * Slice mask cache starts zeroed, fill the default size cache. */ - mask = slice_mask_for_size(mm, psize); + mask = slice_mask_for_size(&mm->context, psize); mask->low_slices = ~0UL; if (SLICE_NUM_HIGH) bitmap_fill(mask->high_slices, SLICE_NUM_HIGH); @@ -777,7 +735,7 @@ void slice_setup_new_exec(void) if (!is_32bit_task()) return; - mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW; + mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW); } #endif @@ -816,22 +774,21 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { const struct slice_mask *maskp; - unsigned int psize = mm->context.user_psize; + unsigned int psize = mm_ctx_user_psize(&mm->context); VM_BUG_ON(radix_enabled()); - maskp = slice_mask_for_size(mm, psize); -#ifdef CONFIG_PPC_64K_PAGES + maskp = slice_mask_for_size(&mm->context, psize); + /* We need to account for 4k slices too */ - if (psize == MMU_PAGE_64K) { + if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) { const struct slice_mask *compat_maskp; struct slice_mask available; - compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K); + compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K); slice_or_mask(&available, maskp, compat_maskp); return !slice_check_range_fits(mm, &available, addr, len); } -#endif return !slice_check_range_fits(mm, maskp, addr, len); } diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index ab26df5bacb9..c155dcbb8691 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o \ - isa207-common.o power8-pmu.o power9-pmu.o + isa207-common.o power8-pmu.o power9-pmu.o \ + generic-compat-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index b0723002a396..a66fb9c01c9e 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -22,6 +22,10 @@ #include <asm/ptrace.h> #include <asm/code-patching.h> +#ifdef CONFIG_PPC64 +#include "internal.h" +#endif + #define BHRB_MAX_ENTRIES 32 #define BHRB_TARGET 0x0000000000000002 #define BHRB_PREDICTION 0x0000000000000001 @@ -2294,3 +2298,27 @@ int register_power_pmu(struct power_pmu *pmu) power_pmu_prepare_cpu, NULL); return 0; } + +#ifdef CONFIG_PPC64 +static int __init init_ppc64_pmu(void) +{ + /* run through all the pmu drivers one at a time */ + if (!init_power5_pmu()) + return 0; + else if (!init_power5p_pmu()) + return 0; + else if (!init_power6_pmu()) + return 0; + else if (!init_power7_pmu()) + return 0; + else if (!init_power8_pmu()) + return 0; + else if (!init_power9_pmu()) + return 0; + else if (!init_ppc970_pmu()) + return 0; + else + return init_generic_compat_pmu(); +} +early_initcall(init_ppc64_pmu); +#endif diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c new file mode 100644 index 000000000000..5e5a54d5588e --- /dev/null +++ b/arch/powerpc/perf/generic-compat-pmu.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019 Madhavan Srinivasan, IBM Corporation. + +#define pr_fmt(fmt) "generic-compat-pmu: " fmt + +#include "isa207-common.h" + +/* + * Raw event encoding: + * + * 60 56 52 48 44 40 36 32 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * + * 28 24 20 16 12 8 4 0 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * [ pmc ] [unit ] [ ] m [ pmcxsel ] + * | | + * | *- mark + * | + * | + * *- combine + * + * Below uses IBM bit numbering. + * + * MMCR1[x:y] = unit (PMCxUNIT) + * MMCR1[24] = pmc1combine[0] + * MMCR1[25] = pmc1combine[1] + * MMCR1[26] = pmc2combine[0] + * MMCR1[27] = pmc2combine[1] + * MMCR1[28] = pmc3combine[0] + * MMCR1[29] = pmc3combine[1] + * MMCR1[30] = pmc4combine[0] + * MMCR1[31] = pmc4combine[1] + * + */ + +/* + * Some power9 event codes. + */ +#define EVENT(_name, _code) _name = _code, + +enum { +EVENT(PM_CYC, 0x0001e) +EVENT(PM_INST_CMPL, 0x00002) +}; + +#undef EVENT + +GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); +GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); + +static struct attribute *generic_compat_events_attr[] = { + GENERIC_EVENT_PTR(PM_CYC), + GENERIC_EVENT_PTR(PM_INST_CMPL), + NULL +}; + +static struct attribute_group generic_compat_pmu_events_group = { + .name = "events", + .attrs = generic_compat_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-19"); +PMU_FORMAT_ATTR(pmcxsel, "config:0-7"); +PMU_FORMAT_ATTR(mark, "config:8"); +PMU_FORMAT_ATTR(combine, "config:10-11"); +PMU_FORMAT_ATTR(unit, "config:12-15"); +PMU_FORMAT_ATTR(pmc, "config:16-19"); + +static struct attribute *generic_compat_pmu_format_attr[] = { + &format_attr_event.attr, + &format_attr_pmcxsel.attr, + &format_attr_mark.attr, + &format_attr_combine.attr, + &format_attr_unit.attr, + &format_attr_pmc.attr, + NULL, +}; + +static struct attribute_group generic_compat_pmu_format_group = { + .name = "format", + .attrs = generic_compat_pmu_format_attr, +}; + +static const struct attribute_group *generic_compat_pmu_attr_groups[] = { + &generic_compat_pmu_format_group, + &generic_compat_pmu_events_group, + NULL, +}; + +static int compat_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC, + [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(NODE) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +#undef C + +static struct power_pmu generic_compat_pmu = { + .name = "GENERIC_COMPAT", + .n_counter = MAX_PMU_COUNTERS, + .add_fields = ISA207_ADD_FIELDS, + .test_adder = ISA207_TEST_ADDER, + .compute_mmcr = isa207_compute_mmcr, + .get_constraint = isa207_get_constraint, + .disable_pmc = isa207_disable_pmc, + .flags = PPMU_HAS_SIER | PPMU_ARCH_207S, + .n_generic = ARRAY_SIZE(compat_generic_events), + .generic_events = compat_generic_events, + .cache_events = &generic_compat_cache_events, + .attr_groups = generic_compat_pmu_attr_groups, +}; + +int init_generic_compat_pmu(void) +{ + int rc = 0; + + rc = register_power_pmu(&generic_compat_pmu); + if (rc) + return rc; + + /* Tell userspace that EBB is supported */ + cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + + return 0; +} diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index b1c37cc3fa98..31fa753e2eb2 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -43,12 +43,17 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem); static struct imc_pmu *thread_imc_pmu; static int thread_imc_mem_size; +/* Trace IMC data structures */ +static DEFINE_PER_CPU(u64 *, trace_imc_mem); +static struct imc_pmu_ref *trace_imc_refc; +static int trace_imc_mem_size; + static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); } -PMU_FORMAT_ATTR(event, "config:0-40"); +PMU_FORMAT_ATTR(event, "config:0-61"); PMU_FORMAT_ATTR(offset, "config:0-31"); PMU_FORMAT_ATTR(rvalue, "config:32"); PMU_FORMAT_ATTR(mode, "config:33-40"); @@ -65,6 +70,25 @@ static struct attribute_group imc_format_group = { .attrs = imc_format_attrs, }; +/* Format attribute for imc trace-mode */ +PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19"); +PMU_FORMAT_ATTR(cpmc_event, "config:20-27"); +PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29"); +PMU_FORMAT_ATTR(cpmc_load, "config:30-61"); +static struct attribute *trace_imc_format_attrs[] = { + &format_attr_event.attr, + &format_attr_cpmc_reserved.attr, + &format_attr_cpmc_event.attr, + &format_attr_cpmc_samplesel.attr, + &format_attr_cpmc_load.attr, + NULL, +}; + +static struct attribute_group trace_imc_format_group = { +.name = "format", +.attrs = trace_imc_format_attrs, +}; + /* Get the cpumask printed to a buffer "buf" */ static ssize_t imc_pmu_cpumask_get_attr(struct device *dev, struct device_attribute *attr, @@ -487,6 +511,11 @@ static int nest_imc_event_init(struct perf_event *event) * Get the base memory addresss for this cpu. */ chip_id = cpu_to_chip_id(event->cpu); + + /* Return, if chip_id is not valid */ + if (chip_id < 0) + return -ENODEV; + pcni = pmu->mem_info; do { if (pcni->id == chip_id) { @@ -494,7 +523,7 @@ static int nest_imc_event_init(struct perf_event *event) break; } pcni++; - } while (pcni); + } while (pcni->vbase != 0); if (!flag) return -ENODEV; @@ -788,8 +817,11 @@ static int core_imc_event_init(struct perf_event *event) } /* - * Allocates a page of memory for each of the online cpus, and write the - * physical base address of that page to the LDBAR for that cpu. + * Allocates a page of memory for each of the online cpus, and load + * LDBAR with 0. + * The physical base address of the page allocated for a cpu will be + * written to the LDBAR for that cpu, when the thread-imc event + * is added. * * LDBAR Register Layout: * @@ -807,7 +839,7 @@ static int core_imc_event_init(struct perf_event *event) */ static int thread_imc_mem_alloc(int cpu_id, int size) { - u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); + u64 *local_mem = per_cpu(thread_imc_mem, cpu_id); int nid = cpu_to_node(cpu_id); if (!local_mem) { @@ -824,9 +856,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size) per_cpu(thread_imc_mem, cpu_id) = local_mem; } - ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; - - mtspr(SPRN_LDBAR, ldbar_value); + mtspr(SPRN_LDBAR, 0); return 0; } @@ -858,6 +888,9 @@ static int thread_imc_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + /* Sampling not supported */ if (event->hw.sample_period) return -EINVAL; @@ -977,6 +1010,7 @@ static int thread_imc_event_add(struct perf_event *event, int flags) { int core_id; struct imc_pmu_ref *ref; + u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id()); if (flags & PERF_EF_START) imc_event_start(event, flags); @@ -985,6 +1019,9 @@ static int thread_imc_event_add(struct perf_event *event, int flags) return -EINVAL; core_id = smp_processor_id() / threads_per_core; + ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; + mtspr(SPRN_LDBAR, ldbar_value); + /* * imc pmus are enabled only when it is used. * See if this is triggered for the first time. @@ -1016,11 +1053,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags) int core_id; struct imc_pmu_ref *ref; - /* - * Take a snapshot and calculate the delta and update - * the event counter values. - */ - imc_event_update(event); + mtspr(SPRN_LDBAR, 0); core_id = smp_processor_id() / threads_per_core; ref = &core_imc_refc[core_id]; @@ -1039,6 +1072,240 @@ static void thread_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + /* + * Take a snapshot and calculate the delta and update + * the event counter values. + */ + imc_event_update(event); +} + +/* + * Allocate a page of memory for each cpu, and load LDBAR with 0. + */ +static int trace_imc_mem_alloc(int cpu_id, int size) +{ + u64 *local_mem = per_cpu(trace_imc_mem, cpu_id); + int phys_id = cpu_to_node(cpu_id), rc = 0; + int core_id = (cpu_id / threads_per_core); + + if (!local_mem) { + local_mem = page_address(alloc_pages_node(phys_id, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | + __GFP_NOWARN, get_order(size))); + if (!local_mem) + return -ENOMEM; + per_cpu(trace_imc_mem, cpu_id) = local_mem; + + /* Initialise the counters for trace mode */ + rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem), + get_hard_smp_processor_id(cpu_id)); + if (rc) { + pr_info("IMC:opal init failed for trace imc\n"); + return rc; + } + } + + /* Init the mutex, if not already */ + trace_imc_refc[core_id].id = core_id; + mutex_init(&trace_imc_refc[core_id].lock); + + mtspr(SPRN_LDBAR, 0); + return 0; +} + +static int ppc_trace_imc_cpu_online(unsigned int cpu) +{ + return trace_imc_mem_alloc(cpu, trace_imc_mem_size); +} + +static int ppc_trace_imc_cpu_offline(unsigned int cpu) +{ + mtspr(SPRN_LDBAR, 0); + return 0; +} + +static int trace_imc_cpu_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE, + "perf/powerpc/imc_trace:online", + ppc_trace_imc_cpu_online, + ppc_trace_imc_cpu_offline); +} + +static u64 get_trace_imc_event_base_addr(void) +{ + return (u64)per_cpu(trace_imc_mem, smp_processor_id()); +} + +/* + * Function to parse trace-imc data obtained + * and to prepare the perf sample. + */ +static int trace_imc_prepare_sample(struct trace_imc_data *mem, + struct perf_sample_data *data, + u64 *prev_tb, + struct perf_event_header *header, + struct perf_event *event) +{ + /* Sanity checks for a valid record */ + if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb) + *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1)); + else + return -EINVAL; + + if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) != + be64_to_cpu(READ_ONCE(mem->tb2))) + return -EINVAL; + + /* Prepare perf sample */ + data->ip = be64_to_cpu(READ_ONCE(mem->ip)); + data->period = event->hw.last_period; + + header->type = PERF_RECORD_SAMPLE; + header->size = sizeof(*header) + event->header_size; + header->misc = 0; + + if (is_kernel_addr(data->ip)) + header->misc |= PERF_RECORD_MISC_KERNEL; + else + header->misc |= PERF_RECORD_MISC_USER; + + perf_event_header__init_id(header, data, event); + + return 0; +} + +static void dump_trace_imc_data(struct perf_event *event) +{ + struct trace_imc_data *mem; + int i, ret; + u64 prev_tb = 0; + + mem = (struct trace_imc_data *)get_trace_imc_event_base_addr(); + for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data)); + i++, mem++) { + struct perf_sample_data data; + struct perf_event_header header; + + ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event); + if (ret) /* Exit, if not a valid record */ + break; + else { + /* If this is a valid record, create the sample */ + struct perf_output_handle handle; + + if (perf_output_begin(&handle, event, header.size)) + return; + + perf_output_sample(&handle, &header, &data, event); + perf_output_end(&handle); + } + } +} + +static int trace_imc_event_add(struct perf_event *event, int flags) +{ + int core_id = smp_processor_id() / threads_per_core; + struct imc_pmu_ref *ref = NULL; + u64 local_mem, ldbar_value; + + /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */ + local_mem = get_trace_imc_event_base_addr(); + ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; + + if (core_imc_refc) + ref = &core_imc_refc[core_id]; + if (!ref) { + /* If core-imc is not enabled, use trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; + if (!ref) + return -EINVAL; + } + mtspr(SPRN_LDBAR, ldbar_value); + mutex_lock(&ref->lock); + if (ref->refc == 0) { + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, + get_hard_smp_processor_id(smp_processor_id()))) { + mutex_unlock(&ref->lock); + pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); + mtspr(SPRN_LDBAR, 0); + return -EINVAL; + } + } + ++ref->refc; + mutex_unlock(&ref->lock); + + return 0; +} + +static void trace_imc_event_read(struct perf_event *event) +{ + return; +} + +static void trace_imc_event_stop(struct perf_event *event, int flags) +{ + u64 local_mem = get_trace_imc_event_base_addr(); + dump_trace_imc_data(event); + memset((void *)local_mem, 0, sizeof(u64)); +} + +static void trace_imc_event_start(struct perf_event *event, int flags) +{ + return; +} + +static void trace_imc_event_del(struct perf_event *event, int flags) +{ + int core_id = smp_processor_id() / threads_per_core; + struct imc_pmu_ref *ref = NULL; + + if (core_imc_refc) + ref = &core_imc_refc[core_id]; + if (!ref) { + /* If core-imc is not enabled, use trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; + if (!ref) + return; + } + mtspr(SPRN_LDBAR, 0); + mutex_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, + get_hard_smp_processor_id(smp_processor_id()))) { + mutex_unlock(&ref->lock); + pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); + return; + } + } else if (ref->refc < 0) { + ref->refc = 0; + } + mutex_unlock(&ref->lock); + trace_imc_event_stop(event, flags); +} + +static int trace_imc_event_init(struct perf_event *event) +{ + struct task_struct *target; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + /* Return if this is a couting event */ + if (event->attr.sample_period == 0) + return -ENOENT; + + event->hw.idx = -1; + target = event->hw.target; + + event->pmu->task_ctx_nr = perf_hw_context; + return 0; } /* update_pmu_ops : Populate the appropriate operations for "pmu" */ @@ -1071,6 +1338,14 @@ static int update_pmu_ops(struct imc_pmu *pmu) pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn; pmu->pmu.commit_txn = thread_imc_pmu_commit_txn; break; + case IMC_DOMAIN_TRACE: + pmu->pmu.event_init = trace_imc_event_init; + pmu->pmu.add = trace_imc_event_add; + pmu->pmu.del = trace_imc_event_del; + pmu->pmu.start = trace_imc_event_start; + pmu->pmu.stop = trace_imc_event_stop; + pmu->pmu.read = trace_imc_event_read; + pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group; default: break; } @@ -1163,6 +1438,18 @@ static void cleanup_all_thread_imc_memory(void) } } +static void cleanup_all_trace_imc_memory(void) +{ + int i, order = get_order(trace_imc_mem_size); + + for_each_online_cpu(i) { + if (per_cpu(trace_imc_mem, i)) + free_pages((u64)per_cpu(trace_imc_mem, i), order); + + } + kfree(trace_imc_refc); +} + /* Function to free the attr_groups which are dynamically allocated */ static void imc_common_mem_free(struct imc_pmu *pmu_ptr) { @@ -1204,6 +1491,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); cleanup_all_thread_imc_memory(); } + + if (pmu_ptr->domain == IMC_DOMAIN_TRACE) { + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE); + cleanup_all_trace_imc_memory(); + } } /* @@ -1286,6 +1578,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, thread_imc_pmu = pmu_ptr; break; + case IMC_DOMAIN_TRACE: + /* Update the pmu name */ + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc"); + if (!pmu_ptr->pmu.name) + return -ENOMEM; + + nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core); + trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), + GFP_KERNEL); + if (!trace_imc_refc) + return -ENOMEM; + + trace_imc_mem_size = pmu_ptr->counter_mem_size; + for_each_online_cpu(cpu) { + res = trace_imc_mem_alloc(cpu, trace_imc_mem_size); + if (res) { + cleanup_all_trace_imc_memory(); + goto err; + } + } + break; default: return -EINVAL; } @@ -1359,6 +1672,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id } break; + case IMC_DOMAIN_TRACE: + ret = trace_imc_cpu_init(); + if (ret) { + cleanup_all_trace_imc_memory(); + goto err_free_mem; + } + + break; default: return -EINVAL; /* Unknown domain */ } diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h new file mode 100644 index 000000000000..f755c64da137 --- /dev/null +++ b/arch/powerpc/perf/internal.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019 Madhavan Srinivasan, IBM Corporation. + +extern int init_ppc970_pmu(void); +extern int init_power5_pmu(void); +extern int init_power5p_pmu(void); +extern int init_power6_pmu(void); +extern int init_power7_pmu(void); +extern int init_power8_pmu(void); +extern int init_power9_pmu(void); +extern int init_generic_compat_pmu(void); diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c index 0526dac66007..9aa803504cb2 100644 --- a/arch/powerpc/perf/power5+-pmu.c +++ b/arch/powerpc/perf/power5+-pmu.c @@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = { .cache_events = &power5p_cache_events, }; -static int __init init_power5p_pmu(void) +int init_power5p_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") @@ -686,5 +686,3 @@ static int __init init_power5p_pmu(void) return register_power_pmu(&power5p_pmu); } - -early_initcall(init_power5p_pmu); diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c index 4dc99f9f7962..30cb13d081a9 100644 --- a/arch/powerpc/perf/power5-pmu.c +++ b/arch/powerpc/perf/power5-pmu.c @@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = { .flags = PPMU_HAS_SSLOT, }; -static int __init init_power5_pmu(void) +int init_power5_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) @@ -626,5 +626,3 @@ static int __init init_power5_pmu(void) return register_power_pmu(&power5_pmu); } - -early_initcall(init_power5_pmu); diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c index 9c9d646b68a1..80ec48632cfe 100644 --- a/arch/powerpc/perf/power6-pmu.c +++ b/arch/powerpc/perf/power6-pmu.c @@ -540,7 +540,7 @@ static struct power_pmu power6_pmu = { .cache_events = &power6_cache_events, }; -static int __init init_power6_pmu(void) +int init_power6_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) @@ -548,5 +548,3 @@ static int __init init_power6_pmu(void) return register_power_pmu(&power6_pmu); } - -early_initcall(init_power6_pmu); diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 6dbae9884ec4..bb6efd5d2530 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = { .cache_events = &power7_cache_events, }; -static int __init init_power7_pmu(void) +int init_power7_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) @@ -456,5 +456,3 @@ static int __init init_power7_pmu(void) return register_power_pmu(&power7_pmu); } - -early_initcall(init_power7_pmu); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index d12a2db26353..bcc3409a06de 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -379,7 +379,7 @@ static struct power_pmu power8_pmu = { .bhrb_nr = 32, }; -static int __init init_power8_pmu(void) +int init_power8_pmu(void) { int rc; @@ -399,4 +399,3 @@ static int __init init_power8_pmu(void) return 0; } -early_initcall(init_power8_pmu); diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index 063c9d9f2516..6b1dc9a83ede 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -63,8 +63,6 @@ EVENT(PM_RUN_CYC_ALT, 0x200f4) /* Instruction Dispatched */ EVENT(PM_INST_DISP, 0x200f2) EVENT(PM_INST_DISP_ALT, 0x300f2) -/* Alternate Branch event code */ -EVENT(PM_BR_CMPL_ALT, 0x10012) /* Branch event that are not strongly biased */ EVENT(PM_BR_2PATH, 0x20036) /* ALternate branch event that are not strongly biased */ diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 030544e35959..3a31ac6f4805 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -437,7 +437,7 @@ static struct power_pmu power9_pmu = { .bhrb_nr = 32, }; -static int __init init_power9_pmu(void) +int init_power9_pmu(void) { int rc = 0; unsigned int pvr = mfspr(SPRN_PVR); @@ -467,4 +467,3 @@ static int __init init_power9_pmu(void) return 0; } -early_initcall(init_power9_pmu); diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c index 8b6a8a36fa38..1d3370914022 100644 --- a/arch/powerpc/perf/ppc970-pmu.c +++ b/arch/powerpc/perf/ppc970-pmu.c @@ -490,7 +490,7 @@ static struct power_pmu ppc970_pmu = { .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING, }; -static int __init init_ppc970_pmu(void) +int init_ppc970_pmu(void) { if (!cur_cpu_spec->oprofile_cpu_type || (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") @@ -499,5 +499,3 @@ static int __init init_ppc970_pmu(void) return register_power_pmu(&ppc970_pmu); } - -early_initcall(init_ppc970_pmu); diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index b3097fe6441b..af265ae40a61 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -239,6 +239,7 @@ static inline struct clk *mpc512x_clk_divider( const char *name, const char *parent_name, u8 clkflags, u32 __iomem *reg, u8 pos, u8 len, int divflags) { + divflags |= CLK_DIVIDER_BIG_ENDIAN; return clk_register_divider(NULL, name, parent_name, clkflags, reg, pos, len, divflags, &clklock); } @@ -250,7 +251,7 @@ static inline struct clk *mpc512x_clk_divtable( { u8 divflags; - divflags = 0; + divflags = CLK_DIVIDER_BIG_ENDIAN; return clk_register_divider_table(NULL, name, parent_name, 0, reg, pos, len, divflags, divtab, &clklock); @@ -261,10 +262,12 @@ static inline struct clk *mpc512x_clk_gated( u32 __iomem *reg, u8 pos) { int clkflags; + u8 gateflags; clkflags = CLK_SET_RATE_PARENT; + gateflags = CLK_GATE_BIG_ENDIAN; return clk_register_gate(NULL, name, parent_name, clkflags, - reg, pos, 0, &clklock); + reg, pos, gateflags, &clklock); } static inline struct clk *mpc512x_clk_muxed(const char *name, @@ -275,7 +278,7 @@ static inline struct clk *mpc512x_clk_muxed(const char *name, u8 muxflags; clkflags = CLK_SET_RATE_PARENT; - muxflags = 0; + muxflags = CLK_MUX_BIG_ENDIAN; return clk_register_mux(NULL, name, parent_names, parent_count, clkflags, reg, pos, len, muxflags, &clklock); diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 17cf249b18ee..3cb2f07ce8eb 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -628,7 +628,7 @@ static int mpc52xx_wdt_open(struct inode *inode, struct file *file) } file->private_data = mpc52xx_gpt_wdt; - return nonseekable_open(inode, file); + return stream_open(inode, file); } static int mpc52xx_wdt_release(struct inode *inode, struct file *file) diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 5c31d8292d3b..e7c2c3fb011a 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void) int ret = 0; np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); - if (!np || !of_device_is_available(np)) + if (!np || !of_device_is_available(np)) { + of_node_put(np); return -ENODEV; + } prop = of_get_property(np, "phy_type", NULL); if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c index 8d5a25d43ef3..e9617d35fd1f 100644 --- a/arch/powerpc/platforms/8xx/pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -153,10 +153,9 @@ int mpc8xx_pic_init(void) if (mpc8xx_pic_host == NULL) { printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n"); ret = -ENOMEM; - goto out; } - return 0; + ret = 0; out: of_node_put(np); return ret; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 50cd09b4e05d..2794235e9d3e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -25,6 +25,8 @@ config PPC_BOOK3S_32 bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx" select PPC_FPU select PPC_HAVE_PMU_SUPPORT + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP config PPC_85xx bool "Freescale 85xx" @@ -34,6 +36,9 @@ config PPC_8xx bool "Freescale 8xx" select FSL_SOC select SYS_SUPPORTS_HUGETLBFS + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP + select PPC_MM_SLICES if HUGETLB_PAGE config 40x bool "AMCC 40x" @@ -75,6 +80,7 @@ config PPC_BOOK3S_64 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_SUPPORTS_NUMA_BALANCING select IRQ_WORK + select PPC_MM_SLICES config PPC_BOOK3E_64 bool "Embedded processors" @@ -325,7 +331,9 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 && HUGETLB_PAGE - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE + select PPC_HAVE_KUEP + select PPC_HAVE_KUAP default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this @@ -345,6 +353,37 @@ config PPC_RADIX_MMU_DEFAULT If you're unsure, say Y. +config PPC_HAVE_KUEP + bool + +config PPC_KUEP + bool "Kernel Userspace Execution Prevention" + depends on PPC_HAVE_KUEP + default y + help + Enable support for Kernel Userspace Execution Prevention (KUEP) + + If you're unsure, say Y. + +config PPC_HAVE_KUAP + bool + +config PPC_KUAP + bool "Kernel Userspace Access Protection" + depends on PPC_HAVE_KUAP + default y + help + Enable support for Kernel Userspace Access Protection (KUAP) + + If you're unsure, say Y. + +config PPC_KUAP_DEBUG + bool "Extra debugging for Kernel Userspace Access Protection" + depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32) + help + Add extra debugging for Kernel Userspace Access Protection (KUAP) + If you're unsure, say N. + config ARCH_ENABLE_HUGEPAGE_MIGRATION def_bool y depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION @@ -354,14 +393,16 @@ config PPC_MMU_NOHASH def_bool y depends on !PPC_BOOK3S +config PPC_MMU_NOHASH_32 + def_bool y + depends on PPC_MMU_NOHASH && PPC32 + config PPC_BOOK3E_MMU def_bool y depends on FSL_BOOKE || PPC_BOOK3E config PPC_MM_SLICES bool - default y if PPC_BOOK3S_64 - default y if PPC_8xx && HUGETLB_PAGE config PPC_HAVE_PMU_SUPPORT bool diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 7f12c7b78c0f..6646f152d57b 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) * faults need to be deferred to process context. */ if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) && - (REGION_ID(ea) != USER_REGION_ID)) { + (get_region_id(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); ret = hash_page(ea, @@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb) unsigned long ea = (unsigned long)addr; u64 llp; - if (REGION_ID(ea) == KERNEL_REGION_ID) + if (get_region_id(ea) == LINEAR_MAP_REGION_ID) llp = mmu_psize_defs[mmu_linear_psize].sllp; else llp = mmu_psize_defs[mmu_virtual_psize].sllp; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 48c2477e7e2a..bfb9ca99ac05 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -588,7 +588,7 @@ static int spufs_pipe_open(struct inode *inode, struct file *file) struct spufs_inode_info *i = SPUFS_I(inode); file->private_data = i->i_ctx; - return nonseekable_open(inode, file); + return stream_open(inode, file); } /* diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index db329d4bf1c3..c1a75216050a 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -71,17 +71,11 @@ spufs_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void spufs_i_callback(struct rcu_head *head) +static void spufs_free_inode(struct inode *inode) { - struct inode *inode = container_of(head, struct inode, i_rcu); kmem_cache_free(spufs_inode_cache, SPUFS_I(inode)); } -static void spufs_destroy_inode(struct inode *inode) -{ - call_rcu(&inode->i_rcu, spufs_i_callback); -} - static void spufs_init_once(void *p) { @@ -739,7 +733,7 @@ spufs_fill_super(struct super_block *sb, void *data, int silent) struct spufs_sb_info *info; static const struct super_operations s_ops = { .alloc_inode = spufs_alloc_inode, - .destroy_inode = spufs_destroy_inode, + .free_inode = spufs_free_inode, .statfs = simple_statfs, .evict_inode = spufs_evict_inode, .show_options = spufs_show_options, diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 0409714e8070..829bf3697dc9 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -44,7 +44,8 @@ #define HOLLY_PCI_CFG_PHYS 0x7c000000 -int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn) +static int holly_exclude_device(struct pci_controller *hose, u_char bus, + u_char devfn) { if (bus == 0 && PCI_SLOT(devfn) == 0) return PCIBIOS_DEVICE_NOT_FOUND; @@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void) tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0); } -void holly_show_cpuinfo(struct seq_file *m) +static void holly_show_cpuinfo(struct seq_file *m) { seq_printf(m, "vendor\t\t: IBM\n"); seq_printf(m, "machine\t\t: PPC750 GX/CL\n"); } -void __noreturn holly_restart(char *cmd) +static void __noreturn holly_restart(char *cmd) { __be32 __iomem *ocn_bar1 = NULL; unsigned long bar; @@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd) for (;;) ; } -void holly_power_off(void) -{ - local_irq_disable(); - /* No way to shut power off with software */ - for (;;) ; -} - -void holly_halt(void) -{ - holly_power_off(); -} - /* * Called very early, device-tree isn't unflattened */ diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile index 20ebf35d7913..f4247ade71ca 100644 --- a/arch/powerpc/platforms/powermac/Makefile +++ b/arch/powerpc/platforms/powermac/Makefile @@ -2,6 +2,12 @@ CFLAGS_bootx_init.o += -fPIC CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector) +KASAN_SANITIZE_bootx_init.o := n + +ifdef CONFIG_KASAN +CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING +endif + ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE) diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e52f9b06dd9c..c9133f7908ca 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -16,6 +16,7 @@ #include <linux/device.h> #include <linux/cpu.h> +#include <asm/asm-prototypes.h> #include <asm/firmware.h> #include <asm/machdep.h> #include <asm/opal.h> @@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask; static bool default_stop_found; /* - * First deep stop state. Used to figure out when to save/restore - * hypervisor context. + * First stop state levels when SPR and TB loss can occur. */ -u64 pnv_first_deep_stop_state = MAX_STOP_STATE; +static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1; +static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1; /* * psscr value and mask of the deepest stop idle state. @@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask; static u64 pnv_deepest_stop_flag; static bool deepest_stop_found; +static unsigned long power7_offline_type; + static int pnv_save_sprs_for_deep_states(void) { int cpu; @@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void) * all cpus at boot. Get these reg values of current cpu and use the * same across all cpus. */ - uint64_t lpcr_val = mfspr(SPRN_LPCR); - uint64_t hid0_val = mfspr(SPRN_HID0); - uint64_t hid1_val = mfspr(SPRN_HID1); - uint64_t hid4_val = mfspr(SPRN_HID4); - uint64_t hid5_val = mfspr(SPRN_HID5); - uint64_t hmeer_val = mfspr(SPRN_HMEER); + uint64_t lpcr_val = mfspr(SPRN_LPCR); + uint64_t hid0_val = mfspr(SPRN_HID0); + uint64_t hid1_val = mfspr(SPRN_HID1); + uint64_t hid4_val = mfspr(SPRN_HID4); + uint64_t hid5_val = mfspr(SPRN_HID5); + uint64_t hmeer_val = mfspr(SPRN_HMEER); uint64_t msr_val = MSR_IDLE; uint64_t psscr_val = pnv_deepest_stop_psscr_val; @@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void) return 0; } -static void pnv_alloc_idle_core_states(void) -{ - int i, j; - int nr_cores = cpu_nr_cores(); - u32 *core_idle_state; - - /* - * core_idle_state - The lower 8 bits track the idle state of - * each thread of the core. - * - * The most significant bit is the lock bit. - * - * Initially all the bits corresponding to threads_per_core - * are set. They are cleared when the thread enters deep idle - * state like sleep and winkle/stop. - * - * Initially the lock bit is cleared. The lock bit has 2 - * purposes: - * a. While the first thread in the core waking up from - * idle is restoring core state, it prevents other - * threads in the core from switching to process - * context. - * b. While the last thread in the core is saving the - * core state, it prevents a different thread from - * waking up. - */ - for (i = 0; i < nr_cores; i++) { - int first_cpu = i * threads_per_core; - int node = cpu_to_node(first_cpu); - size_t paca_ptr_array_size; - - core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node); - *core_idle_state = (1 << threads_per_core) - 1; - paca_ptr_array_size = (threads_per_core * - sizeof(struct paca_struct *)); - - for (j = 0; j < threads_per_core; j++) { - int cpu = first_cpu + j; - - paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state; - paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING; - paca_ptrs[cpu]->thread_mask = 1 << j; - } - } - - update_subcore_sibling_mask(); - - if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { - int rc = pnv_save_sprs_for_deep_states(); - - if (likely(!rc)) - return; - - /* - * The stop-api is unable to restore hypervisor - * resources on wakeup from platform idle states which - * lose full context. So disable such states. - */ - supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; - pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); - pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); - - if (cpu_has_feature(CPU_FTR_ARCH_300) && - (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { - /* - * Use the default stop state for CPU-Hotplug - * if available. - */ - if (default_stop_found) { - pnv_deepest_stop_psscr_val = - pnv_default_stop_val; - pnv_deepest_stop_psscr_mask = - pnv_default_stop_mask; - pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", - pnv_deepest_stop_psscr_val); - } else { /* Fallback to snooze loop for CPU-Hotplug */ - deepest_stop_found = false; - pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); - } - } - } -} - u32 pnv_get_supported_cpuidle_states(void) { return supported_cpuidle_states; @@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info) *err = 1; } +static bool power7_fastsleep_workaround_entry = true; +static bool power7_fastsleep_workaround_exit = true; + /* * Used to store fastsleep workaround state * 0 - Workaround applied/undone at fastsleep entry/exit path (Default) @@ -269,21 +192,15 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, * fastsleep_workaround_applyonce = 1 implies * fastsleep workaround needs to be left in 'applied' state on all * the cores. Do this by- - * 1. Patching out the call to 'undo' workaround in fastsleep exit path - * 2. Sending ipi to all the cores which have at least one online thread - * 3. Patching out the call to 'apply' workaround in fastsleep entry - * path + * 1. Disable the 'undo' workaround in fastsleep exit path + * 2. Sendi IPIs to all the cores which have at least one online thread + * 3. Disable the 'apply' workaround in fastsleep entry path + * * There is no need to send ipi to cores which have all threads * offlined, as last thread of the core entering fastsleep or deeper * state would have applied workaround. */ - err = patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - if (err) { - pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit"); - goto fail; - } + power7_fastsleep_workaround_exit = false; get_online_cpus(); primary_thread_mask = cpu_online_cores_map(); @@ -296,13 +213,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev, goto fail; } - err = patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - if (err) { - pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry"); - goto fail; - } + power7_fastsleep_workaround_entry = false; fastsleep_workaround_applyonce = 1; @@ -315,27 +226,346 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600, show_fastsleep_workaround_applyonce, store_fastsleep_workaround_applyonce); -static unsigned long __power7_idle_type(unsigned long type) +static inline void atomic_start_thread_idle(void) { + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + clear_bit(thread_nr, state); +} + +static inline void atomic_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + int thread_nr = cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + set_bit(thread_nr, state); +} + +static inline void atomic_lock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state))) + barrier(); +} + +static inline void atomic_unlock_and_stop_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + u64 s = READ_ONCE(*state); + u64 new, tmp; + + BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT)); + BUG_ON(s & thread); + +again: + new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT; + tmp = cmpxchg(state, s, new); + if (unlikely(tmp != s)) { + s = tmp; + goto again; + } +} + +static inline void atomic_unlock_thread_idle(void) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + + BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state)); + clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state); +} + +/* P7 and P8 */ +struct p7_sprs { + /* per core */ + u64 tscr; + u64 worc; + + /* per subcore */ + u64 sdr1; + u64 rpr; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 purr; + u64 spurr; + u64 dscr; + u64 wort; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 amor; + u64 uamor; +}; + +static unsigned long power7_idle_insn(unsigned long type) +{ + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long thread = 1UL << cpu_thread_in_core(cpu); + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; unsigned long srr1; + bool full_winkle; + struct p7_sprs sprs = {}; /* avoid false use-uninitialised */ + bool sprs_saved = false; + int rc; - if (!prep_irq_for_idle_irqsoff()) - return 0; + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + + BUG_ON(!(*state & thread)); + *state &= ~thread; + + if (power7_fastsleep_workaround_entry) { + if ((*state & core_thread_mask) == 0) { + rc = opal_config_cpu_idle_state( + OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_APPLY); + BUG_ON(rc); + } + } + + if (type == PNV_THREAD_WINKLE) { + sprs.tscr = mfspr(SPRN_TSCR); + sprs.worc = mfspr(SPRN_WORC); + + sprs.sdr1 = mfspr(SPRN_SDR1); + sprs.rpr = mfspr(SPRN_RPR); + + sprs.lpcr = mfspr(SPRN_LPCR); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + } + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.wort = mfspr(SPRN_WORT); + + sprs_saved = true; + + /* + * Increment winkle counter and set all winkle bits if + * all threads are winkling. This allows wakeup side to + * distinguish between fast sleep and winkle state + * loss. Fast sleep still has to resync the timebase so + * this may not be a really big win. + */ + *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) + >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT + == threads_per_core) + *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS; + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + } + + atomic_unlock_thread_idle(); + } + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.amor = mfspr(SPRN_AMOR); + sprs.uamor = mfspr(SPRN_UAMOR); + } + + local_paca->thread_idle_state = type; + srr1 = isa206_idle_insn_mayloss(type); /* go idle */ + local_paca->thread_idle_state = PNV_THREAD_RUNNING; + + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + /* + * We don't need an isync after the mtsprs here because + * the upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_UAMOR, sprs.uamor); + } + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) { + if (unlikely(type != PNV_THREAD_NAP)) { + atomic_lock_thread_idle(); + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + } + atomic_unlock_and_stop_thread_idle(); + } + return srr1; + } + + /* HV state loss */ + BUG_ON(type == PNV_THREAD_NAP); + + atomic_lock_thread_idle(); + + full_winkle = false; + if (type == PNV_THREAD_WINKLE) { + WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0); + *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT; + if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) { + *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT); + full_winkle = true; + BUG_ON(!sprs_saved); + } + } + + WARN_ON(*state & thread); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + if (full_winkle) { + mtspr(SPRN_TSCR, sprs.tscr); + mtspr(SPRN_WORC, sprs.worc); + } + + if (power7_fastsleep_workaround_exit) { + rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP, + OPAL_CONFIG_IDLE_UNDO); + BUG_ON(rc); + } + + /* TB */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + +core_woken: + if (!full_winkle) + goto subcore_woken; + + if ((*state & local_paca->subcore_sibling_mask) != 0) + goto subcore_woken; + + /* Per-subcore SPRs */ + mtspr(SPRN_SDR1, sprs.sdr1); + mtspr(SPRN_RPR, sprs.rpr); + +subcore_woken: + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + atomic_unlock_and_stop_thread_idle(); + + /* Fast sleep does not lose SPRs */ + if (!full_winkle) + return srr1; + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + } + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_WORT, sprs.wort); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + + /* + * The SLB has to be restored here, but it sometimes still + * contains entries, so the __ variant must be used to prevent + * multi hits. + */ + __slb_restore_bolted_realmode(); + + return srr1; +} + +extern unsigned long idle_kvm_start_guest(unsigned long srr1); + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long power7_offline(void) +{ + unsigned long srr1; + + mtmsr(MSR_IDLE); + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle. */ + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in */ + /* pnv_powersave_wakeup in this file. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; +#endif __ppc64_runlatch_off(); - srr1 = power7_idle_insn(type); + srr1 = power7_idle_insn(power7_offline_type); __ppc64_runlatch_on(); - fini_irq_for_idle_irqsoff(); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); +#endif + + mtmsr(MSR_KERNEL); return srr1; } +#endif void power7_idle_type(unsigned long type) { unsigned long srr1; - srr1 = __power7_idle_type(type); + if (!prep_irq_for_idle_irqsoff()) + return; + + mtmsr(MSR_IDLE); + __ppc64_runlatch_off(); + srr1 = power7_idle_insn(type); + __ppc64_runlatch_on(); + mtmsr(MSR_KERNEL); + + fini_irq_for_idle_irqsoff(); irq_set_pending_from_srr1(srr1); } @@ -347,33 +577,292 @@ void power7_idle(void) power7_idle_type(PNV_THREAD_NAP); } -static unsigned long __power9_idle_type(unsigned long stop_psscr_val, - unsigned long stop_psscr_mask) +struct p9_sprs { + /* per core */ + u64 ptcr; + u64 rpr; + u64 tscr; + u64 ldbar; + + /* per thread */ + u64 lpcr; + u64 hfscr; + u64 fscr; + u64 pid; + u64 purr; + u64 spurr; + u64 dscr; + u64 wort; + + u64 mmcra; + u32 mmcr0; + u32 mmcr1; + u64 mmcr2; + + /* per thread SPRs that get lost in shallow states */ + u64 amr; + u64 iamr; + u64 amor; + u64 uamor; +}; + +static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on) { - unsigned long psscr; + int cpu = raw_smp_processor_id(); + int first = cpu_first_thread_sibling(cpu); + unsigned long *state = &paca_ptrs[first]->idle_state; + unsigned long core_thread_mask = (1UL << threads_per_core) - 1; unsigned long srr1; + unsigned long pls; + unsigned long mmcr0 = 0; + struct p9_sprs sprs = {}; /* avoid false used-uninitialised */ + bool sprs_saved = false; - if (!prep_irq_for_idle_irqsoff()) - return 0; + if (!(psscr & (PSSCR_EC|PSSCR_ESL))) { + /* EC=ESL=0 case */ + + BUG_ON(!mmu_on); + + /* + * Wake synchronously. SRESET via xscom may still cause + * a 0x100 powersave wakeup with SRR1 reason! + */ + srr1 = isa300_idle_stop_noloss(psscr); /* go idle */ + if (likely(!srr1)) + return 0; + + /* + * Registers not saved, can't recover! + * This would be a hardware bug + */ + BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS); + + goto out; + } + + /* EC=ESL=1 case */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) { + local_paca->requested_psscr = psscr; + /* order setting requested_psscr vs testing dont_stop */ + smp_mb(); + if (atomic_read(&local_paca->dont_stop)) { + local_paca->requested_psscr = 0; + return 0; + } + } +#endif + + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + /* + * POWER9 DD2 can incorrectly set PMAO when waking up + * after a state-loss idle. Saving and restoring MMCR0 + * over idle is a workaround. + */ + mmcr0 = mfspr(SPRN_MMCR0); + } + if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) { + sprs.lpcr = mfspr(SPRN_LPCR); + sprs.hfscr = mfspr(SPRN_HFSCR); + sprs.fscr = mfspr(SPRN_FSCR); + sprs.pid = mfspr(SPRN_PID); + sprs.purr = mfspr(SPRN_PURR); + sprs.spurr = mfspr(SPRN_SPURR); + sprs.dscr = mfspr(SPRN_DSCR); + sprs.wort = mfspr(SPRN_WORT); + + sprs.mmcra = mfspr(SPRN_MMCRA); + sprs.mmcr0 = mfspr(SPRN_MMCR0); + sprs.mmcr1 = mfspr(SPRN_MMCR1); + sprs.mmcr2 = mfspr(SPRN_MMCR2); + + sprs.ptcr = mfspr(SPRN_PTCR); + sprs.rpr = mfspr(SPRN_RPR); + sprs.tscr = mfspr(SPRN_TSCR); + sprs.ldbar = mfspr(SPRN_LDBAR); + + sprs_saved = true; + + atomic_start_thread_idle(); + } + + sprs.amr = mfspr(SPRN_AMR); + sprs.iamr = mfspr(SPRN_IAMR); + sprs.amor = mfspr(SPRN_AMOR); + sprs.uamor = mfspr(SPRN_UAMOR); + + srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */ + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + local_paca->requested_psscr = 0; +#endif psscr = mfspr(SPRN_PSSCR); - psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + WARN_ON_ONCE(!srr1); + WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR)); + + if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) { + unsigned long mmcra; + + /* + * We don't need an isync after the mtsprs here because the + * upcoming mtmsrd is execution synchronizing. + */ + mtspr(SPRN_AMR, sprs.amr); + mtspr(SPRN_IAMR, sprs.iamr); + mtspr(SPRN_AMOR, sprs.amor); + mtspr(SPRN_UAMOR, sprs.uamor); + + /* + * Workaround for POWER9 DD2.0, if we lost resources, the ERAT + * might have been corrupted and needs flushing. We also need + * to reload MMCR0 (see mmcr0 comment above). + */ + if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) { + asm volatile(PPC_INVALIDATE_ERAT); + mtspr(SPRN_MMCR0, mmcr0); + } + + /* + * DD2.2 and earlier need to set then clear bit 60 in MMCRA + * to ensure the PMU starts running. + */ + mmcra = mfspr(SPRN_MMCRA); + mmcra |= PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + mmcra &= ~PPC_BIT(60); + mtspr(SPRN_MMCRA, mmcra); + } + + if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI)) + hmi_exception_realmode(NULL); + + /* + * On POWER9, SRR1 bits do not match exactly as expected. + * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so + * just always test PSSCR for SPR/TB state loss. + */ + pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT; + if (likely(pls < pnv_first_spr_loss_level)) { + if (sprs_saved) + atomic_stop_thread_idle(); + goto out; + } + + /* HV state loss */ + BUG_ON(!sprs_saved); + + atomic_lock_thread_idle(); + + if ((*state & core_thread_mask) != 0) + goto core_woken; + + /* Per-core SPRs */ + mtspr(SPRN_PTCR, sprs.ptcr); + mtspr(SPRN_RPR, sprs.rpr); + mtspr(SPRN_TSCR, sprs.tscr); + mtspr(SPRN_LDBAR, sprs.ldbar); + + if (pls >= pnv_first_tb_loss_level) { + /* TB loss */ + if (opal_resync_timebase() != OPAL_SUCCESS) + BUG(); + } + + /* + * isync after restoring shared SPRs and before unlocking. Unlock + * only contains hwsync which does not necessarily do the right + * thing for SPRs. + */ + isync(); + +core_woken: + atomic_unlock_and_stop_thread_idle(); + + /* Per-thread SPRs */ + mtspr(SPRN_LPCR, sprs.lpcr); + mtspr(SPRN_HFSCR, sprs.hfscr); + mtspr(SPRN_FSCR, sprs.fscr); + mtspr(SPRN_PID, sprs.pid); + mtspr(SPRN_PURR, sprs.purr); + mtspr(SPRN_SPURR, sprs.spurr); + mtspr(SPRN_DSCR, sprs.dscr); + mtspr(SPRN_WORT, sprs.wort); + + mtspr(SPRN_MMCRA, sprs.mmcra); + mtspr(SPRN_MMCR0, sprs.mmcr0); + mtspr(SPRN_MMCR1, sprs.mmcr1); + mtspr(SPRN_MMCR2, sprs.mmcr2); + + mtspr(SPRN_SPRG3, local_paca->sprg_vdso); + + if (!radix_enabled()) + __slb_restore_bolted_realmode(); + +out: + if (mmu_on) + mtmsr(MSR_KERNEL); + + return srr1; +} + +#ifdef CONFIG_HOTPLUG_CPU +static unsigned long power9_offline_stop(unsigned long psscr) +{ + unsigned long srr1; + +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE __ppc64_runlatch_off(); - srr1 = power9_idle_stop(psscr); + srr1 = power9_idle_stop(psscr, true); __ppc64_runlatch_on(); +#else + /* + * Tell KVM we're entering idle. + * This does not have to be done in real mode because the P9 MMU + * is independent per-thread. Some steppings share radix/hash mode + * between threads, but in that case KVM has a barrier sync in real + * mode before and after switching between radix and hash. + * + * kvm_start_guest must still be called in real mode though, hence + * the false argument. + */ + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE; - fini_irq_for_idle_irqsoff(); + __ppc64_runlatch_off(); + srr1 = power9_idle_stop(psscr, false); + __ppc64_runlatch_on(); + + local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL; + /* Order setting hwthread_state vs. testing hwthread_req */ + smp_mb(); + if (local_paca->kvm_hstate.hwthread_req) + srr1 = idle_kvm_start_guest(srr1); + mtmsr(MSR_KERNEL); +#endif return srr1; } +#endif void power9_idle_type(unsigned long stop_psscr_val, unsigned long stop_psscr_mask) { + unsigned long psscr; unsigned long srr1; - srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask); + if (!prep_irq_for_idle_irqsoff()) + return; + + psscr = mfspr(SPRN_PSSCR); + psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val; + + __ppc64_runlatch_off(); + srr1 = power9_idle_stop(psscr, true); + __ppc64_runlatch_on(); + + fini_irq_for_idle_irqsoff(); + irq_set_pending_from_srr1(srr1); } @@ -409,7 +898,7 @@ void pnv_power9_force_smt4_catch(void) atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop); } /* order setting dont_stop vs testing requested_psscr */ - mb(); + smp_mb(); for (thr = 0; thr < threads_per_core; ++thr) { if (!paca_ptrs[cpu0+thr]->requested_psscr) ++awake_threads; @@ -481,7 +970,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) unsigned long pnv_cpu_offline(unsigned int cpu) { unsigned long srr1; - u32 idle_states = pnv_get_supported_cpuidle_states(); __ppc64_runlatch_off(); @@ -492,15 +980,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) psscr = (psscr & ~pnv_deepest_stop_psscr_mask) | pnv_deepest_stop_psscr_val; srr1 = power9_offline_stop(psscr); - - } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) && - (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) { - srr1 = power7_idle_insn(PNV_THREAD_WINKLE); - } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || - (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - srr1 = power7_idle_insn(PNV_THREAD_SLEEP); - } else if (idle_states & OPAL_PM_NAP_ENABLED) { - srr1 = power7_idle_insn(PNV_THREAD_NAP); + } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) { + srr1 = power7_offline(); } else { /* This is the fallback method. We emulate snooze */ while (!generic_check_cpu_restart(cpu)) { @@ -596,33 +1077,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags) * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_power9_idle_init(void) +static void __init pnv_power9_idle_init(void) { u64 max_residency_ns = 0; int i; /* - * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask}, - * and the pnv_default_stop_{val,mask}. - * - * pnv_first_deep_stop_state should be set to the first stop - * level to cause hypervisor state loss. - * * pnv_deepest_stop_{val,mask} should be set to values corresponding to * the deepest stop state. * * pnv_default_stop_{val,mask} should be set to values corresponding to - * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state. + * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state. */ - pnv_first_deep_stop_state = MAX_STOP_STATE; + pnv_first_tb_loss_level = MAX_STOP_STATE + 1; + pnv_first_spr_loss_level = MAX_STOP_STATE + 1; for (i = 0; i < nr_pnv_idle_states; i++) { int err; struct pnv_idle_states_t *state = &pnv_idle_states[i]; u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK; + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (pnv_first_tb_loss_level > psscr_rl)) + pnv_first_tb_loss_level = psscr_rl; + if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) && - pnv_first_deep_stop_state > psscr_rl) - pnv_first_deep_stop_state = psscr_rl; + (pnv_first_spr_loss_level > psscr_rl)) + pnv_first_spr_loss_level = psscr_rl; + + /* + * The idle code does not deal with TB loss occurring + * in a shallower state than SPR loss, so force it to + * behave like SPRs are lost if TB is lost. POWER9 would + * never encouter this, but a POWER8 core would if it + * implemented the stop instruction. So this is for forward + * compatibility. + */ + if ((state->flags & OPAL_PM_TIMEBASE_STOP) && + (pnv_first_spr_loss_level > psscr_rl)) + pnv_first_spr_loss_level = psscr_rl; err = validate_psscr_val_mask(&state->psscr_val, &state->psscr_mask, @@ -647,6 +1139,7 @@ static int __init pnv_power9_idle_init(void) pnv_default_stop_val = state->psscr_val; pnv_default_stop_mask = state->psscr_mask; default_stop_found = true; + WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT); } } @@ -666,10 +1159,40 @@ static int __init pnv_power9_idle_init(void) pnv_deepest_stop_psscr_mask); } - pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n", - pnv_first_deep_stop_state); + pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n", + pnv_first_spr_loss_level); - return 0; + pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n", + pnv_first_tb_loss_level); +} + +static void __init pnv_disable_deep_states(void) +{ + /* + * The stop-api is unable to restore hypervisor + * resources on wakeup from platform idle states which + * lose full context. So disable such states. + */ + supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT; + pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n"); + pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n"); + + if (cpu_has_feature(CPU_FTR_ARCH_300) && + (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) { + /* + * Use the default stop state for CPU-Hotplug + * if available. + */ + if (default_stop_found) { + pnv_deepest_stop_psscr_val = pnv_default_stop_val; + pnv_deepest_stop_psscr_mask = pnv_default_stop_mask; + pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n", + pnv_deepest_stop_psscr_val); + } else { /* Fallback to snooze loop for CPU-Hotplug */ + deepest_stop_found = false; + pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n"); + } + } } /* @@ -684,10 +1207,8 @@ static void __init pnv_probe_idle_states(void) return; } - if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_power9_idle_init()) - return; - } + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pnv_power9_idle_init(); for (i = 0; i < nr_pnv_idle_states; i++) supported_cpuidle_states |= pnv_idle_states[i].flags; @@ -807,11 +1328,33 @@ out: static int __init pnv_init_idle_states(void) { + int cpu; int rc = 0; - supported_cpuidle_states = 0; + + /* Set up PACA fields */ + for_each_present_cpu(cpu) { + struct paca_struct *p = paca_ptrs[cpu]; + + p->idle_state = 0; + if (cpu == cpu_first_thread_sibling(cpu)) + p->idle_state = (1 << threads_per_core) - 1; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + /* P7/P8 nap */ + p->thread_idle_state = PNV_THREAD_RUNNING; + } else { + /* P9 stop */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + p->requested_psscr = 0; + atomic_set(&p->dont_stop, 0); +#endif + } + } /* In case we error out nr_pnv_idle_states will be zero */ nr_pnv_idle_states = 0; + supported_cpuidle_states = 0; + if (cpuidle_disable != IDLE_NO_OVERRIDE) goto out; rc = pnv_parse_cpuidle_dt(); @@ -819,27 +1362,40 @@ static int __init pnv_init_idle_states(void) return rc; pnv_probe_idle_states(); - if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_entry, - PPC_INST_NOP); - patch_instruction( - (unsigned int *)pnv_fastsleep_workaround_at_exit, - PPC_INST_NOP); - } else { - /* - * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that - * workaround is needed to use fastsleep. Provide sysfs - * control to choose how this workaround has to be applied. - */ - device_create_file(cpu_subsys.dev_root, + if (!cpu_has_feature(CPU_FTR_ARCH_300)) { + if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { + power7_fastsleep_workaround_entry = false; + power7_fastsleep_workaround_exit = false; + } else { + /* + * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that + * workaround is needed to use fastsleep. Provide sysfs + * control to choose how this workaround has to be + * applied. + */ + device_create_file(cpu_subsys.dev_root, &dev_attr_fastsleep_workaround_applyonce); - } + } + + update_subcore_sibling_mask(); + + if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) { + ppc_md.power_save = power7_idle; + power7_offline_type = PNV_THREAD_NAP; + } - pnv_alloc_idle_core_states(); + if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) && + (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)) + power7_offline_type = PNV_THREAD_WINKLE; + else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) || + (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) + power7_offline_type = PNV_THREAD_SLEEP; + } - if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) - ppc_md.power_save = power7_idle; + if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) { + if (pnv_save_sprs_for_deep_states()) + pnv_disable_deep_states(); + } out: return 0; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index dc23d9d2a7d9..495550432f3d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -1213,9 +1213,8 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, * Currently we only support radix and non-zero LPCR only makes sense * for hash tables so skiboot expects the LPCR parameter to be a zero. */ - ret = opal_npu_map_lpar(nphb->opal_id, - PCI_DEVID(gpdev->bus->number, gpdev->devfn), lparid, - 0 /* LPCR bits */); + ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid, + 0 /* LPCR bits */); if (ret) { dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); return ret; @@ -1224,7 +1223,7 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n", nphb->opal_id, msr); ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr, - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); + pci_dev_id(gpdev)); if (ret < 0) dev_err(&gpdev->dev, "Failed to init context: %d\n", ret); else @@ -1258,7 +1257,7 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", nphb->opal_id); ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/, - PCI_DEVID(gpdev->bus->number, gpdev->devfn)); + pci_dev_id(gpdev)); if (ret < 0) { dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret); return ret; @@ -1266,9 +1265,8 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) /* Set LPID to 0 anyway, just to be safe */ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id); - ret = opal_npu_map_lpar(nphb->opal_id, - PCI_DEVID(gpdev->bus->number, gpdev->devfn), 0 /*LPID*/, - 0 /* LPCR bits */); + ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/, + 0 /* LPCR bits */); if (ret) dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c index daad8c45c8e7..36c8fa3647a2 100644 --- a/arch/powerpc/platforms/powernv/opal-call.c +++ b/arch/powerpc/platforms/powernv/opal-call.c @@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3, #define OPAL_CALL(name, opcode) \ int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ + int64_t a4, int64_t a5, int64_t a6, int64_t a7); \ +int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \ int64_t a4, int64_t a5, int64_t a6, int64_t a7) \ { \ return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \ @@ -218,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); +OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2); OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE); OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG); OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION); @@ -260,6 +263,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO); OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO); OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC); OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); +OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE); +OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE); +OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE); OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 58a07948c76e..3e497b91d210 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, nr_chips)) goto error; - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), + pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info), GFP_KERNEL); if (!pmu_ptr->mem_info) goto error; @@ -284,6 +284,9 @@ static int opal_imc_counters_probe(struct platform_device *pdev) case IMC_TYPE_THREAD: domain = IMC_DOMAIN_THREAD; break; + case IMC_TYPE_TRACE: + domain = IMC_DOMAIN_TRACE; + break; default: pr_warn("IMC Unknown Device type \n"); domain = -1; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2b0eca104f86..f2b063b027f0 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -505,7 +505,7 @@ static int opal_recover_mce(struct pt_regs *regs, recovered = 0; } - if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) { + if (!recovered && evt->sync_error) { /* * Try to kill processes if we get a synchronous machine check * (e.g., one caused by execution of this instruction). This @@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs) return 0; } +int opal_hmi_exception_early2(struct pt_regs *regs) +{ + s64 rc; + __be64 out_flags; + + /* + * call opal hmi handler. + * Check 64-bit flag mask to find out if an event was generated, + * and whether TB is still valid or not etc. + */ + rc = opal_handle_hmi2(&out_flags); + if (rc != OPAL_SUCCESS) + return 0; + + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT) + local_paca->hmi_event_available = 1; + if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL) + tb_invalid = true; + return 1; +} + /* HMI exception handler called in virtual mode during check_irq_replay. */ int opal_handle_hmi_exception(struct pt_regs *regs) { diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3ead4c237ed0..126602b4e399 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -847,11 +847,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); if (rc) - pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc); + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, bcomp, dcomp, fcomp, OPAL_UNMAP_PE); if (rc) - pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); + pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc); pe->pbus = NULL; pe->pdev = NULL; @@ -1174,11 +1174,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all) pe->rid = bus->busn_res.start << 8; if (all) - pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n", - bus->busn_res.start, bus->busn_res.end, pe->pe_number); + pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n", + &bus->busn_res.start, &bus->busn_res.end, + pe->pe_number); else - pe_info(pe, "Secondary bus %d associated with PE#%x\n", - bus->busn_res.start, pe->pe_number); + pe_info(pe, "Secondary bus %pad associated with PE#%x\n", + &bus->busn_res.start, pe->pe_number); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1448,7 +1449,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe tbl = pe->table_group.tables[0]; rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) - pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + pe_warn(pe, "OPAL error %lld release DMA window\n", rc); pnv_pci_ioda2_set_bypass(pe, false); if (pe->table_group.group) { @@ -1836,7 +1837,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, struct pnv_ioda_pe *pe; if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) - return -ENODEV; + return false; pe = &phb->ioda.pe_array[pdn->pe_number]; if (pe->tce_bypass_enabled) { @@ -1859,7 +1860,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev, /* Configure the bypass mode */ s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe); if (rc) - return rc; + return false; /* 4GB offset bypasses 32-bit space */ pdev->dev.archdata.dma_offset = (1ULL << 32); return true; @@ -2286,8 +2287,8 @@ found: __pa(addr) + tce32_segsz * i, tce32_segsz, IOMMU_PAGE_SIZE_4K); if (rc) { - pe_err(pe, " Failed to configure 32-bit TCE table," - " err %ld\n", rc); + pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n", + rc); goto fail; } } @@ -2332,9 +2333,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; const __u64 win_size = tbl->it_size << tbl->it_page_shift; - pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num, - start_addr, start_addr + win_size - 1, - IOMMU_PAGE_SIZE(tbl)); + pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n", + num, start_addr, start_addr + win_size - 1, + IOMMU_PAGE_SIZE(tbl)); /* * Map TCE table through TVT. The TVE index is the PE number @@ -2348,7 +2349,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, size << 3, IOMMU_PAGE_SIZE(tbl)); if (rc) { - pe_err(pe, "Failed to configure TCE table, err %ld\n", rc); + pe_err(pe, "Failed to configure TCE table, err %lld\n", rc); return rc; } @@ -3450,7 +3451,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe) #ifdef CONFIG_IOMMU_API rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0); if (rc) - pe_warn(pe, "OPAL error %ld release DMA window\n", rc); + pe_warn(pe, "OPAL error %lld release DMA window\n", rc); #endif pnv_pci_ioda2_set_bypass(pe, false); @@ -3484,7 +3485,7 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe, phb->ioda.reserved_pe_idx, win, 0, idx); if (rc != OPAL_SUCCESS) - pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n", + pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n", rc, win, idx); map[idx] = IODA_INVALID_PE; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 8e36da379252..be26ab3d99e0 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -2,6 +2,7 @@ #ifndef __POWERNV_PCI_H #define __POWERNV_PCI_H +#include <linux/compiler.h> /* for __printf */ #include <linux/iommu.h> #include <asm/iommu.h> #include <asm/msi_bitmap.h> @@ -204,6 +205,7 @@ extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift, __u64 window_size, __u32 levels); extern int pnv_eeh_post_init(void); +__printf(3, 4) extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); #define pe_err(pe, fmt, ...) \ diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 14befee4b3f1..3cf40f689aac 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void) /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ ppc_md.machine_check_exception = opal_machine_check; ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; - ppc_md.hmi_exception_early = opal_hmi_exception_early; + if (opal_check_token(OPAL_HANDLE_HMI2)) + ppc_md.hmi_exception_early = opal_hmi_exception_early2; + else + ppc_md.hmi_exception_early = opal_hmi_exception_early; ppc_md.handle_hmi_exception = opal_handle_hmi_exception; } diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c index 45563004feda..1d7a9fd30dd1 100644 --- a/arch/powerpc/platforms/powernv/subcore.c +++ b/arch/powerpc/platforms/powernv/subcore.c @@ -183,7 +183,7 @@ static void unsplit_core(void) cpu = smp_processor_id(); if (cpu_thread_in_core(cpu) != 0) { while (mfspr(SPRN_HID0) & mask) - power7_idle_insn(PNV_THREAD_NAP); + power7_idle_type(PNV_THREAD_NAP); per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; return; diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index d291b618a559..47087832f8b2 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int nid, rc; + int rc; if (!lmb_is_removable(lmb)) return -EINVAL; @@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) return rc; block_sz = pseries_memory_block_size(); - nid = memory_add_physaddr_to_nid(lmb->base_addr); - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->nid, lmb->base_addr, block_sz); /* Update memory regions for memory remove */ memblock_remove(lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); + lmb_clear_nid(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; return 0; @@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index) static int dlpar_add_lmb(struct drmem_lmb *lmb) { unsigned long block_sz; - int nid, rc; + int rc; if (lmb->flags & DRCONF_MEM_ASSIGNED) return -EINVAL; @@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) return rc; } + lmb_set_nid(lmb); block_sz = memory_block_size_bytes(); - /* Find the node id for this address */ - nid = memory_add_physaddr_to_nid(lmb->base_addr); - /* Add the memory */ - rc = __add_memory(nid, lmb->base_addr, block_sz); + rc = __add_memory(lmb->nid, lmb->base_addr, block_sz); if (rc) { invalidate_lmb_associativity_index(lmb); return rc; @@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb) rc = dlpar_online_lmb(lmb); if (rc) { - __remove_memory(nid, lmb->base_addr, block_sz); + __remove_memory(lmb->nid, lmb->base_addr, block_sz); invalidate_lmb_associativity_index(lmb); + lmb_clear_nid(lmb); } else { lmb->flags |= DRCONF_MEM_ASSIGNED; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 36eb1ddbac69..03bbb299320e 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, unsigned long attrs) { u64 proto_tce; - __be64 *tcep, *tces; + __be64 *tcep; u64 rpn; proto_tce = TCE_PCI_READ; // Read allowed @@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; - tces = tcep = ((__be64 *)tbl->it_base) + index; + tcep = ((__be64 *)tbl->it_base) + index; while (npages--) { /* can't move this out since we might cross MEMBLOCK boundary */ @@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index, static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages) { - __be64 *tcep, *tces; + __be64 *tcep; - tces = tcep = ((__be64 *)tbl->it_base) + index; + tcep = ((__be64 *)tbl->it_base) + index; while (npages--) *(tcep++) = 0; @@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void) for_each_node_by_type(memory, "memory") { unsigned long start, size; - int ranges, n_mem_addr_cells, n_mem_size_cells, len; + int n_mem_addr_cells, n_mem_size_cells, len; const __be32 *memcell_buf; memcell_buf = of_get_property(memory, "reg", &len); @@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void) n_mem_addr_cells = of_n_addr_cells(memory); n_mem_size_cells = of_n_size_cells(memory); - /* ranges in cell */ - ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); - start = of_read_number(memcell_buf, n_mem_addr_cells); memcell_buf += n_mem_addr_cells; size = of_read_number(memcell_buf, n_mem_size_cells); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index f2a9f0adc2d3..1034ef1fe2b4 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift) break; case H_PARAMETER: + pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); return -EINVAL; case H_RESOURCE: + pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); return -EPERM; default: pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); @@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift) if (rc != 0) { switch (state.commit_rc) { case H_PTEG_FULL: - pr_warn("Hash collision while resizing HPT\n"); return -ENOSPC; default: diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c index 27f0a915c8a9..f860a897a9e0 100644 --- a/arch/powerpc/platforms/pseries/pmem.c +++ b/arch/powerpc/platforms/pseries/pmem.c @@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index) int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) { - u32 count, drc_index; + u32 drc_index; int rc; /* slim chance, but we might get a hotplug event while booting */ @@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) return -EINVAL; } - count = hp_elog->_drc_u.drc_count; drc_index = hp_elog->_drc_u.drc_index; lock_device_hotplug(); diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 452dcfd7e5dd..c97d15352f9f 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -539,44 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs, int disposition = rtas_error_disposition(errp); static const char * const initiators[] = { - "Unknown", - "CPU", - "PCI", - "ISA", - "Memory", - "Power Mgmt", + [0] = "Unknown", + [1] = "CPU", + [2] = "PCI", + [3] = "ISA", + [4] = "Memory", + [5] = "Power Mgmt", }; static const char * const mc_err_types[] = { - "UE", - "SLB", - "ERAT", - "Unknown", - "TLB", - "D-Cache", - "Unknown", - "I-Cache", + [0] = "UE", + [1] = "SLB", + [2] = "ERAT", + [3] = "Unknown", + [4] = "TLB", + [5] = "D-Cache", + [6] = "Unknown", + [7] = "I-Cache", }; static const char * const mc_ue_types[] = { - "Indeterminate", - "Instruction fetch", - "Page table walk ifetch", - "Load/Store", - "Page table walk Load/Store", + [0] = "Indeterminate", + [1] = "Instruction fetch", + [2] = "Page table walk ifetch", + [3] = "Load/Store", + [4] = "Page table walk Load/Store", }; /* SLB sub errors valid values are 0x0, 0x1, 0x2 */ static const char * const mc_slb_types[] = { - "Parity", - "Multihit", - "Indeterminate", + [0] = "Parity", + [1] = "Multihit", + [2] = "Indeterminate", }; /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */ static const char * const mc_soft_types[] = { - "Unknown", - "Parity", - "Multihit", - "Indeterminate", + [0] = "Unknown", + [1] = "Parity", + [2] = "Multihit", + [3] = "Indeterminate", }; if (!rtas_error_extended(errp)) { @@ -707,6 +707,87 @@ out: return disposition; } +#ifdef CONFIG_MEMORY_FAILURE + +static DEFINE_PER_CPU(int, rtas_ue_count); +static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]); + +#define UE_EFFECTIVE_ADDR_PROVIDED 0x40 +#define UE_LOGICAL_ADDR_PROVIDED 0x20 + + +static void pseries_hwpoison_work_fn(struct work_struct *work) +{ + unsigned long paddr; + int index; + + while (__this_cpu_read(rtas_ue_count) > 0) { + index = __this_cpu_read(rtas_ue_count) - 1; + paddr = __this_cpu_read(rtas_ue_paddr[index]); + memory_failure(paddr >> PAGE_SHIFT, 0); + __this_cpu_dec(rtas_ue_count); + } +} + +static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn); + +static void queue_ue_paddr(unsigned long paddr) +{ + int index; + + index = __this_cpu_inc_return(rtas_ue_count) - 1; + if (index >= MAX_MC_EVT) { + __this_cpu_dec(rtas_ue_count); + return; + } + this_cpu_write(rtas_ue_paddr[index], paddr); + schedule_work(&hwpoison_work); +} + +static void pseries_do_memory_failure(struct pt_regs *regs, + struct pseries_mc_errorlog *mce_log) +{ + unsigned long paddr; + + if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) { + paddr = be64_to_cpu(mce_log->logical_address); + } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) { + unsigned long pfn; + + pfn = addr_to_pfn(regs, + be64_to_cpu(mce_log->effective_address)); + if (pfn == ULONG_MAX) + return; + paddr = pfn << PAGE_SHIFT; + } else { + return; + } + queue_ue_paddr(paddr); +} + +static void pseries_process_ue(struct pt_regs *regs, + struct rtas_error_log *errp) +{ + struct pseries_errorlog *pseries_log; + struct pseries_mc_errorlog *mce_log; + + if (!rtas_error_extended(errp)) + return; + + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE); + if (!pseries_log) + return; + + mce_log = (struct pseries_mc_errorlog *)pseries_log->data; + + if (mce_log->error_type == MC_ERROR_TYPE_UE) + pseries_do_memory_failure(regs, mce_log); +} +#else +static inline void pseries_process_ue(struct pt_regs *regs, + struct rtas_error_log *errp) { } +#endif /*CONFIG_MEMORY_FAILURE */ + /* * Process MCE rtas errlog event. */ @@ -765,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) recovered = 1; } + pseries_process_ue(regs, err); + /* Queue irq work to log this rtas event later. */ irq_work_queue(&mce_errlog_process_work); diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile index 4314ba5baf43..7c6d8b14f440 100644 --- a/arch/powerpc/purgatory/Makefile +++ b/arch/powerpc/purgatory/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 + +KASAN_SANITIZE := n + targets += trampoline.o purgatory.ro kexec-purgatory.c LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c index 1f1af12f23e2..026619c9a8cb 100644 --- a/arch/powerpc/sysdev/tsi108_dev.c +++ b/arch/powerpc/sysdev/tsi108_dev.c @@ -18,6 +18,7 @@ #include <linux/irq.h> #include <linux/export.h> #include <linux/device.h> +#include <linux/etherdevice.h> #include <linux/platform_device.h> #include <linux/of_net.h> #include <asm/tsi108.h> @@ -105,8 +106,8 @@ static int __init tsi108_eth_of_init(void) } mac_addr = of_get_mac_address(np); - if (mac_addr) - memcpy(tsi_eth_data.mac_addr, mac_addr, 6); + if (!IS_ERR(mac_addr)) + ether_addr_copy(tsi_eth_data.mac_addr, mac_addr); ph = of_get_property(np, "mdio-handle", NULL); mdio = of_find_node_by_phandle(*ph); diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 1ca127d052a6..0c037e933e55 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -437,6 +437,12 @@ void xive_native_sync_source(u32 hw_irq) } EXPORT_SYMBOL_GPL(xive_native_sync_source); +void xive_native_sync_queue(u32 hw_irq) +{ + opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq); +} +EXPORT_SYMBOL_GPL(xive_native_sync_queue); + static const struct xive_ops xive_native_ops = { .populate_irq_data = xive_native_populate_irq_data, .configure_irq = xive_native_configure_irq, @@ -711,3 +717,96 @@ bool xive_native_has_single_escalation(void) return xive_has_single_esc; } EXPORT_SYMBOL_GPL(xive_native_has_single_escalation); + +int xive_native_get_queue_info(u32 vp_id, u32 prio, + u64 *out_qpage, + u64 *out_qsize, + u64 *out_qeoi_page, + u32 *out_escalate_irq, + u64 *out_qflags) +{ + __be64 qpage; + __be64 qsize; + __be64 qeoi_page; + __be32 escalate_irq; + __be64 qflags; + s64 rc; + + rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize, + &qeoi_page, &escalate_irq, &qflags); + if (rc) { + pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n", + vp_id, prio, rc); + return -EIO; + } + + if (out_qpage) + *out_qpage = be64_to_cpu(qpage); + if (out_qsize) + *out_qsize = be32_to_cpu(qsize); + if (out_qeoi_page) + *out_qeoi_page = be64_to_cpu(qeoi_page); + if (out_escalate_irq) + *out_escalate_irq = be32_to_cpu(escalate_irq); + if (out_qflags) + *out_qflags = be64_to_cpu(qflags); + + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_get_queue_info); + +int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex) +{ + __be32 opal_qtoggle; + __be32 opal_qindex; + s64 rc; + + rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle, + &opal_qindex); + if (rc) { + pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n", + vp_id, prio, rc); + return -EIO; + } + + if (qtoggle) + *qtoggle = be32_to_cpu(opal_qtoggle); + if (qindex) + *qindex = be32_to_cpu(opal_qindex); + + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_get_queue_state); + +int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex) +{ + s64 rc; + + rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex); + if (rc) { + pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n", + vp_id, prio, rc); + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_set_queue_state); + +int xive_native_get_vp_state(u32 vp_id, u64 *out_state) +{ + __be64 state; + s64 rc; + + rc = opal_xive_get_vp_state(vp_id, &state); + if (rc) { + pr_err("OPAL failed to get vp state for VCPU %d : %lld\n", + vp_id, rc); + return -EIO; + } + + if (out_state) + *out_state = be64_to_cpu(state); + return 0; +} +EXPORT_SYMBOL_GPL(xive_native_get_vp_state); diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 3050f9323254..f142570ad860 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -7,6 +7,7 @@ subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Disable ftrace for the entire directory ORIG_CFLAGS := $(KBUILD_CFLAGS) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 13c6a47e6150..1b0149b2bb6c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -80,6 +80,7 @@ static int set_indicator_token = RTAS_UNKNOWN_SERVICE; #endif static unsigned long in_xmon __read_mostly = 0; static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT); +static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE); static unsigned long adrs; static int size = 1; @@ -202,6 +203,8 @@ static void dump_tlb_book3e(void); #define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3]) #endif +static const char *xmon_ro_msg = "Operation disabled: xmon in read-only mode\n"; + static char *help_string = "\ Commands:\n\ b show breakpoints\n\ @@ -989,6 +992,10 @@ cmds(struct pt_regs *excp) memlocate(); break; case 'z': + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } memzcan(); break; case 'i': @@ -1042,6 +1049,10 @@ cmds(struct pt_regs *excp) set_lpp_cmd(); break; case 'b': + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } bpt_cmds(); break; case 'C': @@ -1055,6 +1066,10 @@ cmds(struct pt_regs *excp) bootcmds(); break; case 'p': + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } proccall(); break; case 'P': @@ -1777,6 +1792,11 @@ read_spr(int n, unsigned long *vp) static void write_spr(int n, unsigned long val) { + if (xmon_is_ro) { + printf(xmon_ro_msg); + return; + } + if (setjmp(bus_error_jmp) == 0) { catch_spr_faults = 1; sync(); @@ -2016,6 +2036,12 @@ mwrite(unsigned long adrs, void *buf, int size) char *p, *q; n = 0; + + if (xmon_is_ro) { + printf(xmon_ro_msg); + return n; + } + if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); @@ -2434,7 +2460,6 @@ static void dump_one_paca(int cpu) DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x"); #endif DUMP(p, irq_work_pending, "%#-*x"); - DUMP(p, nap_state_lost, "%#-*x"); DUMP(p, sprg_vdso, "%#-*llx"); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -2442,19 +2467,16 @@ static void dump_one_paca(int cpu) #endif #ifdef CONFIG_PPC_POWERNV - DUMP(p, core_idle_state_ptr, "%-*px"); - DUMP(p, thread_idle_state, "%#-*x"); - DUMP(p, thread_mask, "%#-*x"); - DUMP(p, subcore_sibling_mask, "%#-*x"); - DUMP(p, requested_psscr, "%#-*llx"); - DUMP(p, stop_sprs.pid, "%#-*llx"); - DUMP(p, stop_sprs.ldbar, "%#-*llx"); - DUMP(p, stop_sprs.fscr, "%#-*llx"); - DUMP(p, stop_sprs.hfscr, "%#-*llx"); - DUMP(p, stop_sprs.mmcr1, "%#-*llx"); - DUMP(p, stop_sprs.mmcr2, "%#-*llx"); - DUMP(p, stop_sprs.mmcra, "%#-*llx"); - DUMP(p, dont_stop.counter, "%#-*x"); + DUMP(p, idle_state, "%#-*lx"); + if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { + DUMP(p, thread_idle_state, "%#-*x"); + DUMP(p, subcore_sibling_mask, "%#-*x"); + } else { +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + DUMP(p, requested_psscr, "%#-*llx"); + DUMP(p, dont_stop.counter, "%#-*x"); +#endif + } #endif DUMP(p, accounting.utime, "%#-*lx"); @@ -2887,9 +2909,17 @@ memops(int cmd) scanhex((void *)&mcount); switch( cmd ){ case 'm': + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } memmove((void *)mdest, (void *)msrc, mcount); break; case 's': + if (xmon_is_ro) { + printf(xmon_ro_msg); + break; + } memset((void *)mdest, mval, mcount); break; case 'd': @@ -3799,6 +3829,14 @@ static int __init early_parse_xmon(char *p) } else if (strncmp(p, "on", 2) == 0) { xmon_init(1); xmon_on = 1; + } else if (strncmp(p, "rw", 2) == 0) { + xmon_init(1); + xmon_on = 1; + xmon_is_ro = false; + } else if (strncmp(p, "ro", 2) == 0) { + xmon_init(1); + xmon_on = 1; + xmon_is_ro = true; } else if (strncmp(p, "off", 3) == 0) xmon_on = 0; else diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index a3d5273ded7c..0f2fe1794c8f 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -88,7 +88,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->a1, args, 5 * sizeof(regs->a1)); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_64BIT return AUDIT_ARCH_RISCV64; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bc7b77e34d09..8bf6f9c2d48c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -66,11 +66,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -void free_initmem(void) -{ - free_initmem_default(0); -} - #ifdef CONFIG_BLK_DEV_INITRD static void __init setup_initrd(void) { diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 07485582d027..109243fdb6ec 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -63,7 +63,7 @@ config S390 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_MEMORY @@ -100,6 +100,7 @@ config S390 select ARCH_INLINE_WRITE_UNLOCK_BH select ARCH_INLINE_WRITE_UNLOCK_IRQ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE + select ARCH_KEEP_MEMBLOCK select ARCH_SAVE_PAGE_KEYS if HIBERNATION select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 0d15383d0ff1..1f9ab24dc048 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -224,24 +224,11 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm); + int err; - if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && - crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE)) && - (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { - tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; - } - - /* in fips mode, ensure k1 != k2 and k2 != k3 and k1 != k3 */ - if (fips_enabled && - !(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && - crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE) && - crypto_memneq(key, &key[DES_KEY_SIZE * 2], DES_KEY_SIZE))) { - tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; - } + err = __des3_verify_key(&tfm->crt_flags, key); + if (unlikely(err)) + return err; memcpy(ctx->key, key, key_len); return 0; diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 3cc52e37b4b2..f316de40e51b 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -202,7 +202,7 @@ static inline int __cpacf_check_opcode(unsigned int opcode) } } -static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) +static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) { if (__cpacf_check_opcode(opcode)) { __cpacf_query(opcode, mask); diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 2d1afa58a4b6..bb59dd964590 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -116,7 +116,9 @@ static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) return pte_modify(pte, newprot); } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif +static inline bool gigantic_page_runtime_supported(void) +{ + return true; +} + #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index ab3407aa4fd8..f073292e9fdb 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -79,10 +79,10 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->orig_gpr2 = args[0]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_COMPAT - if (test_tsk_thread_flag(current, TIF_31BIT)) + if (test_tsk_thread_flag(task, TIF_31BIT)) return AUDIT_ARCH_S390; #endif return AUDIT_ARCH_S390X; diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig index 767453faacfc..1816ee48eadd 100644 --- a/arch/s390/kvm/Kconfig +++ b/arch/s390/kvm/Kconfig @@ -21,7 +21,6 @@ config KVM prompt "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM select PREEMPT_NOTIFIERS - select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 37503ae62486..1fd706f6206c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2376,7 +2376,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr) ret = -EFAULT; goto out; } - ret = get_user_pages_fast(map->addr, 1, 1, &map->page); + ret = get_user_pages_fast(map->addr, 1, FOLL_WRITE, &map->page); if (ret < 0) goto out; BUG_ON(ret != 1); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7cf48eefec8f..14d1eae9fe43 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -157,14 +157,6 @@ void free_initmem(void) free_initmem_default(POISON_FREE_INITMEM); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, - "initrd"); -} -#endif - unsigned long memory_block_size_bytes(void) { /* @@ -227,8 +219,8 @@ device_initcall(s390_cma_mem_init); #endif /* CONFIG_CMA */ -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); @@ -238,21 +230,22 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, if (rc) return rc; - rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock); + rc = __add_pages(nid, start_pfn, size_pages, restrictions); if (rc) vmem_remove_mapping(start, size); return rc; } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a * hot memory remove on s390. So there is nothing that needs to be * implemented. */ - return -EBUSY; + BUG(); } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 0be08d586d40..b77f512bb176 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -10,7 +10,6 @@ config SUPERH select DMA_DECLARE_COHERENT select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK_NODE_MAP - select ARCH_DISCARD_MEMBLOCK select HAVE_OPROFILE select HAVE_ARCH_TRACEHOOK select HAVE_PERF_EVENTS @@ -53,6 +52,7 @@ config SUPERH select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_NMI select NEED_SG_DMA_LENGTH + select ARCH_HAS_GIGANTIC_PAGE help The SuperH is a RISC processor targeted for use in embedded systems diff --git a/arch/sh/boards/board-apsh4a3a.c b/arch/sh/boards/board-apsh4a3a.c index 346eda7a2ef6..abf19a947df3 100644 --- a/arch/sh/boards/board-apsh4a3a.c +++ b/arch/sh/boards/board-apsh4a3a.c @@ -16,7 +16,7 @@ #include <linux/irq.h> #include <linux/clk.h> #include <asm/machvec.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/clock.h> static struct mtd_partition nor_flash_partitions[] = { diff --git a/arch/sh/boards/board-apsh4ad0a.c b/arch/sh/boards/board-apsh4ad0a.c index 4efa9c571f64..fa031a16c9b5 100644 --- a/arch/sh/boards/board-apsh4ad0a.c +++ b/arch/sh/boards/board-apsh4ad0a.c @@ -15,7 +15,7 @@ #include <linux/irq.h> #include <linux/clk.h> #include <asm/machvec.h> -#include <asm/sizes.h> +#include <linux/sizes.h> /* Dummy supplies, where voltage doesn't matter */ static struct regulator_consumer_supply dummy_supplies[] = { diff --git a/arch/sh/boards/board-edosk7705.c b/arch/sh/boards/board-edosk7705.c index 67a8803eb3f9..0de7d603da2d 100644 --- a/arch/sh/boards/board-edosk7705.c +++ b/arch/sh/boards/board-edosk7705.c @@ -16,7 +16,7 @@ #include <linux/smc91x.h> #include <linux/sh_intc.h> #include <asm/machvec.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #define SMC_IOBASE 0xA2000000 #define SMC_IO_OFFSET 0x300 diff --git a/arch/sh/boards/board-edosk7760.c b/arch/sh/boards/board-edosk7760.c index 0fbe91cba67a..7569d85c5ff5 100644 --- a/arch/sh/boards/board-edosk7760.c +++ b/arch/sh/boards/board-edosk7760.c @@ -18,7 +18,7 @@ #include <asm/addrspace.h> #include <asm/delay.h> #include <asm/i2c-sh7760.h> -#include <asm/sizes.h> +#include <linux/sizes.h> /* Bus state controller registers for CS4 area */ #define BSC_CS4BCR 0xA4FD0010 diff --git a/arch/sh/boards/board-espt.c b/arch/sh/boards/board-espt.c index f478fee3b48a..6e784b5cf5a0 100644 --- a/arch/sh/boards/board-espt.c +++ b/arch/sh/boards/board-espt.c @@ -13,7 +13,7 @@ #include <linux/sh_eth.h> #include <linux/sh_intc.h> #include <asm/machvec.h> -#include <asm/sizes.h> +#include <linux/sizes.h> /* NOR Flash */ static struct mtd_partition espt_nor_flash_partitions[] = { diff --git a/arch/sh/boards/board-urquell.c b/arch/sh/boards/board-urquell.c index 799af57c0b81..dad2b3b40735 100644 --- a/arch/sh/boards/board-urquell.c +++ b/arch/sh/boards/board-urquell.c @@ -21,7 +21,7 @@ #include <mach/urquell.h> #include <cpu/sh7786.h> #include <asm/heartbeat.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/smp-ops.h> /* diff --git a/arch/sh/boards/mach-dreamcast/irq.c b/arch/sh/boards/mach-dreamcast/irq.c index a929f764ae04..cc06e4cdb4cd 100644 --- a/arch/sh/boards/mach-dreamcast/irq.c +++ b/arch/sh/boards/mach-dreamcast/irq.c @@ -10,7 +10,6 @@ */ #include <linux/irq.h> #include <linux/io.h> -#include <linux/irq.h> #include <linux/export.h> #include <linux/err.h> #include <mach/sysasic.h> diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index 34e5414c5563..f402aa741bf3 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -806,7 +806,6 @@ static struct spi_board_info spi_bus[] = { .platform_data = &mmc_spi_info, .max_speed_hz = 5000000, .mode = SPI_MODE_0, - .controller_data = (void *) GPIO_PTM4, }, }; @@ -838,6 +837,14 @@ static struct platform_device msiof0_device = { .resource = msiof0_resources, }; +static struct gpiod_lookup_table msiof_gpio_table = { + .dev_id = "spi_sh_msiof.0", + .table = { + GPIO_LOOKUP("sh7724_pfc", GPIO_PTM4, "cs", GPIO_ACTIVE_HIGH), + { }, + }, +}; + #endif /* FSI */ @@ -1296,12 +1303,11 @@ static int __init arch_setup(void) gpio_request(GPIO_FN_MSIOF0_TXD, NULL); gpio_request(GPIO_FN_MSIOF0_RXD, NULL); gpio_request(GPIO_FN_MSIOF0_TSCK, NULL); - gpio_request(GPIO_PTM4, NULL); /* software CS control of TSYNC pin */ - gpio_direction_output(GPIO_PTM4, 1); /* active low CS */ gpio_request(GPIO_PTB6, NULL); /* 3.3V power control */ gpio_direction_output(GPIO_PTB6, 0); /* disable power by default */ gpiod_add_lookup_table(&mmc_spi_gpio_table); + gpiod_add_lookup_table(&msiof_gpio_table); spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus)); #endif diff --git a/arch/sh/boards/mach-microdev/setup.c b/arch/sh/boards/mach-microdev/setup.c index 706b48f797be..f4a777fe2d01 100644 --- a/arch/sh/boards/mach-microdev/setup.c +++ b/arch/sh/boards/mach-microdev/setup.c @@ -15,7 +15,7 @@ #include <mach/microdev.h> #include <asm/io.h> #include <asm/machvec.h> -#include <asm/sizes.h> +#include <linux/sizes.h> static struct resource smc91x_resources[] = { [0] = { diff --git a/arch/sh/boards/mach-sdk7786/fpga.c b/arch/sh/boards/mach-sdk7786/fpga.c index 6d2a3d381c2a..895576ff8376 100644 --- a/arch/sh/boards/mach-sdk7786/fpga.c +++ b/arch/sh/boards/mach-sdk7786/fpga.c @@ -8,7 +8,7 @@ #include <linux/io.h> #include <linux/bcd.h> #include <mach/fpga.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #define FPGA_REGS_OFFSET 0x03fff800 #define FPGA_REGS_SIZE 0x490 diff --git a/arch/sh/boards/mach-sdk7786/setup.c b/arch/sh/boards/mach-sdk7786/setup.c index 65721c3a482c..d183026dbeb1 100644 --- a/arch/sh/boards/mach-sdk7786/setup.c +++ b/arch/sh/boards/mach-sdk7786/setup.c @@ -19,7 +19,7 @@ #include <mach/irq.h> #include <asm/machvec.h> #include <asm/heartbeat.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/clock.h> #include <asm/reboot.h> #include <asm/smp-ops.h> diff --git a/arch/sh/boards/mach-sdk7786/sram.c b/arch/sh/boards/mach-sdk7786/sram.c index d76cdb7ede39..7c6ca976f332 100644 --- a/arch/sh/boards/mach-sdk7786/sram.c +++ b/arch/sh/boards/mach-sdk7786/sram.c @@ -13,7 +13,7 @@ #include <linux/string.h> #include <mach/fpga.h> #include <asm/sram.h> -#include <asm/sizes.h> +#include <linux/sizes.h> static int __init fpga_sram_init(void) { diff --git a/arch/sh/boards/mach-se/7343/irq.c b/arch/sh/boards/mach-se/7343/irq.c index 39a3175e72b2..1aedbfe32654 100644 --- a/arch/sh/boards/mach-se/7343/irq.c +++ b/arch/sh/boards/mach-se/7343/irq.c @@ -16,7 +16,7 @@ #include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/io.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach-se/mach/se7343.h> #define PA_CPLD_BASE_ADDR 0x11400000 diff --git a/arch/sh/boards/mach-se/7722/irq.c b/arch/sh/boards/mach-se/7722/irq.c index f6e3009edd4e..6d34592767f8 100644 --- a/arch/sh/boards/mach-se/7722/irq.c +++ b/arch/sh/boards/mach-se/7722/irq.c @@ -14,7 +14,7 @@ #include <linux/irqdomain.h> #include <linux/io.h> #include <linux/err.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach-se/mach/se7722.h> #define IRQ01_BASE_ADDR 0x11800000 diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 72b72e50a92e..0ef3f1f9de5c 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -35,7 +35,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_SH_FLCTL=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index 825c641726c4..d0d9ebc7165b 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -19,7 +19,6 @@ CONFIG_SLAB=y CONFIG_PROFILING=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_CFQ_GROUP_IOSCHED=y CONFIG_CPU_SUBTYPE_SH7786=y diff --git a/arch/sh/configs/ecovec24-romimage_defconfig b/arch/sh/configs/ecovec24-romimage_defconfig index 0c5dfccbfe37..bdb61d1d0127 100644 --- a/arch/sh/configs/ecovec24-romimage_defconfig +++ b/arch/sh/configs/ecovec24-romimage_defconfig @@ -7,7 +7,6 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y # CONFIG_KALLSYMS is not set CONFIG_SLAB=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7724=y CONFIG_MEMORY_SIZE=0x10000000 diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index 3568310c2c2f..ba67e3752938 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -38,7 +38,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=4 diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index e04f21be0756..121a75d65fb4 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -34,7 +34,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_BLK_DEV_RAM=y CONFIG_SCSI=y diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig index 2b9b731fc86b..ad003ee469ea 100644 --- a/arch/sh/configs/rsk7264_defconfig +++ b/arch/sh/configs/rsk7264_defconfig @@ -16,7 +16,6 @@ CONFIG_PERF_COUNTERS=y CONFIG_SLAB=y CONFIG_MMAP_ALLOW_UNINITIALIZED=y CONFIG_PROFILING=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_DEADLINE is not set diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig index d041f7bcb84c..27fc01d58cf8 100644 --- a/arch/sh/configs/rsk7269_defconfig +++ b/arch/sh/configs/rsk7269_defconfig @@ -3,7 +3,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SLAB=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index d16e9334cd98..5209889765ad 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -108,7 +108,7 @@ CONFIG_MTD_ROM=m CONFIG_MTD_ABSENT=m CONFIG_MTD_PLATRAM=y CONFIG_MTD_PHRAM=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_NAND_SH_FLCTL=m CONFIG_MTD_UBI=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index aedb3a2d9a10..9f6d46d58554 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -37,7 +37,7 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_NAND=y +CONFIG_MTD_RAW_NAND=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=4 diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 2ddf5ca7094e..a89ccc15af23 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -11,7 +11,6 @@ CONFIG_PROFILING=y CONFIG_GCOV_KERNEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_LBDAF is not set # CONFIG_BLK_DEV_BSG is not set CONFIG_CPU_SUBTYPE_SH7785=y CONFIG_MEMORY_START=0x40000000 diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index ceb48e9b70f4..822fa9e96f74 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -155,7 +155,7 @@ CONFIG_INFTL=m CONFIG_RFD_FTL=m CONFIG_MTD_CFI=m CONFIG_MTD_JEDECPROBE=m -CONFIG_MTD_NAND=m +CONFIG_MTD_RAW_NAND=m CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_RAM=y diff --git a/arch/sh/drivers/pci/pci-sh7751.c b/arch/sh/drivers/pci/pci-sh7751.c index 1b9e5caac389..11ed21c2e9bb 100644 --- a/arch/sh/drivers/pci/pci-sh7751.c +++ b/arch/sh/drivers/pci/pci-sh7751.c @@ -14,7 +14,7 @@ #include <linux/io.h> #include "pci-sh4.h" #include <asm/addrspace.h> -#include <asm/sizes.h> +#include <linux/sizes.h> static int __init __area_sdram_check(struct pci_channel *chan, unsigned int area) diff --git a/arch/sh/drivers/pci/pci-sh7780.c b/arch/sh/drivers/pci/pci-sh7780.c index 3fd0f392a0ee..287b3a68570c 100644 --- a/arch/sh/drivers/pci/pci-sh7780.c +++ b/arch/sh/drivers/pci/pci-sh7780.c @@ -16,7 +16,7 @@ #include <linux/log2.h> #include "pci-sh4.h" #include <asm/mmu.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #if defined(CONFIG_CPU_BIG_ENDIAN) # define PCICR_ENDIANNESS SH4_PCICR_BSWP diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index a58b77cea295..e0b568aaa701 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -18,7 +18,7 @@ #include <linux/sh_intc.h> #include <cpu/sh7786.h> #include "pcie-sh7786.h" -#include <asm/sizes.h> +#include <linux/sizes.h> struct sh7786_pcie_port { struct pci_channel *hose; diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 73fff39a0122..51a54df22c11 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -18,6 +18,5 @@ generic-y += parport.h generic-y += percpu.h generic-y += preempt.h generic-y += serial.h -generic-y += sizes.h generic-y += trace_clock.h generic-y += xor.h diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 8c9d7e5e5dcc..0b5b8e75edac 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -72,7 +72,7 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->regs[4] = args[0]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_SH; diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index 22fad97da066..72efcbc76f91 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -59,7 +59,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->regs[2], args, 6 * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { int arch = AUDIT_ARCH_SH; diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h index 8f9bfbf3cdb1..d6cce65b4871 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7786.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h @@ -132,7 +132,7 @@ enum { static inline u32 sh7786_mm_sel(void) { - return __raw_readl(0xFC400020) & 0x7; + return __raw_readl((const volatile void __iomem *)0xFC400020) & 0x7; } #endif /* __CPU_SH7786_H__ */ diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h index 17313d2c3527..ef18a668456d 100644 --- a/arch/sh/include/uapi/asm/sockios.h +++ b/arch/sh/include/uapi/asm/sockios.h @@ -10,6 +10,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */ -#define SIOCGSTAMPNS _IOR('s', 101, struct timespec) /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */ + #endif /* __ASM_SH_SOCKIOS_H */ diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index 3e27f6d1f1ec..277c882f7489 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -204,7 +204,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * get_user_pages_fast() - pin user pages in memory * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to + * @gup_flags: flags modifying pin behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. * @@ -216,8 +216,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, * requested. If nr_pages is 0 or negative, returns 0. If no pages * were pinned, returns -errno. */ -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -241,7 +241,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); local_irq_enable(); @@ -261,7 +262,7 @@ slow_irqon: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, pages, - write ? FOLL_WRITE : 0); + gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 70621324db41..5aeb4d7099a1 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -26,7 +26,7 @@ #include <asm/sections.h> #include <asm/setup.h> #include <asm/cache.h> -#include <asm/sizes.h> +#include <linux/sizes.h> pgd_t swapper_pg_dir[PTRS_PER_PGD]; @@ -403,28 +403,16 @@ void __init mem_init(void) mem_init_done = 1; } -void free_initmem(void) -{ - free_initmem_default(-1); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; int ret; /* We only have ZONE_NORMAL, so this is easy.. */ - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); if (unlikely(ret)) printk("%s: Failed, __add_pages() == %d\n", __func__, ret); @@ -441,20 +429,15 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; - int ret; zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - if (unlikely(ret)) - pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, - ret); - - return ret; + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/pmb.c b/arch/sh/mm/pmb.c index 7b2cc490ebb7..a53a040d0054 100644 --- a/arch/sh/mm/pmb.c +++ b/arch/sh/mm/pmb.c @@ -24,7 +24,7 @@ #include <linux/spinlock.h> #include <linux/vmalloc.h> #include <asm/cacheflush.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <linux/uaccess.h> #include <asm/pgtable.h> #include <asm/page.h> diff --git a/arch/sh/mm/uncached.c b/arch/sh/mm/uncached.c index 010010bf205a..bd1585e8efed 100644 --- a/arch/sh/mm/uncached.c +++ b/arch/sh/mm/uncached.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/module.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/page.h> #include <asm/addrspace.h> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index f6421c9ce5d3..7c93f3121ee6 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -92,6 +92,7 @@ config SPARC64 select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_PTE_SPECIAL select PCI_DOMAINS if PCI + select ARCH_HAS_GIGANTIC_PAGE config ARCH_DEFCONFIG string diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c index 4884315daff4..453a4cf5492a 100644 --- a/arch/sparc/crypto/des_glue.c +++ b/arch/sparc/crypto/des_glue.c @@ -201,18 +201,15 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm); - const u32 *K = (const u32 *)key; u32 *flags = &tfm->crt_flags; u64 k1[DES_EXPKEY_WORDS / 2]; u64 k2[DES_EXPKEY_WORDS / 2]; u64 k3[DES_EXPKEY_WORDS / 2]; + int err; - if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) || - !((K[2] ^ K[4]) | (K[3] ^ K[5]))) && - (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) { - *flags |= CRYPTO_TFM_RES_WEAK_KEY; - return -EINVAL; - } + err = __des3_verify_key(flags, key); + if (unlikely(err)) + return err; des_sparc64_key_expand((const u32 *)key, k1); key += DES_KEY_SIZE; diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 1393a8ac596b..22500c3be7a9 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -231,36 +231,6 @@ extern unsigned long _PAGE_ALL_SZ_BITS; extern struct page *mem_map_zero; #define ZERO_PAGE(vaddr) (mem_map_zero) -/* This macro must be updated when the size of struct page grows above 80 - * or reduces below 64. - * The idea that compiler optimizes out switch() statement, and only - * leaves clrx instructions - */ -#define mm_zero_struct_page(pp) do { \ - unsigned long *_pp = (void *)(pp); \ - \ - /* Check that struct page is either 64, 72, or 80 bytes */ \ - BUILD_BUG_ON(sizeof(struct page) & 7); \ - BUILD_BUG_ON(sizeof(struct page) < 64); \ - BUILD_BUG_ON(sizeof(struct page) > 80); \ - \ - switch (sizeof(struct page)) { \ - case 80: \ - _pp[9] = 0; /* fallthrough */ \ - case 72: \ - _pp[8] = 0; /* fallthrough */ \ - default: \ - _pp[7] = 0; \ - _pp[6] = 0; \ - _pp[5] = 0; \ - _pp[4] = 0; \ - _pp[3] = 0; \ - _pp[2] = 0; \ - _pp[1] = 0; \ - _pp[0] = 0; \ - } \ -} while (0) - /* PFNs are real physical page numbers. However, mem_map only begins to record * per-page information starting at pfn_base. This is to handle systems where * the first physical page in the machine is at some huge physical address, diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 4d075434e816..62a5a78804c4 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -127,10 +127,11 @@ static inline void syscall_set_arguments(struct task_struct *task, regs->u_regs[UREG_I0 + i] = args[i]; } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT) - return in_compat_syscall() ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64; + return test_tsk_thread_flag(task, TIF_32BIT) + ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64; #elif defined(CONFIG_SPARC64) return AUDIT_ARCH_SPARC64; #else diff --git a/arch/sparc/include/uapi/asm/sockios.h b/arch/sparc/include/uapi/asm/sockios.h deleted file mode 100644 index 18a3ec14a847..000000000000 --- a/arch/sparc/include/uapi/asm/sockios.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_SPARC_SOCKIOS_H -#define _ASM_SPARC_SOCKIOS_H - -/* Socket-level I/O control calls. */ -#define FIOSETOWN 0x8901 -#define SIOCSPGRP 0x8902 -#define FIOGETOWN 0x8903 -#define SIOCGPGRP 0x8904 -#define SIOCATMARK 0x8905 -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ - -#endif /* !(_ASM_SPARC_SOCKIOS_H) */ - diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index d1d52822603d..1cb62bfeaa1f 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -194,8 +194,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void) n = enumerate_cpuinfo_nodes(tmp_level); - new_tree = kzalloc(sizeof(struct cpuinfo_tree) + - (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC); + new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC); if (!new_tree) return NULL; diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index f87265afb175..cad08ccce625 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -876,7 +876,7 @@ void ldom_power_off(void) static void ds_conn_reset(struct ds_info *dp) { - printk(KERN_ERR "ds-%llu: ds_conn_reset() from %pf\n", + printk(KERN_ERR "ds-%llu: ds_conn_reset() from %ps\n", dp->id, __builtin_return_address(0)); } diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c index d852ae56ddc1..c44bf5b85de8 100644 --- a/arch/sparc/kernel/uprobes.c +++ b/arch/sparc/kernel/uprobes.c @@ -29,7 +29,6 @@ #include <linux/kdebug.h> #include <asm/cacheflush.h> -#include <linux/uaccess.h> /* Compute the address of the breakpoint instruction and return it. * diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index aee6dba83d0e..1e770a517d4a 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -245,8 +245,8 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, return nr; } -int get_user_pages_fast(unsigned long start, int nr_pages, int write, - struct page **pages) +int get_user_pages_fast(unsigned long start, int nr_pages, + unsigned int gup_flags, struct page **pages) { struct mm_struct *mm = current->mm; unsigned long addr, len, end; @@ -303,7 +303,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, next = pgd_addr_end(addr, end); if (pgd_none(pgd)) goto slow; - if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) + if (!gup_pud_range(pgd, addr, next, gup_flags & FOLL_WRITE, + pages, &nr)) goto slow; } while (pgdp++, addr = next, addr != end); @@ -324,7 +325,7 @@ slow: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, pages, - write ? FOLL_WRITE : 0); + gup_flags); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index a8ff29821bdb..046ab116cc8c 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -294,19 +294,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -void free_initmem (void) -{ - free_initmem_default(POISON_FREE_INITMEM); -} - -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, - "initrd"); -} -#endif - void sparc_flush_page_to_ram(struct page *page) { unsigned long vaddr = (unsigned long)page_address(page); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index f2d70ff7a284..4b099dd7a767 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2269,19 +2269,6 @@ static unsigned long last_valid_pfn; static void sun4u_pgprot_init(void); static void sun4v_pgprot_init(void); -static phys_addr_t __init available_memory(void) -{ - phys_addr_t available = 0ULL; - phys_addr_t pa_start, pa_end; - u64 i; - - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start, - &pa_end, NULL) - available = available + (pa_end - pa_start); - - return available; -} - #define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U) #define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V) #define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U) @@ -2295,33 +2282,8 @@ static phys_addr_t __init available_memory(void) */ static void __init reduce_memory(phys_addr_t limit_ram) { - phys_addr_t avail_ram = available_memory(); - phys_addr_t pa_start, pa_end; - u64 i; - - if (limit_ram >= avail_ram) - return; - - for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start, - &pa_end, NULL) { - phys_addr_t region_size = pa_end - pa_start; - phys_addr_t clip_start = pa_start; - - avail_ram = avail_ram - region_size; - /* Are we consuming too much? */ - if (avail_ram < limit_ram) { - phys_addr_t give_back = limit_ram - avail_ram; - - region_size = region_size - give_back; - clip_start = clip_start + give_back; - } - - memblock_remove(clip_start, region_size); - - if (avail_ram <= limit_ram) - break; - i = 0UL; - } + limit_ram += memblock_reserved_size(); + memblock_enforce_memory_limit(limit_ram); } void __init paging_init(void) @@ -2610,14 +2572,6 @@ void free_initmem(void) } } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, - "initrd"); -} -#endif - pgprot_t PAGE_KERNEL __read_mostly; EXPORT_SYMBOL(PAGE_KERNEL); diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index e8d5d73ca40d..71ac353032b6 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -175,16 +175,37 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte) } } -static u32 iommu_get_one(struct device *dev, struct page *page, int npages) +static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t len, bool per_page_flush) { struct iommu_struct *iommu = dev->archdata.iommu; - int ioptex; - iopte_t *iopte, *iopte0; + phys_addr_t paddr = page_to_phys(page) + offset; + unsigned long off = paddr & ~PAGE_MASK; + unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long pfn = __phys_to_pfn(paddr); unsigned int busa, busa0; - int i; + iopte_t *iopte, *iopte0; + int ioptex, i; + + /* XXX So what is maxphys for us and how do drivers know it? */ + if (!len || len > 256 * 1024) + return DMA_MAPPING_ERROR; + + /* + * We expect unmapped highmem pages to be not in the cache. + * XXX Is this a good assumption? + * XXX What if someone else unmaps it here and races us? + */ + if (per_page_flush && !PageHighMem(page)) { + unsigned long vaddr, p; + + vaddr = (unsigned long)page_address(page) + offset; + for (p = vaddr & PAGE_MASK; p < vaddr + len; p += PAGE_SIZE) + flush_page_for_dma(p); + } /* page color = pfn of page */ - ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page)); + ioptex = bit_map_string_get(&iommu->usemap, npages, pfn); if (ioptex < 0) panic("iommu out"); busa0 = iommu->start + (ioptex << PAGE_SHIFT); @@ -193,29 +214,15 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages) busa = busa0; iopte = iopte0; for (i = 0; i < npages; i++) { - iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM); + iopte_val(*iopte) = MKIOPTE(pfn, IOPERM); iommu_invalidate_page(iommu->regs, busa); busa += PAGE_SIZE; iopte++; - page++; + pfn++; } iommu_flush_iotlb(iopte0, npages); - - return busa0; -} - -static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t len) -{ - void *vaddr = page_address(page) + offset; - unsigned long off = (unsigned long)vaddr & ~PAGE_MASK; - unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - - /* XXX So what is maxphys for us and how do drivers know it? */ - if (!len || len > 256 * 1024) - return DMA_MAPPING_ERROR; - return iommu_get_one(dev, virt_to_page(vaddr), npages) + off; + return busa0 + off; } static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, @@ -223,81 +230,58 @@ static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { flush_page_for_dma(0); - return __sbus_iommu_map_page(dev, page, offset, len); + return __sbus_iommu_map_page(dev, page, offset, len, false); } static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev, struct page *page, unsigned long offset, size_t len, enum dma_data_direction dir, unsigned long attrs) { - void *vaddr = page_address(page) + offset; - unsigned long p = ((unsigned long)vaddr) & PAGE_MASK; - - while (p < (unsigned long)vaddr + len) { - flush_page_for_dma(p); - p += PAGE_SIZE; - } - - return __sbus_iommu_map_page(dev, page, offset, len); + return __sbus_iommu_map_page(dev, page, offset, len, true); } -static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) +static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs, + bool per_page_flush) { struct scatterlist *sg; - int i, n; - - flush_page_for_dma(0); + int j; - for_each_sg(sgl, sg, nents, i) { - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; + for_each_sg(sgl, sg, nents, j) { + sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg), + sg->offset, sg->length, per_page_flush); + if (sg->dma_address == DMA_MAPPING_ERROR) + return 0; sg->dma_length = sg->length; } return nents; } -static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, +static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { - unsigned long page, oldpage = 0; - struct scatterlist *sg; - int i, j, n; - - for_each_sg(sgl, sg, nents, j) { - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - - /* - * We expect unmapped highmem pages to be not in the cache. - * XXX Is this a good assumption? - * XXX What if someone else unmaps it here and races us? - */ - if ((page = (unsigned long) page_address(sg_page(sg))) != 0) { - for (i = 0; i < n; i++) { - if (page != oldpage) { /* Already flushed? */ - flush_page_for_dma(page); - oldpage = page; - } - page += PAGE_SIZE; - } - } - - sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset; - sg->dma_length = sg->length; - } + flush_page_for_dma(0); + return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false); +} - return nents; +static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true); } -static void iommu_release_one(struct device *dev, u32 busa, int npages) +static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, + size_t len, enum dma_data_direction dir, unsigned long attrs) { struct iommu_struct *iommu = dev->archdata.iommu; - int ioptex; - int i; + unsigned int busa = dma_addr & PAGE_MASK; + unsigned long off = dma_addr & ~PAGE_MASK; + unsigned int npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; + unsigned int ioptex = (busa - iommu->start) >> PAGE_SHIFT; + unsigned int i; BUG_ON(busa < iommu->start); - ioptex = (busa - iommu->start) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { iopte_val(iommu->page_table[ioptex + i]) = 0; iommu_invalidate_page(iommu->regs, busa); @@ -306,25 +290,15 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages) bit_map_clear(&iommu->usemap, ioptex, npages); } -static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr, - size_t len, enum dma_data_direction dir, unsigned long attrs) -{ - unsigned long off = dma_addr & ~PAGE_MASK; - int npages; - - npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(dev, dma_addr & PAGE_MASK, npages); -} - static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; - int i, n; + int i; for_each_sg(sgl, sg, nents, i) { - n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT; - iommu_release_one(dev, sg->dma_address & PAGE_MASK, n); + sbus_iommu_unmap_page(dev, sg->dma_address, sg->length, dir, + attrs); sg->dma_address = 0x21212121; } } diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile index 74e97f77e23b..83c4b463cb3d 100644 --- a/arch/sparc/vdso/Makefile +++ b/arch/sparc/vdso/Makefile @@ -68,7 +68,7 @@ CFLAGS_REMOVE_vdso-note.o = -pg CFLAGS_REMOVE_vclock_gettime.o = -pg $(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg +$(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) diff --git a/arch/um/Kconfig b/arch/um/Kconfig index ec9711d068b7..6b6eb938fcc1 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -80,46 +80,46 @@ config LD_SCRIPT_DYN bool default y depends on !LD_SCRIPT_STATIC - select MODULE_REL_CRCS if MODVERSIONS + select MODULE_REL_CRCS if MODVERSIONS config HOSTFS tristate "Host filesystem" help - While the User-Mode Linux port uses its own root file system for - booting and normal file access, this module lets the UML user - access files stored on the host. It does not require any - network connection between the Host and UML. An example use of - this might be: + While the User-Mode Linux port uses its own root file system for + booting and normal file access, this module lets the UML user + access files stored on the host. It does not require any + network connection between the Host and UML. An example use of + this might be: - mount none /tmp/fromhost -t hostfs -o /tmp/umlshare + mount none /tmp/fromhost -t hostfs -o /tmp/umlshare - where /tmp/fromhost is an empty directory inside UML and - /tmp/umlshare is a directory on the host with files the UML user - wishes to access. + where /tmp/fromhost is an empty directory inside UML and + /tmp/umlshare is a directory on the host with files the UML user + wishes to access. - For more information, see - <http://user-mode-linux.sourceforge.net/hostfs.html>. + For more information, see + <http://user-mode-linux.sourceforge.net/hostfs.html>. - If you'd like to be able to work with files stored on the host, - say Y or M here; otherwise say N. + If you'd like to be able to work with files stored on the host, + say Y or M here; otherwise say N. config MCONSOLE bool "Management console" depends on PROC_FS default y help - The user mode linux management console is a low-level interface to - the kernel, somewhat like the i386 SysRq interface. Since there is - a full-blown operating system running under every user mode linux - instance, there is much greater flexibility possible than with the - SysRq mechanism. + The user mode linux management console is a low-level interface to + the kernel, somewhat like the i386 SysRq interface. Since there is + a full-blown operating system running under every user mode linux + instance, there is much greater flexibility possible than with the + SysRq mechanism. - If you answer 'Y' to this option, to use this feature, you need the - mconsole client (called uml_mconsole) which is present in CVS in - 2.4.5-9um and later (path /tools/mconsole), and is also in the - distribution RPM package in 2.4.6 and later. + If you answer 'Y' to this option, to use this feature, you need the + mconsole client (called uml_mconsole) which is present in CVS in + 2.4.5-9um and later (path /tools/mconsole), and is also in the + distribution RPM package in 2.4.6 and later. - It is safe to say 'Y' here. + It is safe to say 'Y' here. config MAGIC_SYSRQ bool "Magic SysRq key" @@ -142,13 +142,17 @@ config MAGIC_SYSRQ config KERNEL_STACK_ORDER int "Kernel stack size order" - default 1 if 64BIT - range 1 10 if 64BIT - default 0 if !64BIT + default 2 if 64BIT + range 2 10 if 64BIT + default 1 if !64BIT help This option determines the size of UML kernel stacks. They will be 1 << order pages. The default is OK unless you're running Valgrind on UML, in which case, set this to 3. + It is possible to reduce the stack to 1 for 64BIT and 0 for 32BIT on + older (pre-2017) CPUs. It is not recommended on newer CPUs due to the + increase in the size of the state which needs to be saved when handling + signals. config MMAPPER tristate "iomem emulation driver" diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 2b1aaf7755aa..2638e46f50cc 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -11,58 +11,58 @@ config STDERR_CONSOLE config SSL bool "Virtual serial line" help - The User-Mode Linux environment allows you to create virtual serial - lines on the UML that are usually made to show up on the host as - ttys or ptys. + The User-Mode Linux environment allows you to create virtual serial + lines on the UML that are usually made to show up on the host as + ttys or ptys. - See <http://user-mode-linux.sourceforge.net/old/input.html> for more - information and command line examples of how to use this facility. + See <http://user-mode-linux.sourceforge.net/old/input.html> for more + information and command line examples of how to use this facility. - Unless you have a specific reason for disabling this, say Y. + Unless you have a specific reason for disabling this, say Y. config NULL_CHAN bool "null channel support" help - This option enables support for attaching UML consoles and serial - lines to a device similar to /dev/null. Data written to it disappears - and there is never any data to be read. + This option enables support for attaching UML consoles and serial + lines to a device similar to /dev/null. Data written to it disappears + and there is never any data to be read. config PORT_CHAN bool "port channel support" help - This option enables support for attaching UML consoles and serial - lines to host portals. They may be accessed with 'telnet <host> - <port number>'. Any number of consoles and serial lines may be - attached to a single portal, although what UML device you get when - you telnet to that portal will be unpredictable. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to host portals. They may be accessed with 'telnet <host> + <port number>'. Any number of consoles and serial lines may be + attached to a single portal, although what UML device you get when + you telnet to that portal will be unpredictable. + It is safe to say 'Y' here. config PTY_CHAN bool "pty channel support" help - This option enables support for attaching UML consoles and serial - lines to host pseudo-terminals. Access to both traditional - pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled - with this option. The assignment of UML devices to host devices - will be announced in the kernel message log. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to host pseudo-terminals. Access to both traditional + pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled + with this option. The assignment of UML devices to host devices + will be announced in the kernel message log. + It is safe to say 'Y' here. config TTY_CHAN bool "tty channel support" help - This option enables support for attaching UML consoles and serial - lines to host terminals. Access to both virtual consoles - (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and - /dev/pts/*) are controlled by this option. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to host terminals. Access to both virtual consoles + (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and + /dev/pts/*) are controlled by this option. + It is safe to say 'Y' here. config XTERM_CHAN bool "xterm channel support" help - This option enables support for attaching UML consoles and serial - lines to xterms. Each UML device so assigned will be brought up in - its own xterm. - It is safe to say 'Y' here. + This option enables support for attaching UML consoles and serial + lines to xterms. Each UML device so assigned will be brought up in + its own xterm. + It is safe to say 'Y' here. config NOCONFIG_CHAN bool @@ -72,43 +72,43 @@ config CON_ZERO_CHAN string "Default main console channel initialization" default "fd:0,fd:1" help - This is the string describing the channel to which the main console - will be attached by default. This value can be overridden from the - command line. The default value is "fd:0,fd:1", which attaches the - main console to stdin and stdout. - It is safe to leave this unchanged. + This is the string describing the channel to which the main console + will be attached by default. This value can be overridden from the + command line. The default value is "fd:0,fd:1", which attaches the + main console to stdin and stdout. + It is safe to leave this unchanged. config CON_CHAN string "Default console channel initialization" default "xterm" help - This is the string describing the channel to which all consoles - except the main console will be attached by default. This value can - be overridden from the command line. The default value is "xterm", - which brings them up in xterms. - It is safe to leave this unchanged, although you may wish to change - this if you expect the UML that you build to be run in environments - which don't have X or xterm available. + This is the string describing the channel to which all consoles + except the main console will be attached by default. This value can + be overridden from the command line. The default value is "xterm", + which brings them up in xterms. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have X or xterm available. config SSL_CHAN string "Default serial line channel initialization" default "pty" help - This is the string describing the channel to which the serial lines - will be attached by default. This value can be overridden from the - command line. The default value is "pty", which attaches them to - traditional pseudo-terminals. - It is safe to leave this unchanged, although you may wish to change - this if you expect the UML that you build to be run in environments - which don't have a set of /dev/pty* devices. + This is the string describing the channel to which the serial lines + will be attached by default. This value can be overridden from the + command line. The default value is "pty", which attaches them to + traditional pseudo-terminals. + It is safe to leave this unchanged, although you may wish to change + this if you expect the UML that you build to be run in environments + which don't have a set of /dev/pty* devices. config UML_SOUND tristate "Sound support" help - This option enables UML sound support. If enabled, it will pull in - soundcore and the UML hostaudio relay, which acts as a intermediary - between the host's dsp and mixer devices and the UML sound system. - It is safe to say 'Y' here. + This option enables UML sound support. If enabled, it will pull in + soundcore and the UML hostaudio relay, which acts as a intermediary + between the host's dsp and mixer devices and the UML sound system. + It is safe to say 'Y' here. config SOUND tristate @@ -131,107 +131,107 @@ menu "UML Network Devices" config UML_NET bool "Virtual network device" help - While the User-Mode port cannot directly talk to any physical - hardware devices, this choice and the following transport options - provide one or more virtual network devices through which the UML - kernels can talk to each other, the host, and with the host's help, - machines on the outside world. + While the User-Mode port cannot directly talk to any physical + hardware devices, this choice and the following transport options + provide one or more virtual network devices through which the UML + kernels can talk to each other, the host, and with the host's help, + machines on the outside world. - For more information, including explanations of the networking and - sample configurations, see - <http://user-mode-linux.sourceforge.net/old/networking.html>. + For more information, including explanations of the networking and + sample configurations, see + <http://user-mode-linux.sourceforge.net/old/networking.html>. - If you'd like to be able to enable networking in the User-Mode - linux environment, say Y; otherwise say N. Note that you must - enable at least one of the following transport options to actually - make use of UML networking. + If you'd like to be able to enable networking in the User-Mode + linux environment, say Y; otherwise say N. Note that you must + enable at least one of the following transport options to actually + make use of UML networking. config UML_NET_ETHERTAP bool "Ethertap transport" depends on UML_NET help - The Ethertap User-Mode Linux network transport allows a single - running UML to exchange packets with its host over one of the - host's Ethertap devices, such as /dev/tap0. Additional running - UMLs can use additional Ethertap devices, one per running UML. - While the UML believes it's on a (multi-device, broadcast) virtual - Ethernet network, it's in fact communicating over a point-to-point - link with the host. - - To use this, your host kernel must have support for Ethertap - devices. Also, if your host kernel is 2.4.x, it must have - CONFIG_NETLINK_DEV configured as Y or M. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Ethertap - networking. - - If you'd like to set up an IP network with the host and/or the - outside world, say Y to this, the Daemon Transport and/or the - Slip Transport. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. + The Ethertap User-Mode Linux network transport allows a single + running UML to exchange packets with its host over one of the + host's Ethertap devices, such as /dev/tap0. Additional running + UMLs can use additional Ethertap devices, one per running UML. + While the UML believes it's on a (multi-device, broadcast) virtual + Ethernet network, it's in fact communicating over a point-to-point + link with the host. + + To use this, your host kernel must have support for Ethertap + devices. Also, if your host kernel is 2.4.x, it must have + CONFIG_NETLINK_DEV configured as Y or M. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Ethertap + networking. + + If you'd like to set up an IP network with the host and/or the + outside world, say Y to this, the Daemon Transport and/or the + Slip Transport. You'll need at least one of them, but may choose + more than one without conflict. If you don't need UML networking, + say N. config UML_NET_TUNTAP bool "TUN/TAP transport" depends on UML_NET help - The UML TUN/TAP network transport allows a UML instance to exchange - packets with the host over a TUN/TAP device. This option will only - work with a 2.4 host, unless you've applied the TUN/TAP patch to - your 2.2 host kernel. + The UML TUN/TAP network transport allows a UML instance to exchange + packets with the host over a TUN/TAP device. This option will only + work with a 2.4 host, unless you've applied the TUN/TAP patch to + your 2.2 host kernel. - To use this transport, your host kernel must have support for TUN/TAP - devices, either built-in or as a module. + To use this transport, your host kernel must have support for TUN/TAP + devices, either built-in or as a module. config UML_NET_SLIP bool "SLIP transport" depends on UML_NET help - The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), - the slip transport can only carry IP packets. - - To use this, your host must support slip devices. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html>. - has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - - The Ethertap Transport is preferred over slip because of its - limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. + The slip User-Mode Linux network transport allows a running UML to + network with its host over a point-to-point link. Unlike Ethertap, + which can carry any Ethernet frame (and hence even non-IP packets), + the slip transport can only carry IP packets. + + To use this, your host must support slip devices. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html>. + has examples of the UML command line to use to enable slip + networking, and details of a few quirks with it. + + The Ethertap Transport is preferred over slip because of its + limitations. If you prefer slip, however, say Y here. Otherwise + choose the Multicast transport (to network multiple UMLs on + multiple hosts), Ethertap (to network with the host and the + outside world), and/or the Daemon transport (to network multiple + UMLs on a single host). You may choose more than one without + conflict. If you don't need UML networking, say N. config UML_NET_DAEMON bool "Daemon transport" depends on UML_NET help - This User-Mode Linux network transport allows one or more running - UMLs on a single host to communicate with each other, but not to - the host. - - To use this form of networking, you'll need to run the UML - networking daemon on the host. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Daemon - networking. - - If you'd like to set up a network with other UMLs on a single host, - say Y. If you need a network between UMLs on multiple physical - hosts, choose the Multicast Transport. To set up a network with - the host and/or other IP machines, say Y to the Ethertap or Slip - transports. You'll need at least one of them, but may choose - more than one without conflict. If you don't need UML networking, - say N. + This User-Mode Linux network transport allows one or more running + UMLs on a single host to communicate with each other, but not to + the host. + + To use this form of networking, you'll need to run the UML + networking daemon on the host. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Daemon + networking. + + If you'd like to set up a network with other UMLs on a single host, + say Y. If you need a network between UMLs on multiple physical + hosts, choose the Multicast Transport. To set up a network with + the host and/or other IP machines, say Y to the Ethertap or Slip + transports. You'll need at least one of them, but may choose + more than one without conflict. If you don't need UML networking, + say N. config UML_NET_VECTOR bool "Vector I/O high performance network devices" @@ -270,26 +270,26 @@ config UML_NET_MCAST bool "Multicast transport" depends on UML_NET help - This Multicast User-Mode Linux network transport allows multiple - UMLs (even ones running on different host machines!) to talk to - each other over a virtual ethernet network. However, it requires - at least one UML with one of the other transports to act as a - bridge if any of them need to be able to talk to their hosts or any - other IP machines. - - To use this, your host kernel(s) must support IP Multicasting. - - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable Multicast - networking, and notes about the security of this approach. - - If you need UMLs on multiple physical hosts to communicate as if - they shared an Ethernet network, say Y. If you need to communicate - with other IP machines, make sure you select one of the other - transports (possibly in addition to Multicast; they're not - exclusive). If you don't need to network UMLs say N to each of - the transports. + This Multicast User-Mode Linux network transport allows multiple + UMLs (even ones running on different host machines!) to talk to + each other over a virtual ethernet network. However, it requires + at least one UML with one of the other transports to act as a + bridge if any of them need to be able to talk to their hosts or any + other IP machines. + + To use this, your host kernel(s) must support IP Multicasting. + + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable Multicast + networking, and notes about the security of this approach. + + If you need UMLs on multiple physical hosts to communicate as if + they shared an Ethernet network, say Y. If you need to communicate + with other IP machines, make sure you select one of the other + transports (possibly in addition to Multicast; they're not + exclusive). If you don't need to network UMLs say N to each of + the transports. config UML_NET_PCAP bool "pcap transport" @@ -300,9 +300,9 @@ config UML_NET_PCAP UML act as a network monitor for the host. You must have libcap installed in order to build the pcap transport into UML. - For more information, see - <http://user-mode-linux.sourceforge.net/old/networking.html> That site - has examples of the UML command line to use to enable this option. + For more information, see + <http://user-mode-linux.sourceforge.net/old/networking.html> That site + has examples of the UML command line to use to enable this option. If you intend to use UML as a network monitor for the host, say Y here. Otherwise, say N. @@ -311,27 +311,27 @@ config UML_NET_SLIRP bool "SLiRP transport" depends on UML_NET help - The SLiRP User-Mode Linux network transport allows a running UML - to network by invoking a program that can handle SLIP encapsulated - packets. This is commonly (but not limited to) the application - known as SLiRP, a program that can re-socket IP packets back onto - the host on which it is run. Only IP packets are supported, - unlike other network transports that can handle all Ethernet - frames. In general, slirp allows the UML the same IP connectivity - to the outside world that the host user is permitted, and unlike - other transports, SLiRP works without the need of root level - privleges, setuid binaries, or SLIP devices on the host. This - also means not every type of connection is possible, but most - situations can be accommodated with carefully crafted slirp - commands that can be passed along as part of the network device's - setup string. The effect of this transport on the UML is similar - that of a host behind a firewall that masquerades all network - connections passing through it (but is less secure). - - To use this you should first have slirp compiled somewhere - accessible on the host, and have read its documentation. If you - don't need UML networking, say N. - - Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + The SLiRP User-Mode Linux network transport allows a running UML + to network by invoking a program that can handle SLIP encapsulated + packets. This is commonly (but not limited to) the application + known as SLiRP, a program that can re-socket IP packets back onto + he host on which it is run. Only IP packets are supported, + unlike other network transports that can handle all Ethernet + frames. In general, slirp allows the UML the same IP connectivity + to the outside world that the host user is permitted, and unlike + other transports, SLiRP works without the need of root level + privleges, setuid binaries, or SLIP devices on the host. This + also means not every type of connection is possible, but most + situations can be accommodated with carefully crafted slirp + commands that can be passed along as part of the network device's + setup string. The effect of this transport on the UML is similar + that of a host behind a firewall that masquerades all network + connections passing through it (but is less secure). + + To use this you should first have slirp compiled somewhere + accessible on the host, and have read its documentation. If you + don't need UML networking, say N. + + Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" endmenu diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c index 6d381279b362..000cb69ba0bc 100644 --- a/arch/um/drivers/harddog_kern.c +++ b/arch/um/drivers/harddog_kern.c @@ -85,7 +85,7 @@ static int harddog_open(struct inode *inode, struct file *file) timer_alive = 1; spin_unlock(&lock); mutex_unlock(&harddog_mutex); - return nonseekable_open(inode, file); + return stream_open(inode, file); err: spin_unlock(&lock); mutex_unlock(&harddog_mutex); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index aca09be2373e..33c1cd6a12ac 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -276,14 +276,14 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out) str++; if(!strcmp(str, "sync")){ global_openflags = of_sync(global_openflags); - goto out1; + return err; } err = -EINVAL; major = simple_strtoul(str, &end, 0); if((*end != '\0') || (end == str)){ *error_out = "Didn't parse major number"; - goto out1; + return err; } mutex_lock(&ubd_lock); diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 596e7056f376..e190e4ca52e1 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -1043,7 +1043,7 @@ static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) vector_send(vp->tx_queue); return NETDEV_TX_OK; } - if (skb->xmit_more) { + if (netdev_xmit_more()) { mod_timer(&vp->tl, vp->coalesce); return NETDEV_TX_OK; } diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 9c04562310b3..b377df76cc28 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -263,7 +263,12 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) *pteptr = pte_mknewpage(*pteptr); if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *pteptr, pte_t pteval) +{ + set_pte(pteptr, pteval); +} #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index f4874b7ec503..598d7b3d9355 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -479,7 +479,7 @@ void __init init_IRQ(void) irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); - for (i = 1; i < NR_IRQS; i++) + for (i = 1; i < LAST_IRQ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); /* Initialize EPOLL Loop */ os_setup_epoll(); diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 99aa11bf53d1..a9c9a94c096f 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -188,13 +188,6 @@ void free_initmem(void) { } -#ifdef CONFIG_BLK_DEV_INITRD -void free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} -#endif - /* Allocate and free page tables. */ pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 7f06fdbc7ee1..bd3cb694322c 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -59,7 +59,6 @@ static pte_t *maybe_map(unsigned long virt, int is_write) static int do_op_one_page(unsigned long addr, int len, int is_write, int (*op)(unsigned long addr, int len, void *arg), void *arg) { - jmp_buf buf; struct page *page; pte_t *pte; int n; diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 6b995e870d55..05585eef11d9 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -20,7 +20,7 @@ static void _print_addr(void *data, unsigned long address, int reliable) { - pr_info(" [<%08lx>] %s%pF\n", address, reliable ? "" : "? ", + pr_info(" [<%08lx>] %s%pS\n", address, reliable ? "" : "? ", (void *)address); } diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index 052de4c8acb2..0c572a48158e 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -56,7 +56,7 @@ static int itimer_one_shot(struct clock_event_device *evt) static struct clock_event_device timer_clockevent = { .name = "posix-timer", .rating = 250, - .cpumask = cpu_all_mask, + .cpumask = cpu_possible_mask, .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_state_shutdown = itimer_shutdown, diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index bf0acb8aad8b..75b10235d369 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -31,29 +31,23 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) { - struct uml_pt_regs *r; + struct uml_pt_regs r; int save_errno = errno; - r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC); - if (!r) - panic("out of memory"); - - r->is_user = 0; + r.is_user = 0; if (sig == SIGSEGV) { /* For segfaults, we want the data from the sigcontext. */ - get_regs_from_mc(r, mc); - GET_FAULTINFO_FROM_MC(r->faultinfo, mc); + get_regs_from_mc(&r, mc); + GET_FAULTINFO_FROM_MC(r.faultinfo, mc); } /* enable signals if sig isn't IRQ signal */ if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM)) unblock_signals(); - (*sig_info[sig])(sig, si, r); + (*sig_info[sig])(sig, si, &r); errno = save_errno; - - free(r); } /* @@ -91,17 +85,11 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) static void timer_real_alarm_handler(mcontext_t *mc) { - struct uml_pt_regs *regs; - - regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC); - if (!regs) - panic("out of memory"); + struct uml_pt_regs regs; if (mc != NULL) - get_regs_from_mc(regs, mc); - timer_handler(SIGALRM, NULL, regs); - - free(regs); + get_regs_from_mc(®s, mc); + timer_handler(SIGALRM, NULL, ®s); } void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc) diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 998fbb445458..e261656fe9d7 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -135,12 +135,18 @@ out: */ static inline int is_umdir_used(char *dir) { - char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; - char pid[sizeof("nnnnn\0")], *end; + char pid[sizeof("nnnnn\0")], *end, *file; int dead, fd, p, n, err; + size_t filelen; - n = snprintf(file, sizeof(file), "%s/pid", dir); - if (n >= sizeof(file)) { + err = asprintf(&file, "%s/pid", dir); + if (err < 0) + return 0; + + filelen = strlen(file); + + n = snprintf(file, filelen, "%s/pid", dir); + if (n >= filelen) { printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n"); err = -E2BIG; goto out; @@ -185,6 +191,7 @@ static inline int is_umdir_used(char *dir) out_close: close(fd); out: + free(file); return 0; } @@ -210,18 +217,21 @@ static int umdir_take_if_dead(char *dir) static void __init create_pid_file(void) { - char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; - char pid[sizeof("nnnnn\0")]; + char pid[sizeof("nnnnn\0")], *file; int fd, n; - if (umid_file_name("pid", file, sizeof(file))) + file = malloc(strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")); + if (!file) return; + if (umid_file_name("pid", file, sizeof(file))) + goto out; + fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644); if (fd < 0) { printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: " "%s\n", file, strerror(errno)); - return; + goto out; } snprintf(pid, sizeof(pid), "%d\n", getpid()); @@ -231,6 +241,8 @@ static void __init create_pid_file(void) errno); close(fd); +out: + free(file); } int __init set_umid(char *name) @@ -385,13 +397,19 @@ __uml_setup("uml_dir=", set_uml_dir, static void remove_umid_dir(void) { - char dir[strlen(uml_dir) + UMID_LEN + 1], err; + char *dir, err; + + dir = malloc(strlen(uml_dir) + UMID_LEN + 1); + if (!dir) + return; sprintf(dir, "%s%s", uml_dir, umid); err = remove_files_and_dir(dir); if (err) os_warn("%s - remove_files_and_dir failed with err = %d\n", __func__, err); + + free(dir); } __uml_exitcall(remove_umid_dir); diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 2445dfcf6444..41fe944005f8 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -3,6 +3,7 @@ config UNICORE32 def_bool y select ARCH_32BIT_OFF_T select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_KEEPINITRD select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_KERNEL_GZIP @@ -190,7 +191,6 @@ config I2C_EEPROM_AT24 config LCD_BACKLIGHT tristate "LCD Backlight support" - select BACKLIGHT_LCD_SUPPORT select BACKLIGHT_PWM endmenu diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index b301a0b3c0b2..c93dc6478cb2 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -31,7 +31,6 @@ generic-y += sections.h generic-y += segment.h generic-y += serial.h generic-y += shmparam.h -generic-y += sizes.h generic-y += syscalls.h generic-y += topology.h generic-y += trace_clock.h diff --git a/arch/unicore32/include/asm/elf.h b/arch/unicore32/include/asm/elf.h index 829042d07722..ae66dc1be49e 100644 --- a/arch/unicore32/include/asm/elf.h +++ b/arch/unicore32/include/asm/elf.h @@ -19,6 +19,7 @@ * ELF register definitions.. */ #include <asm/ptrace.h> +#include <linux/elf-em.h> typedef unsigned long elf_greg_t; typedef unsigned long elf_freg_t[3]; @@ -28,8 +29,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG]; typedef struct fp_state elf_fpregset_t; -#define EM_UNICORE 110 - #define R_UNICORE_NONE 0 #define R_UNICORE_PC24 1 #define R_UNICORE_ABS32 2 diff --git a/arch/unicore32/include/asm/memory.h b/arch/unicore32/include/asm/memory.h index 66bb9f6525c0..46cf27efbb7e 100644 --- a/arch/unicore32/include/asm/memory.h +++ b/arch/unicore32/include/asm/memory.h @@ -16,7 +16,7 @@ #include <linux/compiler.h> #include <linux/const.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/memory.h> /* diff --git a/arch/unicore32/include/asm/syscall.h b/arch/unicore32/include/asm/syscall.h new file mode 100644 index 000000000000..607961797fff --- /dev/null +++ b/arch/unicore32/include/asm/syscall.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UNICORE_SYSCALL_H +#define _ASM_UNICORE_SYSCALL_H + +#include <uapi/linux/audit.h> + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_UNICORE; +} + +#endif /* _ASM_UNICORE_SYSCALL_H */ diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c index 74b6a2e29809..c994cdf14119 100644 --- a/arch/unicore32/mm/init.c +++ b/arch/unicore32/mm/init.c @@ -23,7 +23,7 @@ #include <asm/sections.h> #include <asm/setup.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/tlb.h> #include <asm/memblock.h> #include <mach/map.h> @@ -287,27 +287,3 @@ void __init mem_init(void) sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; } } - -void free_initmem(void) -{ - free_initmem_default(-1); -} - -#ifdef CONFIG_BLK_DEV_INITRD - -static int keep_initrd; - -void free_initrd_mem(unsigned long start, unsigned long end) -{ - if (!keep_initrd) - free_reserved_area((void *)start, (void *)end, -1, "initrd"); -} - -static int __init keepinitrd_setup(char *__unused) -{ - keep_initrd = 1; - return 1; -} - -__setup("keepinitrd", keepinitrd_setup); -#endif diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c index bf012b2b71a9..b69cb18ce8b1 100644 --- a/arch/unicore32/mm/ioremap.c +++ b/arch/unicore32/mm/ioremap.c @@ -34,7 +34,7 @@ #include <asm/mmu_context.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <mach/map.h> #include "mm.h" diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c index aa2060beb408..f0ae623b305f 100644 --- a/arch/unicore32/mm/mmu.c +++ b/arch/unicore32/mm/mmu.c @@ -22,7 +22,7 @@ #include <asm/cputype.h> #include <asm/sections.h> #include <asm/setup.h> -#include <asm/sizes.h> +#include <linux/sizes.h> #include <asm/tlb.h> #include <asm/memblock.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index de071d7e67b6..6bc9dd6e7534 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,7 +22,7 @@ config X86_64 def_bool y depends on 64BIT # Options that are inherently 64-bit kernel only: - select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA + select ARCH_HAS_GIGANTIC_PAGE select ARCH_SUPPORTS_INT128 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY @@ -44,11 +44,9 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI - select ANON_INODES select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_DATA select ARCH_CLOCKSOURCE_INIT - select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -304,9 +302,6 @@ config ZONE_DMA32 config AUDIT_ARCH def_bool y if X86_64 -config ARCH_SUPPORTS_OPTIMIZED_INLINING - def_bool y - config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 15d0fbe27872..f730680dc818 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -266,20 +266,6 @@ config CPA_DEBUG ---help--- Do change_page_attr() self-tests every 30 seconds. -config OPTIMIZE_INLINING - bool "Allow gcc to uninline functions marked 'inline'" - ---help--- - This option determines if the kernel forces gcc to inline the functions - developers have marked 'inline'. Doing so takes away freedom from gcc to - do what it thinks is best, which is desirable for the gcc 3.x series of - compilers. The gcc 4.x series have a rewritten inlining algorithm and - enabling this option will generate a smaller kernel there. Hopefully - this algorithm is so good that allowing gcc 4.x and above to make the - decision will become the default in the future. Until then this option - is there to test gcc for this. - - If unsure, say N. - config DEBUG_ENTRY bool "Debug low-level entry code" depends on DEBUG_KERNEL diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 3ea71b871813..bdeee1b830be 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> @@ -242,131 +242,35 @@ static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis128_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis128_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis128_aesni_alg[] = { - { - .setkey = crypto_aegis128_aesni_setkey, - .setauthsize = crypto_aegis128_aesni_setauthsize, - .encrypt = crypto_aegis128_aesni_encrypt, - .decrypt = crypto_aegis128_aesni_decrypt, - .init = crypto_aegis128_aesni_init_tfm, - .exit = crypto_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS128_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis128", - .cra_driver_name = "__aegis128-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis128_aesni_setkey, - .setauthsize = cryptd_aegis128_aesni_setauthsize, - .encrypt = cryptd_aegis128_aesni_encrypt, - .decrypt = cryptd_aegis128_aesni_decrypt, - .init = cryptd_aegis128_aesni_init_tfm, - .exit = cryptd_aegis128_aesni_exit_tfm, - - .ivsize = AEGIS128_NONCE_SIZE, - .maxauthsize = AEGIS128_MAX_AUTH_SIZE, - .chunksize = AEGIS128_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis128", - .cra_driver_name = "aegis128-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis128_aesni_alg = { + .setkey = crypto_aegis128_aesni_setkey, + .setauthsize = crypto_aegis128_aesni_setauthsize, + .encrypt = crypto_aegis128_aesni_encrypt, + .decrypt = crypto_aegis128_aesni_decrypt, + .init = crypto_aegis128_aesni_init_tfm, + .exit = crypto_aegis128_aesni_exit_tfm, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS128_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128", + .cra_driver_name = "__aegis128-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis128_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis128_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis128_aesni_alg, - ARRAY_SIZE(crypto_aegis128_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis128_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis128_aesni_alg, - ARRAY_SIZE(crypto_aegis128_aesni_alg)); + simd_unregister_aeads(&crypto_aegis128_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis128_aesni_module_init); diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c index 1b1b39c66c5e..80d917f7e467 100644 --- a/arch/x86/crypto/aegis128l-aesni-glue.c +++ b/arch/x86/crypto/aegis128l-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> @@ -242,131 +242,35 @@ static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis128l_aesni_alg[] = { - { - .setkey = crypto_aegis128l_aesni_setkey, - .setauthsize = crypto_aegis128l_aesni_setauthsize, - .encrypt = crypto_aegis128l_aesni_encrypt, - .decrypt = crypto_aegis128l_aesni_decrypt, - .init = crypto_aegis128l_aesni_init_tfm, - .exit = crypto_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis128l", - .cra_driver_name = "__aegis128l-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis128l_aesni_setkey, - .setauthsize = cryptd_aegis128l_aesni_setauthsize, - .encrypt = cryptd_aegis128l_aesni_encrypt, - .decrypt = cryptd_aegis128l_aesni_decrypt, - .init = cryptd_aegis128l_aesni_init_tfm, - .exit = cryptd_aegis128l_aesni_exit_tfm, - - .ivsize = AEGIS128L_NONCE_SIZE, - .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, - .chunksize = AEGIS128L_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis128l", - .cra_driver_name = "aegis128l-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis128l_aesni_alg = { + .setkey = crypto_aegis128l_aesni_setkey, + .setauthsize = crypto_aegis128l_aesni_setauthsize, + .encrypt = crypto_aegis128l_aesni_encrypt, + .decrypt = crypto_aegis128l_aesni_decrypt, + .init = crypto_aegis128l_aesni_init_tfm, + .exit = crypto_aegis128l_aesni_exit_tfm, + + .ivsize = AEGIS128L_NONCE_SIZE, + .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, + .chunksize = AEGIS128L_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis128l", + .cra_driver_name = "__aegis128l-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis128l_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis128l_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis128l_aesni_alg, - ARRAY_SIZE(crypto_aegis128l_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis128l_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis128l_aesni_alg, - ARRAY_SIZE(crypto_aegis128l_aesni_alg)); + simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis128l_aesni_module_init); diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c index 6227ca3220a0..716eecb66bd5 100644 --- a/arch/x86/crypto/aegis256-aesni-glue.c +++ b/arch/x86/crypto/aegis256-aesni-glue.c @@ -11,8 +11,8 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> @@ -242,131 +242,35 @@ static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead) { } -static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead, - const u8 *key, unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} - -static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - -static int cryptd_aegis256_aesni_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} - -static int cryptd_aegis256_aesni_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} - -static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg crypto_aegis256_aesni_alg[] = { - { - .setkey = crypto_aegis256_aesni_setkey, - .setauthsize = crypto_aegis256_aesni_setauthsize, - .encrypt = crypto_aegis256_aesni_encrypt, - .decrypt = crypto_aegis256_aesni_decrypt, - .init = crypto_aegis256_aesni_init_tfm, - .exit = crypto_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_INTERNAL, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct aegis_ctx) + - __alignof__(struct aegis_ctx), - .cra_alignmask = 0, - - .cra_name = "__aegis256", - .cra_driver_name = "__aegis256-aesni", - - .cra_module = THIS_MODULE, - } - }, { - .setkey = cryptd_aegis256_aesni_setkey, - .setauthsize = cryptd_aegis256_aesni_setauthsize, - .encrypt = cryptd_aegis256_aesni_encrypt, - .decrypt = cryptd_aegis256_aesni_decrypt, - .init = cryptd_aegis256_aesni_init_tfm, - .exit = cryptd_aegis256_aesni_exit_tfm, - - .ivsize = AEGIS256_NONCE_SIZE, - .maxauthsize = AEGIS256_MAX_AUTH_SIZE, - .chunksize = AEGIS256_BLOCK_SIZE, - - .base = { - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_alignmask = 0, - - .cra_priority = 400, - - .cra_name = "aegis256", - .cra_driver_name = "aegis256-aesni", - - .cra_module = THIS_MODULE, - } +static struct aead_alg crypto_aegis256_aesni_alg = { + .setkey = crypto_aegis256_aesni_setkey, + .setauthsize = crypto_aegis256_aesni_setauthsize, + .encrypt = crypto_aegis256_aesni_encrypt, + .decrypt = crypto_aegis256_aesni_decrypt, + .init = crypto_aegis256_aesni_init_tfm, + .exit = crypto_aegis256_aesni_exit_tfm, + + .ivsize = AEGIS256_NONCE_SIZE, + .maxauthsize = AEGIS256_MAX_AUTH_SIZE, + .chunksize = AEGIS256_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + .cra_priority = 400, + + .cra_name = "__aegis256", + .cra_driver_name = "__aegis256-aesni", + + .cra_module = THIS_MODULE, } }; +static struct simd_aead_alg *simd_alg; + static int __init crypto_aegis256_aesni_module_init(void) { if (!boot_cpu_has(X86_FEATURE_XMM2) || @@ -374,14 +278,13 @@ static int __init crypto_aegis256_aesni_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_aegis256_aesni_alg, - ARRAY_SIZE(crypto_aegis256_aesni_alg)); + return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1, + &simd_alg); } static void __exit crypto_aegis256_aesni_module_exit(void) { - crypto_unregister_aeads(crypto_aegis256_aesni_alg, - ARRAY_SIZE(crypto_aegis256_aesni_alg)); + simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg); } module_init(crypto_aegis256_aesni_module_init); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1e3d2102033a..21c246799aa5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -25,14 +25,13 @@ #include <linux/err.h> #include <crypto/algapi.h> #include <crypto/aes.h> -#include <crypto/cryptd.h> #include <crypto/ctr.h> #include <crypto/b128ops.h> #include <crypto/gcm.h> #include <crypto/xts.h> #include <asm/cpu_device_id.h> -#include <asm/fpu/api.h> #include <asm/crypto/aes.h> +#include <asm/simd.h> #include <crypto/scatterwalk.h> #include <crypto/internal/aead.h> #include <crypto/internal/simd.h> @@ -333,7 +332,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, return -EINVAL; } - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) err = crypto_aes_expand_key(ctx, in_key, key_len); else { kernel_fpu_begin(); @@ -354,7 +353,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) crypto_aes_encrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -367,7 +366,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) crypto_aes_decrypt_x86(ctx, dst, src); else { kernel_fpu_begin(); @@ -643,29 +642,6 @@ static int xts_decrypt(struct skcipher_request *req) aes_ctx(ctx->raw_crypt_ctx)); } -static int rfc4106_init(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} - -static void rfc4106_exit(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { @@ -710,15 +686,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } -static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key, - unsigned int key_len) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(parent); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, key_len); -} - +/* This is the Integrity Check Value (aka the authentication tag) length and can + * be 8, 12 or 16 bytes long. */ static int common_rfc4106_set_authsize(struct crypto_aead *aead, unsigned int authsize) { @@ -734,17 +703,6 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead, return 0; } -/* This is the Integrity Check Value (aka the authentication tag length and can - * be 8, 12 or 16 bytes long. */ -static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(parent); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} - static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { @@ -964,38 +922,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req) return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, aes_ctx); } - -static int gcmaes_wrapper_encrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(tfm); - struct cryptd_aead *cryptd_tfm = *ctx; - - tfm = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - tfm = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, tfm); - - return crypto_aead_encrypt(req); -} - -static int gcmaes_wrapper_decrypt(struct aead_request *req) -{ - struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(tfm); - struct cryptd_aead *cryptd_tfm = *ctx; - - tfm = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - tfm = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, tfm); - - return crypto_aead_decrypt(req); -} #endif static struct crypto_alg aesni_algs[] = { { @@ -1148,31 +1074,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req) aes_ctx); } -static int generic_gcmaes_init(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni", - CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - - return 0; -} - -static void generic_gcmaes_exit(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} - -static struct aead_alg aesni_aead_algs[] = { { +static struct aead_alg aesni_aeads[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, .encrypt = helper_rfc4106_encrypt, @@ -1180,8 +1082,9 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_RFC4106_IV_SIZE, .maxauthsize = 16, .base = { - .cra_name = "__gcm-aes-aesni", - .cra_driver_name = "__driver-gcm-aes-aesni", + .cra_name = "__rfc4106(gcm(aes))", + .cra_driver_name = "__rfc4106-gcm-aesni", + .cra_priority = 400, .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx), @@ -1189,24 +1092,6 @@ static struct aead_alg aesni_aead_algs[] = { { .cra_module = THIS_MODULE, }, }, { - .init = rfc4106_init, - .exit = rfc4106_exit, - .setkey = gcmaes_wrapper_set_key, - .setauthsize = gcmaes_wrapper_set_authsize, - .encrypt = gcmaes_wrapper_encrypt, - .decrypt = gcmaes_wrapper_decrypt, - .ivsize = GCM_RFC4106_IV_SIZE, - .maxauthsize = 16, - .base = { - .cra_name = "rfc4106(gcm(aes))", - .cra_driver_name = "rfc4106-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_module = THIS_MODULE, - }, -}, { .setkey = generic_gcmaes_set_key, .setauthsize = generic_gcmaes_set_authsize, .encrypt = generic_gcmaes_encrypt, @@ -1214,38 +1099,21 @@ static struct aead_alg aesni_aead_algs[] = { { .ivsize = GCM_AES_IV_SIZE, .maxauthsize = 16, .base = { - .cra_name = "__generic-gcm-aes-aesni", - .cra_driver_name = "__driver-generic-gcm-aes-aesni", - .cra_priority = 0, + .cra_name = "__gcm(aes)", + .cra_driver_name = "__generic-gcm-aesni", + .cra_priority = 400, .cra_flags = CRYPTO_ALG_INTERNAL, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), .cra_alignmask = AESNI_ALIGN - 1, .cra_module = THIS_MODULE, }, -}, { - .init = generic_gcmaes_init, - .exit = generic_gcmaes_exit, - .setkey = gcmaes_wrapper_set_key, - .setauthsize = gcmaes_wrapper_set_authsize, - .encrypt = gcmaes_wrapper_encrypt, - .decrypt = gcmaes_wrapper_decrypt, - .ivsize = GCM_AES_IV_SIZE, - .maxauthsize = 16, - .base = { - .cra_name = "gcm(aes)", - .cra_driver_name = "generic-gcm-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct cryptd_aead *), - .cra_module = THIS_MODULE, - }, } }; #else -static struct aead_alg aesni_aead_algs[0]; +static struct aead_alg aesni_aeads[0]; #endif +static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)]; static const struct x86_cpu_id aesni_cpu_id[] = { X86_FEATURE_MATCH(X86_FEATURE_AES), @@ -1253,23 +1121,9 @@ static const struct x86_cpu_id aesni_cpu_id[] = { }; MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); -static void aesni_free_simds(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) && - aesni_simd_skciphers[i]; i++) - simd_skcipher_free(aesni_simd_skciphers[i]); -} - static int __init aesni_init(void) { - struct simd_skcipher_alg *simd; - const char *basename; - const char *algname; - const char *drvname; int err; - int i; if (!x86_match_cpu(aesni_cpu_id)) return -ENODEV; @@ -1304,36 +1158,22 @@ static int __init aesni_init(void) if (err) return err; - err = crypto_register_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + err = simd_register_skciphers_compat(aesni_skciphers, + ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); if (err) goto unregister_algs; - err = crypto_register_aeads(aesni_aead_algs, - ARRAY_SIZE(aesni_aead_algs)); + err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); if (err) goto unregister_skciphers; - for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) { - algname = aesni_skciphers[i].base.cra_name + 2; - drvname = aesni_skciphers[i].base.cra_driver_name + 2; - basename = aesni_skciphers[i].base.cra_driver_name; - simd = simd_skcipher_create_compat(algname, drvname, basename); - err = PTR_ERR(simd); - if (IS_ERR(simd)) - goto unregister_simds; - - aesni_simd_skciphers[i] = simd; - } - return 0; -unregister_simds: - aesni_free_simds(); - crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); unregister_skciphers: - crypto_unregister_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); unregister_algs: crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); return err; @@ -1341,10 +1181,10 @@ unregister_algs: static void __exit aesni_exit(void) { - aesni_free_simds(); - crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs)); - crypto_unregister_skciphers(aesni_skciphers, - ARRAY_SIZE(aesni_skciphers)); + simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads), + aesni_simd_aeads); + simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers), + aesni_simd_skciphers); crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs)); } diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c index 45c1c4143176..4967ad620775 100644 --- a/arch/x86/crypto/chacha_glue.c +++ b/arch/x86/crypto/chacha_glue.c @@ -12,10 +12,10 @@ #include <crypto/algapi.h> #include <crypto/chacha.h> +#include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> #include <linux/kernel.h> #include <linux/module.h> -#include <asm/fpu/api.h> #include <asm/simd.h> #define CHACHA_STATE_ALIGN 16 @@ -170,7 +170,7 @@ static int chacha_simd(struct skcipher_request *req) struct skcipher_walk walk; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_chacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); @@ -193,7 +193,7 @@ static int xchacha_simd(struct skcipher_request *req) u8 real_iv[16]; int err; - if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable()) + if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable()) return crypto_xchacha_crypt(req); err = skcipher_walk_virt(&walk, req, true); diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index c8d9cdacbf10..cb4ab6645106 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -32,10 +32,11 @@ #include <linux/kernel.h> #include <linux/crc32.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -54,7 +55,7 @@ static u32 __attribute__((pure)) unsigned int iremainder; unsigned int prealign; - if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) return crc32_le(crc, p, len); if ((long)p & SCALE_F_MASK) { diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 5773e1161072..a58fe217c856 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -29,10 +29,11 @@ #include <linux/string.h> #include <linux/kernel.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> -#include <asm/fpu/internal.h> +#include <asm/simd.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 @@ -177,7 +178,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ - if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); *crcp = crc_pcl(data, len, *crcp); kernel_fpu_end(); @@ -189,7 +190,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) { + if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp)); kernel_fpu_end(); diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index 0e785c0b2354..3c81e15b0873 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -26,12 +26,13 @@ #include <linux/module.h> #include <linux/crc-t10dif.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/string.h> #include <linux/kernel.h> -#include <asm/fpu/api.h> #include <asm/cpufeatures.h> #include <asm/cpu_device_id.h> +#include <asm/simd.h> asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len); @@ -53,7 +54,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - if (length >= 16 && irq_fpu_usable()) { + if (length >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); ctx->crc = crc_t10dif_pcl(ctx->crc, data, length); kernel_fpu_end(); @@ -70,15 +71,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out) return 0; } -static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len, - u8 *out) +static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out) { - if (len >= 16 && irq_fpu_usable()) { + if (len >= 16 && crypto_simd_usable()) { kernel_fpu_begin(); - *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len); + *(__u16 *)out = crc_t10dif_pcl(crc, data, len); kernel_fpu_end(); } else - *(__u16 *)out = crc_t10dif_generic(*crcp, data, len); + *(__u16 *)out = crc_t10dif_generic(crc, data, len); return 0; } @@ -87,15 +87,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - return __chksum_finup(&ctx->crc, data, len, out); + return __chksum_finup(ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { - struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - - return __chksum_finup(&ctx->crc, data, length, out); + return __chksum_finup(0, data, length, out); } static struct shash_alg alg = { diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 3582ae885ee1..e3f3e6fd9d65 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,8 +19,9 @@ #include <crypto/cryptd.h> #include <crypto/gf128mul.h> #include <crypto/internal/hash.h> -#include <asm/fpu/api.h> +#include <crypto/internal/simd.h> #include <asm/cpu_device_id.h> +#include <asm/simd.h> #define GHASH_BLOCK_SIZE 16 #define GHASH_DIGEST_SIZE 16 @@ -171,7 +172,6 @@ static int ghash_async_init(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return crypto_shash_init(desc); } @@ -182,7 +182,7 @@ static int ghash_async_update(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -200,7 +200,7 @@ static int ghash_async_final(struct ahash_request *req) struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -241,7 +241,7 @@ static int ghash_async_digest(struct ahash_request *req) struct ahash_request *cryptd_req = ahash_request_ctx(req); struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm; - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (in_atomic() && cryptd_ahash_queued(cryptd_tfm))) { memcpy(cryptd_req, req, sizeof(*req)); ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base); @@ -251,7 +251,6 @@ static int ghash_async_digest(struct ahash_request *req) struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm); desc->tfm = child; - desc->flags = req->base.flags; return shash_ahash_digest(req, desc); } } diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c index 6634907d6ccd..679627a2a824 100644 --- a/arch/x86/crypto/morus1280-avx2-glue.c +++ b/arch/x86/crypto/morus1280-avx2-glue.c @@ -12,6 +12,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/morus1280_glue.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); +MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus1280_avx2_module_init(void) { @@ -44,14 +47,13 @@ static int __init crypto_morus1280_avx2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus1280_avx2_algs, - ARRAY_SIZE(crypto_morus1280_avx2_algs)); + return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1, + &simd_alg); } static void __exit crypto_morus1280_avx2_module_exit(void) { - crypto_unregister_aeads(crypto_morus1280_avx2_algs, - ARRAY_SIZE(crypto_morus1280_avx2_algs)); + simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg); } module_init(crypto_morus1280_avx2_module_init); diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c index f40244eaf14d..c35c0638d0bb 100644 --- a/arch/x86/crypto/morus1280-sse2-glue.c +++ b/arch/x86/crypto/morus1280-sse2-glue.c @@ -12,6 +12,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/morus1280_glue.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); +MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus1280_sse2_module_init(void) { @@ -43,14 +46,13 @@ static int __init crypto_morus1280_sse2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus1280_sse2_algs, - ARRAY_SIZE(crypto_morus1280_sse2_algs)); + return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1, + &simd_alg); } static void __exit crypto_morus1280_sse2_module_exit(void) { - crypto_unregister_aeads(crypto_morus1280_sse2_algs, - ARRAY_SIZE(crypto_morus1280_sse2_algs)); + simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg); } module_init(crypto_morus1280_sse2_module_init); diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c index 7e600f8bcdad..30fc1bd98ec3 100644 --- a/arch/x86/crypto/morus1280_glue.c +++ b/arch/x86/crypto/morus1280_glue.c @@ -11,7 +11,6 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/morus1280_glue.h> @@ -205,90 +204,6 @@ void crypto_morus1280_glue_init_ops(struct crypto_aead *aead, } EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops); -int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey); - -int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize); - -int cryptd_morus1280_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt); - -int cryptd_morus1280_glue_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt); - -int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - const char *name = crypto_aead_alg(aead)->base.cra_driver_name; - char internal_name[CRYPTO_MAX_ALG_NAME]; - - if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) - >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm); - -void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} -EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c index 9afaf8f8565a..32da56b3bdad 100644 --- a/arch/x86/crypto/morus640-sse2-glue.c +++ b/arch/x86/crypto/morus640-sse2-glue.c @@ -12,6 +12,7 @@ */ #include <crypto/internal/aead.h> +#include <crypto/internal/simd.h> #include <crypto/morus640_glue.h> #include <linux/module.h> #include <asm/fpu/api.h> @@ -35,7 +36,9 @@ asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src, asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor, u64 assoclen, u64 cryptlen); -MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); +MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400); + +static struct simd_aead_alg *simd_alg; static int __init crypto_morus640_sse2_module_init(void) { @@ -43,14 +46,13 @@ static int __init crypto_morus640_sse2_module_init(void) !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; - return crypto_register_aeads(crypto_morus640_sse2_algs, - ARRAY_SIZE(crypto_morus640_sse2_algs)); + return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1, + &simd_alg); } static void __exit crypto_morus640_sse2_module_exit(void) { - crypto_unregister_aeads(crypto_morus640_sse2_algs, - ARRAY_SIZE(crypto_morus640_sse2_algs)); + simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg); } module_init(crypto_morus640_sse2_module_init); diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c index cb3a81732016..1dea33d84426 100644 --- a/arch/x86/crypto/morus640_glue.c +++ b/arch/x86/crypto/morus640_glue.c @@ -11,7 +11,6 @@ * any later version. */ -#include <crypto/cryptd.h> #include <crypto/internal/aead.h> #include <crypto/internal/skcipher.h> #include <crypto/morus640_glue.h> @@ -200,90 +199,6 @@ void crypto_morus640_glue_init_ops(struct crypto_aead *aead, } EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops); -int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key, - unsigned int keylen) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey); - -int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead, - unsigned int authsize) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize); - -int cryptd_morus640_glue_encrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_encrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt); - -int cryptd_morus640_glue_decrypt(struct aead_request *req) -{ - struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - struct cryptd_aead *cryptd_tfm = *ctx; - - aead = &cryptd_tfm->base; - if (irq_fpu_usable() && (!in_atomic() || - !cryptd_aead_queued(cryptd_tfm))) - aead = cryptd_aead_child(cryptd_tfm); - - aead_request_set_tfm(req, aead); - - return crypto_aead_decrypt(req); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt); - -int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead *cryptd_tfm; - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - const char *name = crypto_aead_alg(aead)->base.cra_driver_name; - char internal_name[CRYPTO_MAX_ALG_NAME]; - - if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) - >= CRYPTO_MAX_ALG_NAME) - return -ENAMETOOLONG; - - cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, - CRYPTO_ALG_INTERNAL); - if (IS_ERR(cryptd_tfm)) - return PTR_ERR(cryptd_tfm); - - *ctx = cryptd_tfm; - crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); - return 0; -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm); - -void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead) -{ - struct cryptd_aead **ctx = crypto_aead_ctx(aead); - - cryptd_free_aead(*ctx); -} -EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index 20d815ea4b6a..f7567cbd35b6 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -7,9 +7,10 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, u8 hash[NH_HASH_BYTES]); @@ -24,7 +25,7 @@ static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_avx2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !irq_fpu_usable()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index ed68d164ce14..a661ede3b5cf 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -7,9 +7,10 @@ */ #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/nhpoly1305.h> #include <linux/module.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, u8 hash[NH_HASH_BYTES]); @@ -24,7 +25,7 @@ static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) { - if (srclen < 64 || !irq_fpu_usable()) + if (srclen < 64 || !crypto_simd_usable()) return crypto_nhpoly1305_update(desc, src, srclen); do { diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index 88cc01506c84..6eb65b237b3c 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -11,11 +11,11 @@ #include <crypto/algapi.h> #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <crypto/poly1305.h> #include <linux/crypto.h> #include <linux/kernel.h> #include <linux/module.h> -#include <asm/fpu/api.h> #include <asm/simd.h> struct poly1305_simd_desc_ctx { @@ -126,7 +126,7 @@ static int poly1305_simd_update(struct shash_desc *desc, unsigned int bytes; /* kernel_fpu_begin/end is costly, use fallback for small updates */ - if (srclen <= 288 || !may_use_simd()) + if (srclen <= 288 || !crypto_simd_usable()) return crypto_poly1305_update(desc, src, srclen); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7391c7de72c7..42f177afc33a 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> @@ -29,7 +30,7 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> -#include <asm/fpu/api.h> +#include <asm/simd.h> typedef void (sha1_transform_fn)(u32 *digest, const char *data, unsigned int rounds); @@ -39,7 +40,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, { struct sha1_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE) return crypto_sha1_update(desc, data, len); @@ -57,7 +58,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, static int sha1_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha1_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 773a873d2b28..73867da3cbee 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -30,6 +30,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> @@ -37,8 +38,8 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> -#include <asm/fpu/api.h> #include <linux/string.h> +#include <asm/simd.h> asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); @@ -49,7 +50,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, { struct sha256_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE) return crypto_sha256_update(desc, data, len); @@ -67,7 +68,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data, static int sha256_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha256_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index f1b811b60ba6..458356a3f124 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -28,16 +28,16 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/cryptohash.h> +#include <linux/string.h> #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha512_base.h> -#include <asm/fpu/api.h> - -#include <linux/string.h> +#include <asm/simd.h> asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, u64 rounds); @@ -49,7 +49,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, { struct sha512_state *sctx = shash_desc_ctx(desc); - if (!irq_fpu_usable() || + if (!crypto_simd_usable() || (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE) return crypto_sha512_update(desc, data, len); @@ -67,7 +67,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data, static int sha512_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) { - if (!irq_fpu_usable()) + if (!crypto_simd_usable()) return crypto_sha512_finup(desc, data, len, out); kernel_fpu_begin(); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7bc105f47d21..a986b3c8294c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -25,12 +25,14 @@ #include <linux/uprobes.h> #include <linux/livepatch.h> #include <linux/syscalls.h> +#include <linux/uaccess.h> #include <asm/desc.h> #include <asm/traps.h> #include <asm/vdso.h> -#include <linux/uaccess.h> #include <asm/cpufeature.h> +#include <asm/fpu/api.h> +#include <asm/nospec-branch.h> #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> @@ -196,6 +198,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) exit_to_usermode_loop(regs, cached_flags); + /* Reload ti->flags; we may have rescheduled above. */ + cached_flags = READ_ONCE(ti->flags); + + fpregs_assert_state_consistent(); + if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + #ifdef CONFIG_COMPAT /* * Compat syscalls set TS_COMPAT. Make sure we clear it before @@ -212,6 +221,8 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) #endif user_enter_irqoff(); + + mds_user_clear_cpu_buffers(); } #define SYSCALL_EXIT_WORK_FLAGS \ diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 1f9607ed087c..4cd5f982b1e5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -398,7 +398,12 @@ 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents 386 i386 rseq sys_rseq __ia32_sys_rseq -# don't use numbers 387 through 392, add new calls at the end +387 i386 open_tree sys_open_tree __ia32_sys_open_tree +388 i386 move_mount sys_move_mount __ia32_sys_move_mount +389 i386 fsopen sys_fsopen __ia32_sys_fsopen +390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig +391 i386 fsmount sys_fsmount __ia32_sys_fsmount +392 i386 fspick sys_fspick __ia32_sys_fspick 393 i386 semget sys_semget __ia32_sys_semget 394 i386 semctl sys_semctl __ia32_compat_sys_semctl 395 i386 shmget sys_shmget __ia32_sys_shmget diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 92ee0b4378d4..64ca0d06259a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,12 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +335 common open_tree __x64_sys_open_tree +336 common move_mount __x64_sys_move_mount +337 common fsopen __x64_sys_fsopen +338 common fsconfig __x64_sys_fsconfig +339 common fsmount __x64_sys_fsmount +340 common fspick __x64_sys_fspick # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal __x64_sys_pidfd_send_signal diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 7cdd7b13bbda..890a3fb5706f 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -23,7 +23,7 @@ #include <linux/device.h> #include <linux/coredump.h> -#include <asm-generic/sizes.h> +#include <linux/sizes.h> #include <asm/perf_event.h> #include "../perf_event.h" diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 4d5fcd47ab75..629d1ee05599 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -221,8 +221,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { - struct fpu *fpu = ¤t->thread.fpu; - unsigned long sp; + unsigned long sp, fx_aligned, math_size; /* Default to using normal stack */ sp = regs->sp; @@ -236,15 +235,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (fpu->initialized) { - unsigned long fx_aligned, math_size; - - sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); - *fpstate = (struct _fpstate_32 __user *) sp; - if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, - math_size) < 0) - return (void __user *) -1L; - } + sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); + *fpstate = (struct _fpstate_32 __user *) sp; + if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, + math_size) < 0) + return (void __user *) -1L; sp -= frame_size; /* Align the stack pointer according to the i386 ABI, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 981ff9479648..75f27ee2c263 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ @@ -382,5 +383,7 @@ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ #define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ce4d176b3d13..6b15a24930e0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -13,14 +13,7 @@ #include <asm/swiotlb.h> #include <linux/dma-contiguous.h> -#ifdef CONFIG_ISA -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) -#else -# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32) -#endif - extern int iommu_merge; -extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; extern const struct dma_map_ops *dma_ops; @@ -30,7 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -bool arch_dma_alloc_attrs(struct device **dev); -#define arch_dma_alloc_attrs arch_dma_alloc_attrs - #endif diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index b56d504af654..b774c52e5411 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,6 +10,7 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H +#include <linux/bottom_half.h> /* * Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It @@ -21,6 +22,36 @@ extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); +extern void fpregs_mark_activate(void); + +/* + * Use fpregs_lock() while editing CPU's FPU registers or fpu->state. + * A context switch will (and softirq might) save CPU's FPU registers to + * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in + * a random state. + */ +static inline void fpregs_lock(void) +{ + preempt_disable(); + local_bh_disable(); +} + +static inline void fpregs_unlock(void) +{ + local_bh_enable(); + preempt_enable(); +} + +#ifdef CONFIG_X86_DEBUG_FPU +extern void fpregs_assert_state_consistent(void); +#else +static inline void fpregs_assert_state_consistent(void) { } +#endif + +/* + * Load the task FPU state before returning to userspace. + */ +extern void switch_fpu_return(void); /* * Query the presence of one or more xfeatures. Works on any legacy CPU as well. diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 745a19d34f23..9e27fa05a7ae 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -14,6 +14,7 @@ #include <linux/compat.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/mm.h> #include <asm/user.h> #include <asm/fpu/api.h> @@ -24,14 +25,12 @@ /* * High level FPU state handling functions: */ -extern void fpu__initialize(struct fpu *fpu); extern void fpu__prepare_read(struct fpu *fpu); extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); -extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); -extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); +extern int fpu__copy(struct task_struct *dst, struct task_struct *src); extern void fpu__clear(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); @@ -122,6 +121,21 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) +#define kernel_insn_err(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + #define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ @@ -150,6 +164,14 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } +static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else + return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { if (IS_ENABLED(CONFIG_X86_32)) @@ -163,6 +185,11 @@ static inline void copy_kernel_to_fregs(struct fregs_state *fx) kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } +static inline int copy_kernel_to_fregs_err(struct fregs_state *fx) +{ + return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + static inline int copy_user_to_fregs(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -363,6 +390,21 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) } /* + * Restore xstate from kernel space xsave area, return an error code instead of + * an exception. + */ +static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err; + + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + + return err; +} + +/* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact and we can * keep registers active. @@ -487,6 +529,25 @@ static inline void fpregs_activate(struct fpu *fpu) } /* + * Internal helper, do not use directly. Use switch_fpu_return() instead. + */ +static inline void __fpregs_load_activate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + int cpu = smp_processor_id(); + + if (WARN_ON_ONCE(current->mm == NULL)) + return; + + if (!fpregs_state_valid(fpu, cpu)) { + copy_kernel_to_fpregs(&fpu->state); + fpregs_activate(fpu); + fpu->last_cpu = cpu; + } + clear_thread_flag(TIF_NEED_FPU_LOAD); +} + +/* * FPU state switching for scheduling. * * This is a two-stage process: @@ -494,12 +555,23 @@ static inline void fpregs_activate(struct fpu *fpu) * - switch_fpu_prepare() saves the old state. * This is done within the context of the old process. * - * - switch_fpu_finish() restores the new state as - * necessary. + * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state + * will get loaded on return to userspace, or when the kernel needs it. + * + * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers + * are saved in the current thread's FPU register state. + * + * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not + * hold current()'s FPU registers. It is required to load the + * registers before returning to userland or using the content + * otherwise. + * + * The FPU context is only stored/restored for a user task and + * ->mm is used to distinguish between kernel and user threads. */ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) { + if (static_cpu_has(X86_FEATURE_FPU) && current->mm) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -507,8 +579,7 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) /* But leave fpu_fpregs_owner_ctx! */ trace_x86_fpu_regs_deactivated(old_fpu); - } else - old_fpu->last_cpu = -1; + } } /* @@ -516,36 +587,32 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) */ /* - * Set up the userspace FPU context for the new task, if the task - * has used the FPU. + * Load PKRU from the FPU context if available. Delay loading of the + * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) +static inline void switch_fpu_finish(struct fpu *new_fpu) { - bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->initialized; + u32 pkru_val = init_pkru_value; + struct pkru_state *pk; - if (preload) { - if (!fpregs_state_valid(new_fpu, cpu)) - copy_kernel_to_fpregs(&new_fpu->state); - fpregs_activate(new_fpu); - } -} + if (!static_cpu_has(X86_FEATURE_FPU)) + return; -/* - * Needs to be preemption-safe. - * - * NOTE! user_fpu_begin() must be used only immediately before restoring - * the save state. It does not do any saving/restoring on its own. In - * lazy FPU mode, it is just an optimization to avoid a #NM exception, - * the task can lose the FPU right after preempt_enable(). - */ -static inline void user_fpu_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; + set_thread_flag(TIF_NEED_FPU_LOAD); + + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) + return; - preempt_disable(); - fpregs_activate(fpu); - preempt_enable(); + /* + * PKRU state is switched eagerly because it needs to be valid before we + * return to userland e.g. for a copy_to_user() operation. + */ + if (current->mm) { + pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); + if (pk) + pkru_val = pk->pkru; + } + __write_pkru(pkru_val); } /* diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 44bbc39a57b3..7fb516b6893a 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -22,7 +22,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, +extern void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env); unsigned long diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 2e32e178e064..f098f6cab94b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -294,15 +294,6 @@ struct fpu { unsigned int last_cpu; /* - * @initialized: - * - * This flag indicates whether this context is initialized: if the task - * is not running then we can restore from this context, if the task - * is running then we should save into this context. - */ - unsigned char initialized; - - /* * @avx512_timestamp: * * Records the timestamp of AVX512 use during last context switch. diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 48581988d78c..7e42b285c856 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -2,9 +2,11 @@ #ifndef __ASM_X86_XSAVE_H #define __ASM_X86_XSAVE_H +#include <linux/uaccess.h> #include <linux/types.h> + #include <asm/processor.h> -#include <linux/uaccess.h> +#include <asm/user.h> /* Bit 63 of XCR0 is reserved for future expansion */ #define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63))) @@ -46,8 +48,8 @@ extern void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask); void fpu__xstate_clear_all_cpu_caps(void); -void *get_xsave_addr(struct xregs_state *xsave, int xstate); -const void *get_xsave_field_ptr(int xstate_field); +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr); +const void *get_xsave_field_ptr(int xfeature_nr); int using_compacted_format(void); int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h index 7469d321f072..f65cfb48cfdd 100644 --- a/arch/x86/include/asm/hugetlb.h +++ b/arch/x86/include/asm/hugetlb.h @@ -17,8 +17,4 @@ static inline void arch_clear_hugepage_flags(struct page *page) { } -#ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE -static inline bool gigantic_page_supported(void) { return true; } -#endif - #endif /* _ASM_X86_HUGETLB_H */ diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 2bdbbbcfa393..cdf44aa9a501 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: GPL-2.0 */ /* * This file contains definitions from Hyper-V Hypervisor Top-Level Functional diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 058e40fed167..8a0e56e1dcc9 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ +#include <asm/nospec-branch.h> + /* Provide __cpuidle; we can't safely include <linux/cpu.h> */ #define __cpuidle __attribute__((__section__(".cpuidle.text"))) @@ -54,11 +56,13 @@ static inline void native_irq_enable(void) static inline __cpuidle void native_safe_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("sti; hlt": : :"memory"); } static inline __cpuidle void native_halt(void) { + mds_idle_clear_cpu_buffers(); asm volatile("hlt": : :"memory"); } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1378518cf63f..88dd202c8b00 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_MSR_INDEX_H #define _ASM_X86_MSR_INDEX_H +#include <linux/bits.h> + /* * CPU model specific register (MSR) numbers. * @@ -40,14 +42,14 @@ /* Intel MSRs. Some also available on other CPUs */ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ -#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ +#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ -#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -69,20 +71,25 @@ #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a -#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ -#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ -#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH (1 << 3) /* Skip L1D flush on vmentry */ -#define ARCH_CAP_SSB_NO (1 << 4) /* - * Not susceptible to Speculative Store Bypass - * attack, so no Speculative Store Bypass - * control required. - */ +#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ +#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ +#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ +#define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ +#define ARCH_CAP_MDS_NO BIT(5) /* + * Not susceptible to + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b -#define L1D_FLUSH (1 << 0) /* - * Writeback and invalidate the - * L1 data cache. - */ +#define L1D_FLUSH BIT(0) /* + * Writeback and invalidate the + * L1 data cache. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 39a2fb29378a..eb0f80ce8524 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -6,6 +6,7 @@ #include <linux/sched/idle.h> #include <asm/cpufeature.h> +#include <asm/nospec-branch.h> #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf @@ -40,6 +41,8 @@ static inline void __monitorx(const void *eax, unsigned long ecx, static inline void __mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + /* "mwait %eax, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xc9;" :: "a" (eax), "c" (ecx)); @@ -74,6 +77,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) static inline void __mwaitx(unsigned long eax, unsigned long ebx, unsigned long ecx) { + /* No MDS buffer clear as this is AMD/HYGON only */ + /* "mwaitx %eax, %ebx, %ecx;" */ asm volatile(".byte 0x0f, 0x01, 0xfb;" :: "a" (eax), "b" (ebx), "c" (ecx)); @@ -81,6 +86,8 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { + mds_idle_clear_cpu_buffers(); + trace_hardirqs_on(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index daf25b60c9e3..109f974f9835 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -308,6 +308,56 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +DECLARE_STATIC_KEY_FALSE(mds_user_clear); +DECLARE_STATIC_KEY_FALSE(mds_idle_clear); + +#include <asm/segment.h> + +/** + * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the + * instruction is executed. + */ +static inline void mds_clear_cpu_buffers(void) +{ + static const u16 ds = __KERNEL_DS; + + /* + * Has to be the memory-operand variant because only that + * guarantees the CPU buffer flush functionality according to + * documentation. The register-operand variant does not. + * Works with any segment selector, but a valid writable + * data segment is the fastest variant. + * + * "cc" clobber is required because VERW modifies ZF. + */ + asm volatile("verw %[ds]" : : [ds] "m" (ds) : "cc"); +} + +/** + * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_user_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_user_clear)) + mds_clear_cpu_buffers(); +} + +/** + * mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +static inline void mds_idle_clear_cpu_buffers(void) +{ + if (static_branch_likely(&mds_idle_clear)) + mds_clear_cpu_buffers(); +} + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3a221942f805..5e0509b41986 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -23,6 +23,8 @@ #ifndef __ASSEMBLY__ #include <asm/x86_init.h> +#include <asm/fpu/xstate.h> +#include <asm/fpu/api.h> extern pgd_t early_top_pgt[PTRS_PER_PGD]; int __init __early_make_pgtable(unsigned long address, pmdval_t pmd); @@ -127,14 +129,29 @@ static inline int pte_dirty(pte_t pte) static inline u32 read_pkru(void) { if (boot_cpu_has(X86_FEATURE_OSPKE)) - return __read_pkru(); + return rdpkru(); return 0; } static inline void write_pkru(u32 pkru) { - if (boot_cpu_has(X86_FEATURE_OSPKE)) - __write_pkru(pkru); + struct pkru_state *pk; + + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return; + + pk = get_xsave_addr(¤t->thread.fpu.state.xsave, XFEATURE_PKRU); + + /* + * The PKRU value in xstate needs to be in sync with the value that is + * written to the CPU. The FPU restore on return to userland would + * otherwise load the previous value again. + */ + fpregs_lock(); + if (pk) + pk->pkru = pkru; + __write_pkru(pkru); + fpregs_unlock(); } static inline int pte_young(pte_t pte) @@ -1358,6 +1375,12 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) #define PKRU_WD_BIT 0x2 #define PKRU_BITS_PER_PKEY 2 +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS +extern u32 init_pkru_value; +#else +#define init_pkru_value 0 +#endif + static inline bool __pkru_allows_read(u32 pkru, u16 pkey) { int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7e99ef67bff0..c34a35c78618 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -978,4 +978,10 @@ enum l1tf_mitigations { extern enum l1tf_mitigations l1tf_mitigation; +enum mds_mitigations { + MDS_MITIGATION_OFF, + MDS_MITIGATION_FULL, + MDS_MITIGATION_VMWERV, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 43c029cdc3fe..0a3c4cab39db 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -92,7 +92,7 @@ static inline void native_write_cr8(unsigned long val) #endif #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { u32 ecx = 0; u32 edx, pkru; @@ -107,7 +107,7 @@ static inline u32 __read_pkru(void) return pkru; } -static inline void __write_pkru(u32 pkru) +static inline void wrpkru(u32 pkru) { u32 ecx = 0, edx = 0; @@ -118,8 +118,21 @@ static inline void __write_pkru(u32 pkru) asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (pkru), "c"(ecx), "d"(edx)); } + +static inline void __write_pkru(u32 pkru) +{ + /* + * WRPKRU is relatively expensive compared to RDPKRU. + * Avoid WRPKRU when it would not change the value. + */ + if (pkru == rdpkru()) + return; + + wrpkru(pkru); +} + #else -static inline u32 __read_pkru(void) +static inline u32 rdpkru(void) { return 0; } diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 4c305471ec33..b05ad16174e5 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -105,7 +105,7 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->bx + i, args, n * sizeof(args[0])); } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } @@ -160,10 +160,12 @@ static inline void syscall_set_arguments(struct task_struct *task, } } -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ - return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; + return (IS_ENABLED(CONFIG_IA32_EMULATION) && + task->thread_info.status & TS_COMPAT) + ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0eccbcb8447..f9453536f9bb 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -88,6 +88,7 @@ struct thread_info { #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ +#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */ #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* IA32 compatibility process */ @@ -117,6 +118,7 @@ struct thread_info { #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) +#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD) #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h index e0e6d7f21399..6b1e87194809 100644 --- a/arch/x86/include/asm/trace/exceptions.h +++ b/arch/x86/include/asm/trace/exceptions.h @@ -30,7 +30,7 @@ DECLARE_EVENT_CLASS(x86_exceptions, __entry->error_code = error_code; ), - TP_printk("address=%pf ip=%pf error_code=0x%lx", + TP_printk("address=%ps ip=%ps error_code=0x%lx", (void *)__entry->address, (void *)__entry->ip, __entry->error_code) ); diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 069c04be1507..879b77792f94 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, initialized) + __field(bool, load_fpu) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->initialized = fpu->initialized; + __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD); if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->initialized, + __entry->load_fpu, __entry->xfeatures, __entry->xcomp_bv ) @@ -64,11 +64,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated, TP_ARGS(fpu) ); -DEFINE_EVENT(x86_fpu, x86_fpu_activate_state, - TP_PROTO(struct fpu *fpu), - TP_ARGS(fpu) -); - DEFINE_EVENT(x86_fpu, x86_fpu_init_state, TP_PROTO(struct fpu *fpu), TP_ARGS(fpu) diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h deleted file mode 100644 index def6d4746ee7..000000000000 --- a/arch/x86/include/uapi/asm/sockios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sockios.h> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8dcbf6890714..9fc92e4539d8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) } static int __init -acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic *processor = NULL; #ifdef CONFIG_X86_X2APIC @@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); #ifdef CONFIG_X86_X2APIC apic_id = processor->local_apic_id; @@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic *processor = NULL; @@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Ignore invalid ID */ if (processor->id == 0xff) @@ -272,7 +272,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end) } static int __init -acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) +acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_sapic *processor = NULL; @@ -281,7 +281,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) if (BAD_MADT_ENTRY(processor, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ processor->processor_id, /* ACPI ID */ @@ -291,7 +291,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end) } static int __init -acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL; @@ -301,7 +301,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(lapic_addr_ovr, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); acpi_lapic_addr = lapic_addr_ovr->address; @@ -309,7 +309,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, +acpi_parse_x2apic_nmi(union acpi_subtable_headers *header, const unsigned long end) { struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL; @@ -319,7 +319,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, if (BAD_MADT_ENTRY(x2apic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (x2apic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -328,7 +328,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header, } static int __init -acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_local_apic_nmi *lapic_nmi = NULL; @@ -337,7 +337,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e if (BAD_MADT_ENTRY(lapic_nmi, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (lapic_nmi->lint != 1) printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n"); @@ -449,7 +449,7 @@ static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, } static int __init -acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_ioapic(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_io_apic *ioapic = NULL; struct ioapic_domain_cfg cfg = { @@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) if (BAD_MADT_ENTRY(ioapic, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */ if (ioapic->global_irq_base < nr_legacy_irqs()) @@ -508,7 +508,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, } static int __init -acpi_parse_int_src_ovr(struct acpi_subtable_header * header, +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_interrupt_override *intsrc = NULL; @@ -518,7 +518,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, if (BAD_MADT_ENTRY(intsrc, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { acpi_sci_ioapic_setup(intsrc->source_irq, @@ -550,7 +550,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, } static int __init -acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end) +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) { struct acpi_madt_nmi_source *nmi_src = NULL; @@ -559,7 +559,7 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end if (BAD_MADT_ENTRY(nmi_src, end)) return -EINVAL; - acpi_table_print_madt_entry(header); + acpi_table_print_madt_entry(&header->common); /* TBD: Support nimsrc entries? */ diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 2c0aa34af69c..bf7f13ea3c64 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -233,9 +233,6 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page, unsigned long bus; phys_addr_t paddr = page_to_phys(page) + offset; - if (!dev) - dev = &x86_dma_fallback_dev; - if (!need_iommu(dev, paddr, size)) return paddr; @@ -392,9 +389,6 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, if (nents == 0) return 0; - if (!dev) - dev = &x86_dma_fallback_dev; - out = 0; start = 0; start_sg = sg; diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29630393f300..03b4cc0ec3a7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -37,6 +37,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); +static void __init mds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -63,6 +64,13 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); /* Control unconditional IBPB in switch_mm() */ DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); +/* Control MDS CPU buffer clear before returning to user space */ +DEFINE_STATIC_KEY_FALSE(mds_user_clear); +EXPORT_SYMBOL_GPL(mds_user_clear); +/* Control MDS CPU buffer clear before idling (halt, mwait) */ +DEFINE_STATIC_KEY_FALSE(mds_idle_clear); +EXPORT_SYMBOL_GPL(mds_idle_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -101,6 +109,10 @@ void __init check_bugs(void) l1tf_select_mitigation(); + mds_select_mitigation(); + + arch_smt_update(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -207,6 +219,61 @@ static void x86_amd_ssb_disable(void) } #undef pr_fmt +#define pr_fmt(fmt) "MDS: " fmt + +/* Default mitigation for MDS-affected CPUs */ +static enum mds_mitigations mds_mitigation __ro_after_init = MDS_MITIGATION_FULL; +static bool mds_nosmt __ro_after_init = false; + +static const char * const mds_strings[] = { + [MDS_MITIGATION_OFF] = "Vulnerable", + [MDS_MITIGATION_FULL] = "Mitigation: Clear CPU buffers", + [MDS_MITIGATION_VMWERV] = "Vulnerable: Clear CPU buffers attempted, no microcode", +}; + +static void __init mds_select_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) { + mds_mitigation = MDS_MITIGATION_OFF; + return; + } + + if (mds_mitigation == MDS_MITIGATION_FULL) { + if (!boot_cpu_has(X86_FEATURE_MD_CLEAR)) + mds_mitigation = MDS_MITIGATION_VMWERV; + + static_branch_enable(&mds_user_clear); + + if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) && + (mds_nosmt || cpu_mitigations_auto_nosmt())) + cpu_smt_disable(false); + } + + pr_info("%s\n", mds_strings[mds_mitigation]); +} + +static int __init mds_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + mds_mitigation = MDS_MITIGATION_OFF; + else if (!strcmp(str, "full")) + mds_mitigation = MDS_MITIGATION_FULL; + else if (!strcmp(str, "full,nosmt")) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_nosmt = true; + } + + return 0; +} +early_param("mds", mds_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = @@ -575,9 +642,6 @@ specv2_set_mode: /* Set up IBPB and STIBP depending on the general spectre V2 command */ spectre_v2_user_select_mitigation(cmd); - - /* Enable STIBP if appropriate */ - arch_smt_update(); } static void update_stibp_msr(void * __unused) @@ -611,6 +675,31 @@ static void update_indir_branch_cond(void) static_branch_disable(&switch_to_cond_stibp); } +#undef pr_fmt +#define pr_fmt(fmt) fmt + +/* Update the static key controlling the MDS CPU buffer clear in idle */ +static void update_mds_branch_idle(void) +{ + /* + * Enable the idle clearing if SMT is active on CPUs which are + * affected only by MSBDS and not any other MDS variant. + * + * The other variants cannot be mitigated when SMT is enabled, so + * clearing the buffers on idle just to prevent the Store Buffer + * repartitioning leak would be a window dressing exercise. + */ + if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) + return; + + if (sched_smt_active()) + static_branch_enable(&mds_idle_clear); + else + static_branch_disable(&mds_idle_clear); +} + +#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -632,6 +721,17 @@ void arch_smt_update(void) break; } + switch (mds_mitigation) { + case MDS_MITIGATION_FULL: + case MDS_MITIGATION_VMWERV: + if (sched_smt_active() && !boot_cpu_has(X86_BUG_MSBDS_ONLY)) + pr_warn_once(MDS_MSG_SMT); + update_mds_branch_idle(); + break; + case MDS_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1043,7 +1143,7 @@ static void __init l1tf_select_mitigation(void) pr_info("You may make it effective by booting the kernel with mem=%llu parameter.\n", half_pa); pr_info("However, doing so will make a part of your RAM unusable.\n"); - pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html might help you decide.\n"); + pr_info("Reading https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html might help you decide.\n"); return; } @@ -1076,6 +1176,7 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS @@ -1114,6 +1215,23 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static ssize_t mds_show_state(char *buf) +{ + if (!hypervisor_is_type(X86_HYPER_NATIVE)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + mds_strings[mds_mitigation]); + } + + if (boot_cpu_has(X86_BUG_MSBDS_ONLY)) { + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + (mds_mitigation == MDS_MITIGATION_OFF ? "vulnerable" : + sched_smt_active() ? "mitigated" : "disabled")); + } + + return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1180,6 +1298,10 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_L1TF_PTEINV)) return l1tf_show_state(buf); break; + + case X86_BUG_MDS: + return mds_show_state(buf); + default: break; } @@ -1211,4 +1333,9 @@ ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *b { return cpu_show_common(dev, attr, buf, X86_BUG_L1TF); } + +ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37640544e12f..d7f55ad2dfb1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -372,6 +372,8 @@ static bool pku_disabled; static __always_inline void setup_pku(struct cpuinfo_x86 *c) { + struct pkru_state *pk; + /* check the boot processor, plus compile options for PKU: */ if (!cpu_feature_enabled(X86_FEATURE_PKU)) return; @@ -382,6 +384,9 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) return; cr4_set_bits(X86_CR4_PKE); + pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); + if (pk) + pk->pkru = init_pkru_value; /* * Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE * cpuid bit to be set. We need to ensure that we @@ -935,61 +940,77 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #endif } -static const __initconst struct x86_cpu_id cpu_no_speculation[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_TABLET, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SALTWELL_MID, X86_FEATURE_ANY }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_BONNELL, X86_FEATURE_ANY }, - { X86_VENDOR_CENTAUR, 5 }, - { X86_VENDOR_INTEL, 5 }, - { X86_VENDOR_NSC, 5 }, - { X86_VENDOR_ANY, 4 }, - {} -}; +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) +#define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) -static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { - { X86_VENDOR_AMD }, - { X86_VENDOR_HYGON }, - {} -}; +#define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } -/* Only list CPUs which speculate but are non susceptible to SSB */ -static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, - { X86_VENDOR_AMD, 0x12, }, - { X86_VENDOR_AMD, 0x11, }, - { X86_VENDOR_AMD, 0x10, }, - { X86_VENDOR_AMD, 0xf, }, - {} -}; +#define VULNWL_INTEL(model, whitelist) \ + VULNWL(INTEL, 6, INTEL_FAM6_##model, whitelist) + +#define VULNWL_AMD(family, whitelist) \ + VULNWL(AMD, family, X86_MODEL_ANY, whitelist) + +#define VULNWL_HYGON(family, whitelist) \ + VULNWL(HYGON, family, X86_MODEL_ANY, whitelist) + +static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + VULNWL(ANY, 4, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(CENTAUR, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(INTEL, 5, X86_MODEL_ANY, NO_SPECULATION), + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), -static const __initconst struct x86_cpu_id cpu_no_l1tf[] = { - /* in addition to cpu_no_speculation */ - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT_MID }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_X }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_GOLDMONT_PLUS }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, - { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY), + + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF), + + /* AMD Family 0xf - 0x12 */ + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS), {} }; +static bool __init cpu_matches(unsigned long which) +{ + const struct x86_cpu_id *m = x86_match_cpu(cpu_vuln_whitelist); + + return m && !!(m->driver_data & which); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_speculation)) + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); @@ -998,15 +1019,20 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!x86_match_cpu(cpu_no_spec_store_bypass) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); if (ia32_cap & ARCH_CAP_IBRS_ALL) setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); - if (x86_match_cpu(cpu_no_meltdown)) + if (!cpu_matches(NO_MDS) && !(ia32_cap & ARCH_CAP_MDS_NO)) { + setup_force_cpu_bug(X86_BUG_MDS); + if (cpu_matches(MSBDS_ONLY)) + setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); + } + + if (cpu_matches(NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ @@ -1015,7 +1041,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - if (x86_match_cpu(cpu_no_l1tf)) + if (cpu_matches(NO_L1TF)) return; setup_force_cpu_bug(X86_BUG_L1TF); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 8a4a7823451a..c321f4f513f9 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -428,7 +428,7 @@ static int do_microcode_update(const void __user *buf, size_t size) static int microcode_open(struct inode *inode, struct file *file) { - return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; + return capable(CAP_SYS_RAWIO) ? stream_open(inode, file) : -EPERM; } static ssize_t microcode_write(struct file *file, const char __user *buf, diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 50d5848bf22e..6c4f01540833 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -525,7 +525,8 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_I945G_IDS(&gen3_early_ops), INTEL_I945GM_IDS(&gen3_early_ops), INTEL_VLV_IDS(&gen6_early_ops), - INTEL_PINEVIEW_IDS(&gen3_early_ops), + INTEL_PINEVIEW_G_IDS(&gen3_early_ops), + INTEL_PINEVIEW_M_IDS(&gen3_early_ops), INTEL_I965G_IDS(&gen3_early_ops), INTEL_G33_IDS(&gen3_early_ops), INTEL_I965GM_IDS(&gen3_early_ops), @@ -547,6 +548,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_GLK_IDS(&gen9_early_ops), INTEL_CNL_IDS(&gen9_early_ops), INTEL_ICL_11_IDS(&gen11_early_ops), + INTEL_EHL_IDS(&gen11_early_ops), }; struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2e5003fef51a..ce243f76bdb7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -101,24 +101,21 @@ static void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->initialized) { - /* - * Ignore return value -- we don't care if reg state - * is clobbered. - */ - copy_fpregs_to_fpstate(fpu); - } else { - __cpu_invalidate_fpregs_state(); + if (current->mm) { + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + set_thread_flag(TIF_NEED_FPU_LOAD); + /* + * Ignore return value -- we don't care if reg state + * is clobbered. + */ + copy_fpregs_to_fpstate(fpu); + } } + __cpu_invalidate_fpregs_state(); } static void __kernel_fpu_end(void) { - struct fpu *fpu = ¤t->thread.fpu; - - if (fpu->initialized) - copy_kernel_to_fpregs(&fpu->state); - kernel_fpu_enable(); } @@ -145,15 +142,17 @@ void fpu__save(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - preempt_disable(); + fpregs_lock(); trace_x86_fpu_before_save(fpu); - if (fpu->initialized) { + + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } } + trace_x86_fpu_after_save(fpu); - preempt_enable(); + fpregs_unlock(); } EXPORT_SYMBOL_GPL(fpu__save); @@ -186,11 +185,14 @@ void fpstate_init(union fpregs_state *state) } EXPORT_SYMBOL_GPL(fpstate_init); -int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) +int fpu__copy(struct task_struct *dst, struct task_struct *src) { + struct fpu *dst_fpu = &dst->thread.fpu; + struct fpu *src_fpu = &src->thread.fpu; + dst_fpu->last_cpu = -1; - if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) + if (!static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -202,16 +204,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size); /* - * Save current FPU registers directly into the child - * FPU context, without any memory-to-memory copying. + * If the FPU registers are not current just memcpy() the state. + * Otherwise save current FPU registers directly into the child's FPU + * context, without any memory-to-memory copying. * * ( The function 'fails' in the FNSAVE case, which destroys - * register contents so we have to copy them back. ) + * register contents so we have to load them back. ) */ - if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); - copy_kernel_to_fpregs(&src_fpu->state); - } + fpregs_lock(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size); + + else if (!copy_fpregs_to_fpstate(dst_fpu)) + copy_kernel_to_fpregs(&dst_fpu->state); + + fpregs_unlock(); + + set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -223,20 +232,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Activate the current task's in-memory FPU context, * if it has not been used before: */ -void fpu__initialize(struct fpu *fpu) +static void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!fpu->initialized) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for the current task: */ - fpu->initialized = 1; - } + set_thread_flag(TIF_NEED_FPU_LOAD); + fpstate_init(&fpu->state); + trace_x86_fpu_init_state(fpu); } -EXPORT_SYMBOL_GPL(fpu__initialize); /* * This function must be called before we read a task's fpstate. @@ -248,32 +251,20 @@ EXPORT_SYMBOL_GPL(fpu__initialize); * * - or it's called for stopped tasks (ptrace), in which case the * registers were already saved by the context-switch code when - * the task scheduled out - we only have to initialize the registers - * if they've never been initialized. + * the task scheduled out. * * If the task has used the FPU before then save it. */ void fpu__prepare_read(struct fpu *fpu) { - if (fpu == ¤t->thread.fpu) { + if (fpu == ¤t->thread.fpu) fpu__save(fpu); - } else { - if (!fpu->initialized) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for current and for stopped child tasks: */ - fpu->initialized = 1; - } - } } /* * This function must be called before we write a task's fpstate. * - * If the task has used the FPU before then invalidate any cached FPU registers. - * If the task has not used the FPU before then initialize its fpstate. + * Invalidate any cached FPU registers. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will @@ -290,44 +281,11 @@ void fpu__prepare_write(struct fpu *fpu) */ WARN_ON_FPU(fpu == ¤t->thread.fpu); - if (fpu->initialized) { - /* Invalidate any cached state: */ - __fpu_invalidate_fpregs_state(fpu); - } else { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - - trace_x86_fpu_activate_state(fpu); - /* Safe to do for stopped child tasks: */ - fpu->initialized = 1; - } + /* Invalidate any cached state: */ + __fpu_invalidate_fpregs_state(fpu); } /* - * 'fpu__restore()' is called to copy FPU registers from - * the FPU fpstate to the live hw registers and to activate - * access to the hardware registers, so that FPU instructions - * can be used afterwards. - * - * Must be called with kernel preemption disabled (for example - * with local interrupts disabled, as it is in the case of - * do_device_not_available()). - */ -void fpu__restore(struct fpu *fpu) -{ - fpu__initialize(fpu); - - /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ - kernel_fpu_disable(); - trace_x86_fpu_before_restore(fpu); - fpregs_activate(fpu); - copy_kernel_to_fpregs(&fpu->state); - trace_x86_fpu_after_restore(fpu); - kernel_fpu_enable(); -} -EXPORT_SYMBOL_GPL(fpu__restore); - -/* * Drops current FPU state: deactivates the fpregs and * the fpstate. NOTE: it still leaves previous contents * in the fpregs in the eager-FPU case. @@ -341,17 +299,13 @@ void fpu__drop(struct fpu *fpu) preempt_disable(); if (fpu == ¤t->thread.fpu) { - if (fpu->initialized) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - fpregs_deactivate(fpu); - } + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + fpregs_deactivate(fpu); } - fpu->initialized = 0; - trace_x86_fpu_dropped(fpu); preempt_enable(); @@ -363,6 +317,8 @@ void fpu__drop(struct fpu *fpu) */ static inline void copy_init_fpstate_to_fpregs(void) { + fpregs_lock(); + if (use_xsave()) copy_kernel_to_xregs(&init_fpstate.xsave, -1); else if (static_cpu_has(X86_FEATURE_FXSR)) @@ -372,6 +328,9 @@ static inline void copy_init_fpstate_to_fpregs(void) if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); + + fpregs_mark_activate(); + fpregs_unlock(); } /* @@ -389,16 +348,52 @@ void fpu__clear(struct fpu *fpu) /* * Make sure fpstate is cleared and initialized. */ - if (static_cpu_has(X86_FEATURE_FPU)) { - preempt_disable(); - fpu__initialize(fpu); - user_fpu_begin(); + fpu__initialize(fpu); + if (static_cpu_has(X86_FEATURE_FPU)) copy_init_fpstate_to_fpregs(); - preempt_enable(); - } } /* + * Load FPU context before returning to userspace. + */ +void switch_fpu_return(void) +{ + if (!static_cpu_has(X86_FEATURE_FPU)) + return; + + __fpregs_load_activate(); +} +EXPORT_SYMBOL_GPL(switch_fpu_return); + +#ifdef CONFIG_X86_DEBUG_FPU +/* + * If current FPU state according to its tracking (loaded FPU context on this + * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is + * loaded on return to userland. + */ +void fpregs_assert_state_consistent(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + return; + + WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id())); +} +EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent); +#endif + +void fpregs_mark_activate(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + fpregs_activate(fpu); + fpu->last_cpu = smp_processor_id(); + clear_thread_flag(TIF_NEED_FPU_LOAD); +} +EXPORT_SYMBOL_GPL(fpregs_mark_activate); + +/* * x87 math exception handling: */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6abd83572b01..20d8fa7124c7 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; - - WARN_ON_FPU(current->thread.fpu.initialized); } /* diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index bc02f5144b95..d652b939ccfb 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -15,16 +15,12 @@ */ int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { - struct fpu *target_fpu = &target->thread.fpu; - - return target_fpu->initialized ? regset->n : 0; + return regset->n; } int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { - struct fpu *target_fpu = &target->thread.fpu; - - if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized) + if (boot_cpu_has(X86_FEATURE_FXSR)) return regset->n; else return 0; @@ -269,11 +265,10 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) memcpy(&to[i], &from[i], sizeof(to[0])); } -void convert_to_fxsr(struct task_struct *tsk, +void convert_to_fxsr(struct fxregs_state *fxsave, const struct user_i387_ia32_struct *env) { - struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -350,7 +345,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); if (!ret) - convert_to_fxsr(target, &env); + convert_to_fxsr(&target->thread.fpu.state.fxsave, &env); /* * update the header bit in the xsave header, indicating the @@ -371,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) { struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - int fpvalid; - - fpvalid = fpu->initialized; - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - ufpu, NULL); - return fpvalid; + return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), + ufpu, NULL); } EXPORT_SYMBOL(dump_fpu); diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index f6a1d299627c..5a8d118bc423 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -92,13 +92,13 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) return err; err |= __put_user(FP_XSTATE_MAGIC2, - (__u32 *)(buf + fpu_user_xstate_size)); + (__u32 __user *)(buf + fpu_user_xstate_size)); /* * Read the xfeatures which we copied (directly from the cpu or * from the state in task struct) to the user buffers. */ - err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); + err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures); /* * For legacy compatible, we always set FP/SSE bits in the bit @@ -113,7 +113,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) */ xfeatures |= XFEATURE_MASK_FPSSE; - err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); + err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures); return err; } @@ -144,9 +144,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * Try to save it directly to the user frame with disabled page fault handler. + * If this fails then do the slow path where the FPU state is first saved to + * task's fpu->state and then copy it to the user frame pointed to by the + * aligned pointer 'buf_fx'. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. @@ -156,10 +157,9 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { - struct fpu *fpu = ¤t->thread.fpu; - struct xregs_state *xsave = &fpu->state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); + int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -172,28 +172,34 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_32 __user *) buf) ? -1 : 1; - if (fpu->initialized || using_compacted_format()) { - /* Save the live register state to the user directly. */ - if (copy_fpregs_to_sigframe(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - copy_fxregs_to_kernel(fpu); - } else { - /* - * It is a *bug* if kernel uses compacted-format for xsave - * area and we copy it out directly to a signal frame. It - * should have been handled above by saving the registers - * directly. - */ - if (boot_cpu_has(X86_FEATURE_XSAVES)) { - WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n"); - return -1; - } - - fpstate_sanitize_xstate(fpu); - if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) - return -1; +retry: + /* + * Load the FPU registers if they are not valid for the current task. + * With a valid FPU state we can attempt to save the state directly to + * userland's stack frame which will likely succeed. If it does not, + * resolve the fault in the user memory and try again. + */ + fpregs_lock(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + __fpregs_load_activate(); + + pagefault_disable(); + ret = copy_fpregs_to_sigframe(buf_fx); + pagefault_enable(); + fpregs_unlock(); + + if (ret) { + int aligned_size; + int nr_pages; + + aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size; + nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE); + + ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages, + NULL, FOLL_WRITE); + if (ret == nr_pages) + goto retry; + return -EFAULT; } /* Save the fsave header for the 32-bit frames. */ @@ -207,11 +213,11 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) } static inline void -sanitize_restored_xstate(struct task_struct *tsk, +sanitize_restored_xstate(union fpregs_state *state, struct user_i387_ia32_struct *ia32_env, u64 xfeatures, int fx_only) { - struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &state->xsave; struct xstate_header *header = &xsave->header; if (use_xsave()) { @@ -238,17 +244,18 @@ sanitize_restored_xstate(struct task_struct *tsk, */ xsave->i387.mxcsr &= mxcsr_feature_mask; - convert_to_fxsr(tsk, ia32_env); + if (ia32_env) + convert_to_fxsr(&state->fxsave, ia32_env); } } /* * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) +static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) { if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { + if (fx_only) { u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); return copy_user_to_fxregs(buf); @@ -266,12 +273,15 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) { + struct user_i387_ia32_struct *envp = NULL; + int state_size = fpu_kernel_xstate_size; int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - int state_size = fpu_kernel_xstate_size; + struct user_i387_ia32_struct env; u64 xfeatures = 0; int fx_only = 0; + int ret = 0; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); @@ -284,8 +294,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(buf, size)) return -EACCES; - fpu__initialize(fpu); - if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), @@ -308,61 +316,101 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) } } + /* + * The current state of the FPU registers does not matter. By setting + * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate + * is not modified on context switch and that the xstate is considered + * to be loaded again on return to userland (overriding last_cpu avoids + * the optimisation). + */ + set_thread_flag(TIF_NEED_FPU_LOAD); + __fpu_invalidate_fpregs_state(fpu); + + if ((unsigned long)buf_fx % 64) + fx_only = 1; + /* + * For 32-bit frames with fxstate, copy the fxstate so it can be + * reconstructed later. + */ if (ia32_fxstate) { + ret = __copy_from_user(&env, buf, sizeof(env)); + if (ret) + goto err_out; + envp = &env; + } else { /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Validate and sanitize the copied state. + * Attempt to restore the FPU registers directly from user + * memory. For that to succeed, the user access cannot cause + * page faults. If it does, fall back to the slow path below, + * going through the kernel buffer with the enabled pagefault + * handler. */ - struct user_i387_ia32_struct env; - int err = 0; + fpregs_lock(); + pagefault_disable(); + ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only); + pagefault_enable(); + if (!ret) { + fpregs_mark_activate(); + fpregs_unlock(); + return 0; + } + fpregs_unlock(); + } - /* - * Drop the current fpu which clears fpu->initialized. This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * fpu->initialized is again set. - */ - fpu__drop(fpu); + + if (use_xsave() && !fx_only) { + u64 init_bv = xfeatures_mask & ~xfeatures; if (using_compacted_format()) { - err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); } else { - err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); + ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); - if (!err && state_size > offsetof(struct xregs_state, header)) - err = validate_xstate_header(&fpu->state.xsave.header); + if (!ret && state_size > offsetof(struct xregs_state, header)) + ret = validate_xstate_header(&fpu->state.xsave.header); } + if (ret) + goto err_out; - if (err || __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(&fpu->state); - trace_x86_fpu_init_state(fpu); - err = -1; - } else { - sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); + + fpregs_lock(); + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures); + + } else if (use_fxsr()) { + ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); + if (ret) { + ret = -EFAULT; + goto err_out; } - local_bh_disable(); - fpu->initialized = 1; - fpu__restore(fpu); - local_bh_enable(); + sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); - return err; - } else { - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - user_fpu_begin(); - if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { - fpu__clear(fpu); - return -1; + fpregs_lock(); + if (use_xsave()) { + u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); } + + ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave); + } else { + ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); + if (ret) + goto err_out; + + fpregs_lock(); + ret = copy_kernel_to_fregs_err(&fpu->state.fsave); } + if (!ret) + fpregs_mark_activate(); + fpregs_unlock(); - return 0; +err_out: + if (ret) + fpu__clear(fpu); + return ret; } static inline int xstate_sigframe_size(void) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d7432c2b1051..9c459fd1d38e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -805,20 +805,18 @@ void fpu__resume_cpu(void) } /* - * Given an xstate feature mask, calculate where in the xsave + * Given an xstate feature nr, calculate where in the xsave * buffer the state is. Callers should ensure that the buffer * is valid. */ -static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask) +static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { - int feature_nr = fls64(xstate_feature_mask) - 1; - - if (!xfeature_enabled(feature_nr)) { + if (!xfeature_enabled(xfeature_nr)) { WARN_ON_FPU(1); return NULL; } - return (void *)xsave + xstate_comp_offsets[feature_nr]; + return (void *)xsave + xstate_comp_offsets[xfeature_nr]; } /* * Given the xsave area and a state inside, this function returns the @@ -832,13 +830,13 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask * * Inputs: * xstate: the thread's storage area for all FPU data - * xstate_feature: state which is defined in xsave.h (e.g. - * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...) + * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area, or NULL if the * field is not present in the xsave buffer. */ -void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) { /* * Do we even *have* xsave state? @@ -851,11 +849,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * have not enabled. Remember that pcntxt_mask is * what we write to the XCR0 register. */ - WARN_ONCE(!(xfeatures_mask & xstate_feature), + WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to - * have requested that 'xstate_feature' be saved. + * have requested that 'xfeature_nr' be saved. * If it did not, we might be seeing and old value * of the field in the buffer. * @@ -864,10 +862,10 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) * or because the "init optimization" caused it * to not be saved. */ - if (!(xsave->header.xfeatures & xstate_feature)) + if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr))) return NULL; - return __raw_xsave_addr(xsave, xstate_feature); + return __raw_xsave_addr(xsave, xfeature_nr); } EXPORT_SYMBOL_GPL(get_xsave_addr); @@ -882,25 +880,23 @@ EXPORT_SYMBOL_GPL(get_xsave_addr); * Note that this only works on the current task. * * Inputs: - * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP, - * XFEATURE_MASK_SSE, etc...) + * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP, + * XFEATURE_SSE, etc...) * Output: * address of the state in the xsave area or NULL if the state * is not present or is in its 'init state'. */ -const void *get_xsave_field_ptr(int xsave_state) +const void *get_xsave_field_ptr(int xfeature_nr) { struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->initialized) - return NULL; /* * fpu__save() takes the CPU's xstate registers * and saves them off to the 'fpu memory buffer. */ fpu__save(fpu); - return get_xsave_addr(&fpu->state.xsave, xsave_state); + return get_xsave_addr(&fpu->state.xsave, xfeature_nr); } #ifdef CONFIG_ARCH_HAS_PKEYS @@ -1016,7 +1012,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, 1 << i); + void *src = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1102,7 +1098,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i * Copy only in-use xstates: */ if ((header.xfeatures >> i) & 1) { - void *src = __raw_xsave_addr(xsave, 1 << i); + void *src = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1159,7 +1155,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) u64 mask = ((u64)1 << i); if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, 1 << i); + void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; @@ -1213,7 +1209,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) u64 mask = ((u64)1 << i); if (hdr.xfeatures & mask) { - void *dst = __raw_xsave_addr(xsave, 1 << i); + void *dst = __raw_xsave_addr(xsave, i); offset = xstate_offsets[i]; size = xstate_sizes[i]; diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c index e47cd9390ab4..85de790583f9 100644 --- a/arch/x86/kernel/ima_arch.c +++ b/arch/x86/kernel/ima_arch.c @@ -3,6 +3,7 @@ * Copyright (C) 2018 IBM Corporation */ #include <linux/efi.h> +#include <linux/module.h> #include <linux/ima.h> extern struct boot_params boot_params; @@ -64,12 +65,19 @@ static const char * const sb_arch_rules[] = { "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig", #endif /* CONFIG_KEXEC_VERIFY_SIG */ "measure func=KEXEC_KERNEL_CHECK", +#if !IS_ENABLED(CONFIG_MODULE_SIG) + "appraise func=MODULE_CHECK appraise_type=imasig", +#endif + "measure func=MODULE_CHECK", NULL }; const char * const *arch_get_ima_policy(void) { - if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) + if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) { + if (IS_ENABLED(CONFIG_MODULE_SIG)) + set_module_sig_enforced(); return sb_arch_rules; + } return NULL; } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 3755d0310026..05b09896cfaf 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -35,6 +35,7 @@ #include <asm/x86_init.h> #include <asm/reboot.h> #include <asm/cache.h> +#include <asm/nospec-branch.h> #define CREATE_TRACE_POINTS #include <trace/events/nmi.h> @@ -551,6 +552,9 @@ nmi_restart: write_cr2(this_cpu_read(nmi_cr2)); if (this_cpu_dec_return(nmi_state)) goto nmi_restart; + + if (user_mode(regs)) + mds_user_clear_cpu_buffers(); } NOKPROBE_SYMBOL(do_nmi); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d460998ae828..dcd272dbd0a9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -51,14 +51,6 @@ int iommu_pass_through __read_mostly; extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; -/* Dummy device used for NULL arguments (normally ISA). */ -struct device x86_dma_fallback_dev = { - .init_name = "fallback device", - .coherent_dma_mask = ISA_DMA_BIT_MASK, - .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, -}; -EXPORT_SYMBOL(x86_dma_fallback_dev); - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -77,18 +69,6 @@ void __init pci_iommu_alloc(void) } } -bool arch_dma_alloc_attrs(struct device **dev) -{ - if (!*dev) - *dev = &x86_dma_fallback_dev; - - if (!is_device_dma_capable(*dev)) - return false; - return true; - -} -EXPORT_SYMBOL(arch_dma_alloc_attrs); - /* * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel * parameter documentation. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d1d312d012a6..75fea0d48c0e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -101,7 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.vm86 = NULL; #endif - return fpu__copy(&dst->thread.fpu, &src->thread.fpu); + return fpu__copy(dst, src); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 70933193878c..2399e910d109 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -241,7 +241,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - switch_fpu_prepare(prev_fpu, cpu); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -274,9 +275,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so - * the GDT and LDT are properly updated, and must be - * done before fpu__restore(), so the TS bit is up - * to date. + * the GDT and LDT are properly updated. */ arch_end_context_switch(next_p); @@ -297,10 +296,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) lazy_load_gs(next->gs); - switch_fpu_finish(next_fpu, cpu); - this_cpu_write(current_task, next_p); + switch_fpu_finish(next_fpu); + /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 844a28b29967..f8e1af380cdf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -521,7 +521,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); - switch_fpu_prepare(prev_fpu, cpu); + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_prepare(prev_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -539,9 +540,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before - * loading segments that might reference them, and and it must - * be done before fpu__restore(), so the TS bit is up to - * date. + * loading segments that might reference them. */ arch_end_context_switch(next_p); @@ -569,14 +568,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) x86_fsgsbase_load(prev, next); - switch_fpu_finish(next_fpu, cpu); - /* * Switch the PDA and FPU contexts. */ this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); + switch_fpu_finish(next_fpu); + /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index dff90fb6a9af..364813cea647 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -205,7 +205,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ - put_user_ex(fpstate, &sc->fpstate); + put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate); /* non-iBCS2 extensions.. */ put_user_ex(mask, &sc->oldmask); @@ -245,7 +245,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long sp = regs->sp; unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); - struct fpu *fpu = ¤t->thread.fpu; + int ret; /* redzone */ if (IS_ENABLED(CONFIG_X86_64)) @@ -264,11 +264,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, sp = (unsigned long) ka->sa.sa_restorer; } - if (fpu->initialized) { - sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), - &buf_fx, &math_size); - *fpstate = (void __user *)sp; - } + sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), + &buf_fx, &math_size); + *fpstate = (void __user *)sp; sp = align_sigframe(sp - frame_size); @@ -280,8 +278,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (fpu->initialized && - copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) + ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size); + if (ret < 0) return (void __user *)-1L; return (void __user *)sp; @@ -574,7 +572,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, restorer = NULL; err |= -EFAULT; } - put_user_ex(restorer, &frame->pretcode); + put_user_ex(restorer, (unsigned long __user *)&frame->pretcode); } put_user_catch(err); err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, @@ -765,8 +763,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (fpu->initialized) - fpu__clear(fpu); + fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d26f9e9c3d83..7de466eb960b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -58,6 +58,7 @@ #include <asm/alternative.h> #include <asm/fpu/xstate.h> #include <asm/trace/mpx.h> +#include <asm/nospec-branch.h> #include <asm/mpx.h> #include <asm/vm86.h> #include <asm/umip.h> @@ -367,6 +368,13 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) regs->ip = (unsigned long)general_protection; regs->sp = (unsigned long)&gpregs->orig_ax; + /* + * This situation can be triggered by userspace via + * modify_ldt(2) and the return does not take the regular + * user space exit, so a CPU buffer clear is required when + * MDS mitigation is enabled. + */ + mds_user_clear_cpu_buffers(); return; } #endif @@ -456,7 +464,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * which is all zeros which indicates MPX was not * responsible for the exception. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) goto exit_trap; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 72fa955f4a15..fc042419e670 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -27,7 +27,6 @@ config KVM depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER - select ANON_INODES select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select IRQ_BYPASS_MANAGER diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index fd3951638ae4..bbbe611f0c49 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -410,7 +410,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | + F(MD_CLEAR); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bdca39829bc..08715034e315 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -140,7 +140,7 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, pt_element_t *table; struct page *page; - npages = get_user_pages_fast((unsigned long)ptep_user, 1, 1, &page); + npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page); /* Check if the user is doing something meaningless. */ if (unlikely(npages != 1)) return -EFAULT; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 406b558abfef..6b92eaf4a3b1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1805,7 +1805,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, return NULL; /* Pin the user virtual address. */ - npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); + npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); if (npinned != npages) { pr_err("SEV: Failure locking %lu pages.\n", npages); goto err; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 0c955bb286ff..e1fa935a545f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6431,8 +6431,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) vmx_l1d_flush(vcpu); + else if (static_branch_unlikely(&mds_user_clear)) + mds_clear_cpu_buffers(); if (vcpu->arch.cr2 != read_cr2()) write_cr2(vcpu->arch.cr2); @@ -6500,7 +6503,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) */ if (static_cpu_has(X86_FEATURE_PKU) && kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) { - vcpu->arch.pkru = __read_pkru(); + vcpu->arch.pkru = rdpkru(); if (vcpu->arch.pkru != vmx->host_pkru) __write_pkru(vmx->host_pkru); } @@ -6668,8 +6671,8 @@ free_partial_vcpu: return ERR_PTR(err); } -#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" -#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/l1tf.html for details.\n" +#define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" +#define L1TF_MSG_L1D "L1TF CPU bug present and virtualization mitigation disabled, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" static int vmx_vm_init(struct kvm *kvm) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b5edc8e3ce1d..d75bb97b983c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3681,15 +3681,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *src = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *src = get_xsave_addr(xsave, xfeature_nr); if (src) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(dest + offset, &vcpu->arch.pkru, sizeof(vcpu->arch.pkru)); else @@ -3697,7 +3697,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) } - valid -= feature; + valid -= xfeature_mask; } } @@ -3724,22 +3724,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) */ valid = xstate_bv & ~XFEATURE_MASK_FPSSE; while (valid) { - u64 feature = valid & -valid; - int index = fls64(feature) - 1; - void *dest = get_xsave_addr(xsave, feature); + u64 xfeature_mask = valid & -valid; + int xfeature_nr = fls64(xfeature_mask) - 1; + void *dest = get_xsave_addr(xsave, xfeature_nr); if (dest) { u32 size, offset, ecx, edx; - cpuid_count(XSTATE_CPUID, index, + cpuid_count(XSTATE_CPUID, xfeature_nr, &size, &offset, &ecx, &edx); - if (feature == XFEATURE_MASK_PKRU) + if (xfeature_nr == XFEATURE_PKRU) memcpy(&vcpu->arch.pkru, src + offset, sizeof(vcpu->arch.pkru)); else memcpy(dest, src + offset, size); } - valid -= feature; + valid -= xfeature_mask; } } @@ -7899,6 +7899,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) wait_lapic_expire(vcpu); guest_enter_irqoff(); + fpregs_assert_state_consistent(); + if (test_thread_flag(TIF_NEED_FPU_LOAD)) + switch_fpu_return(); + if (unlikely(vcpu->arch.switch_db_regs)) { set_debugreg(0, 7); set_debugreg(vcpu->arch.eff_db[0], 0); @@ -8157,22 +8161,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { - preempt_disable(); + fpregs_lock(); + copy_fpregs_to_fpstate(¤t->thread.fpu); /* PKRU is separately restored in kvm_x86_ops->run. */ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state, ~XFEATURE_MASK_PKRU); - preempt_enable(); + + fpregs_mark_activate(); + fpregs_unlock(); + trace_kvm_fpu(1); } /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { - preempt_disable(); + fpregs_lock(); + copy_fpregs_to_fpstate(vcpu->arch.guest_fpu); copy_kernel_to_fpregs(¤t->thread.fpu.state); - preempt_enable(); + + fpregs_mark_activate(); + fpregs_unlock(); + ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); } @@ -8870,11 +8882,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (init_event) kvm_put_guest_fpu(vcpu); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_MASK_BNDREGS); + XFEATURE_BNDREGS); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state)); mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave, - XFEATURE_MASK_BNDCSR); + XFEATURE_BNDCSR); if (mpx_state_buffer) memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr)); if (init_event) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9e2ba7e667f6..a873da6b46d6 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -113,9 +113,6 @@ void math_emulate(struct math_emu_info *info) unsigned long code_base = 0; unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; - struct fpu *fpu = ¤t->thread.fpu; - - fpu__initialize(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 3c4568f8fb28..b0a2de8d2f9e 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -145,7 +145,7 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) show_stack_regs(regs); @@ -162,7 +162,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup unsigned long error_code, unsigned long fault_addr) { - if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n", + if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", (unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) show_stack_regs(regs); diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 92e4c4b85bba..fab095362c50 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -203,7 +203,7 @@ static __init int setup_hugepagesz(char *opt) } __setup("hugepagesz=", setup_hugepagesz); -#if (defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || defined(CONFIG_CMA) +#ifdef CONFIG_CONTIG_ALLOC static __init int gigantic_pages_init(void) { /* With compaction or CMA we can allocate gigantic pages at runtime */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 85c94f9a87f8..075e568098f2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -850,24 +850,25 @@ void __init mem_init(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, restrictions); } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5cd125bd2a85..62fc457f3849 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -841,11 +841,11 @@ static void update_end_of_memory_vars(u64 start, u64 size) } int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, - struct vmem_altmap *altmap, bool want_memblock) + struct mhp_restrictions *restrictions) { int ret; - ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + ret = __add_pages(nid, start_pfn, nr_pages, restrictions); WARN_ON_ONCE(ret); /* update max_pfn, max_low_pfn and high_memory */ @@ -855,15 +855,15 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, return ret; } -int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_restrictions *restrictions) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; init_memory_mapping(start, start + size); - return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); + return add_pages(nid, start_pfn, nr_pages, restrictions); } #define PAGE_INUSE 0xFD @@ -1205,24 +1205,20 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end) remove_pagetable(start, end, true, NULL); } -int __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); struct zone *zone; - int ret; /* With altmap the first mapped page is offset from @start */ if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - WARN_ON_ONCE(ret); + __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); - - return ret; } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c805db6236b4..59726aaf4671 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs) goto err_out; } /* get bndregs field from current task's xsave area */ - bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS); + bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; @@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void) const struct mpx_bndcsr *bndcsr; struct mm_struct *mm = current->mm; - bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR); + bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR); if (!bndcsr) return -EINVAL; /* diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index 047a77f6a10c..1dcfc91c8f0c 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -18,6 +18,7 @@ #include <asm/cpufeature.h> /* boot_cpu_has, ... */ #include <asm/mmu_context.h> /* vma_pkey() */ +#include <asm/fpu/internal.h> /* init_fpstate */ int __execute_only_pkey(struct mm_struct *mm) { @@ -39,17 +40,12 @@ int __execute_only_pkey(struct mm_struct *mm) * dance to set PKRU if we do not need to. Check it * first and assume that if the execute-only pkey is * write-disabled that we do not have to set it - * ourselves. We need preempt off so that nobody - * can make fpregs inactive. + * ourselves. */ - preempt_disable(); if (!need_to_set_mm_pkey && - current->thread.fpu.initialized && !__pkru_allows_read(read_pkru(), execute_only_pkey)) { - preempt_enable(); return execute_only_pkey; } - preempt_enable(); /* * Set up PKRU so that it denies access for everything @@ -131,7 +127,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey * in the process's lifetime will not accidentally get access * to data which is pkey-protected later on. */ -static u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) | PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) | PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) | @@ -148,13 +143,6 @@ void copy_init_pkru_to_fpregs(void) { u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value); /* - * Any write to PKRU takes it out of the XSAVE 'init - * state' which increases context switch cost. Avoid - * writing 0 when PKRU was already 0. - */ - if (!init_pkru_value_snapshot && !read_pkru()) - return; - /* * Override the PKRU state that came from 'init_fpstate' * with the baseline from the process. */ @@ -174,6 +162,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf, static ssize_t init_pkru_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { + struct pkru_state *pk; char buf[32]; ssize_t len; u32 new_init_pkru; @@ -196,6 +185,10 @@ static ssize_t init_pkru_write_file(struct file *file, return -EINVAL; WRITE_ONCE(init_pkru_value, new_init_pkru); + pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU); + if (!pk) + return -EINVAL; + pk->pkru = new_init_pkru; return count; } diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 0d9cdffce6ac..b29e82f190c7 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -117,6 +117,8 @@ static bool is_simm32(s64 value) #define IA32_JLE 0x7E #define IA32_JG 0x7F +#define COND_JMP_OPCODE_INVALID (0xFF) + /* * Map eBPF registers to IA32 32bit registers or stack scratch space. * @@ -698,19 +700,12 @@ static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog) STACK_VAR(dst_hi)); } - /* xor ecx,ecx */ - EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* sub dreg_lo,ecx */ - EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX)); - /* mov dreg_lo,ecx */ - EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX)); - - /* xor ecx,ecx */ - EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX)); - /* sbb dreg_hi,ecx */ - EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX)); - /* mov dreg_hi,ecx */ - EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX)); + /* neg dreg_lo */ + EMIT2(0xF7, add_1reg(0xD8, dreg_lo)); + /* adc dreg_hi,0x0 */ + EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00); + /* neg dreg_hi */ + EMIT2(0xF7, add_1reg(0xD8, dreg_hi)); if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ @@ -1613,6 +1608,75 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog) *pprog = prog; } +static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo) +{ + u8 jmp_cond; + + /* Convert BPF opcode to x86 */ + switch (op) { + case BPF_JEQ: + jmp_cond = IA32_JE; + break; + case BPF_JSET: + case BPF_JNE: + jmp_cond = IA32_JNE; + break; + case BPF_JGT: + /* GT is unsigned '>', JA in x86 */ + jmp_cond = IA32_JA; + break; + case BPF_JLT: + /* LT is unsigned '<', JB in x86 */ + jmp_cond = IA32_JB; + break; + case BPF_JGE: + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = IA32_JAE; + break; + case BPF_JLE: + /* LE is unsigned '<=', JBE in x86 */ + jmp_cond = IA32_JBE; + break; + case BPF_JSGT: + if (!is_cmp_lo) + /* Signed '>', GT in x86 */ + jmp_cond = IA32_JG; + else + /* GT is unsigned '>', JA in x86 */ + jmp_cond = IA32_JA; + break; + case BPF_JSLT: + if (!is_cmp_lo) + /* Signed '<', LT in x86 */ + jmp_cond = IA32_JL; + else + /* LT is unsigned '<', JB in x86 */ + jmp_cond = IA32_JB; + break; + case BPF_JSGE: + if (!is_cmp_lo) + /* Signed '>=', GE in x86 */ + jmp_cond = IA32_JGE; + else + /* GE is unsigned '>=', JAE in x86 */ + jmp_cond = IA32_JAE; + break; + case BPF_JSLE: + if (!is_cmp_lo) + /* Signed '<=', LE in x86 */ + jmp_cond = IA32_JLE; + else + /* LE is unsigned '<=', JBE in x86 */ + jmp_cond = IA32_JBE; + break; + default: /* to silence GCC warning */ + jmp_cond = COND_JMP_OPCODE_INVALID; + break; + } + + return jmp_cond; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -2069,10 +2133,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JLE | BPF_X: - case BPF_JMP | BPF_JSGT | BPF_X: - case BPF_JMP | BPF_JSLE | BPF_X: - case BPF_JMP | BPF_JSLT | BPF_X: - case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JEQ | BPF_X: case BPF_JMP32 | BPF_JNE | BPF_X: case BPF_JMP32 | BPF_JGT | BPF_X: @@ -2118,6 +2178,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); goto emit_cond_jmp; } + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: { + u8 dreg_lo = dstk ? IA32_EAX : dst_lo; + u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 sreg_lo = sstk ? IA32_ECX : src_lo; + u8 sreg_hi = sstk ? IA32_EBX : src_hi; + + if (dstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(dst_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); + } + + if (sstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), + STACK_VAR(src_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EBX), + STACK_VAR(src_hi)); + } + + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 10); + /* cmp dreg_lo,sreg_lo */ + EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); + goto emit_cond_jmp_signed; + } case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: { bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP; @@ -2194,10 +2288,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JLE | BPF_K: - case BPF_JMP | BPF_JSGT | BPF_K: - case BPF_JMP | BPF_JSLE | BPF_K: - case BPF_JMP | BPF_JSLT | BPF_K: - case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: @@ -2238,50 +2328,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* cmp dreg_lo,sreg_lo */ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); -emit_cond_jmp: /* Convert BPF opcode to x86 */ - switch (BPF_OP(code)) { - case BPF_JEQ: - jmp_cond = IA32_JE; - break; - case BPF_JSET: - case BPF_JNE: - jmp_cond = IA32_JNE; - break; - case BPF_JGT: - /* GT is unsigned '>', JA in x86 */ - jmp_cond = IA32_JA; - break; - case BPF_JLT: - /* LT is unsigned '<', JB in x86 */ - jmp_cond = IA32_JB; - break; - case BPF_JGE: - /* GE is unsigned '>=', JAE in x86 */ - jmp_cond = IA32_JAE; - break; - case BPF_JLE: - /* LE is unsigned '<=', JBE in x86 */ - jmp_cond = IA32_JBE; - break; - case BPF_JSGT: - /* Signed '>', GT in x86 */ - jmp_cond = IA32_JG; - break; - case BPF_JSLT: - /* Signed '<', LT in x86 */ - jmp_cond = IA32_JL; - break; - case BPF_JSGE: - /* Signed '>=', GE in x86 */ - jmp_cond = IA32_JGE; - break; - case BPF_JSLE: - /* Signed '<=', LE in x86 */ - jmp_cond = IA32_JLE; - break; - default: /* to silence GCC warning */ +emit_cond_jmp: jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false); + if (jmp_cond == COND_JMP_OPCODE_INVALID) return -EFAULT; - } jmp_offset = addrs[i + insn->off] - addrs[i]; if (is_imm8(jmp_offset)) { EMIT2(jmp_cond, jmp_offset); @@ -2291,7 +2340,66 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ pr_err("cond_jmp gen bug %llx\n", jmp_offset); return -EFAULT; } + break; + } + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: { + u8 dreg_lo = dstk ? IA32_EAX : dst_lo; + u8 dreg_hi = dstk ? IA32_EDX : dst_hi; + u8 sreg_lo = IA32_ECX; + u8 sreg_hi = IA32_EBX; + u32 hi; + if (dstk) { + EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), + STACK_VAR(dst_lo)); + EMIT3(0x8B, + add_2reg(0x40, IA32_EBP, + IA32_EDX), + STACK_VAR(dst_hi)); + } + + /* mov ecx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32); + hi = imm32 & (1 << 31) ? (u32)~0 : 0; + /* mov ebx,imm32 */ + EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi); + /* cmp dreg_hi,sreg_hi */ + EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi)); + EMIT2(IA32_JNE, 10); + /* cmp dreg_lo,sreg_lo */ + EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo)); + + /* + * For simplicity of branch offset computation, + * let's use fixed jump coding here. + */ +emit_cond_jmp_signed: /* Check the condition for low 32-bit comparison */ + jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true); + if (jmp_cond == COND_JMP_OPCODE_INVALID) + return -EFAULT; + jmp_offset = addrs[i + insn->off] - addrs[i] + 8; + if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } + EMIT2(0xEB, 6); + + /* Check the condition for high 32-bit comparison */ + jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false); + if (jmp_cond == COND_JMP_OPCODE_INVALID) + return -EFAULT; + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (is_simm32(jmp_offset)) { + EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); + } else { + pr_err("cond_jmp gen bug %llx\n", jmp_offset); + return -EFAULT; + } break; } case BPF_JMP | BPF_JA: diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 52e55108404e..d3a73f9335e1 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1119,6 +1119,8 @@ static const struct dmi_system_id pciirq_dmi_table[] __initconst = { void __init pcibios_irq_init(void) { + struct irq_routing_table *rtable = NULL; + DBG(KERN_DEBUG "PCI: IRQ init\n"); if (raw_pci_ops == NULL) @@ -1129,8 +1131,10 @@ void __init pcibios_irq_init(void) pirq_table = pirq_find_routing_table(); #ifdef CONFIG_PCI_BIOS - if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) + if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN)) { pirq_table = pcibios_get_irq_routing_table(); + rtable = pirq_table; + } #endif if (pirq_table) { pirq_peer_trick(); @@ -1145,8 +1149,10 @@ void __init pcibios_irq_init(void) * If we're using the I/O APIC, avoid using the PCI IRQ * routing table */ - if (io_apic_assign_pci_irqs) + if (io_apic_assign_pci_irqs) { + kfree(rtable); pirq_table = NULL; + } } x86_init.pci.fixup_irqs(); diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index bcddf09b5aa3..4845b8c7be7f 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -90,7 +90,6 @@ static int get_e820_md5(struct e820_table *table, void *buf) } desc->tfm = tfm; - desc->flags = 0; size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry) * table->nr_entries; diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h index ef898af102d1..56a2f0913e3c 100644 --- a/arch/x86/um/asm/syscall.h +++ b/arch/x86/um/asm/syscall.h @@ -9,7 +9,7 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { #ifdef CONFIG_X86_32 return AUDIT_ARCH_I386; diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 0766a08bdf45..07054572297f 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -105,7 +105,7 @@ void xen_mc_flush(void) for (i = 0; i < b->mcidx; i++) { if (b->entries[i].result < 0) { #if MC_DEBUG - pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n", i + 1, b->debug[i].op, b->debug[i].args[0], diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 35c8d91e6106..6ec1b75eabc5 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -253,12 +253,26 @@ config MEMMAP_CACHEATTR region: bits 0..3 -- for addresses 0x00000000..0x1fffffff, bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on. - Cache attribute values are specific for the MMU type, so e.g. - for region protection MMUs: 2 is cache bypass, 4 is WB cached, - 1 is WT cached, f is illegal. For ful MMU: bit 0 makes it executable, - bit 1 makes it writable, bits 2..3 meaning is 0: cache bypass, - 1: WB cache, 2: WT cache, 3: special (c and e are illegal, f is - reserved). + Cache attribute values are specific for the MMU type. + For region protection MMUs: + 1: WT cached, + 2: cache bypass, + 4: WB cached, + f: illegal. + For ful MMU: + bit 0: executable, + bit 1: writable, + bits 2..3: + 0: cache bypass, + 1: WB cache, + 2: WT cache, + 3: special (c and e are illegal, f is reserved). + For MPU: + 0: illegal, + 1: WB cache, + 2: WB, no-write-allocate cache, + 3: WT cache, + 4: cache bypass. config KSEG_PADDR hex "Physical address of the KSEG mapping" diff --git a/arch/xtensa/boot/boot-redboot/bootstrap.S b/arch/xtensa/boot/boot-redboot/bootstrap.S index bbf3b4b080cd..48ba5a232d94 100644 --- a/arch/xtensa/boot/boot-redboot/bootstrap.S +++ b/arch/xtensa/boot/boot-redboot/bootstrap.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include <variant/core.h> +#include <asm/core.h> #include <asm/regs.h> #include <asm/asmmacro.h> #include <asm/cacheasm.h> diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h index 7f2ae5872151..8308a9c3abb2 100644 --- a/arch/xtensa/include/asm/asmmacro.h +++ b/arch/xtensa/include/asm/asmmacro.h @@ -11,7 +11,7 @@ #ifndef _XTENSA_ASMMACRO_H #define _XTENSA_ASMMACRO_H -#include <variant/core.h> +#include <asm/core.h> /* * Some little helpers for loops. Use zero-overhead-loops diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 7de0149e1cf7..7b00d26f472e 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -15,8 +15,6 @@ #include <linux/stringify.h> #include <linux/types.h> - -#ifdef __KERNEL__ #include <asm/processor.h> #include <asm/cmpxchg.h> #include <asm/barrier.h> @@ -58,7 +56,67 @@ */ #define atomic_set(v,i) WRITE_ONCE((v)->counter, (i)) -#if XCHAL_HAVE_S32C1I +#if XCHAL_HAVE_EXCLUSIVE +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32ex %1, %3\n" \ + " " #op " %0, %1, %2\n" \ + " s32ex %0, %3\n" \ + " getex %0\n" \ + " beqz %0, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ +} \ + +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32ex %1, %3\n" \ + " " #op " %0, %1, %2\n" \ + " s32ex %0, %3\n" \ + " getex %0\n" \ + " beqz %0, 1b\n" \ + " " #op " %0, %1, %2\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return result; \ +} + +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + __asm__ __volatile__( \ + "1: l32ex %1, %3\n" \ + " " #op " %0, %1, %2\n" \ + " s32ex %0, %3\n" \ + " getex %0\n" \ + " beqz %0, 1b\n" \ + : "=&a" (result), "=&a" (tmp) \ + : "a" (i), "a" (v) \ + : "memory" \ + ); \ + \ + return tmp; \ +} + +#elif XCHAL_HAVE_S32C1I #define ATOMIC_OP(op) \ static inline void atomic_##op(int i, atomic_t * v) \ { \ @@ -200,6 +258,4 @@ ATOMIC_OPS(xor) #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n))) #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) -#endif /* __KERNEL__ */ - #endif /* _XTENSA_ATOMIC_H */ diff --git a/arch/xtensa/include/asm/barrier.h b/arch/xtensa/include/asm/barrier.h index 956596e4d437..d6f8d4ddc2bc 100644 --- a/arch/xtensa/include/asm/barrier.h +++ b/arch/xtensa/include/asm/barrier.h @@ -9,12 +9,16 @@ #ifndef _XTENSA_SYSTEM_H #define _XTENSA_SYSTEM_H +#include <asm/core.h> + #define mb() ({ __asm__ __volatile__("memw" : : : "memory"); }) #define rmb() barrier() #define wmb() mb() +#if XCHAL_HAVE_S32C1I #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() +#endif #include <asm-generic/barrier.h> diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h index d3490189792b..aeb15f4c755b 100644 --- a/arch/xtensa/include/asm/bitops.h +++ b/arch/xtensa/include/asm/bitops.h @@ -13,8 +13,6 @@ #ifndef _XTENSA_BITOPS_H #define _XTENSA_BITOPS_H -#ifdef __KERNEL__ - #ifndef _LINUX_BITOPS_H #error only <linux/bitops.h> can be included directly #endif @@ -98,7 +96,126 @@ static inline unsigned long __fls(unsigned long word) #include <asm-generic/bitops/fls64.h> -#if XCHAL_HAVE_S32C1I +#if XCHAL_HAVE_EXCLUSIVE + +static inline void set_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long tmp; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __asm__ __volatile__( + "1: l32ex %0, %2\n" + " or %0, %0, %1\n" + " s32ex %0, %2\n" + " getex %0\n" + " beqz %0, 1b\n" + : "=&a" (tmp) + : "a" (mask), "a" (p) + : "memory"); +} + +static inline void clear_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long tmp; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __asm__ __volatile__( + "1: l32ex %0, %2\n" + " and %0, %0, %1\n" + " s32ex %0, %2\n" + " getex %0\n" + " beqz %0, 1b\n" + : "=&a" (tmp) + : "a" (~mask), "a" (p) + : "memory"); +} + +static inline void change_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long tmp; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __asm__ __volatile__( + "1: l32ex %0, %2\n" + " xor %0, %0, %1\n" + " s32ex %0, %2\n" + " getex %0\n" + " beqz %0, 1b\n" + : "=&a" (tmp) + : "a" (~mask), "a" (p) + : "memory"); +} + +static inline int +test_and_set_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long tmp, value; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __asm__ __volatile__( + "1: l32ex %1, %3\n" + " or %0, %1, %2\n" + " s32ex %0, %3\n" + " getex %0\n" + " beqz %0, 1b\n" + : "=&a" (tmp), "=&a" (value) + : "a" (mask), "a" (p) + : "memory"); + + return value & mask; +} + +static inline int +test_and_clear_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long tmp, value; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __asm__ __volatile__( + "1: l32ex %1, %3\n" + " and %0, %1, %2\n" + " s32ex %0, %3\n" + " getex %0\n" + " beqz %0, 1b\n" + : "=&a" (tmp), "=&a" (value) + : "a" (~mask), "a" (p) + : "memory"); + + return value & mask; +} + +static inline int +test_and_change_bit(unsigned int bit, volatile unsigned long *p) +{ + unsigned long tmp, value; + unsigned long mask = 1UL << (bit & 31); + + p += bit >> 5; + + __asm__ __volatile__( + "1: l32ex %1, %3\n" + " xor %0, %1, %2\n" + " s32ex %0, %3\n" + " getex %0\n" + " beqz %0, 1b\n" + : "=&a" (tmp), "=&a" (value) + : "a" (mask), "a" (p) + : "memory"); + + return value & mask; +} + +#elif XCHAL_HAVE_S32C1I static inline void set_bit(unsigned int bit, volatile unsigned long *p) { @@ -232,6 +349,4 @@ test_and_change_bit(unsigned int bit, volatile unsigned long *p) #include <asm-generic/bitops/lock.h> #include <asm-generic/bitops/sched.h> -#endif /* __KERNEL__ */ - #endif /* _XTENSA_BITOPS_H */ diff --git a/arch/xtensa/include/asm/cache.h b/arch/xtensa/include/asm/cache.h index d2fd932fdb4d..b21fd133ff62 100644 --- a/arch/xtensa/include/asm/cache.h +++ b/arch/xtensa/include/asm/cache.h @@ -11,7 +11,7 @@ #ifndef _XTENSA_CACHE_H #define _XTENSA_CACHE_H -#include <variant/core.h> +#include <asm/core.h> #define L1_CACHE_SHIFT XCHAL_DCACHE_LINEWIDTH #define L1_CACHE_BYTES XCHAL_DCACHE_LINESIZE diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h index f302ef57973a..8b687176ad72 100644 --- a/arch/xtensa/include/asm/checksum.h +++ b/arch/xtensa/include/asm/checksum.h @@ -13,7 +13,7 @@ #include <linux/in6.h> #include <linux/uaccess.h> -#include <variant/core.h> +#include <asm/core.h> /* * computes the checksum of a memory block at buff, length len, diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h index 22a10c715c1f..7ccc5cbf441b 100644 --- a/arch/xtensa/include/asm/cmpxchg.h +++ b/arch/xtensa/include/asm/cmpxchg.h @@ -23,7 +23,24 @@ static inline unsigned long __cmpxchg_u32(volatile int *p, int old, int new) { -#if XCHAL_HAVE_S32C1I +#if XCHAL_HAVE_EXCLUSIVE + unsigned long tmp, result; + + __asm__ __volatile__( + "1: l32ex %0, %3\n" + " bne %0, %4, 2f\n" + " mov %1, %2\n" + " s32ex %1, %3\n" + " getex %1\n" + " beqz %1, 1b\n" + "2:\n" + : "=&a" (result), "=&a" (tmp) + : "a" (new), "a" (p), "a" (old) + : "memory" + ); + + return result; +#elif XCHAL_HAVE_S32C1I __asm__ __volatile__( " wsr %2, scompare1\n" " s32c1i %0, %1, 0\n" @@ -108,7 +125,22 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, static inline unsigned long xchg_u32(volatile int * m, unsigned long val) { -#if XCHAL_HAVE_S32C1I +#if XCHAL_HAVE_EXCLUSIVE + unsigned long tmp, result; + + __asm__ __volatile__( + "1: l32ex %0, %3\n" + " mov %1, %2\n" + " s32ex %1, %3\n" + " getex %1\n" + " beqz %1, 1b\n" + : "=&a" (result), "=&a" (tmp) + : "a" (val), "a" (m) + : "memory" + ); + + return result; +#elif XCHAL_HAVE_S32C1I unsigned long tmp, result; __asm__ __volatile__( "1: l32i %1, %2, 0\n" diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h index 6712929a27c9..0fbe2a740b8d 100644 --- a/arch/xtensa/include/asm/coprocessor.h +++ b/arch/xtensa/include/asm/coprocessor.h @@ -12,8 +12,8 @@ #ifndef _XTENSA_COPROCESSOR_H #define _XTENSA_COPROCESSOR_H -#include <variant/core.h> #include <variant/tie.h> +#include <asm/core.h> #include <asm/types.h> #ifdef __ASSEMBLY__ diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h new file mode 100644 index 000000000000..5b4acb7d1c07 --- /dev/null +++ b/arch/xtensa/include/asm/core.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 Cadence Design Systems Inc. */ + +#ifndef _ASM_XTENSA_CORE_H +#define _ASM_XTENSA_CORE_H + +#include <variant/core.h> + +#ifndef XCHAL_HAVE_EXCLUSIVE +#define XCHAL_HAVE_EXCLUSIVE 0 +#endif + +#ifndef XCHAL_HAVE_MPU +#define XCHAL_HAVE_MPU 0 +#endif + +#ifndef XCHAL_SPANNING_WAY +#define XCHAL_SPANNING_WAY 0 +#endif + +#endif diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h index 505d09eff184..9538b0f7953c 100644 --- a/arch/xtensa/include/asm/futex.h +++ b/arch/xtensa/include/asm/futex.h @@ -15,65 +15,88 @@ #ifndef _ASM_XTENSA_FUTEX_H #define _ASM_XTENSA_FUTEX_H -#ifdef __KERNEL__ - #include <linux/futex.h> #include <linux/uaccess.h> #include <linux/errno.h> -#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +#if XCHAL_HAVE_EXCLUSIVE +#define __futex_atomic_op(insn, ret, old, uaddr, arg) \ + __asm__ __volatile( \ + "1: l32ex %[oldval], %[addr]\n" \ + insn "\n" \ + "2: s32ex %[newval], %[addr]\n" \ + " getex %[newval]\n" \ + " beqz %[newval], 1b\n" \ + " movi %[newval], 0\n" \ + "3:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 4\n" \ + " .literal_position\n" \ + "5: movi %[oldval], 3b\n" \ + " movi %[newval], %[fault]\n" \ + " jx %[oldval]\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .long 1b, 5b, 2b, 5b\n" \ + " .previous\n" \ + : [oldval] "=&r" (old), [newval] "=&r" (ret) \ + : [addr] "r" (uaddr), [oparg] "r" (arg), \ + [fault] "I" (-EFAULT) \ + : "memory") +#elif XCHAL_HAVE_S32C1I +#define __futex_atomic_op(insn, ret, old, uaddr, arg) \ __asm__ __volatile( \ - "1: l32i %0, %2, 0\n" \ + "1: l32i %[oldval], %[addr], 0\n" \ insn "\n" \ - " wsr %0, scompare1\n" \ - "2: s32c1i %1, %2, 0\n" \ - " bne %1, %0, 1b\n" \ - " movi %1, 0\n" \ + " wsr %[oldval], scompare1\n" \ + "2: s32c1i %[newval], %[addr], 0\n" \ + " bne %[newval], %[oldval], 1b\n" \ + " movi %[newval], 0\n" \ "3:\n" \ " .section .fixup,\"ax\"\n" \ " .align 4\n" \ " .literal_position\n" \ - "5: movi %0, 3b\n" \ - " movi %1, %3\n" \ - " jx %0\n" \ + "5: movi %[oldval], 3b\n" \ + " movi %[newval], %[fault]\n" \ + " jx %[oldval]\n" \ " .previous\n" \ " .section __ex_table,\"a\"\n" \ - " .long 1b,5b,2b,5b\n" \ + " .long 1b, 5b, 2b, 5b\n" \ " .previous\n" \ - : "=&r" (oldval), "=&r" (ret) \ - : "r" (uaddr), "I" (-EFAULT), "r" (oparg) \ + : [oldval] "=&r" (old), [newval] "=&r" (ret) \ + : [addr] "r" (uaddr), [oparg] "r" (arg), \ + [fault] "I" (-EFAULT) \ : "memory") +#endif static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { +#if XCHAL_HAVE_S32C1I || XCHAL_HAVE_EXCLUSIVE int oldval = 0, ret; -#if !XCHAL_HAVE_S32C1I - return -ENOSYS; -#endif - pagefault_disable(); switch (op) { case FUTEX_OP_SET: - __futex_atomic_op("mov %1, %4", ret, oldval, uaddr, oparg); + __futex_atomic_op("mov %[newval], %[oparg]", + ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op("add %1, %0, %4", ret, oldval, uaddr, - oparg); + __futex_atomic_op("add %[newval], %[oldval], %[oparg]", + ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: - __futex_atomic_op("or %1, %0, %4", ret, oldval, uaddr, - oparg); + __futex_atomic_op("or %[newval], %[oldval], %[oparg]", + ret, oldval, uaddr, oparg); break; case FUTEX_OP_ANDN: - __futex_atomic_op("and %1, %0, %4", ret, oldval, uaddr, - ~oparg); + __futex_atomic_op("and %[newval], %[oldval], %[oparg]", + ret, oldval, uaddr, ~oparg); break; case FUTEX_OP_XOR: - __futex_atomic_op("xor %1, %0, %4", ret, oldval, uaddr, - oparg); + __futex_atomic_op("xor %[newval], %[oldval], %[oparg]", + ret, oldval, uaddr, oparg); break; default: ret = -ENOSYS; @@ -85,43 +108,60 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, *oval = oldval; return ret; +#else + return -ENOSYS; +#endif } static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { +#if XCHAL_HAVE_S32C1I || XCHAL_HAVE_EXCLUSIVE + unsigned long tmp; int ret = 0; if (!access_ok(uaddr, sizeof(u32))) return -EFAULT; -#if !XCHAL_HAVE_S32C1I - return -ENOSYS; -#endif - __asm__ __volatile__ ( " # futex_atomic_cmpxchg_inatomic\n" - " wsr %5, scompare1\n" - "1: s32c1i %1, %4, 0\n" - " s32i %1, %6, 0\n" +#if XCHAL_HAVE_EXCLUSIVE + "1: l32ex %[tmp], %[addr]\n" + " s32i %[tmp], %[uval], 0\n" + " bne %[tmp], %[oldval], 2f\n" + " mov %[tmp], %[newval]\n" + "3: s32ex %[tmp], %[addr]\n" + " getex %[tmp]\n" + " beqz %[tmp], 1b\n" +#elif XCHAL_HAVE_S32C1I + " wsr %[oldval], scompare1\n" + "1: s32c1i %[newval], %[addr], 0\n" + " s32i %[newval], %[uval], 0\n" +#endif "2:\n" " .section .fixup,\"ax\"\n" " .align 4\n" " .literal_position\n" - "4: movi %1, 2b\n" - " movi %0, %7\n" - " jx %1\n" + "4: movi %[tmp], 2b\n" + " movi %[ret], %[fault]\n" + " jx %[tmp]\n" " .previous\n" " .section __ex_table,\"a\"\n" - " .long 1b,4b\n" + " .long 1b, 4b\n" +#if XCHAL_HAVE_EXCLUSIVE + " .long 3b, 4b\n" +#endif " .previous\n" - : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval) - : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT) + : [ret] "+r" (ret), [newval] "+r" (newval), [tmp] "=&r" (tmp) + : [addr] "r" (uaddr), [oldval] "r" (oldval), [uval] "r" (uval), + [fault] "I" (-EFAULT) : "memory"); return ret; +#else + return -ENOSYS; +#endif } -#endif /* __KERNEL__ */ #endif /* _ASM_XTENSA_FUTEX_H */ diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h index 10e9852b2fb4..323d05789159 100644 --- a/arch/xtensa/include/asm/initialize_mmu.h +++ b/arch/xtensa/include/asm/initialize_mmu.h @@ -33,10 +33,6 @@ #define CA_WRITEBACK (0x4) #endif -#ifndef XCHAL_SPANNING_WAY -#define XCHAL_SPANNING_WAY 0 -#endif - #ifdef __ASSEMBLY__ #define XTENSA_HWVERSION_RC_2009_0 230000 @@ -181,11 +177,42 @@ .macro initialize_cacheattr -#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS +#if !defined(CONFIG_MMU) && (XCHAL_HAVE_TLBS || XCHAL_HAVE_MPU) #if CONFIG_MEMMAP_CACHEATTR == 0x22222222 && XCHAL_HAVE_PTP_MMU #error Default MEMMAP_CACHEATTR of 0x22222222 does not work with full MMU. #endif +#if XCHAL_HAVE_MPU + .data + .align 4 +.Lattribute_table: + .long 0x000000, 0x1fff00, 0x1ddf00, 0x1eef00 + .long 0x006600, 0x000000, 0x000000, 0x000000 + .long 0x000000, 0x000000, 0x000000, 0x000000 + .long 0x000000, 0x000000, 0x000000, 0x000000 + .previous + + movi a3, .Lattribute_table + movi a4, CONFIG_MEMMAP_CACHEATTR + movi a5, 1 + movi a6, XCHAL_MPU_ENTRIES + movi a10, 0x20000000 + movi a11, -1 +1: + sub a5, a5, a10 + extui a8, a4, 28, 4 + beq a8, a11, 2f + addi a6, a6, -1 + mov a11, a8 +2: + addx4 a9, a8, a3 + l32i a9, a9, 0 + or a9, a9, a6 + wptlb a9, a5 + slli a4, a4, 4 + bgeu a5, a10, 1b + +#else movi a5, XCHAL_SPANNING_WAY movi a6, ~_PAGE_ATTRIB_MASK movi a4, CONFIG_MEMMAP_CACHEATTR @@ -208,6 +235,7 @@ isync #endif +#endif .endm diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h index acc5bb2cf1c7..da3e783f896b 100644 --- a/arch/xtensa/include/asm/io.h +++ b/arch/xtensa/include/asm/io.h @@ -11,7 +11,6 @@ #ifndef _XTENSA_IO_H #define _XTENSA_IO_H -#ifdef __KERNEL__ #include <asm/byteorder.h> #include <asm/page.h> #include <asm/vectors.h> @@ -78,8 +77,6 @@ static inline void iounmap(volatile void __iomem *addr) #endif /* CONFIG_MMU */ -#endif /* __KERNEL__ */ - #include <asm-generic/io.h> #endif /* _XTENSA_IO_H */ diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h index 6c6ed23e0c79..0f71a51dab25 100644 --- a/arch/xtensa/include/asm/irq.h +++ b/arch/xtensa/include/asm/irq.h @@ -12,7 +12,7 @@ #define _XTENSA_IRQ_H #include <linux/init.h> -#include <variant/core.h> +#include <asm/core.h> #ifdef CONFIG_PLATFORM_NR_IRQS # define PLATFORM_NR_IRQS CONFIG_PLATFORM_NR_IRQS diff --git a/arch/xtensa/include/asm/irqflags.h b/arch/xtensa/include/asm/irqflags.h index 9b5e8526afe5..12890681587b 100644 --- a/arch/xtensa/include/asm/irqflags.h +++ b/arch/xtensa/include/asm/irqflags.h @@ -27,7 +27,7 @@ static inline unsigned long arch_local_irq_save(void) { unsigned long flags; #if XTENSA_FAKE_NMI -#if defined(CONFIG_DEBUG_KERNEL) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL +#if defined(CONFIG_DEBUG_MISC) && (LOCKLEVEL | TOPLEVEL) >= XCHAL_DEBUGLEVEL unsigned long tmp; asm volatile("rsr %0, ps\t\n" diff --git a/arch/xtensa/include/asm/pci-bridge.h b/arch/xtensa/include/asm/pci-bridge.h index 0b68c76ec1e6..405526912d9a 100644 --- a/arch/xtensa/include/asm/pci-bridge.h +++ b/arch/xtensa/include/asm/pci-bridge.h @@ -11,8 +11,6 @@ #ifndef _XTENSA_PCI_BRIDGE_H #define _XTENSA_PCI_BRIDGE_H -#ifdef __KERNEL__ - struct device_node; struct pci_controller; @@ -84,5 +82,4 @@ int early_write_config_byte(struct pci_controller*, int, int, int, u8); int early_write_config_word(struct pci_controller*, int, int, int, u16); int early_write_config_dword(struct pci_controller*, int, int, int, u32); -#endif /* __KERNEL__ */ #endif /* _XTENSA_PCI_BRIDGE_H */ diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h index 883024054b05..8e2b48a268db 100644 --- a/arch/xtensa/include/asm/pci.h +++ b/arch/xtensa/include/asm/pci.h @@ -11,8 +11,6 @@ #ifndef _XTENSA_PCI_H #define _XTENSA_PCI_H -#ifdef __KERNEL__ - /* Can be used to override the logic in pci_scan_bus for skipping * already-configured bus numbers - to be used for buggy BIOSes * or architectures with incomplete PCI setup by the loader @@ -45,8 +43,6 @@ #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 #define arch_can_pci_mmap_io() 1 -#endif /* __KERNEL__ */ - /* Generic PCI */ #include <asm-generic/pci.h> diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index b3b388ff2f01..368284c972e7 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -11,8 +11,6 @@ #ifndef _XTENSA_PGALLOC_H #define _XTENSA_PGALLOC_H -#ifdef __KERNEL__ - #include <linux/highmem.h> #include <linux/slab.h> @@ -79,5 +77,4 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte) } #define pmd_pgtable(pmd) pmd_page(pmd) -#endif /* __KERNEL__ */ #endif /* _XTENSA_PGALLOC_H */ diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index 0c14018d1c26..19f6b54e358b 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -10,7 +10,7 @@ #ifndef _XTENSA_PROCESSOR_H #define _XTENSA_PROCESSOR_H -#include <variant/core.h> +#include <asm/core.h> #include <linux/compiler.h> #include <linux/stringify.h> diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h index 62a58d2567e9..b109416dc07e 100644 --- a/arch/xtensa/include/asm/ptrace.h +++ b/arch/xtensa/include/asm/ptrace.h @@ -80,7 +80,7 @@ struct pt_regs { unsigned long areg[16]; }; -#include <variant/core.h> +#include <asm/core.h> # define arch_has_single_step() (1) # define task_pt_regs(tsk) ((struct pt_regs*) \ diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 91dc06d58060..359ab40e935a 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -14,7 +14,7 @@ #include <asm/ptrace.h> #include <uapi/linux/audit.h> -static inline int syscall_get_arch(void) +static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_XTENSA; } diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h index 7111280c8842..79fe3007919e 100644 --- a/arch/xtensa/include/asm/vectors.h +++ b/arch/xtensa/include/asm/vectors.h @@ -18,7 +18,7 @@ #ifndef _XTENSA_VECTORS_H #define _XTENSA_VECTORS_H -#include <variant/core.h> +#include <asm/core.h> #include <asm/kmem_layout.h> #if XCHAL_HAVE_PTP_MMU diff --git a/arch/xtensa/include/uapi/asm/sockios.h b/arch/xtensa/include/uapi/asm/sockios.h index fb8ac3607189..1a1f58f4b75a 100644 --- a/arch/xtensa/include/uapi/asm/sockios.h +++ b/arch/xtensa/include/uapi/asm/sockios.h @@ -26,7 +26,7 @@ #define SIOCSPGRP _IOW('s', 8, pid_t) #define SIOCGPGRP _IOR('s', 9, pid_t) -#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ -#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ +#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */ #endif /* _XTENSA_SOCKIOS_H */ diff --git a/arch/xtensa/kernel/hw_breakpoint.c b/arch/xtensa/kernel/hw_breakpoint.c index 4f20416061fb..285fb2942b06 100644 --- a/arch/xtensa/kernel/hw_breakpoint.c +++ b/arch/xtensa/kernel/hw_breakpoint.c @@ -12,7 +12,7 @@ #include <linux/log2.h> #include <linux/percpu.h> #include <linux/perf_event.h> -#include <variant/core.h> +#include <asm/core.h> /* Breakpoint currently in use for each IBREAKA. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[XCHAL_NUM_IBREAK]); diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 4ec6fbb696bf..c0ec24349421 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -651,6 +651,9 @@ c_show(struct seq_file *f, void *slot) #if XCHAL_HAVE_S32C1I "s32c1i " #endif +#if XCHAL_HAVE_EXCLUSIVE + "exclusive " +#endif "\n"); /* Registers. */ diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index 3699d6d3e479..83b244ce61ee 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -126,7 +126,7 @@ void secondary_start_kernel(void) init_mmu(); -#ifdef CONFIG_DEBUG_KERNEL +#ifdef CONFIG_DEBUG_MISC if (boot_secondary_processors == 0) { pr_debug("%s: boot_secondary_processors:%d; Hanging cpu:%d\n", __func__, boot_secondary_processors, cpu); diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index b80a430453b1..943f10639a93 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -18,8 +18,8 @@ #include <asm/page.h> #include <asm/thread_info.h> +#include <asm/core.h> #include <asm/vectors.h> -#include <variant/core.h> OUTPUT_ARCH(xtensa) ENTRY(_start) diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S index 528fe0dd9339..d82c20c1fb7a 100644 --- a/arch/xtensa/lib/checksum.S +++ b/arch/xtensa/lib/checksum.S @@ -16,8 +16,8 @@ #include <linux/errno.h> #include <linux/linkage.h> -#include <variant/core.h> #include <asm/asmmacro.h> +#include <asm/core.h> /* * computes a partial checksum, e.g. for TCP/UDP fragments diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S index c0f6981719d6..efecfd7ed8cc 100644 --- a/arch/xtensa/lib/memcopy.S +++ b/arch/xtensa/lib/memcopy.S @@ -10,8 +10,8 @@ */ #include <linux/linkage.h> -#include <variant/core.h> #include <asm/asmmacro.h> +#include <asm/core.h> /* * void *memcpy(void *dst, const void *src, size_t len); diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S index 276747dec300..8632eacbdc80 100644 --- a/arch/xtensa/lib/memset.S +++ b/arch/xtensa/lib/memset.S @@ -12,8 +12,8 @@ */ #include <linux/linkage.h> -#include <variant/core.h> #include <asm/asmmacro.h> +#include <asm/core.h> /* * void *memset(void *dst, int c, size_t length) diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S index 5fce16b67dca..c4c6c8578d59 100644 --- a/arch/xtensa/lib/strncpy_user.S +++ b/arch/xtensa/lib/strncpy_user.S @@ -13,8 +13,8 @@ #include <linux/errno.h> #include <linux/linkage.h> -#include <variant/core.h> #include <asm/asmmacro.h> +#include <asm/core.h> /* * char *__strncpy_user(char *dst, const char *src, size_t len) diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S index 0b956ce7f386..1f2ca2bb2ab3 100644 --- a/arch/xtensa/lib/strnlen_user.S +++ b/arch/xtensa/lib/strnlen_user.S @@ -12,8 +12,8 @@ */ #include <linux/linkage.h> -#include <variant/core.h> #include <asm/asmmacro.h> +#include <asm/core.h> /* * size_t __strnlen_user(const char *s, size_t len) diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S index 64ab1971324f..228607e30bc2 100644 --- a/arch/xtensa/lib/usercopy.S +++ b/arch/xtensa/lib/usercopy.S @@ -54,8 +54,8 @@ */ #include <linux/linkage.h> -#include <variant/core.h> #include <asm/asmmacro.h> +#include <asm/core.h> .text ENTRY(__xtensa_copy_user) diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index d49861099684..b51746f2b80b 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -216,11 +216,6 @@ void free_initrd_mem(unsigned long start, unsigned long end) } #endif -void free_initmem(void) -{ - free_initmem_default(-1); -} - static void __init parse_memmap_one(char *p) { char *oldp; diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 026211e7ab09..f9cd45860bee 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -297,8 +297,7 @@ out_alloc_disk: blk_cleanup_queue(dev->queue); dev->queue = NULL; out_alloc_queue: - simc_close(dev->fd); - return -EIO; + return -ENOMEM; } static int __init simdisk_init(void) diff --git a/arch/xtensa/platforms/xt2000/include/platform/hardware.h b/arch/xtensa/platforms/xt2000/include/platform/hardware.h index 8e5e0d6a81ec..9f213f573330 100644 --- a/arch/xtensa/platforms/xt2000/include/platform/hardware.h +++ b/arch/xtensa/platforms/xt2000/include/platform/hardware.h @@ -15,7 +15,7 @@ #ifndef _XTENSA_XT2000_HARDWARE_H #define _XTENSA_XT2000_HARDWARE_H -#include <variant/core.h> +#include <asm/core.h> /* * On-board components. diff --git a/arch/xtensa/platforms/xt2000/include/platform/serial.h b/arch/xtensa/platforms/xt2000/include/platform/serial.h index 7226cf732b47..cde804827626 100644 --- a/arch/xtensa/platforms/xt2000/include/platform/serial.h +++ b/arch/xtensa/platforms/xt2000/include/platform/serial.h @@ -11,7 +11,7 @@ #ifndef _XTENSA_XT2000_SERIAL_H #define _XTENSA_XT2000_SERIAL_H -#include <variant/core.h> +#include <asm/core.h> #include <asm/io.h> /* National-Semi PC16552D DUART: */ |