diff options
Diffstat (limited to 'arch/arm64')
71 files changed, 1424 insertions, 830 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bc8d3702f4ed..dde7e9f28d0c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -153,7 +153,6 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PFN_VALID select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER @@ -191,6 +190,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE @@ -665,6 +665,121 @@ config ARM64_ERRATUM_1508412 If unsure, say Y. +config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + bool + +config ARM64_ERRATUM_2119858 + bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Cortex-A710 erratum 2119858. + + Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_ERRATUM_2139208 + bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2139208. + + Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_WORKAROUND_TSB_FLUSH_FAILURE + bool + +config ARM64_ERRATUM_2054223 + bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Cortex-A710 erratum 2054223 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_ERRATUM_2067961 + bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Neoverse-N2 erratum 2067961 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + bool + +config ARM64_ERRATUM_2253138 + bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2253138. + + Affected Neoverse-N2 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + +config ARM64_ERRATUM_2224489 + bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Cortex-A710 erratum 2224489. + + Affected Cortex-A710 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -1143,7 +1258,7 @@ config CRASH_DUMP config TRANS_TABLE def_bool y - depends on HIBERNATION + depends on HIBERNATION || KEXEC_CORE config XEN_DOM0 def_bool y @@ -1272,7 +1387,8 @@ config KUSER_HELPERS config COMPAT_VDSO bool "Enable vDSO for 32-bit applications" - depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != "" + depends on !CPU_BIG_ENDIAN + depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != "" select GENERIC_COMPAT_VDSO default y help diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 519ac1f7f859..af1fafbe7e1d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -64,14 +64,26 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, static inline notrace u64 arch_timer_read_cntpct_el0(void) { - isb(); - return read_sysreg(cntpct_el0); + u64 cnt; + + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); + + return cnt; } static inline notrace u64 arch_timer_read_cntvct_el0(void) { - isb(); - return read_sysreg(cntvct_el0); + u64 cnt; + + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); + + return cnt; } #define arch_timer_reg_read_stable(reg) \ @@ -174,8 +186,10 @@ static __always_inline u64 __arch_counter_get_cntpct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntpct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } @@ -193,8 +207,10 @@ static __always_inline u64 __arch_counter_get_cntvct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntvct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h new file mode 100644 index 000000000000..c39f2437e08e --- /dev/null +++ b/arch/arm64/include/asm/asm-extable.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + .pushsection __ex_table, "a"; \ + .align 2; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ + .short (type); \ + .short (data); \ + .popsection; + +/* + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. + */ + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) + .endm + +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + +#else /* __ASSEMBLY__ */ + +#include <linux/bits.h> +#include <linux/stringify.h> + +#include <asm/gpr-num.h> + +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + ".pushsection __ex_table, \"a\"\n" \ + ".align 2\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") + +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \ + "(" \ + EX_DATA_REG(DATA, data) " | " \ + EX_DATA_REG(ADDR, addr) \ + ")") + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index ccedf548dac9..0557af834e03 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -3,10 +3,11 @@ #define __ASM_ASM_UACCESS_H #include <asm/alternative-macros.h> +#include <asm/asm-extable.h> +#include <asm/assembler.h> #include <asm/kernel-pgtable.h> #include <asm/mmu.h> #include <asm/sysreg.h> -#include <asm/assembler.h> /* * User access enabling/disabling macros. @@ -58,6 +59,10 @@ alternative_else_nop_endif .endm #endif +#define USER(l, x...) \ +9999: x; \ + _asm_extable 9999b, l + /* * Generate the assembly for LDTR/STTR with exception table entries. * This is complicated as there is no post-increment or pair versions of the diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bfa58409a4d4..136d13f3d6e9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -14,9 +14,10 @@ #include <asm-generic/export.h> -#include <asm/asm-offsets.h> #include <asm/alternative.h> #include <asm/asm-bug.h> +#include <asm/asm-extable.h> +#include <asm/asm-offsets.h> #include <asm/cpufeature.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> @@ -130,32 +131,6 @@ alternative_endif .endm /* - * Create an exception table entry for `insn`, which will branch to `fixup` - * when an unhandled fault is taken. - */ - .macro _asm_extable, insn, fixup - .pushsection __ex_table, "a" - .align 3 - .long (\insn - .), (\fixup - .) - .popsection - .endm - -/* - * Create an exception table entry for `insn` if `fixup` is provided. Otherwise - * do nothing. - */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup - .endif - .endm - - -#define USER(l, x...) \ -9999: x; \ - _asm_extable 9999b, l - -/* * Register aliases. */ lr .req x30 // link register @@ -405,19 +380,19 @@ alternative_endif /* * Macro to perform a data cache maintenance for the interval - * [start, end) + * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction * domain: domain used in dsb instruciton * start: starting virtual address of the region * end: end virtual address of the region + * linesz: dcache line size * fixup: optional label to branch to on user fault - * Corrupts: start, end, tmp1, tmp2 + * Corrupts: start, end, tmp */ - .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup - dcache_line_size \tmp1, \tmp2 - sub \tmp2, \tmp1, #1 - bic \start, \start, \tmp2 + .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + sub \tmp, \linesz, #1 + bic \start, \start, \tmp .Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \start @@ -436,7 +411,7 @@ alternative_endif .endif .endif .endif - add \start, \start, \tmp1 + add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ dsb \domain @@ -445,6 +420,22 @@ alternative_endif .endm /* + * Macro to perform a data cache maintenance for the interval + * [start, end) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * start: starting virtual address of the region + * end: end virtual address of the region + * fixup: optional label to branch to on user fault + * Corrupts: start, end, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup + dcache_line_size \tmp1, \tmp2 + dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup + .endm + +/* * Macro to perform an instruction cache maintenance for the interval * [start, end) * @@ -468,6 +459,25 @@ alternative_endif .endm /* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ + .macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 + phys_to_ttbr \tmp, \zero_page + msr ttbr1_el1, \tmp + isb + tlbi vmalle1 + dsb nsh + phys_to_ttbr \tmp, \page_table + offset_ttbr1 \tmp, \tmp2 + msr ttbr1_el1, \tmp + isb + .endm + +/* * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present */ .macro reset_pmuserenr_el0, tmpreg diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 451e11e5fd23..1c5a00598458 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,7 +23,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") -#define tsb_csync() asm volatile("hint #18" : : : "memory") +#define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") #ifdef CONFIG_ARM64_PSEUDO_NMI @@ -46,6 +46,20 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) + +#define tsb_csync() \ + do { \ + /* \ + * CPUs affected by Arm Erratum 2054223 or 2067961 needs \ + * another TSB to ensure the trace is flushed. The barriers \ + * don't have to be strictly back to back, as long as the \ + * CPU is in trace prohibited state. \ + */ \ + if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \ + __tsb_csync(); \ + __tsb_csync(); \ + } while (0) + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 6231e1f0abe7..19b8441aa8f2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,6 +73,8 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -113,6 +115,8 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 29f97eb3dad4..a305ce256090 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -227,6 +227,9 @@ #define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) @@ -317,6 +320,9 @@ #define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ ESR_ELx_CP15_64_ISS_DIR_READ) +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index b15eb4a3e6b2..8b300dd28def 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -18,10 +18,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + static inline bool in_bpf_jit(struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_BPF_JIT)) @@ -32,16 +43,16 @@ static inline bool in_bpf_jit(struct pt_regs *regs) } #ifdef CONFIG_BPF_JIT -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs); +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs); #else /* !CONFIG_BPF_JIT */ static inline -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { - return 0; + return false; } #endif /* !CONFIG_BPF_JIT */ -extern int fixup_exception(struct pt_regs *regs); +bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9a62884183e5..dbb4b30a5648 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -62,15 +62,13 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); + return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); } -extern void sve_save_state(void *state, u32 *pfpsr); +extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, - unsigned long vq_minus_1); -extern void sve_flush_live(unsigned long vq_minus_1); -extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, - unsigned long vq_minus_1); + int restore_ffr); +extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); @@ -79,10 +77,6 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); -extern int __ro_after_init sve_max_vl; -extern int __ro_after_init sve_max_virtualisable_vl; -extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); - /* * Helpers to translate bit indices in sve_vq_map to VQ values (and * vice versa). This allows find_next_bit() to be used to find the @@ -98,15 +92,29 @@ static inline unsigned int __bit_to_vq(unsigned int bit) return SVE_VQ_MAX - bit; } -/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ -static inline bool sve_vq_available(unsigned int vq) -{ - return test_bit(__vq_to_bit(vq), sve_vq_map); -} -#ifdef CONFIG_ARM64_SVE +struct vl_info { + enum vec_type type; + const char *name; /* For display purposes */ -extern size_t sve_state_size(struct task_struct const *task); + /* Minimum supported vector length across all CPUs */ + int min_vl; + + /* Maximum supported vector length across all CPUs */ + int max_vl; + int max_virtualisable_vl; + + /* + * Set of available vector lengths, + * where length vq encoded as bit __vq_to_bit(vq): + */ + DECLARE_BITMAP(vq_map, SVE_VQ_MAX); + + /* Set of vector lengths present on at least one cpu: */ + DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX); +}; + +#ifdef CONFIG_ARM64_SVE extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); @@ -143,11 +151,63 @@ static inline void sve_user_enable(void) * Probing and setup functions. * Calls to these functions must be serialised with one another. */ -extern void __init sve_init_vq_map(void); -extern void sve_update_vq_map(void); -extern int sve_verify_vq_map(void); +enum vec_type; + +extern void __init vec_init_vq_map(enum vec_type type); +extern void vec_update_vq_map(enum vec_type type); +extern int vec_verify_vq_map(enum vec_type type); extern void __init sve_setup(void); +extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX]; + +static inline void write_vl(enum vec_type type, u64 val) +{ + u64 tmp; + + switch (type) { +#ifdef CONFIG_ARM64_SVE + case ARM64_VEC_SVE: + tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_ZCR_EL1); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } +} + +static inline int vec_max_vl(enum vec_type type) +{ + return vl_info[type].max_vl; +} + +static inline int vec_max_virtualisable_vl(enum vec_type type) +{ + return vl_info[type].max_virtualisable_vl; +} + +static inline int sve_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SVE); +} + +static inline int sve_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SVE); +} + +/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ +static inline bool vq_available(enum vec_type type, unsigned int vq) +{ + return test_bit(__vq_to_bit(vq), vl_info[type].vq_map); +} + +static inline bool sve_vq_available(unsigned int vq) +{ + return vq_available(ARM64_VEC_SVE, vq); +} + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -155,6 +215,11 @@ static inline void fpsimd_release_task(struct task_struct *task) { } static inline void sve_sync_to_fpsimd(struct task_struct *task) { } static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +static inline int sve_max_virtualisable_vl(void) +{ + return 0; +} + static inline int sve_set_current_vl(unsigned long arg) { return -EINVAL; @@ -165,14 +230,21 @@ static inline int sve_get_current_vl(void) return -EINVAL; } +static inline int sve_max_vl(void) +{ + return -EINVAL; +} + +static inline bool sve_vq_available(unsigned int vq) { return false; } + static inline void sve_user_disable(void) { BUILD_BUG(); } static inline void sve_user_enable(void) { BUILD_BUG(); } #define sve_cond_update_zcr_vq(val, reg) do { } while (0) -static inline void sve_init_vq_map(void) { } -static inline void sve_update_vq_map(void) { } -static inline int sve_verify_vq_map(void) { return 0; } +static inline void vec_init_vq_map(enum vec_type t) { } +static inline void vec_update_vq_map(enum vec_type t) { } +static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } #endif /* ! CONFIG_ARM64_SVE */ diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 00a2c0b69c2b..2509d7dde55a 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -217,28 +217,36 @@ .macro sve_flush_z _for n, 0, 31, _sve_flush_z \n .endm -.macro sve_flush_p_ffr +.macro sve_flush_p _for n, 0, 15, _sve_pfalse \n +.endm +.macro sve_flush_ffr _sve_wrffr 0 .endm -.macro sve_save nxbase, xpfpsr, nxtmp +.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 + cbz \save_ffr, 921f _sve_rdffr 0 _sve_str_p 0, \nxbase _sve_ldr_p 0, \nxbase, -16 - + b 922f +921: + str xzr, [x\nxbase] // Zero out FFR +922: mrs x\nxtmp, fpsr str w\nxtmp, [\xpfpsr] mrs x\nxtmp, fpcr str w\nxtmp, [\xpfpsr, #4] .endm -.macro __sve_load nxbase, xpfpsr, nxtmp +.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 + cbz \restore_ffr, 921f _sve_ldr_p 0, \nxbase _sve_wrffr 0 +921: _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 ldr w\nxtmp, [\xpfpsr] @@ -246,8 +254,3 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm - -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 - sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 - __sve_load \nxbase, \xpfpsr, \nxtmp -.endm diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 91fa4baa1a93..347b0cc68f07 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -15,7 +15,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount)) #endif /* The BL at the callsite's adjusted rec->ip */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 8e41faa37c69..bc06691d2062 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -25,19 +25,14 @@ do { \ " cbz %w0, 3f\n" \ " sub %w4, %w4, %w0\n" \ " cbnz %w4, 1b\n" \ -" mov %w0, %w7\n" \ +" mov %w0, %w6\n" \ "3:\n" \ " dmb ish\n" \ -" .pushsection .fixup,\"ax\"\n" \ -" .align 2\n" \ -"4: mov %w0, %w6\n" \ -" b 3b\n" \ -" .popsection\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ "+r" (loops) \ - : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ + : "r" (oparg), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable_privileged(); \ } while (0) @@ -105,18 +100,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, " cbz %w3, 3f\n" " sub %w4, %w4, %w3\n" " cbnz %w4, 1b\n" -" mov %w0, %w8\n" +" mov %w0, %w7\n" "3:\n" " dmb ish\n" "4:\n" -" .pushsection .fixup,\"ax\"\n" -"5: mov %w0, %w7\n" -" b 4b\n" -" .popsection\n" - _ASM_EXTABLE(1b, 5b) - _ASM_EXTABLE(2b, 5b) + _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0) + _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) + : "r" (oldval), "r" (newval), "Ir" (-EAGAIN) : "memory"); uaccess_disable_privileged(); diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h new file mode 100644 index 000000000000..05da4a7c5788 --- /dev/null +++ b/arch/arm64/include/asm/gpr-num.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__gpr_num_x\num, \num + .equ .L__gpr_num_w\num, \num + .endr + .equ .L__gpr_num_xzr, 31 + .equ .L__gpr_num_wzr, 31 + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .equ .L__gpr_num_w\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__gpr_num_xzr, 31\n" \ +" .equ .L__gpr_num_wzr, 31\n" + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8c129db8232a..b100e0055eab 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -105,6 +105,7 @@ #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) #define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) +#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 00dbcc71aeb2..9839bfc163d7 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -90,12 +90,24 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#if defined(CONFIG_KEXEC_CORE) +void cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, + unsigned long arg2); +#endif + #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { void *dtb; phys_addr_t dtb_mem; phys_addr_t kern_reloc; + phys_addr_t el2_vectors; + phys_addr_t ttbr0; + phys_addr_t ttbr1; + phys_addr_t zero_page; + unsigned long phys_offset; + unsigned long t0sz; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..6486b1db268e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -263,9 +263,10 @@ extern u64 __kvm_get_mdcr_el2(void); /* * KVM extable for unexpected exceptions. - * In the same format _asm_extable, but output to a different section so that - * it can be mapped to EL2. The KVM version is not sorted. The caller must - * ensure: + * Create a struct kvm_exception_table_entry output to a section that can be + * mapped by EL2. The table is not sorted. + * + * The caller must ensure: * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f1745a843414..1b9a1e242612 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -243,6 +243,7 @@ static inline const void *__tag_set(const void *addr, u8 tag) #ifdef CONFIG_KASAN_HW_TAGS #define arch_enable_tagging_sync() mte_enable_kernel_sync() #define arch_enable_tagging_async() mte_enable_kernel_async() +#define arch_enable_tagging_asymm() mte_enable_kernel_asymm() #define arch_force_async_tag_fault() mte_check_tfsr_exit() #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f4ba93d4ffeb..6770667b34a3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -116,6 +116,30 @@ static inline void cpu_install_idmap(void) } /* + * Load our new page tables. A strict BBM approach requires that we ensure that + * TLBs are free of any entries that may overlap with the global mappings we are + * about to install. + * + * For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero + * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime + * services), while for a userspace-driven test_resume cycle it points to + * userspace page tables (and we must point it at a zero page ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). + */ +static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + __cpu_set_tcr_t0sz(t0sz); + + /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ + write_sysreg(ttbr0, ttbr0_el1); + isb(); +} + +/* * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. */ diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 22420e1f8c03..478b9bcf69ad 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -130,6 +130,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, void mte_enable_kernel_sync(void); void mte_enable_kernel_async(void); +void mte_enable_kernel_asymm(void); #else /* CONFIG_ARM64_MTE */ @@ -161,6 +162,10 @@ static inline void mte_enable_kernel_async(void) { } +static inline void mte_enable_kernel_asymm(void) +{ +} + #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 02511650cffe..075539f5f1c8 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -88,11 +88,11 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ -DECLARE_STATIC_KEY_FALSE(mte_async_mode); +DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { - return static_branch_unlikely(&mte_async_mode); + return static_branch_unlikely(&mte_async_or_asymm_mode); } void mte_check_tfsr_el1(void); @@ -121,7 +121,7 @@ static inline void mte_check_tfsr_exit(void) mte_check_tfsr_el1(); } #else -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { return false; } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index f98c91bbd7c1..993a27ea6f54 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -41,7 +41,6 @@ void tag_clear_highpage(struct page *to); typedef struct page *pgtable_t; -int pfn_valid(unsigned long pfn); int pfn_is_map_memory(unsigned long pfn); #include <asm/memory.h> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dfa76afa0ccf..84fbb52b4224 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1022,6 +1022,11 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot) return PAGE_READONLY_EXEC; } +static inline bool pud_sect_supported(void) +{ + return PAGE_SIZE == SZ_4K; +} + #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 55ca034238ea..6f41b65f9962 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -115,6 +115,11 @@ struct debug_info { #endif }; +enum vec_type { + ARM64_VEC_SVE = 0, + ARM64_VEC_MAX, +}; + struct cpu_context { unsigned long x19; unsigned long x20; @@ -147,8 +152,8 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ - unsigned int sve_vl; /* SVE vector length */ - unsigned int sve_vl_onexec; /* SVE vl after next exec */ + unsigned int vl[ARM64_VEC_MAX]; /* vector length */ + unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ @@ -164,6 +169,46 @@ struct thread_struct { u64 sctlr_user; }; +static inline unsigned int thread_get_vl(struct thread_struct *thread, + enum vec_type type) +{ + return thread->vl[type]; +} + +static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) +{ + return thread_get_vl(thread, ARM64_VEC_SVE); +} + +unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); +void task_set_vl(struct task_struct *task, enum vec_type type, + unsigned long vl); +void task_set_vl_onexec(struct task_struct *task, enum vec_type type, + unsigned long vl); +unsigned int task_get_vl_onexec(const struct task_struct *task, + enum vec_type type); + +static inline unsigned int task_get_sve_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SVE); +} + +static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) +{ + task_set_vl(task, ARM64_VEC_SVE, vl); +} + +static inline unsigned int task_get_sve_vl_onexec(const struct task_struct *task) +{ + return task_get_vl_onexec(task, ARM64_VEC_SVE); +} + +static inline void task_set_sve_vl_onexec(struct task_struct *task, + unsigned long vl) +{ + task_set_vl_onexec(task, ARM64_VEC_SVE, vl); +} + #define SCTLR_USER_MASK \ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \ SCTLR_EL1_TCF0_MASK) diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index e4ad9db53af1..152cb35bf9df 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -21,5 +21,6 @@ extern char __exittext_begin[], __exittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index d3320618ed14..6437df661700 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -8,4 +8,10 @@ void *get_early_fdt_ptr(void); void early_fdt_map(u64 dt_phys); +/* + * These two variables are used in the head.S file. + */ +extern phys_addr_t __fdt_pointer __initdata; +extern u64 __cacheline_aligned boot_args[4]; + #endif diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67ed..027dbe004df4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -13,6 +13,8 @@ #include <linux/stringify.h> #include <linux/kasan-tags.h> +#include <asm/gpr-num.h> + /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -507,6 +509,9 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) +#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) + #define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) @@ -621,6 +626,7 @@ #define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_ENIA_SHIFT 31 @@ -666,6 +672,7 @@ #define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_BT1 (BIT(36)) @@ -807,6 +814,7 @@ #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 +#define ID_AA64PFR1_MTE_ASYMM 0x3 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 @@ -1192,17 +1200,12 @@ #ifdef __ASSEMBLY__ - .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ .L__reg_num_x\num, \num - .endr - .equ .L__reg_num_xzr, 31 - .macro mrs_s, rt, sreg - __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) .endm .macro msr_s, sreg, rt - __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) .endm #else @@ -1211,22 +1214,16 @@ #include <linux/types.h> #include <asm/alternative.h> -#define __DEFINE_MRS_MSR_S_REGNUM \ -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ -" .equ .L__reg_num_x\\num, \\num\n" \ -" .endr\n" \ -" .equ .L__reg_num_xzr, 31\n" - #define DEFINE_MRS_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro mrs_s, rt, sreg\n" \ - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define DEFINE_MSR_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define UNDEFINE_MRS_S \ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 6623c99f0984..d5c8ac81ce11 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -78,7 +78,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SVE 23 /* Scalable Vector Extension in use */ -#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ +#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 5d08e5adf3d5..033d400a4ea4 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin <pasha.tatashin@soleen.com> + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> */ #ifndef _ASM_TRANS_TABLE_H @@ -15,7 +15,7 @@ /* * trans_alloc_page * - Allocator that should return exactly one zeroed page, if this - * allocator fails, trans_pgd_create_copy() and trans_pgd_map_page() + * allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page() * return -ENOMEM error. * * trans_alloc_arg @@ -30,10 +30,12 @@ struct trans_pgd_info { int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd, unsigned long start, unsigned long end); -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot); - int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, unsigned long *t0sz, void *page); +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors); + +extern char trans_pgd_stub_vectors[]; + #endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 190b494e22ab..6e2e0b7031ab 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,7 @@ #include <linux/kasan-checks.h> #include <linux/string.h> +#include <asm/asm-extable.h> #include <asm/cpufeature.h> #include <asm/mmu.h> #include <asm/mte.h> @@ -70,12 +71,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si #define access_ok(addr, size) __range_ok(addr, size) -#define _ASM_EXTABLE(from, to) \ - " .pushsection __ex_table, \"a\"\n" \ - " .align 3\n" \ - " .long (" #from " - .), (" #to " - .)\n" \ - " .popsection\n" - /* * User access enabling/disabling. */ @@ -196,13 +191,13 @@ static inline void __uaccess_enable_tco(void) */ static inline void __uaccess_disable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_disable_tco(); } static inline void __uaccess_enable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_enable_tco(); } @@ -260,15 +255,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup, \"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " mov %1, #0\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT)) + : "r" (addr)) #define __raw_get_mem(ldr, x, ptr, err) \ do { \ @@ -337,14 +326,9 @@ do { \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ - : "r" (x), "r" (addr), "i" (-EFAULT)) + : "r" (x), "r" (addr)) #define __raw_put_mem(str, x, ptr, err) \ do { \ diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index 3fd8fd6d8fc2..3ac35f4a667c 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -20,16 +20,9 @@ #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) #define aarch32_smp_mb() dmb(ish) #define aarch32_smp_rmb() dmb(ishld) #define aarch32_smp_wmb() dmb(ishst) -#else -#define aarch32_smp_mb() dmb(ish) -#define aarch32_smp_rmb() aarch32_smp_mb() -#define aarch32_smp_wmb() dmb(ishst) -#endif - #undef smp_mb #undef smp_rmb diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7379f35ae2c6..3c8af033a997 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -67,6 +67,8 @@ */ extern u32 __boot_cpu_mode[2]; +#define ARM64_VECTOR_TABLE_LEN SZ_2K + void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); @@ -128,6 +130,11 @@ static __always_inline bool is_protected_kvm_enabled(void) return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); } +static inline bool is_hyp_nvhe(void) +{ + return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 7a22aeea9bb5..b9185503feae 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -2,6 +2,7 @@ #define _ASM_ARM64_VMALLOC_H #include <asm/page.h> +#include <asm/pgtable.h> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP @@ -9,10 +10,9 @@ static inline bool arch_vmap_pud_supported(pgprot_t prot) { /* - * Only 4k granule supports level 1 block mappings. * SW table walks can't handle removal of intermediate entries. */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + return pud_sect_supported() && !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 2dcb104c645b..1c8e4f2490bf 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,29 +53,16 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, tmp; + unsigned long ret; __uaccess_enable_tco_async(); /* Load word from unaligned pointer addr */ asm( - "1: ldr %0, %3\n" + "1: ldr %0, %2\n" "2:\n" - " .pushsection .fixup,\"ax\"\n" - " .align 2\n" - "3: bic %1, %2, #0x7\n" - " ldr %0, [%1]\n" - " and %1, %2, #0x7\n" - " lsl %1, %1, #0x3\n" -#ifndef __AARCH64EB__ - " lsr %0, %0, %1\n" -#else - " lsl %0, %0, %1\n" -#endif - " b 2b\n" - " .popsection\n" - _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (tmp) + _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1) + : "=&r" (ret) : "r" (addr), "Q" (*(unsigned long *)addr)); __uaccess_disable_tco_async(); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b8f41aa234ee..7b23b16f21ce 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -75,5 +75,6 @@ #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_ECV (1 << 19) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 0e86e8b9cedd..6875a16b09d2 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -279,7 +279,7 @@ static void __init register_insn_emulation_sysctl(void) do { \ uaccess_enable_privileged(); \ __asm__ __volatile__( \ - " mov %w3, %w7\n" \ + " mov %w3, %w6\n" \ "0: ldxr"B" %w2, [%4]\n" \ "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ @@ -290,16 +290,10 @@ do { \ "2:\n" \ " mov %w1, %w2\n" \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: mov %w0, %w6\n" \ - " b 3b\n" \ - " .popsection" \ - _ASM_EXTABLE(0b, 4b) \ - _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(0b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ : "r" ((unsigned long)addr), "i" (-EAGAIN), \ - "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable_privileged(); \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 551427ae8cc5..2124357c2075 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include <linux/arm_sdei.h> #include <linux/sched.h> +#include <linux/kexec.h> #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/kvm_host.h> @@ -171,5 +172,15 @@ int main(void) #endif BLANK(); #endif +#ifdef CONFIG_KEXEC_CORE + DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); + DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors)); + DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page)); + DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset)); + DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1)); + DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); + DEFINE(KIMAGE_START, offsetof(struct kimage, start)); + BLANK(); +#endif return 0; } diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index d47ff63a5b66..48a8af97faa9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -16,8 +16,7 @@ .pushsection .idmap.text, "awx" /* - * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for - * cpu_soft_restart. + * cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) * * @el2_switch: Flag to indicate a switch to EL2 is needed. * @entry: Location to jump to for soft reset. @@ -29,7 +28,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -SYM_CODE_START(__cpu_soft_restart) +SYM_CODE_START(cpu_soft_restart) mov_q x12, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround /* @@ -48,6 +47,6 @@ SYM_CODE_START(__cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -SYM_CODE_END(__cpu_soft_restart) +SYM_CODE_END(cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h deleted file mode 100644 index 9a7b1262ef17..000000000000 --- a/arch/arm64/kernel/cpu-reset.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * CPU reset routines - * - * Copyright (C) 2015 Huawei Futurewei Technologies. - */ - -#ifndef _ARM64_CPU_RESET_H -#define _ARM64_CPU_RESET_H - -#include <asm/virt.h> - -void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, - unsigned long arg0, unsigned long arg1, unsigned long arg2); - -static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry, - unsigned long arg0, - unsigned long arg1, - unsigned long arg2) -{ - typeof(__cpu_soft_restart) *restart; - - unsigned long el2_switch = !is_kernel_in_hyp_mode() && - is_hyp_mode_available(); - restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); - - cpu_install_idmap(); - restart(el2_switch, entry, arg0, arg1, arg2); - unreachable(); -} - -#endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e2c20c036442..9e1c1aef9ebd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -340,6 +340,42 @@ static const struct midr_range erratum_1463225[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE +static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2139208 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2119858 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ + +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE +static const struct midr_range tsb_flush_fail_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ + +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE +static struct midr_range trbe_write_out_of_range_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2253138 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2224489 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -534,6 +570,34 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), }, #endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + { + /* + * The erratum work around is handled within the TRBE + * driver and can be applied per-cpu. So, we can allow + * a late CPU to come online with this erratum. + */ + .desc = "ARM erratum 2119858 or 2139208", + .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), + }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE + { + .desc = "ARM erratum 2067961 or 2054223", + .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, + ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), + }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + { + .desc = "ARM erratum 2253138 or 2224489", + .capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6ec7036ef7e1..ecbdff795f5e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -279,7 +279,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* @@ -941,7 +941,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); - sve_init_vq_map(); + vec_init_vq_map(ARM64_VEC_SVE); } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) @@ -1175,7 +1175,7 @@ void update_cpu_features(int cpu, /* Probe vector lengths, unless we already gave up on SVE */ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && !system_capabilities_finalized()) - sve_update_vq_map(); + vec_update_vq_map(ARM64_VEC_SVE); } /* @@ -1930,6 +1930,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 1, }, + { + .desc = "Enhanced Counter Virtualization", + .capability = ARM64_HAS_ECV, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", @@ -2321,6 +2331,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, }, + { + .desc = "Asymmetric MTE Tag Check Fault", + .capability = ARM64_MTE_ASYMM, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE_ASYMM, + .sign = FTR_UNSIGNED, + }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", @@ -2451,6 +2471,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), #endif /* CONFIG_ARM64_MTE */ + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), {}, }; @@ -2739,7 +2760,7 @@ static void verify_sve_features(void) unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK; unsigned int len = zcr & ZCR_ELx_LEN_MASK; - if (len < safe_len || sve_verify_vq_map()) { + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) { pr_crit("CPU%d: SVE: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 87731fea5e41..6e27b759056a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -94,6 +94,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", [KERNEL_HWCAP_MTE] = "mte", + [KERNEL_HWCAP_ECV] = "ecv", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 196e921f61de..dc242e269f9a 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -38,9 +38,10 @@ SYM_FUNC_END(fpsimd_load_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR + * x2 - Save FFR if non-zero */ SYM_FUNC_START(sve_save_state) - sve_save 0, x1, 2 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(sve_save_state) @@ -49,10 +50,10 @@ SYM_FUNC_END(sve_save_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR - * x2 - VQ-1 + * x2 - Restore FFR if non-zero */ SYM_FUNC_START(sve_load_state) - sve_load 0, x1, x2, 3, x4 + sve_load 0, x1, x2, 4 ret SYM_FUNC_END(sve_load_state) @@ -67,34 +68,21 @@ SYM_FUNC_START(sve_set_vq) SYM_FUNC_END(sve_set_vq) /* - * Load SVE state from FPSIMD state. - * - * x0 = pointer to struct fpsimd_state - * x1 = VQ - 1 - * - * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD - * and the rest zeroed. All the other SVE registers will be zeroed. - */ -SYM_FUNC_START(sve_load_from_fpsimd_state) - sve_load_vq x1, x2, x3 - fpsimd_restore x0, 8 - sve_flush_p_ffr - ret -SYM_FUNC_END(sve_load_from_fpsimd_state) - -/* * Zero all SVE registers but the first 128-bits of each vector * * VQ must already be configured by caller, any further updates of VQ * will need to ensure that the register state remains valid. * - * x0 = VQ - 1 + * x0 = include FFR? + * x1 = VQ - 1 */ SYM_FUNC_START(sve_flush_live) - cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state + cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state sve_flush_z -1: sve_flush_p_ffr - ret +1: sve_flush_p + tbz x0, #0, 2f + sve_flush_ffr +2: ret SYM_FUNC_END(sve_flush_live) #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index bc6d5a970a13..2f69ae43941d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -168,9 +168,9 @@ alternative_else_nop_endif .macro mte_set_kernel_gcr, tmp, tmp2 #ifdef CONFIG_KASAN_HW_TAGS -alternative_if_not ARM64_MTE +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end mov \tmp, KERNEL_GCR_EL1 msr_s SYS_GCR_EL1, \tmp 1: @@ -178,10 +178,10 @@ alternative_else_nop_endif .endm .macro mte_set_user_gcr, tsk, tmp, tmp2 -#ifdef CONFIG_ARM64_MTE -alternative_if_not ARM64_MTE +#ifdef CONFIG_KASAN_HW_TAGS +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end ldr \tmp, [\tsk, #THREAD_MTE_CTRL] mte_set_gcr \tmp, \tmp2 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ff4962750b3d..fa244c426f61 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,40 +121,62 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); -/* Default VL for tasks that don't set it explicitly: */ -static int __sve_default_vl = -1; +__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { +#ifdef CONFIG_ARM64_SVE + [ARM64_VEC_SVE] = { + .type = ARM64_VEC_SVE, + .name = "SVE", + .min_vl = SVE_VL_MIN, + .max_vl = SVE_VL_MIN, + .max_virtualisable_vl = SVE_VL_MIN, + }, +#endif +}; -static int get_sve_default_vl(void) +static unsigned int vec_vl_inherit_flag(enum vec_type type) { - return READ_ONCE(__sve_default_vl); + switch (type) { + case ARM64_VEC_SVE: + return TIF_SVE_VL_INHERIT; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +struct vl_config { + int __default_vl; /* Default VL for tasks */ +}; + +static struct vl_config vl_config[ARM64_VEC_MAX]; + +static inline int get_default_vl(enum vec_type type) +{ + return READ_ONCE(vl_config[type].__default_vl); } #ifdef CONFIG_ARM64_SVE -static void set_sve_default_vl(int val) +static inline int get_sve_default_vl(void) { - WRITE_ONCE(__sve_default_vl, val); + return get_default_vl(ARM64_VEC_SVE); } -/* Maximum supported vector length across all CPUs (initially poisoned) */ -int __ro_after_init sve_max_vl = SVE_VL_MIN; -int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; +static inline void set_default_vl(enum vec_type type, int val) +{ + WRITE_ONCE(vl_config[type].__default_vl, val); +} -/* - * Set of available vector lengths, - * where length vq encoded as bit __vq_to_bit(vq): - */ -__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); -/* Set of vector lengths present on at least one cpu: */ -static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); +static inline void set_sve_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SVE, val); +} static void __percpu *efi_sve_state; #else /* ! CONFIG_ARM64_SVE */ /* Dummy declaration for code that will be optimised out: */ -extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); -extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ @@ -228,6 +250,29 @@ static void sve_free(struct task_struct *task) __sve_free(task); } +unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) +{ + return task->thread.vl[type]; +} + +void task_set_vl(struct task_struct *task, enum vec_type type, + unsigned long vl) +{ + task->thread.vl[type] = vl; +} + +unsigned int task_get_vl_onexec(const struct task_struct *task, + enum vec_type type) +{ + return task->thread.vl_onexec[type]; +} + +void task_set_vl_onexec(struct task_struct *task, enum vec_type type, + unsigned long vl) +{ + task->thread.vl_onexec[type] = vl; +} + /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored @@ -287,12 +332,13 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { + sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, - sve_vq_from_vl(current->thread.sve_vl) - 1); - else + ¤t->thread.uw.fpsimd_state.fpsr, true); + } else { fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } } /* @@ -308,24 +354,26 @@ static void fpsimd_save(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { - /* - * Can't save the user regs, so current would - * re-enter user with corrupt state. - * There's no way to recover, so kill it: - */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } - - sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); - } else - fpsimd_save_state(last->st); + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + return; + + if (IS_ENABLED(CONFIG_ARM64_SVE) && + test_thread_flag(TIF_SVE)) { + if (WARN_ON(sve_get_vl() != last->sve_vl)) { + /* + * Can't save the user regs, so current would + * re-enter user with corrupt state. + * There's no way to recover, so kill it: + */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } + + sve_save_state((char *)last->sve_state + + sve_ffr_offset(last->sve_vl), + &last->st->fpsr, true); + } else { + fpsimd_save_state(last->st); } } @@ -335,21 +383,23 @@ static void fpsimd_save(void) * If things go wrong there's a bug somewhere, but try to fall back to a * safe choice. */ -static unsigned int find_supported_vector_length(unsigned int vl) +static unsigned int find_supported_vector_length(enum vec_type type, + unsigned int vl) { + struct vl_info *info = &vl_info[type]; int bit; - int max_vl = sve_max_vl; + int max_vl = info->max_vl; if (WARN_ON(!sve_vl_valid(vl))) - vl = SVE_VL_MIN; + vl = info->min_vl; if (WARN_ON(!sve_vl_valid(max_vl))) - max_vl = SVE_VL_MIN; + max_vl = info->min_vl; if (vl > max_vl) vl = max_vl; - bit = find_next_bit(sve_vq_map, SVE_VQ_MAX, + bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); return sve_vl_from_vq(__bit_to_vq(bit)); } @@ -359,6 +409,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; int ret; int vl = get_sve_default_vl(); struct ctl_table tmp_table = { @@ -372,12 +423,12 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, /* Writing -1 has the special meaning "set to max": */ if (vl == -1) - vl = sve_max_vl; + vl = info->max_vl; if (!sve_vl_valid(vl)) return -EINVAL; - set_sve_default_vl(find_supported_vector_length(vl)); + set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, vl)); return 0; } @@ -456,7 +507,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); __fpsimd_to_sve(sst, fst, vq); } @@ -482,7 +533,7 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -495,9 +546,9 @@ static void sve_to_fpsimd(struct task_struct *task) * Return how many bytes of memory are required to store the full SVE * state for task, given task's currently configured vector length. */ -size_t sve_state_size(struct task_struct const *task) +static size_t sve_state_size(struct task_struct const *task) { - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl)); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); } /* @@ -572,7 +623,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -596,20 +647,20 @@ int sve_set_vector_length(struct task_struct *task, if (vl > SVE_VL_ARCH_MAX) vl = SVE_VL_ARCH_MAX; - vl = find_supported_vector_length(vl); + vl = find_supported_vector_length(ARM64_VEC_SVE, vl); if (flags & (PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) - task->thread.sve_vl_onexec = vl; + task_set_sve_vl_onexec(task, vl); else /* Reset VL to system default on next exec: */ - task->thread.sve_vl_onexec = 0; + task_set_sve_vl_onexec(task, 0); /* Only actually set the VL if not deferred: */ if (flags & PR_SVE_SET_VL_ONEXEC) goto out; - if (vl == task->thread.sve_vl) + if (vl == task_get_sve_vl(task)) goto out; /* @@ -636,7 +687,7 @@ int sve_set_vector_length(struct task_struct *task, */ sve_free(task); - task->thread.sve_vl = vl; + task_set_sve_vl(task, vl); out: update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT, @@ -656,9 +707,9 @@ static int sve_prctl_status(unsigned long flags) int ret; if (flags & PR_SVE_SET_VL_ONEXEC) - ret = current->thread.sve_vl_onexec; + ret = task_get_sve_vl_onexec(current); else - ret = current->thread.sve_vl; + ret = task_get_sve_vl(current); if (test_thread_flag(TIF_SVE_VL_INHERIT)) ret |= PR_SVE_VL_INHERIT; @@ -694,18 +745,15 @@ int sve_get_current_vl(void) return sve_prctl_status(0); } -static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) +static void vec_probe_vqs(struct vl_info *info, + DECLARE_BITMAP(map, SVE_VQ_MAX)) { unsigned int vq, vl; - unsigned long zcr; bitmap_zero(map, SVE_VQ_MAX); - zcr = ZCR_ELx_LEN_MASK; - zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr; - for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { - write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */ + write_vl(info->type, vq - 1); /* self-syncing */ vl = sve_get_vl(); vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); @@ -716,10 +764,11 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) * Initialise the set of known supported VQs for the boot CPU. * This is called during kernel boot, before secondary CPUs are brought up. */ -void __init sve_init_vq_map(void) +void __init vec_init_vq_map(enum vec_type type) { - sve_probe_vqs(sve_vq_map); - bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); + struct vl_info *info = &vl_info[type]; + vec_probe_vqs(info, info->vq_map); + bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX); } /* @@ -727,30 +776,33 @@ void __init sve_init_vq_map(void) * those not supported by the current CPU. * This function is called during the bring-up of early secondary CPUs only. */ -void sve_update_vq_map(void) +void vec_update_vq_map(enum vec_type type) { + struct vl_info *info = &vl_info[type]; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); - sve_probe_vqs(tmp_map); - bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX); - bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX); + vec_probe_vqs(info, tmp_map); + bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX); + bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map, + SVE_VQ_MAX); } /* * Check whether the current CPU supports all VQs in the committed set. * This function is called during the bring-up of late secondary CPUs only. */ -int sve_verify_vq_map(void) +int vec_verify_vq_map(enum vec_type type) { + struct vl_info *info = &vl_info[type]; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; - sve_probe_vqs(tmp_map); + vec_probe_vqs(info, tmp_map); bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); - if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) { - pr_warn("SVE: cpu%d: Required vector length(s) missing\n", - smp_processor_id()); + if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) { + pr_warn("%s: cpu%d: Required vector length(s) missing\n", + info->name, smp_processor_id()); return -EINVAL; } @@ -766,7 +818,7 @@ int sve_verify_vq_map(void) /* Recover the set of supported VQs: */ bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); /* Find VQs supported that are not globally supported: */ - bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX); + bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX); /* Find the lowest such VQ, if any: */ b = find_last_bit(tmp_map, SVE_VQ_MAX); @@ -777,9 +829,9 @@ int sve_verify_vq_map(void) * Mismatches above sve_max_virtualisable_vl are fine, since * no guest is allowed to configure ZCR_EL2.LEN to exceed this: */ - if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) { - pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n", - smp_processor_id()); + if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) { + pr_warn("%s: cpu%d: Unsupported vector length(s) present\n", + info->name, smp_processor_id()); return -EINVAL; } @@ -788,6 +840,8 @@ int sve_verify_vq_map(void) static void __init sve_efi_setup(void) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; + if (!IS_ENABLED(CONFIG_EFI)) return; @@ -796,11 +850,11 @@ static void __init sve_efi_setup(void) * This is evidence of a crippled system and we are returning void, * so no attempt is made to handle this situation here. */ - if (!sve_vl_valid(sve_max_vl)) + if (!sve_vl_valid(info->max_vl)) goto fail; efi_sve_state = __alloc_percpu( - SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES); + SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES); if (!efi_sve_state) goto fail; @@ -849,6 +903,7 @@ u64 read_zcr_features(void) void __init sve_setup(void) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; u64 zcr; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; @@ -861,49 +916,52 @@ void __init sve_setup(void) * so sve_vq_map must have at least SVE_VQ_MIN set. * If something went wrong, at least try to patch it up: */ - if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map))) - set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map); + if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map))) + set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map); zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); - sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); + info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); /* * Sanity-check that the max VL we determined through CPU features * corresponds properly to sve_vq_map. If not, do our best: */ - if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl))) - sve_max_vl = find_supported_vector_length(sve_max_vl); + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SVE, + info->max_vl); /* * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - set_sve_default_vl(find_supported_vector_length(64)); + set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64)); - bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, + bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map, SVE_VQ_MAX); b = find_last_bit(tmp_map, SVE_VQ_MAX); if (b >= SVE_VQ_MAX) /* No non-virtualisable VLs found */ - sve_max_virtualisable_vl = SVE_VQ_MAX; + info->max_virtualisable_vl = SVE_VQ_MAX; else if (WARN_ON(b == SVE_VQ_MAX - 1)) /* No virtualisable VLs? This is architecturally forbidden. */ - sve_max_virtualisable_vl = SVE_VQ_MIN; + info->max_virtualisable_vl = SVE_VQ_MIN; else /* b + 1 < SVE_VQ_MAX */ - sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); + info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); - if (sve_max_virtualisable_vl > sve_max_vl) - sve_max_virtualisable_vl = sve_max_vl; + if (info->max_virtualisable_vl > info->max_vl) + info->max_virtualisable_vl = info->max_vl; - pr_info("SVE: maximum available vector length %u bytes per vector\n", - sve_max_vl); - pr_info("SVE: default vector length %u bytes per vector\n", - get_sve_default_vl()); + pr_info("%s: maximum available vector length %u bytes per vector\n", + info->name, info->max_vl); + pr_info("%s: default vector length %u bytes per vector\n", + info->name, get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ - if (sve_max_virtualisable_vl < sve_max_vl) - pr_warn("SVE: unvirtualisable vector lengths present\n"); + if (sve_max_virtualisable_vl() < sve_max_vl()) + pr_warn("%s: unvirtualisable vector lengths present\n", + info->name); sve_efi_setup(); } @@ -958,9 +1016,9 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) */ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq_minus_one = - sve_vq_from_vl(current->thread.sve_vl) - 1; + sve_vq_from_vl(task_get_sve_vl(current)) - 1; sve_set_vq(vq_minus_one); - sve_flush_live(vq_minus_one); + sve_flush_live(true, vq_minus_one); fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current); @@ -1030,10 +1088,43 @@ void fpsimd_thread_switch(struct task_struct *next) __put_cpu_fpsimd_context(); } -void fpsimd_flush_thread(void) +static void fpsimd_flush_thread_vl(enum vec_type type) { int vl, supported_vl; + /* + * Reset the task vector length as required. This is where we + * ensure that all user tasks have a valid vector length + * configured: no kernel task can become a user task without + * an exec and hence a call to this function. By the time the + * first call to this function is made, all early hardware + * probing is complete, so __sve_default_vl should be valid. + * If a bug causes this to go wrong, we make some noise and + * try to fudge thread.sve_vl to a safe value here. + */ + vl = task_get_vl_onexec(current, type); + if (!vl) + vl = get_default_vl(type); + + if (WARN_ON(!sve_vl_valid(vl))) + vl = SVE_VL_MIN; + + supported_vl = find_supported_vector_length(type, vl); + if (WARN_ON(supported_vl != vl)) + vl = supported_vl; + + task_set_vl(current, type, vl); + + /* + * If the task is not set to inherit, ensure that the vector + * length will be reset by a subsequent exec: + */ + if (!test_thread_flag(vec_vl_inherit_flag(type))) + task_set_vl_onexec(current, type, 0); +} + +void fpsimd_flush_thread(void) +{ if (!system_supports_fpsimd()) return; @@ -1046,36 +1137,7 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); sve_free(current); - - /* - * Reset the task vector length as required. - * This is where we ensure that all user tasks have a valid - * vector length configured: no kernel task can become a user - * task without an exec and hence a call to this function. - * By the time the first call to this function is made, all - * early hardware probing is complete, so __sve_default_vl - * should be valid. - * If a bug causes this to go wrong, we make some noise and - * try to fudge thread.sve_vl to a safe value here. - */ - vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : get_sve_default_vl(); - - if (WARN_ON(!sve_vl_valid(vl))) - vl = SVE_VL_MIN; - - supported_vl = find_supported_vector_length(vl); - if (WARN_ON(supported_vl != vl)) - vl = supported_vl; - - current->thread.sve_vl = vl; - - /* - * If the task is not set to inherit, ensure that the vector - * length will be reset by a subsequent exec: - */ - if (!test_thread_flag(TIF_SVE_VL_INHERIT)) - current->thread.sve_vl_onexec = 0; + fpsimd_flush_thread_vl(ARM64_VEC_SVE); } put_cpu_fpsimd_context(); @@ -1120,7 +1182,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; - last->sve_vl = current->thread.sve_vl; + last->sve_vl = task_get_sve_vl(current); current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { @@ -1353,8 +1415,9 @@ void __efi_fpsimd_begin(void) __this_cpu_write(efi_sve_state_used, true); - sve_save_state(sve_state + sve_ffr_offset(sve_max_vl), - &this_cpu_ptr(&efi_fpsimd_state)->fpsr); + sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), + &this_cpu_ptr(&efi_fpsimd_state)->fpsr, + true); } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); } @@ -1378,9 +1441,10 @@ void __efi_fpsimd_end(void) likely(__this_cpu_read(efi_sve_state_used))) { char const *sve_state = this_cpu_ptr(efi_sve_state); - sve_load_state(sve_state + sve_ffr_offset(sve_max_vl), + sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1); + sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - sve_vq_from_vl(sve_get_vl()) - 1); + true); __this_cpu_write(efi_sve_state_used, false); } else { diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 81c0186a5e32..0e1d9c3c6a93 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -16,26 +16,6 @@ #include <asm/virt.h> /* - * To prevent the possibility of old and new partial table walks being visible - * in the tlb, switch the ttbr to a zero page when we invalidate the old - * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i - * Even switching to our copied tables will cause a changed output address at - * each stage of the walk. - */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 - phys_to_ttbr \tmp, \zero_page - msr ttbr1_el1, \tmp - isb - tlbi vmalle1 - dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb -.endm - - -/* * Resume from hibernate * * Loads temporary page tables then restores the memory image. @@ -112,56 +92,4 @@ alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE hvc #0 3: ret SYM_CODE_END(swsusp_arch_suspend_exit) - -/* - * Restore the hyp stub. - * This must be done before the hibernate page is unmapped by _cpu_resume(), - * but happens before any of the hyp-stub's code is cleaned to PoC. - * - * x24: The physical address of __hyp_stub_vectors - */ -SYM_CODE_START_LOCAL(el1_sync) - msr vbar_el2, x24 - eret -SYM_CODE_END(el1_sync) - -.macro invalid_vector label -SYM_CODE_START_LOCAL(\label) - b \label -SYM_CODE_END(\label) -.endm - - invalid_vector el2_sync_invalid - invalid_vector el2_irq_invalid - invalid_vector el2_fiq_invalid - invalid_vector el2_error_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid - invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid - -/* el2 vectors - switch el2 here while we restore the memory image. */ - .align 11 -SYM_CODE_START(hibernate_el2_vectors) - ventry el2_sync_invalid // Synchronous EL2t - ventry el2_irq_invalid // IRQ EL2t - ventry el2_fiq_invalid // FIQ EL2t - ventry el2_error_invalid // Error EL2t - - ventry el2_sync_invalid // Synchronous EL2h - ventry el2_irq_invalid // IRQ EL2h - ventry el2_fiq_invalid // FIQ EL2h - ventry el2_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq_invalid // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync_invalid // Synchronous 32-bit EL1 - ventry el1_irq_invalid // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -SYM_CODE_END(hibernate_el2_vectors) - .popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 46a0b4d6e251..2758f75d6809 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -49,10 +49,7 @@ extern int in_suspend; /* Do we need to reset el2? */ -#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) - -/* temporary el2 vectors in the __hibernate_exit_text section. */ -extern char hibernate_el2_vectors[]; +#define el2_reset_needed() (is_hyp_nvhe()) /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ extern char __hyp_stub_vectors[]; @@ -215,26 +212,7 @@ static int create_safe_exec_page(void *src_start, size_t length, if (rc) return rc; - /* - * Load our new page tables. A strict BBM approach requires that we - * ensure that TLBs are free of any entries that may overlap with the - * global mappings we are about to install. - * - * For a real hibernate/resume cycle TTBR0 currently points to a zero - * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI - * runtime services), while for a userspace-driven test_resume cycle it - * points to userspace page tables (and we must point it at a zero page - * ourselves). - * - * We change T0SZ as part of installing the idmap. This is undone by - * cpu_uninstall_idmap() in __cpu_suspend_exit(). - */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - __cpu_set_tcr_t0sz(t0sz); - write_sysreg(trans_ttbr0, ttbr0_el1); - isb(); - + cpu_install_ttbr0(trans_ttbr0, t0sz); *phys_dst_addr = virt_to_phys(page); return 0; @@ -434,6 +412,7 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; + phys_addr_t el2_vectors; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); struct trans_pgd_info trans_info = { @@ -461,6 +440,14 @@ int swsusp_arch_resume(void) return -ENOMEM; } + if (el2_reset_needed()) { + rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors); + if (rc) { + pr_err("Failed to setup el2 vectors\n"); + return rc; + } + } + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; /* * Copy swsusp_arch_suspend_exit() to a safe page. This will generate @@ -474,25 +461,13 @@ int swsusp_arch_resume(void) } /* - * The hibernate exit text contains a set of el2 vectors, that will - * be executed at el2 with the mmu off in order to reload hyp-stub. - */ - dcache_clean_inval_poc((unsigned long)hibernate_exit, - (unsigned long)hibernate_exit + exit_size); - - /* * KASLR will cause the el2 vectors to be in a different location in * the resumed kernel. Load hibernate's temporary copy into el2. * * We can skip this step if we booted at EL1, or are running with VHE. */ - if (el2_reset_needed()) { - phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit; - el2_vectors += hibernate_el2_vectors - - __hibernate_exit_text_start; /* offset */ - + if (el2_reset_needed()) __hyp_set_vectors(el2_vectors); - } hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, resume_hdr.reenter_kernel, restore_pblist, diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 213d56c14f60..1038494135c8 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -21,12 +21,8 @@ #include <asm/mmu.h> #include <asm/mmu_context.h> #include <asm/page.h> - -#include "cpu-reset.h" - -/* Global variables for the arm64_relocate_new_kernel routine. */ -extern const unsigned char arm64_relocate_new_kernel[]; -extern const unsigned long arm64_relocate_new_kernel_size; +#include <asm/sections.h> +#include <asm/trans_pgd.h> /** * kexec_image_info - For debugging output. @@ -43,7 +39,9 @@ static void _kexec_image_info(const char *func, int line, pr_debug(" start: %lx\n", kimage->start); pr_debug(" head: %lx\n", kimage->head); pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem); pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); + pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors); for (i = 0; i < kimage->nr_segments; i++) { pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", @@ -60,29 +58,6 @@ void machine_kexec_cleanup(struct kimage *kimage) /* Empty routine needed to avoid build errors. */ } -int machine_kexec_post_load(struct kimage *kimage) -{ - void *reloc_code = page_to_virt(kimage->control_code_page); - - memcpy(reloc_code, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); - kimage->arch.kern_reloc = __pa(reloc_code); - kexec_image_info(kimage); - - /* - * For execution with the MMU off, reloc_code needs to be cleaned to the - * PoC and invalidated from the I-cache. - */ - dcache_clean_inval_poc((unsigned long)reloc_code, - (unsigned long)reloc_code + - arm64_relocate_new_kernel_size); - icache_inval_pou((uintptr_t)reloc_code, - (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); - - return 0; -} - /** * machine_kexec_prepare - Prepare for a kexec reboot. * @@ -101,45 +76,6 @@ int machine_kexec_prepare(struct kimage *kimage) } /** - * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. - */ -static void kexec_list_flush(struct kimage *kimage) -{ - kimage_entry_t *entry; - - for (entry = &kimage->head; ; entry++) { - unsigned int flag; - unsigned long addr; - - /* flush the list entries. */ - dcache_clean_inval_poc((unsigned long)entry, - (unsigned long)entry + - sizeof(kimage_entry_t)); - - flag = *entry & IND_FLAGS; - if (flag == IND_DONE) - break; - - addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); - - switch (flag) { - case IND_INDIRECTION: - /* Set entry point just before the new list page. */ - entry = (kimage_entry_t *)addr - 1; - break; - case IND_SOURCE: - /* flush the source pages. */ - dcache_clean_inval_poc(addr, addr + PAGE_SIZE); - break; - case IND_DESTINATION: - break; - default: - BUG(); - } - } -} - -/** * kexec_segment_flush - Helper to flush the kimage segments to PoC. */ static void kexec_segment_flush(const struct kimage *kimage) @@ -163,6 +99,75 @@ static void kexec_segment_flush(const struct kimage *kimage) } } +/* Allocates pages for kexec page table */ +static void *kexec_page_alloc(void *arg) +{ + struct kimage *kimage = (struct kimage *)arg; + struct page *page = kimage_alloc_control_pages(kimage, 0); + + if (!page) + return NULL; + + memset(page_address(page), 0, PAGE_SIZE); + + return page_address(page); +} + +int machine_kexec_post_load(struct kimage *kimage) +{ + int rc; + pgd_t *trans_pgd; + void *reloc_code = page_to_virt(kimage->control_code_page); + long reloc_size; + struct trans_pgd_info info = { + .trans_alloc_page = kexec_page_alloc, + .trans_alloc_arg = kimage, + }; + + /* If in place, relocation is not used, only flush next kernel */ + if (kimage->head & IND_DONE) { + kexec_segment_flush(kimage); + kexec_image_info(kimage); + return 0; + } + + kimage->arch.el2_vectors = 0; + if (is_hyp_nvhe()) { + rc = trans_pgd_copy_el2_vectors(&info, + &kimage->arch.el2_vectors); + if (rc) + return rc; + } + + /* Create a copy of the linear map */ + trans_pgd = kexec_page_alloc(kimage); + if (!trans_pgd) + return -ENOMEM; + rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); + if (rc) + return rc; + kimage->arch.ttbr1 = __pa(trans_pgd); + kimage->arch.zero_page = __pa(empty_zero_page); + + reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; + memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); + kimage->arch.kern_reloc = __pa(reloc_code); + rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, + &kimage->arch.t0sz, reloc_code); + if (rc) + return rc; + kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage; + + /* Flush the reloc_code in preparation for its execution. */ + dcache_clean_inval_poc((unsigned long)reloc_code, + (unsigned long)reloc_code + reloc_size); + icache_inval_pou((uintptr_t)reloc_code, + (uintptr_t)reloc_code + reloc_size); + kexec_image_info(kimage); + + return 0; +} + /** * machine_kexec - Do the kexec reboot. * @@ -180,31 +185,35 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - /* Flush the kimage list and its buffers. */ - kexec_list_flush(kimage); - - /* Flush the new image if already in place. */ - if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) - kexec_segment_flush(kimage); - pr_info("Bye!\n"); local_daif_mask(); /* - * cpu_soft_restart will shutdown the MMU, disable data caches, then - * transfer control to the kern_reloc which contains a copy of - * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel - * uses physical addressing to relocate the new image to its final - * position and transfers control to the image entry point when the - * relocation is complete. + * Both restart and kernel_reloc will shutdown the MMU, disable data + * caches. However, restart will start new kernel or purgatory directly, + * kernel_reloc contains the body of arm64_relocate_new_kernel * In kexec case, kimage->start points to purgatory assuming that * kernel entry and dtb address are embedded in purgatory by * userspace (kexec-tools). * In kexec_file case, the kernel starts directly without purgatory. */ - cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, - kimage->arch.dtb_mem); + if (kimage->head & IND_DONE) { + typeof(cpu_soft_restart) *restart; + + cpu_install_idmap(); + restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); + restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, + 0, 0); + } else { + void (*kernel_reloc)(struct kimage *kimage); + + if (is_hyp_nvhe()) + __hyp_set_vectors(kimage->arch.el2_vectors); + cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); + kernel_reloc = (void *)kimage->arch.kern_reloc; + kernel_reloc(kimage); + } BUG(); /* Should never get here. */ } @@ -261,8 +270,6 @@ void arch_kexec_protect_crashkres(void) { int i; - kexec_segment_flush(kexec_crash_image); - for (i = 0; i < kexec_crash_image->nr_segments; i++) set_memory_valid( __phys_to_virt(kexec_crash_image->segment[i].mem), diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index e5e801bc5312..f418ebc65f95 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,9 +26,12 @@ static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred); #ifdef CONFIG_KASAN_HW_TAGS -/* Whether the MTE asynchronous mode is enabled. */ -DEFINE_STATIC_KEY_FALSE(mte_async_mode); -EXPORT_SYMBOL_GPL(mte_async_mode); +/* + * The asynchronous and asymmetric MTE modes have the same behavior for + * store operations. This flag is set when either of these modes is enabled. + */ +DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); +EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); #endif static void mte_sync_page_tags(struct page *page, pte_t old_pte, @@ -116,7 +119,7 @@ void mte_enable_kernel_sync(void) * Make sure we enter this function when no PE has set * async mode previously. */ - WARN_ONCE(system_uses_mte_async_mode(), + WARN_ONCE(system_uses_mte_async_or_asymm_mode(), "MTE async mode enabled system wide!"); __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); @@ -134,8 +137,34 @@ void mte_enable_kernel_async(void) * mode in between sync and async, this strategy needs * to be reviewed. */ - if (!system_uses_mte_async_mode()) - static_branch_enable(&mte_async_mode); + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); +} + +void mte_enable_kernel_asymm(void) +{ + if (cpus_have_cap(ARM64_MTE_ASYMM)) { + __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM); + + /* + * MTE asymm mode behaves as async mode for store + * operations. The mode is set system wide by the + * first PE that executes this function. + * + * Note: If in future KASAN acquires a runtime switching + * mode in between sync and async, this strategy needs + * to be reviewed. + */ + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); + } else { + /* + * If the CPU does not support MTE asymmetric mode the + * kernel falls back on synchronous mode which is the + * default for kasan=on. + */ + mte_enable_kernel_sync(); + } } #endif @@ -179,6 +208,30 @@ static void mte_update_sctlr_user(struct task_struct *task) task->thread.sctlr_user = sctlr; } +static void mte_update_gcr_excl(struct task_struct *task) +{ + /* + * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by + * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled. + */ + if (kasan_hw_tags_enabled()) + return; + + write_sysreg_s( + ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) & + SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND, + SYS_GCR_EL1); +} + +void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); /* Branch -> NOP */ + + if (kasan_hw_tags_enabled()) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + void mte_thread_init_user(void) { if (!system_supports_mte()) @@ -198,6 +251,7 @@ void mte_thread_switch(struct task_struct *next) return; mte_update_sctlr_user(next); + mte_update_gcr_excl(next); /* * Check if an async tag exception occurred at EL1. @@ -243,6 +297,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (task == current) { preempt_disable(); mte_update_sctlr_user(task); + mte_update_gcr_excl(task); update_sctlr_el1(task->thread.sctlr_user); preempt_enable(); } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e26196a33cf4..88a9034fb9b5 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -725,10 +725,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - header->vl = target->thread.sve_vl; + header->vl = task_get_sve_vl(target); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl; + header->max_vl = sve_max_vl(); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); @@ -820,7 +820,7 @@ static int sve_set(struct task_struct *target, goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(target->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(target)); /* Registers: FPSIMD-only case */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index b78ea5de97a4..f0a3df9e18a3 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -4,6 +4,8 @@ * * Copyright (C) Linaro. * Copyright (C) Huawei Futurewei Technologies. + * Copyright (C) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> */ #include <linux/kexec.h> @@ -13,7 +15,16 @@ #include <asm/kexec.h> #include <asm/page.h> #include <asm/sysreg.h> +#include <asm/virt.h> +.macro turn_off_mmu tmp1, tmp2 + mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el1, \tmp1 + isb +.endm + +.section ".kexec_relocate.text", "ax" /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * @@ -27,33 +38,24 @@ */ SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ - mov x18, x2 /* x18 = dtb address */ - mov x17, x1 /* x17 = kimage_start */ - mov x16, x0 /* x16 = kimage_head */ - mov x14, xzr /* x14 = entry ptr */ - mov x13, xzr /* x13 = copy dest */ - /* Check if the new image needs relocation. */ - tbnz x16, IND_DONE_BIT, .Ldone + ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ + ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ + ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */ + ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */ raw_dcache_line_size x15, x1 /* x15 = dcache line size */ + break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ - + sub x12, x12, x22 /* Convert x12 to virt */ /* Test the entry flags. */ .Ltest_source: tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x2, x13 - add x20, x2, #PAGE_SIZE - sub x1, x15, #1 - bic x2, x2, x1 -2: dc ivac, x2 - add x2, x2, x15 - cmp x2, x20 - b.lo 2b - dsb sy - + mov x19, x13 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 + add x1, x19, #PAGE_SIZE + dcache_by_myline_op civac, sy, x19, x1, x15, x20 b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination @@ -65,31 +67,26 @@ SYM_CODE_START(arm64_relocate_new_kernel) .Lnext: ldr x16, [x14], #8 /* entry = *ptr++ */ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */ -.Ldone: /* wait for writes from copy_page to finish */ dsb nsh ic iallu dsb nsh isb + ldr x4, [x0, #KIMAGE_START] /* relocation start */ + ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ + ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ + turn_off_mmu x12, x13 /* Start new image. */ - mov x0, x18 - mov x1, xzr + cbz x1, .Lel1 + mov x1, x4 /* relocation start */ + mov x2, x0 /* dtb address */ + mov x3, xzr + mov x4, xzr + mov x0, #HVC_SOFT_RESTART + hvc #0 /* Jumps from el2 */ +.Lel1: mov x2, xzr mov x3, xzr - br x17 - + br x4 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) - -.align 3 /* To keep the 64-bit values below naturally aligned. */ - -.Lcopy_end: -.org KEXEC_CONTROL_PAGE_SIZE - -/* - * arm64_relocate_new_kernel_size - Number of bytes to copy to the - * control_code_page. - */ -.globl arm64_relocate_new_kernel_size -arm64_relocate_new_kernel_size: - .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 47f77d1234cb..d20620a1c51a 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) * dropped to EL1 because we don't support VHE, then we can't support * SDEI. */ - if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { + if (is_hyp_nvhe()) { pr_err("Not supported on this hardware/boot configuration\n"); goto out_err; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c287b9407f28..8f6372b44b65 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -227,7 +227,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) { int err = 0; u16 reserved[ARRAY_SIZE(ctx->__reserved)]; - unsigned int vl = current->thread.sve_vl; + unsigned int vl = task_get_sve_vl(current); unsigned int vq = 0; if (test_thread_flag(TIF_SVE)) @@ -266,7 +266,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (__copy_from_user(&sve, user->sve, sizeof(sve))) return -EFAULT; - if (sve.vl != current->thread.sve_vl) + if (sve.vl != task_get_sve_vl(current)) return -EINVAL; if (sve.head.size <= sizeof(*user->sve)) { @@ -594,10 +594,10 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE)) { - int vl = sve_max_vl; + int vl = sve_max_vl(); if (!add_all) - vl = current->thread.sve_vl; + vl = task_get_sve_vl(current); vq = sve_vq_from_vl(vl); } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b03e383d944a..7b21213a570f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -400,11 +400,11 @@ static int call_undef_hook(struct pt_regs *regs) unsigned long flags; u32 instr; int (*fn)(struct pt_regs *regs, u32 instr) = NULL; - void __user *pc = (void __user *)instruction_pointer(regs); + unsigned long pc = instruction_pointer(regs); if (!user_mode(regs)) { __le32 instr_le; - if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) + if (get_kernel_nofault(instr_le, (__le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { @@ -527,14 +527,9 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); "1: " insn ", %1\n" \ " mov %w0, #0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %w2\n" \ - " b 2b\n" \ - " .popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "=r" (res) \ - : "r" (address), "i" (-EFAULT)); \ + : "r" (address)); \ uaccess_ttbr0_disable(); \ } @@ -654,6 +649,12 @@ static const struct sys64_hook sys64_hooks[] = { .handler = cntvct_read_handler, }, { + /* Trap read access to CNTVCTSS_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS, + .handler = cntvct_read_handler, + }, + { /* Trap read access to CNTFRQ_EL0 */ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTFRQ, @@ -729,6 +730,11 @@ static const struct sys64_hook cp15_64_hooks[] = { .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, .handler = compat_cntvct_read_handler, }, + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS, + .handler = compat_cntvct_read_handler, + }, {}, }; diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 3dba0c4f8f42..c8fec493a450 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -10,18 +10,15 @@ include $(srctree)/lib/vdso/Makefile # Same as cc-*option, but using CC_COMPAT instead of CC ifeq ($(CONFIG_CC_IS_CLANG), y) -CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) - CC_COMPAT ?= $(CC) -CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) - -ifneq ($(LLVM),) -LD_COMPAT ?= $(LD) +CC_COMPAT += --target=arm-linux-gnueabi else -LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc endif + +ifeq ($(CONFIG_LD_IS_LLD), y) +LD_COMPAT ?= $(LD) else -CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld endif @@ -29,8 +26,6 @@ cc32-option = $(call try-run,\ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -cc32-as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) # We cannot use the global flags to compile the vDSO files, the main reason # being that the 32-bit compiler may be older than the main (64-bit) compiler @@ -40,16 +35,13 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags # From top-level Makefile VDSO_CAFLAGS := $(VDSO_CPPFLAGS) -ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),) -VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) -endif - VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) ifdef CONFIG_DEBUG_INFO VDSO_CAFLAGS += -g @@ -67,13 +59,7 @@ endif # From arm vDSO Makefile VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING - - -# Try to compile for ARMv8. If the compiler is too old and doesn't support it, -# fall back to v7. There is no easy way to check for what architecture the code -# is being compiled, so define a macro specifying that (see arch/arm/Makefile). -VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ - -march=armv7-a -D__LINUX_ARM_ARCH__=7) +VDSO_CAFLAGS += -march=armv8-a VDSO_CFLAGS := $(VDSO_CAFLAGS) VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 @@ -113,12 +99,6 @@ endif VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ -# Check for binutils support for dmb ishld -dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) - -VDSO_CFLAGS += $(dmbinstr) -VDSO_AFLAGS += $(dmbinstr) - # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f6b1a88245db..50bab186c49b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,12 +57,13 @@ #define SBSS_ALIGN 0 #endif -#define RO_EXCEPTION_TABLE_ALIGN 8 +#define RO_EXCEPTION_TABLE_ALIGN 4 #define RUNTIME_DISCARD_EXIT #include <asm-generic/vmlinux.lds.h> #include <asm/cache.h> #include <asm/kernel-pgtable.h> +#include <asm/kexec.h> #include <asm/memory.h> #include <asm/page.h> @@ -100,6 +101,16 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_KEXEC_CORE +#define KEXEC_TEXT \ + . = ALIGN(SZ_4K); \ + __relocate_new_kernel_start = .; \ + *(.kexec_relocate.text) \ + __relocate_new_kernel_end = .; +#else +#define KEXEC_TEXT +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ @@ -160,8 +171,8 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + KEXEC_TEXT TRAMP_TEXT - *(.fixup) *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ @@ -348,3 +359,10 @@ ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, "TRAMP_SWAPPER_OFFSET is wrong!") #endif + +#ifdef CONFIG_KEXEC_CORE +/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */ +ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1)) + <= SZ_4K, "kexec relocation code is too big or misaligned") +ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken") +#endif diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 3c635929771a..e950875e31ce 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -21,11 +21,13 @@ SYM_FUNC_START(__fpsimd_restore_state) SYM_FUNC_END(__fpsimd_restore_state) SYM_FUNC_START(__sve_restore_state) - __sve_load 0, x1, 2 + mov x2, #1 + sve_load 0, x1, x2, 3 ret SYM_FUNC_END(__sve_restore_state) SYM_FUNC_START(__sve_save_state) - sve_save 0, x1, 2 + mov x2, #1 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(__sve_save_state) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0e78a6027be..d5a47b93ef9b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -30,8 +30,12 @@ #include <asm/processor.h> #include <asm/thread_info.h> -extern struct exception_table_entry __start___kvm_ex_table; -extern struct exception_table_entry __stop___kvm_ex_table; +struct kvm_exception_table_entry { + int insn, fixup; +}; + +extern struct kvm_exception_table_entry __start___kvm_ex_table; +extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs were dirtied while in the host-side run loop: */ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) @@ -510,7 +514,7 @@ static inline void __kvm_unexpected_el2_exception(void) { extern char __guest_exit_panic[]; unsigned long addr, fixup; - struct exception_table_entry *entry, *end; + struct kvm_exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); entry = &__start___kvm_ex_table; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343..09cd30a9aafb 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,7 +46,7 @@ unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) { if (system_supports_sve()) { - kvm_sve_max_vl = sve_max_virtualisable_vl; + kvm_sve_max_vl = sve_max_virtualisable_vl(); /* * The get_sve_reg()/set_sve_reg() ioctl interface will need @@ -61,7 +61,7 @@ int kvm_arm_init_sve(void) * Don't even try to make use of vector lengths that * aren't available on all CPUs, for now: */ - if (kvm_sve_max_vl < sve_max_vl) + if (kvm_sve_max_vl < sve_max_vl()) pr_warn("KVM: SVE vector length for guests limited to %u bytes\n", kvm_sve_max_vl); } @@ -102,7 +102,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ - if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || + if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() || vl > SVE_VL_ARCH_MAX)) return -EIO; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a7efb2ad2a1c..a5a5f5b97b17 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -4,7 +4,7 @@ */ #include <linux/linkage.h> -#include <asm/assembler.h> +#include <asm/asm-uaccess.h> .text @@ -45,13 +45,11 @@ USER(9f, sttrh wzr, [x0]) USER(7f, sttrb wzr, [x2, #-1]) 5: mov x0, #0 ret -SYM_FUNC_END(__arch_clear_user) -EXPORT_SYMBOL(__arch_clear_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 7: sub x0, x2, #5 // Adjust for faulting on the final byte... 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case 9: sub x0, x2, x0 ret - .previous +SYM_FUNC_END(__arch_clear_user) +EXPORT_SYMBOL(__arch_clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 2cf999e41d30..34e317907524 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -60,11 +60,8 @@ SYM_FUNC_START(__arch_copy_from_user) #include "copy_template.S" mov x0, #0 // Nothing to copy ret -SYM_FUNC_END(__arch_copy_from_user) -EXPORT_SYMBOL(__arch_copy_from_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, ldtrb tmp1w, [srcin]) strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_from_user) +EXPORT_SYMBOL(__arch_copy_from_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 9f380eecf653..802231772608 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -59,11 +59,8 @@ SYM_FUNC_START(__arch_copy_to_user) #include "copy_template.S" mov x0, #0 ret -SYM_FUNC_END(__arch_copy_to_user) -EXPORT_SYMBOL(__arch_copy_to_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, sttrb tmp1w, [dst]) add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_to_user) +EXPORT_SYMBOL(__arch_copy_to_user) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index f188c9092696..ff1e800ba7a1 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o +obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ARM64_MTE) += mteswap.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index aa0060178343..c3d53811a15e 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -3,20 +3,87 @@ * Based on arch/arm/mm/extable.c */ +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/uaccess.h> -int fixup_exception(struct pt_regs *regs) +#include <asm/asm-extable.h> +#include <asm/ptrace.h> + +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + regs->pc = get_ex_fixup(ex); + return true; +} + +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + pt_regs_write_reg(regs, reg_err, -EFAULT); + pt_regs_write_reg(regs, reg_zero, 0); + + regs->pc = get_ex_fixup(ex); + return true; +} + +static bool +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + unsigned long data, addr, offset; + + addr = pt_regs_read_reg(regs, reg_addr); + + offset = addr & 0x7UL; + addr &= ~0x7UL; + + data = *(unsigned long*)addr; + +#ifndef __AARCH64EB__ + data >>= 8 * offset; +#else + data <<= 8 * offset; +#endif + + pt_regs_write_reg(regs, reg_data, data); + + regs->pc = get_ex_fixup(ex); + return true; +} + +bool fixup_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *ex; - fixup = search_exception_tables(instruction_pointer(regs)); - if (!fixup) - return 0; + ex = search_exception_tables(instruction_pointer(regs)); + if (!ex) + return false; - if (in_bpf_jit(regs)) - return arm64_bpf_fixup_exception(fixup, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: + return ex_handler_load_unaligned_zeropad(ex, regs); + } - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; - return 1; + BUG(); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index a8158c948966..ffb9c229610a 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,11 +40,11 @@ void __init arm64_hugetlb_cma_reserve(void) { int order; -#ifdef CONFIG_ARM64_4K_PAGES - order = PUD_SHIFT - PAGE_SHIFT; -#else - order = CONT_PMD_SHIFT - PAGE_SHIFT; -#endif + if (pud_sect_supported()) + order = PUD_SHIFT - PAGE_SHIFT; + else + order = CONT_PMD_SHIFT - PAGE_SHIFT; + /* * HugeTLB CMA reservation is required for gigantic * huge pages which could not be allocated via the @@ -62,8 +62,9 @@ bool arch_hugetlb_migration_supported(struct hstate *h) size_t pagesize = huge_page_size(h); switch (pagesize) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case PMD_SIZE: case CONT_PMD_SIZE: @@ -126,8 +127,11 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = size; switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + if (pud_sect_supported()) + contig_ptes = 1; + break; #endif case PMD_SIZE: contig_ptes = 1; @@ -489,9 +493,9 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, static int __init hugetlbpage_init(void) { -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); -#endif + if (pud_sect_supported()) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); @@ -503,8 +507,9 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case CONT_PMD_SIZE: case PMD_SIZE: diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 37a81754d9b6..a8834434af99 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,43 +160,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init(max_zone_pfns); } -int pfn_valid(unsigned long pfn) -{ - phys_addr_t addr = PFN_PHYS(pfn); - struct mem_section *ms; - - /* - * Ensure the upper PAGE_SHIFT bits are clear in the - * pfn. Else it might lead to false positives when - * some of the upper bits are set, but the lower bits - * match a valid pfn. - */ - if (PHYS_PFN(addr) != pfn) - return 0; - - if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) - return 0; - - ms = __pfn_to_section(pfn); - if (!valid_section(ms)) - return 0; - - /* - * ZONE_DEVICE memory does not have the memblock entries. - * memblock_is_map_memory() check for ZONE_DEVICE based - * addresses will always fail. Even the normal hotplugged - * memory will never have MEMBLOCK_NOMAP flag set in their - * memblock entries. Skip memblock search for all non early - * memory sections covering all of hotplug memory including - * both normal and ZONE_DEVICE based. - */ - if (!early_section(ms)) - return pfn_section_valid(ms, pfn); - - return memblock_is_memory(addr); -} -EXPORT_SYMBOL(pfn_valid); - int pfn_is_map_memory(unsigned long pfn) { phys_addr_t addr = PFN_PHYS(pfn); @@ -416,8 +379,6 @@ void __init mem_init(void) else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; - set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); - /* this will put all unused low memory onto the freelists */ memblock_free_all(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cfd9deb347c3..fd85b51b9d50 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1499,6 +1499,11 @@ int arch_add_memory(int nid, u64 start, u64 size, if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); + else { + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + } + return ret; } diff --git a/arch/arm64/mm/trans_pgd-asm.S b/arch/arm64/mm/trans_pgd-asm.S new file mode 100644 index 000000000000..021c31573bcb --- /dev/null +++ b/arch/arm64/mm/trans_pgd-asm.S @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/kvm_asm.h> + +.macro invalid_vector label +SYM_CODE_START_LOCAL(\label) + .align 7 + b \label +SYM_CODE_END(\label) +.endm + +.macro el1_sync_vector +SYM_CODE_START_LOCAL(el1_sync) + .align 7 + cmp x0, #HVC_SET_VECTORS /* Called from hibernate */ + b.ne 1f + msr vbar_el2, x1 + mov x0, xzr + eret +1: cmp x0, #HVC_SOFT_RESTART /* Called from kexec */ + b.ne 2f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 +2: /* Unexpected argument, set an error */ + mov_q x0, HVC_STUB_ERR + eret +SYM_CODE_END(el1_sync) +.endm + +SYM_CODE_START(trans_pgd_stub_vectors) + invalid_vector hyp_stub_el2t_sync_invalid // Synchronous EL2t + invalid_vector hyp_stub_el2t_irq_invalid // IRQ EL2t + invalid_vector hyp_stub_el2t_fiq_invalid // FIQ EL2t + invalid_vector hyp_stub_el2t_error_invalid // Error EL2t + + invalid_vector hyp_stub_el2h_sync_invalid // Synchronous EL2h + invalid_vector hyp_stub_el2h_irq_invalid // IRQ EL2h + invalid_vector hyp_stub_el2h_fiq_invalid // FIQ EL2h + invalid_vector hyp_stub_el2h_error_invalid // Error EL2h + + el1_sync_vector // Synchronous 64-bit EL1 + invalid_vector hyp_stub_el1_irq_invalid // IRQ 64-bit EL1 + invalid_vector hyp_stub_el1_fiq_invalid // FIQ 64-bit EL1 + invalid_vector hyp_stub_el1_error_invalid // Error 64-bit EL1 + + invalid_vector hyp_stub_32b_el1_sync_invalid // Synchronous 32-bit EL1 + invalid_vector hyp_stub_32b_el1_irq_invalid // IRQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_fiq_invalid // FIQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_error_invalid // Error 32-bit EL1 + .align 11 +SYM_INNER_LABEL(__trans_pgd_stub_vectors_end, SYM_L_LOCAL) +SYM_CODE_END(trans_pgd_stub_vectors) + +# Check the trans_pgd_stub_vectors didn't overflow +.org . - (__trans_pgd_stub_vectors_end - trans_pgd_stub_vectors) + SZ_2K diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 527f0a39c3da..d7da8ca40d2e 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -5,8 +5,8 @@ * * This file derived from: arch/arm64/kernel/hibernate.c * - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin <pasha.tatashin@soleen.com> + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin <pasha.tatashin@soleen.com> * */ @@ -218,63 +218,6 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, } /* - * Add map entry to trans_pgd for a base-size page at PTE level. - * info: contains allocator and its argument - * trans_pgd: page table in which new map is added. - * page: page to be mapped. - * dst_addr: new VA address for the page - * pgprot: protection for the page. - * - * Returns 0 on success, and -ENOMEM on failure. - */ -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot) -{ - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - p4dp = trans_alloc(info); - if (!pgdp) - return -ENOMEM; - pgd_populate(NULL, pgdp, p4dp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = trans_alloc(info); - if (!pudp) - return -ENOMEM; - p4d_populate(NULL, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = trans_alloc(info); - if (!pmdp) - return -ENOMEM; - pud_populate(NULL, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = trans_alloc(info); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(NULL, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot)); - - return 0; -} - -/* * The page we want to idmap may be outside the range covered by VA_BITS that * can be built using the kernel's p?d_populate() helpers. As a one off, for a * single page, we build these page tables bottom up and just assume that will @@ -322,3 +265,26 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, return 0; } + +/* + * Create a copy of the vector table so we can call HVC_SET_VECTORS or + * HVC_SOFT_RESTART from contexts where the table may be overwritten. + */ +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors) +{ + void *hyp_stub = trans_alloc(info); + + if (!hyp_stub) + return -ENOMEM; + *el2_vectors = virt_to_phys(hyp_stub); + memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); + caches_clean_inval_pou((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + dcache_clean_inval_poc((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + + return 0; +} diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 803e7773fa86..3a8a7140a9bf 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include <linux/printk.h> #include <linux/slab.h> +#include <asm/asm-extable.h> #include <asm/byteorder.h> #include <asm/cacheflush.h> #include <asm/debug-monitors.h> @@ -358,15 +359,15 @@ static void build_epilogue(struct jit_ctx *ctx) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); regs->regs[dst_reg] = 0; regs->pc = (unsigned long)&ex->fixup - offset; - return 1; + return true; } /* For accesses to BTF pointers, add an entry to the exception table */ @@ -412,6 +413,8 @@ static int add_exception_handler(const struct bpf_insn *insn, ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + ex->type = EX_TYPE_BPF; + ctx->exentry_idx++; return 0; } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..870c39537dd0 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -18,6 +18,7 @@ HAS_CRC32 HAS_DCPODP HAS_DCPOP HAS_E0PD +HAS_ECV HAS_EPAN HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH @@ -39,6 +40,7 @@ HW_DBM KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE MTE +MTE_ASYMM SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 @@ -53,6 +55,9 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE +WORKAROUND_TSB_FLUSH_FAILURE +WORKAROUND_TRBE_WRITE_OUT_OF_RANGE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 |