From 7001d4af926b26469a0604eca80dc73b80c5faa8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 18 Jan 2021 13:01:29 +0000 Subject: arm64: Drop workaround for broken 'S' constraint with GCC 4.9 Since GCC < 5.1 has been shown to be unsuitable for the arm64 kernel, let's drop the workaround for the 'S' asm constraint that GCC 4.9 doesn't always grok. This is effectively a revert of 9fd339a45be5 ("arm64: Work around broken GCC 4.9 handling of "S" constraint"). Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210118130129.2875949-1-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_asm.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 8a33d83ea843..7ccf770c53d9 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -199,12 +199,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -#if defined(GCC_VERSION) && GCC_VERSION < 50000 -#define SYM_CONSTRAINT "i" -#else -#define SYM_CONSTRAINT "S" -#endif - /* * Obtain the PC-relative address of a kernel symbol * s: symbol @@ -221,7 +215,7 @@ extern u32 __kvm_get_mdcr_el2(void); typeof(s) *addr; \ asm("adrp %0, %1\n" \ "add %0, %0, :lo12:%1\n" \ - : "=r" (addr) : SYM_CONSTRAINT (&s)); \ + : "=r" (addr) : "S" (&s)); \ addr; \ }) -- cgit v1.2.3 From a5b8ca97fbf8300a5e21c393df25ce6f521e7939 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 18 Dec 2020 11:45:40 +0900 Subject: arm64: do not descend to vdso directories twice arm64 descends into each vdso directory twice; first in vdso_prepare, second during the ordinary build process. PPC mimicked it and uncovered a problem [1]. In the first descend, Kbuild directly visits the vdso directories, therefore it does not inherit subdir-ccflags-y from upper directories. This means the command line parameters may differ between the two. If it happens, the offset values in the generated headers might be different from real offsets of vdso.so in the kernel. This potential danger should be avoided. The vdso directories are built in the vdso_prepare stage, so the second descend is unneeded. [1]: https://lore.kernel.org/linux-kbuild/CAK7LNARAkJ3_-4gX0VA2UkapbOftuzfSTVMBbgbw=HD8n7N+7w@mail.gmail.com/T/#ma10dcb961fda13f36d42d58fa6cb2da988b7e73a Signed-off-by: Masahiro Yamada Link: https://lore.kernel.org/r/20201218024540.1102650-1-masahiroy@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Makefile | 10 ++++++---- arch/arm64/kernel/Makefile | 5 +++-- arch/arm64/kernel/vdso-wrap.S | 24 ++++++++++++++++++++++++ arch/arm64/kernel/vdso/Makefile | 1 - arch/arm64/kernel/vdso/vdso.S | 24 ------------------------ arch/arm64/kernel/vdso32-wrap.S | 19 +++++++++++++++++++ arch/arm64/kernel/vdso32/Makefile | 1 - arch/arm64/kernel/vdso32/vdso.S | 19 ------------------- 8 files changed, 52 insertions(+), 51 deletions(-) create mode 100644 arch/arm64/kernel/vdso-wrap.S delete mode 100644 arch/arm64/kernel/vdso/vdso.S create mode 100644 arch/arm64/kernel/vdso32-wrap.S delete mode 100644 arch/arm64/kernel/vdso32/vdso.S diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 90309208bb28..5b84aec31ed3 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -188,10 +188,12 @@ ifeq ($(KBUILD_EXTMOD),) # this hack. prepare: vdso_prepare vdso_prepare: prepare0 - $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h - $(if $(CONFIG_COMPAT_VDSO),$(Q)$(MAKE) \ - $(build)=arch/arm64/kernel/vdso32 \ - include/generated/vdso32-offsets.h) + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso \ + include/generated/vdso-offsets.h arch/arm64/kernel/vdso/vdso.so +ifdef CONFIG_COMPAT_VDSO + $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 \ + include/generated/vdso32-offsets.h arch/arm64/kernel/vdso32/vdso.so +endif endif define archhelp diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 86364ab6f13f..42f6ad2c7eac 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -59,9 +59,10 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_MTE) += mte.o +obj-y += vdso-wrap.o +obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o -obj-y += vdso/ probes/ -obj-$(CONFIG_COMPAT_VDSO) += vdso32/ +obj-y += probes/ head-y := head.o extra-y += $(head-y) vmlinux.lds diff --git a/arch/arm64/kernel/vdso-wrap.S b/arch/arm64/kernel/vdso-wrap.S new file mode 100644 index 000000000000..c4b1990bf2be --- /dev/null +++ b/arch/arm64/kernel/vdso-wrap.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon + */ + +#include +#include +#include +#include +#include + + .globl vdso_start, vdso_end + .section .rodata + .balign PAGE_SIZE +vdso_start: + .incbin "arch/arm64/kernel/vdso/vdso.so" + .balign PAGE_SIZE +vdso_end: + + .previous + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index cd9c3fa25902..76c0255ecc91 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -44,7 +44,6 @@ endif # Disable gcov profiling for VDSO code GCOV_PROFILE := n -obj-y += vdso.o targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S deleted file mode 100644 index c4b1990bf2be..000000000000 --- a/arch/arm64/kernel/vdso/vdso.S +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012 ARM Limited - * - * Author: Will Deacon - */ - -#include -#include -#include -#include -#include - - .globl vdso_start, vdso_end - .section .rodata - .balign PAGE_SIZE -vdso_start: - .incbin "arch/arm64/kernel/vdso/vdso.so" - .balign PAGE_SIZE -vdso_end: - - .previous - -emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso32-wrap.S b/arch/arm64/kernel/vdso32-wrap.S new file mode 100644 index 000000000000..e72ac7bc4c04 --- /dev/null +++ b/arch/arm64/kernel/vdso32-wrap.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 ARM Limited + */ + +#include +#include +#include +#include + + .globl vdso32_start, vdso32_end + .section .rodata + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/arm64/kernel/vdso32/vdso.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index a1e0f91e6cea..789ad420f16b 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -155,7 +155,6 @@ c-obj-vdso-gettimeofday := $(addprefix $(obj)/, $(c-obj-vdso-gettimeofday)) asm-obj-vdso := $(addprefix $(obj)/, $(asm-obj-vdso)) obj-vdso := $(c-obj-vdso) $(c-obj-vdso-gettimeofday) $(asm-obj-vdso) -obj-y += vdso.o targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vdso32/vdso.S b/arch/arm64/kernel/vdso32/vdso.S deleted file mode 100644 index e72ac7bc4c04..000000000000 --- a/arch/arm64/kernel/vdso32/vdso.S +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2012 ARM Limited - */ - -#include -#include -#include -#include - - .globl vdso32_start, vdso32_end - .section .rodata - .balign PAGE_SIZE -vdso32_start: - .incbin "arch/arm64/kernel/vdso32/vdso.so" - .balign PAGE_SIZE -vdso32_end: - - .previous -- cgit v1.2.3 From f3cb097ad8888019e6d8777cb17bcee18ac6c2f6 Mon Sep 17 00:00:00 2001 From: John Millikin Date: Wed, 23 Dec 2020 14:10:56 +0900 Subject: arm64: Support running gen_vdso_offsets.sh with BSD userland. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BSD sed ignores whitespace character escape sequences such as '\t' in the replacement string, causing this script to produce the following incorrect output:   #define vdso_offset_sigtrampt0x089c Changing the hard tab to ' ' causes both BSD and GNU dialects of sed to produce equivalent output. Signed-off-by: John Millikin Link: https://lore.kernel.org/r/15147ffb-7e67-b607-266d-f56599ecafd1@john-millikin.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/gen_vdso_offsets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh index 8b806eacd0a6..0387209d65b1 100755 --- a/arch/arm64/kernel/vdso/gen_vdso_offsets.sh +++ b/arch/arm64/kernel/vdso/gen_vdso_offsets.sh @@ -13,4 +13,4 @@ LC_ALL=C sed -n -e 's/^00*/0/' -e \ -'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2\t0x\1/p' +'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso_offset_\2 0x\1/p' -- cgit v1.2.3 From edb739eed8f37e2846641313ff1308e872a30d98 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 5 Jan 2021 16:54:11 +0530 Subject: arm64/mm: Add warning for outside range requests in vmemmap_populate() vmemmap_populate() does not validate the requested vmemmap address range to be inside the platform assigned space i.e [VMEMMAP_START..VMEMMAP_END] for vmemmap. Instead it would just go ahead and create the mapping which might then overlap with other sections in the kernel virtual address space. Just adding an warning here for range overrun which would help detect the problem earlier on, before a potential struct page corruption. This also makes vmemmap_populate() symmetrical with vmemmap_free() which already has a similar warning. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/1609845851-25064-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ae0c3d023824..f938e8020523 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1094,6 +1094,7 @@ static void free_empty_tables(unsigned long addr, unsigned long end, int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { + WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); return vmemmap_populate_basepages(start, end, node, altmap); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ @@ -1107,6 +1108,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, pud_t *pudp; pmd_t *pmdp; + WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); do { next = pmd_addr_end(addr, end); -- cgit v1.2.3 From 67c6bb56b649590a3f59c2a92331aa4e83d4534c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 6 Jan 2021 10:34:49 +0000 Subject: firmware: smccc: Add SMCCC TRNG function call IDs The ARM architected TRNG firmware interface, described in ARM spec DEN0098, define an ARM SMCCC based interface to a true random number generator, provided by firmware. Add the definitions of the SMCCC functions as defined by the spec. Signed-off-by: Ard Biesheuvel Signed-off-by: Andre Przywara Reviewed-by: Linus Walleij Reviewed-by: Sudeep Holla Link: https://lore.kernel.org/r/20210106103453.152275-2-andre.przywara@arm.com Signed-off-by: Will Deacon --- include/linux/arm-smccc.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index f860645f6512..62c54234576c 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -102,6 +102,37 @@ ARM_SMCCC_OWNER_STANDARD_HYP, \ 0x21) +/* TRNG entropy source calls (defined by ARM DEN0098) */ +#define ARM_SMCCC_TRNG_VERSION \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x50) + +#define ARM_SMCCC_TRNG_FEATURES \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x51) + +#define ARM_SMCCC_TRNG_GET_UUID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x52) + +#define ARM_SMCCC_TRNG_RND32 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x53) + +#define ARM_SMCCC_TRNG_RND64 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_STANDARD, \ + 0x53) + /* * Return codes defined in ARM DEN 0070A * ARM DEN 0070A is now merged/consolidated into ARM DEN 0028 C -- cgit v1.2.3 From dd313a2653d442f015e82706f6b185a881772101 Mon Sep 17 00:00:00 2001 From: YANG LI Date: Mon, 11 Jan 2021 17:35:37 +0800 Subject: arm64: mte: style: Simplify bool comparison Fix the following coccicheck warning: ./tools/testing/selftests/arm64/mte/check_buffer_fill.c:84:12-35: WARNING: Comparison to bool Signed-off-by: YANG LI Reported-by: Abaci Robot Link: https://lore.kernel.org/r/1610357737-68678-1-git-send-email-abaci-bugfix@linux.alibaba.com Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/mte/check_buffer_fill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index c9fa141ebdcc..75fc482d63b6 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -81,7 +81,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, last_index = 0; /* Set some value in tagged memory and make the buffer underflow */ for (j = sizes[i] - 1; (j >= -underflow_range) && - (cur_mte_cxt.fault_valid == false); j--) { + (!cur_mte_cxt.fault_valid); j--) { ptr[j] = '1'; last_index = j; } -- cgit v1.2.3 From 6106e1112cc69a367f495da2e66f13e2bca369fb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 13 Jan 2021 17:31:55 +0000 Subject: arm64: remove EL0 exception frame record When entering an exception from EL0, the entry code creates a synthetic frame record with a NULL PC. This was used by the code introduced in commit: 7326749801396105 ("arm64: unwind: reference pt_regs via embedded stack frame") ... to discover exception entries on the stack and dump the associated pt_regs. Since the NULL PC was undesirable for the stacktrace, we added a special case to unwind_frame() to prevent the NULL PC from being logged. Since commit: a25ffd3a6302a678 ("arm64: traps: Don't print stack or raw PC/LR values in backtraces") ... we no longer try to dump the pt_regs as part of a stacktrace, and hence no longer need the synthetic exception record. This patch removes the synthetic exception record and the associated special case in unwind_frame(). Instead, EL0 exceptions set the FP to NULL, as is the case for other terminal records (e.g. when a kernel thread starts). The synthetic record for exceptions from EL1 is retrained as this has useful unwind information for the interrupted context. To make the terminal case a bit clearer, an explicit check is added to the start of unwind_frame(). This would otherwise be caught implicitly by the on_accessible_stack() checks. Reported-by: Mark Brown Signed-off-by: Mark Rutland Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210113173155.43063-1-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 10 +++++----- arch/arm64/kernel/stacktrace.c | 13 ++++--------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c9bae73f2621..89fd3907816e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -261,16 +261,16 @@ alternative_else_nop_endif stp lr, x21, [sp, #S_LR] /* - * In order to be able to dump the contents of struct pt_regs at the - * time the exception was taken (in case we attempt to walk the call - * stack later), chain it together with the stack frames. + * For exceptions from EL0, terminate the callchain here. + * For exceptions from EL1, create a synthetic frame record so the + * interrupted code shows up in the backtrace. */ .if \el == 0 - stp xzr, xzr, [sp, #S_STACKFRAME] + mov x29, xzr .else stp x29, x22, [sp, #S_STACKFRAME] - .endif add x29, sp, #S_STACKFRAME + .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index fa56af1a59c3..0fb42129b469 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -44,6 +44,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; struct stack_info info; + /* Terminal record; nothing to unwind */ + if (!fp) + return -EINVAL; + if (fp & 0xf) return -EINVAL; @@ -104,15 +108,6 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ptrauth_strip_insn_pac(frame->pc); - /* - * Frames created upon entry from EL0 have NULL FP and PC values, so - * don't bother reporting these. Frames created by __noreturn functions - * might have a valid FP even if PC is bogus, so only terminate where - * both are NULL. - */ - if (!frame->fp && !frame->pc) - return -EINVAL; - return 0; } NOKPROBE_SYMBOL(unwind_frame); -- cgit v1.2.3 From 384e5699e101b1ee184590a39ee60f10ec0d36b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 8 Jan 2021 16:46:51 +0530 Subject: arm64: topology: Avoid the have_policy check Every time I have stumbled upon this routine, I get confused with the way 'have_policy' is used and I have to dig in to understand why is it so. Here is an attempt to make it easier to understand, and hopefully it is an improvement. The 'have_policy' check was just an optimization to avoid writing to amu_fie_cpus in case we don't have to, but that optimization itself is creating more confusion than the real work. Lets just do that if all the CPUs support AMUs. It is much cleaner that way. Reviewed-by: Ionela Voinescu Signed-off-by: Viresh Kumar Tested-by: Ionela Voinescu Link: https://lore.kernel.org/r/c125766c4be93461772015ac7c9a6ae45d5756f6.1610104461.git.viresh.kumar@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f6faa697e83e..ebadc73449f9 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -199,14 +199,14 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) return 0; } -static inline bool +static inline void enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); if (!policy) { pr_debug("CPU%d: No cpufreq policy found.\n", cpu); - return false; + return; } if (cpumask_subset(policy->related_cpus, valid_cpus)) @@ -214,8 +214,6 @@ enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) amu_fie_cpus); cpufreq_cpu_put(policy); - - return true; } static DEFINE_STATIC_KEY_FALSE(amu_fie_key); @@ -225,7 +223,6 @@ static int __init init_amu_fie(void) { bool invariance_status = topology_scale_freq_invariant(); cpumask_var_t valid_cpus; - bool have_policy = false; int ret = 0; int cpu; @@ -245,17 +242,12 @@ static int __init init_amu_fie(void) continue; cpumask_set_cpu(cpu, valid_cpus); - have_policy |= enable_policy_freq_counters(cpu, valid_cpus); + enable_policy_freq_counters(cpu, valid_cpus); } - /* - * If we are not restricted by cpufreq policies, we only enable - * the use of the AMU feature for FIE if all CPUs support AMU. - * Otherwise, enable_policy_freq_counters has already enabled - * policy cpus. - */ - if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask)) - cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus); + /* Overwrite amu_fie_cpus if all CPUs support AMU */ + if (cpumask_equal(valid_cpus, cpu_present_mask)) + cpumask_copy(amu_fie_cpus, cpu_present_mask); if (!cpumask_empty(amu_fie_cpus)) { pr_info("CPUs[%*pbl]: counters will be used for FIE.", -- cgit v1.2.3 From 47b10b737c0794c2fa584d7c8103b485e274ed51 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 8 Jan 2021 16:46:52 +0530 Subject: arm64: topology: Reorder init_amu_fie() a bit This patch does a couple of optimizations in init_amu_fie(), like early exits from paths where we don't need to continue any further, avoid the enable/disable dance, moving the calls to topology_scale_freq_invariant() just when we need them, instead of at the top of the routine, and avoiding calling it for the third time. Reviewed-by: Ionela Voinescu Signed-off-by: Viresh Kumar Tested-by: Ionela Voinescu Link: https://lore.kernel.org/r/a732e71ab9ec28c354eb28dd898c9b47d490863f.1610104461.git.viresh.kumar@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index ebadc73449f9..57267d694495 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -221,8 +221,8 @@ static DEFINE_STATIC_KEY_FALSE(amu_fie_key); static int __init init_amu_fie(void) { - bool invariance_status = topology_scale_freq_invariant(); cpumask_var_t valid_cpus; + bool invariant; int ret = 0; int cpu; @@ -249,18 +249,19 @@ static int __init init_amu_fie(void) if (cpumask_equal(valid_cpus, cpu_present_mask)) cpumask_copy(amu_fie_cpus, cpu_present_mask); - if (!cpumask_empty(amu_fie_cpus)) { - pr_info("CPUs[%*pbl]: counters will be used for FIE.", - cpumask_pr_args(amu_fie_cpus)); - static_branch_enable(&amu_fie_key); - } + if (cpumask_empty(amu_fie_cpus)) + goto free_valid_mask; - /* - * If the system is not fully invariant after AMU init, disable - * partial use of counters for frequency invariance. - */ - if (!topology_scale_freq_invariant()) - static_branch_disable(&amu_fie_key); + invariant = topology_scale_freq_invariant(); + + /* We aren't fully invariant yet */ + if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask)) + goto free_valid_mask; + + static_branch_enable(&amu_fie_key); + + pr_info("CPUs[%*pbl]: counters will be used for FIE.", + cpumask_pr_args(amu_fie_cpus)); /* * Task scheduler behavior depends on frequency invariance support, @@ -268,7 +269,7 @@ static int __init init_amu_fie(void) * a result of counter initialisation and use, retrigger the build of * scheduling domains to ensure the information is propagated properly. */ - if (invariance_status != topology_scale_freq_invariant()) + if (!invariant) rebuild_sched_domains_energy(); free_valid_mask: -- cgit v1.2.3 From a5f1b187cd24f7da35eb7a48fc50839c554d52a2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 8 Jan 2021 16:46:53 +0530 Subject: arm64: topology: Make AMUs work with modular cpufreq drivers The AMU counters won't get used today if the cpufreq driver is built as a module as the amu core requires everything to be ready by late init. Fix that properly by registering for cpufreq policy notifier. Note that the amu core don't have any cpufreq dependency after the first time CPUFREQ_CREATE_POLICY notifier is called for all the CPUs. And so we don't need to do anything on the CPUFREQ_REMOVE_POLICY notifier. And for the same reason we check if the CPUs are already parsed in the beginning of amu_fie_setup() and skip if that is true. Alternatively we can shoot a work from there to unregister the notifier instead, but that seemed too much instead of this simple check. While at it, convert the print message to pr_debug instead of pr_info. Signed-off-by: Viresh Kumar Reviewed-by: Ionela Voinescu Tested-by: Ionela Voinescu Link: https://lore.kernel.org/r/89c1921334443e133c9c8791b4693607d65ed9f5.1610104461.git.viresh.kumar@linaro.org Signed-off-by: Will Deacon --- arch/arm64/kernel/topology.c | 92 +++++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 57267d694495..e08a4126453a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -199,69 +199,38 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate) return 0; } -static inline void -enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus) -{ - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - - if (!policy) { - pr_debug("CPU%d: No cpufreq policy found.\n", cpu); - return; - } - - if (cpumask_subset(policy->related_cpus, valid_cpus)) - cpumask_or(amu_fie_cpus, policy->related_cpus, - amu_fie_cpus); - - cpufreq_cpu_put(policy); -} - static DEFINE_STATIC_KEY_FALSE(amu_fie_key); #define amu_freq_invariant() static_branch_unlikely(&amu_fie_key) -static int __init init_amu_fie(void) +static void amu_fie_setup(const struct cpumask *cpus) { - cpumask_var_t valid_cpus; bool invariant; - int ret = 0; int cpu; - if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL)) - return -ENOMEM; - - if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { - ret = -ENOMEM; - goto free_valid_mask; - } + /* We are already set since the last insmod of cpufreq driver */ + if (unlikely(cpumask_subset(cpus, amu_fie_cpus))) + return; - for_each_present_cpu(cpu) { + for_each_cpu(cpu, cpus) { if (!freq_counters_valid(cpu) || freq_inv_set_max_ratio(cpu, cpufreq_get_hw_max_freq(cpu) * 1000, arch_timer_get_rate())) - continue; - - cpumask_set_cpu(cpu, valid_cpus); - enable_policy_freq_counters(cpu, valid_cpus); + return; } - /* Overwrite amu_fie_cpus if all CPUs support AMU */ - if (cpumask_equal(valid_cpus, cpu_present_mask)) - cpumask_copy(amu_fie_cpus, cpu_present_mask); - - if (cpumask_empty(amu_fie_cpus)) - goto free_valid_mask; + cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); invariant = topology_scale_freq_invariant(); /* We aren't fully invariant yet */ if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask)) - goto free_valid_mask; + return; static_branch_enable(&amu_fie_key); - pr_info("CPUs[%*pbl]: counters will be used for FIE.", - cpumask_pr_args(amu_fie_cpus)); + pr_debug("CPUs[%*pbl]: counters will be used for FIE.", + cpumask_pr_args(cpus)); /* * Task scheduler behavior depends on frequency invariance support, @@ -271,13 +240,48 @@ static int __init init_amu_fie(void) */ if (!invariant) rebuild_sched_domains_energy(); +} + +static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_policy *policy = data; + + if (val == CPUFREQ_CREATE_POLICY) + amu_fie_setup(policy->related_cpus); + + /* + * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU + * counters don't have any dependency on cpufreq driver once we have + * initialized AMU support and enabled invariance. The AMU counters will + * keep on working just fine in the absence of the cpufreq driver, and + * for the CPUs for which there are no counters available, the last set + * value of freq_scale will remain valid as that is the frequency those + * CPUs are running at. + */ + + return 0; +} + +static struct notifier_block init_amu_fie_notifier = { + .notifier_call = init_amu_fie_callback, +}; + +static int __init init_amu_fie(void) +{ + int ret; + + if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) + return -ENOMEM; -free_valid_mask: - free_cpumask_var(valid_cpus); + ret = cpufreq_register_notifier(&init_amu_fie_notifier, + CPUFREQ_POLICY_NOTIFIER); + if (ret) + free_cpumask_var(amu_fie_cpus); return ret; } -late_initcall_sync(init_amu_fie); +core_initcall(init_amu_fie); bool arch_freq_counters_available(const struct cpumask *cpus) { -- cgit v1.2.3 From f9ce0be71d1fbb038ada15ced83474b0e63f264d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 19 Dec 2020 15:19:23 +0300 Subject: mm: Cleanup faultaround and finish_fault() codepaths alloc_set_pte() has two users with different requirements: in the faultaround code, it called from an atomic context and PTE page table has to be preallocated. finish_fault() can sleep and allocate page table as needed. PTL locking rules are also strange, hard to follow and overkill for finish_fault(). Let's untangle the mess. alloc_set_pte() has gone now. All locking is explicit. The price is some code duplication to handle huge pages in faultaround path, but it should be fine, having overall improvement in readability. Link: https://lore.kernel.org/r/20201229132819.najtavneutnf7ajp@box Signed-off-by: Kirill A. Shutemov [will: s/from from/from/ in comment; spotted by willy] Signed-off-by: Will Deacon --- fs/xfs/xfs_file.c | 6 +- include/linux/mm.h | 12 +-- include/linux/pgtable.h | 11 +++ mm/filemap.c | 177 +++++++++++++++++++++++++++++++----------- mm/memory.c | 199 ++++++++++++++---------------------------------- 5 files changed, 213 insertions(+), 192 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 5b0f93f73837..111fe73bb8a7 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1319,17 +1319,19 @@ xfs_filemap_pfn_mkwrite( return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); } -static void +static vm_fault_t xfs_filemap_map_pages( struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff) { struct inode *inode = file_inode(vmf->vma->vm_file); + vm_fault_t ret; xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - filemap_map_pages(vmf, start_pgoff, end_pgoff); + ret = filemap_map_pages(vmf, start_pgoff, end_pgoff); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); + return ret; } static const struct vm_operations_struct xfs_file_vm_ops = { diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6ae..4572a9bc5862 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -542,8 +542,8 @@ struct vm_fault { * is not NULL, otherwise pmd. */ pgtable_t prealloc_pte; /* Pre-allocated pte page table. - * vm_ops->map_pages() calls - * alloc_set_pte() from atomic context. + * vm_ops->map_pages() sets up a page + * table from atomic context. * do_fault_around() pre-allocates * page table to avoid allocation from * atomic context. @@ -578,7 +578,7 @@ struct vm_operations_struct { vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); - void (*map_pages)(struct vm_fault *vmf, + vm_fault_t (*map_pages)(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); unsigned long (*pagesize)(struct vm_area_struct * area); @@ -988,7 +988,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) return pte; } -vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page); +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); +void do_set_pte(struct vm_fault *vmf, struct page *page); + vm_fault_t finish_fault(struct vm_fault *vmf); vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #endif @@ -2622,7 +2624,7 @@ extern void truncate_inode_pages_final(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ extern vm_fault_t filemap_fault(struct vm_fault *vmf); -extern void filemap_map_pages(struct vm_fault *vmf, +extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 8fcdfa52eb4b..36eb748f3c97 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1314,6 +1314,17 @@ static inline int pmd_trans_unstable(pmd_t *pmd) #endif } +/* + * the ordering of these checks is important for pmds with _page_devmap set. + * if we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). this will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static inline int pmd_devmap_trans_unstable(pmd_t *pmd) +{ + return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} + #ifndef CONFIG_NUMA_BALANCING /* * Technically a PTE can be PROTNONE even when not doing NUMA balancing but diff --git a/mm/filemap.c b/mm/filemap.c index 5c9d564317a5..c1f2dc89b8a7 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS @@ -2911,74 +2912,164 @@ out_retry: } EXPORT_SYMBOL(filemap_fault); -void filemap_map_pages(struct vm_fault *vmf, - pgoff_t start_pgoff, pgoff_t end_pgoff) +static bool filemap_map_pmd(struct vm_fault *vmf, struct page *page) { - struct file *file = vmf->vma->vm_file; + struct mm_struct *mm = vmf->vma->vm_mm; + + /* Huge page is mapped? No need to proceed. */ + if (pmd_trans_huge(*vmf->pmd)) { + unlock_page(page); + put_page(page); + return true; + } + + if (pmd_none(*vmf->pmd) && PageTransHuge(page)) { + vm_fault_t ret = do_set_pmd(vmf, page); + if (!ret) { + /* The page is mapped successfully, reference consumed. */ + unlock_page(page); + return true; + } + } + + if (pmd_none(*vmf->pmd)) { + vmf->ptl = pmd_lock(mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(mm); + pmd_populate(mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } + + /* See comment in handle_pte_fault() */ + if (pmd_devmap_trans_unstable(vmf->pmd)) { + unlock_page(page); + put_page(page); + return true; + } + + return false; +} + +static struct page *next_uptodate_page(struct page *page, + struct address_space *mapping, + struct xa_state *xas, pgoff_t end_pgoff) +{ + unsigned long max_idx; + + do { + if (!page) + return NULL; + if (xas_retry(xas, page)) + continue; + if (xa_is_value(page)) + continue; + if (PageLocked(page)) + continue; + if (!page_cache_get_speculative(page)) + continue; + /* Has the page moved or been split? */ + if (unlikely(page != xas_reload(xas))) + goto skip; + if (!PageUptodate(page) || PageReadahead(page)) + goto skip; + if (PageHWPoison(page)) + goto skip; + if (!trylock_page(page)) + goto skip; + if (page->mapping != mapping) + goto unlock; + if (!PageUptodate(page)) + goto unlock; + max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); + if (xas->xa_index >= max_idx) + goto unlock; + return page; +unlock: + unlock_page(page); +skip: + put_page(page); + } while ((page = xas_next_entry(xas, end_pgoff)) != NULL); + + return NULL; +} + +static inline struct page *first_map_page(struct address_space *mapping, + struct xa_state *xas, + pgoff_t end_pgoff) +{ + return next_uptodate_page(xas_find(xas, end_pgoff), + mapping, xas, end_pgoff); +} + +static inline struct page *next_map_page(struct address_space *mapping, + struct xa_state *xas, + pgoff_t end_pgoff) +{ + return next_uptodate_page(xas_next_entry(xas, end_pgoff), + mapping, xas, end_pgoff); +} + +vm_fault_t filemap_map_pages(struct vm_fault *vmf, + pgoff_t start_pgoff, pgoff_t end_pgoff) +{ + struct vm_area_struct *vma = vmf->vma; + struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; pgoff_t last_pgoff = start_pgoff; - unsigned long max_idx; + unsigned long address = vmf->address; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *head, *page; unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); + vm_fault_t ret = 0; rcu_read_lock(); - xas_for_each(&xas, head, end_pgoff) { - if (xas_retry(&xas, head)) - continue; - if (xa_is_value(head)) - goto next; + head = first_map_page(mapping, &xas, end_pgoff); + if (!head) + goto out; - /* - * Check for a locked page first, as a speculative - * reference may adversely influence page migration. - */ - if (PageLocked(head)) - goto next; - if (!page_cache_get_speculative(head)) - goto next; + if (filemap_map_pmd(vmf, head)) { + ret = VM_FAULT_NOPAGE; + goto out; + } - /* Has the page moved or been split? */ - if (unlikely(head != xas_reload(&xas))) - goto skip; + vmf->address = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); + do { page = find_subpage(head, xas.xa_index); - - if (!PageUptodate(head) || - PageReadahead(page) || - PageHWPoison(page)) - goto skip; - if (!trylock_page(head)) - goto skip; - - if (head->mapping != mapping || !PageUptodate(head)) - goto unlock; - - max_idx = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); - if (xas.xa_index >= max_idx) + if (PageHWPoison(page)) goto unlock; if (mmap_miss > 0) mmap_miss--; vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; - if (vmf->pte) - vmf->pte += xas.xa_index - last_pgoff; + vmf->pte += xas.xa_index - last_pgoff; last_pgoff = xas.xa_index; - if (alloc_set_pte(vmf, page)) + + if (!pte_none(*vmf->pte)) goto unlock; + + do_set_pte(vmf, page); + /* no need to invalidate: a not-present page won't be cached */ + update_mmu_cache(vma, vmf->address, vmf->pte); unlock_page(head); - goto next; + + /* The fault is handled */ + if (vmf->address == address) + ret = VM_FAULT_NOPAGE; + continue; unlock: unlock_page(head); -skip: put_page(head); -next: - /* Huge page is mapped? No need to proceed. */ - if (pmd_trans_huge(*vmf->pmd)) - break; - } + } while ((head = next_map_page(mapping, &xas, end_pgoff)) != NULL); + pte_unmap_unlock(vmf->pte, vmf->ptl); +out: rcu_read_unlock(); + vmf->address = address; WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); + return ret; } EXPORT_SYMBOL(filemap_map_pages); diff --git a/mm/memory.c b/mm/memory.c index feff48e1465a..3e2fc2950ad7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3503,7 +3503,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) if (pte_alloc(vma->vm_mm, vmf->pmd)) return VM_FAULT_OOM; - /* See the comment in pte_alloc_one_map() */ + /* See comment in handle_pte_fault() */ if (unlikely(pmd_trans_unstable(vmf->pmd))) return 0; @@ -3643,66 +3643,6 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) return ret; } -/* - * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. - * If we check pmd_trans_unstable() first we will trip the bad_pmd() check - * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly - * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. - */ -static int pmd_devmap_trans_unstable(pmd_t *pmd) -{ - return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); -} - -static vm_fault_t pte_alloc_one_map(struct vm_fault *vmf) -{ - struct vm_area_struct *vma = vmf->vma; - - if (!pmd_none(*vmf->pmd)) - goto map_pte; - if (vmf->prealloc_pte) { - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_none(*vmf->pmd))) { - spin_unlock(vmf->ptl); - goto map_pte; - } - - mm_inc_nr_ptes(vma->vm_mm); - pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); - spin_unlock(vmf->ptl); - vmf->prealloc_pte = NULL; - } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { - return VM_FAULT_OOM; - } -map_pte: - /* - * If a huge pmd materialized under us just retry later. Use - * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of - * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge - * under us and then back to pmd_none, as a result of MADV_DONTNEED - * running immediately after a huge pmd fault in a different thread of - * this mm, in turn leading to a misleading pmd_trans_huge() retval. - * All we have to ensure is that it is a regular pmd that we can walk - * with pte_offset_map() and we can do that through an atomic read in - * C, which is what pmd_trans_unstable() provides. - */ - if (pmd_devmap_trans_unstable(vmf->pmd)) - return VM_FAULT_NOPAGE; - - /* - * At this point we know that our vmf->pmd points to a page of ptes - * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() - * for the duration of the fault. If a racing MADV_DONTNEED runs and - * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still - * be valid and we will re-check to make sure the vmf->pte isn't - * pte_none() under vmf->ptl protection when we return to - * alloc_set_pte(). - */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); - return 0; -} - #ifdef CONFIG_TRANSPARENT_HUGEPAGE static void deposit_prealloc_pte(struct vm_fault *vmf) { @@ -3717,7 +3657,7 @@ static void deposit_prealloc_pte(struct vm_fault *vmf) vmf->prealloc_pte = NULL; } -static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -3775,52 +3715,17 @@ out: return ret; } #else -static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) +vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) { - BUILD_BUG(); - return 0; + return VM_FAULT_FALLBACK; } #endif -/** - * alloc_set_pte - setup new PTE entry for given page and add reverse page - * mapping. If needed, the function allocates page table or use pre-allocated. - * - * @vmf: fault environment - * @page: page to map - * - * Caller must take care of unlocking vmf->ptl, if vmf->pte is non-NULL on - * return. - * - * Target users are page handler itself and implementations of - * vm_ops->map_pages. - * - * Return: %0 on success, %VM_FAULT_ code in case of error. - */ -vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) +void do_set_pte(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; pte_t entry; - vm_fault_t ret; - - if (pmd_none(*vmf->pmd) && PageTransCompound(page)) { - ret = do_set_pmd(vmf, page); - if (ret != VM_FAULT_FALLBACK) - return ret; - } - - if (!vmf->pte) { - ret = pte_alloc_one_map(vmf); - if (ret) - return ret; - } - - /* Re-check under ptl */ - if (unlikely(!pte_none(*vmf->pte))) { - update_mmu_tlb(vma, vmf->address, vmf->pte); - return VM_FAULT_NOPAGE; - } flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); @@ -3837,14 +3742,8 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) page_add_file_rmap(page, false); } set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); - - /* no need to invalidate: a not-present page won't be cached */ - update_mmu_cache(vma, vmf->address, vmf->pte); - - return 0; } - /** * finish_fault - finish page fault once we have prepared the page to fault * @@ -3862,12 +3761,12 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page) */ vm_fault_t finish_fault(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page; - vm_fault_t ret = 0; + vm_fault_t ret; /* Did we COW the page? */ - if ((vmf->flags & FAULT_FLAG_WRITE) && - !(vmf->vma->vm_flags & VM_SHARED)) + if ((vmf->flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) page = vmf->cow_page; else page = vmf->page; @@ -3876,12 +3775,38 @@ vm_fault_t finish_fault(struct vm_fault *vmf) * check even for read faults because we might have lost our CoWed * page */ - if (!(vmf->vma->vm_flags & VM_SHARED)) - ret = check_stable_address_space(vmf->vma->vm_mm); - if (!ret) - ret = alloc_set_pte(vmf, page); - if (vmf->pte) - pte_unmap_unlock(vmf->pte, vmf->ptl); + if (!(vma->vm_flags & VM_SHARED)) { + ret = check_stable_address_space(vma->vm_mm); + if (ret) + return ret; + } + + if (pmd_none(*vmf->pmd)) { + if (PageTransCompound(page)) { + ret = do_set_pmd(vmf, page); + if (ret != VM_FAULT_FALLBACK) + return ret; + } + + if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) + return VM_FAULT_OOM; + } + + /* See comment in handle_pte_fault() */ + if (pmd_devmap_trans_unstable(vmf->pmd)) + return 0; + + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + ret = 0; + /* Re-check under ptl */ + if (likely(pte_none(*vmf->pte))) + do_set_pte(vmf, page); + else + ret = VM_FAULT_NOPAGE; + + update_mmu_tlb(vma, vmf->address, vmf->pte); + pte_unmap_unlock(vmf->pte, vmf->ptl); return ret; } @@ -3951,13 +3876,12 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) pgoff_t start_pgoff = vmf->pgoff; pgoff_t end_pgoff; int off; - vm_fault_t ret = 0; nr_pages = READ_ONCE(fault_around_bytes) >> PAGE_SHIFT; mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK; - vmf->address = max(address & mask, vmf->vma->vm_start); - off = ((address - vmf->address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); + address = max(address & mask, vmf->vma->vm_start); + off = ((vmf->address - address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); start_pgoff -= off; /* @@ -3965,7 +3889,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) * the vma or nr_pages from start_pgoff, depending what is nearest. */ end_pgoff = start_pgoff - - ((vmf->address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + PTRS_PER_PTE - 1; end_pgoff = min3(end_pgoff, vma_pages(vmf->vma) + vmf->vma->vm_pgoff - 1, start_pgoff + nr_pages - 1); @@ -3973,31 +3897,11 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) if (pmd_none(*vmf->pmd)) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) - goto out; + return VM_FAULT_OOM; smp_wmb(); /* See comment in __pte_alloc() */ } - vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); - - /* Huge page is mapped? Page fault is solved */ - if (pmd_trans_huge(*vmf->pmd)) { - ret = VM_FAULT_NOPAGE; - goto out; - } - - /* ->map_pages() haven't done anything useful. Cold page cache? */ - if (!vmf->pte) - goto out; - - /* check if the page fault is solved */ - vmf->pte -= (vmf->address >> PAGE_SHIFT) - (address >> PAGE_SHIFT); - if (!pte_none(*vmf->pte)) - ret = VM_FAULT_NOPAGE; - pte_unmap_unlock(vmf->pte, vmf->ptl); -out: - vmf->address = address; - vmf->pte = NULL; - return ret; + return vmf->vma->vm_ops->map_pages(vmf, start_pgoff, end_pgoff); } static vm_fault_t do_read_fault(struct vm_fault *vmf) @@ -4353,7 +4257,18 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) */ vmf->pte = NULL; } else { - /* See comment in pte_alloc_one_map() */ + /* + * If a huge pmd materialized under us just retry later. Use + * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead + * of pmd_trans_huge() to ensure the pmd didn't become + * pmd_trans_huge under us and then back to pmd_none, as a + * result of MADV_DONTNEED running immediately after a huge pmd + * fault in a different thread of this mm, in turn leading to a + * misleading pmd_trans_huge() retval. All we have to ensure is + * that it is a regular pmd that we can walk with + * pte_offset_map() and we can do that through an atomic read + * in C, which is what pmd_trans_unstable() provides. + */ if (pmd_devmap_trans_unstable(vmf->pmd)) return 0; /* -- cgit v1.2.3 From 46bdb4277f98e70d0c91f4289897ade533fe9e80 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Nov 2020 18:48:26 +0000 Subject: mm: Allow architectures to request 'old' entries when prefaulting Commit 5c0a85fad949 ("mm: make faultaround produce old ptes") changed the "faultaround" behaviour to initialise prefaulted PTEs as 'old', since this avoids vmscan wrongly assuming that they are hot, despite having never been explicitly accessed by userspace. The change has been shown to benefit numerous arm64 micro-architectures (with hardware access flag) running Android, where both application launch latency and direct reclaim time are significantly reduced (by 10%+ and ~80% respectively). Unfortunately, commit 315d09bf30c2 ("Revert "mm: make faultaround produce old ptes"") reverted the change due to it being identified as the cause of a ~6% regression in unixbench on x86. Experiments on a variety of recent arm64 micro-architectures indicate that unixbench is not affected by the original commit, which appears to yield a 0-1% performance improvement. Since one size does not fit all for the initial state of prefaulted PTEs, introduce arch_wants_old_prefaulted_pte(), which allows an architecture to opt-in to 'old' prefaulted PTEs at runtime based on whatever criteria it may have. Cc: Jan Kara Cc: Minchan Kim Cc: Andrew Morton Cc: Kirill A. Shutemov Cc: Linus Torvalds Reported-by: Vinayak Menon Signed-off-by: Will Deacon --- include/linux/mm.h | 5 ++++- mm/filemap.c | 14 ++++++++++---- mm/memory.c | 20 +++++++++++++++++++- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 4572a9bc5862..251a2339befb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -434,6 +434,7 @@ extern pgprot_t protection_map[16]; * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. + * @FAULT_FLAG_PREFAULT: Fault was a prefault. * * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify * whether we would allow page faults to retry by specifying these two @@ -464,6 +465,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 #define FAULT_FLAG_INSTRUCTION 0x100 #define FAULT_FLAG_INTERRUPTIBLE 0x200 +#define FAULT_FLAG_PREFAULT 0x400 /* * The default fault flags that should be used by most of the @@ -501,7 +503,8 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ - { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ + { FAULT_FLAG_PREFAULT, "PREFAULT" } /* * vm_fault is filled by the pagefault handler and passed to the vma's diff --git a/mm/filemap.c b/mm/filemap.c index c1f2dc89b8a7..a6dc97906c8e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3019,6 +3019,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, struct address_space *mapping = file->f_mapping; pgoff_t last_pgoff = start_pgoff; unsigned long address = vmf->address; + unsigned long flags = vmf->flags; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *head, *page; unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); @@ -3051,14 +3052,18 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, if (!pte_none(*vmf->pte)) goto unlock; + /* We're about to handle the fault */ + if (vmf->address == address) { + vmf->flags &= ~FAULT_FLAG_PREFAULT; + ret = VM_FAULT_NOPAGE; + } else { + vmf->flags |= FAULT_FLAG_PREFAULT; + } + do_set_pte(vmf, page); /* no need to invalidate: a not-present page won't be cached */ update_mmu_cache(vma, vmf->address, vmf->pte); unlock_page(head); - - /* The fault is handled */ - if (vmf->address == address) - ret = VM_FAULT_NOPAGE; continue; unlock: unlock_page(head); @@ -3067,6 +3072,7 @@ unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); + vmf->flags = flags; vmf->address = address; WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); return ret; diff --git a/mm/memory.c b/mm/memory.c index 3e2fc2950ad7..f0e7c589ca9d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -134,6 +134,18 @@ static inline bool arch_faults_on_old_pte(void) } #endif +#ifndef arch_wants_old_prefaulted_pte +static inline bool arch_wants_old_prefaulted_pte(void) +{ + /* + * Transitioning a PTE from 'old' to 'young' can be expensive on + * some architectures, even if it's performed in hardware. By + * default, "false" means prefaulted entries will be 'young'. + */ + return false; +} +#endif + static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -3725,11 +3737,17 @@ void do_set_pte(struct vm_fault *vmf, struct page *page) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; + bool prefault = vmf->flags & FAULT_FLAG_PREFAULT; pte_t entry; flush_icache_page(vma, page); entry = mk_pte(page, vma->vm_page_prot); - entry = pte_sw_mkyoung(entry); + + if (prefault && arch_wants_old_prefaulted_pte()) + entry = pte_mkold(entry); + else + entry = pte_sw_mkyoung(entry); + if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ -- cgit v1.2.3 From 0388f9c7433024ccf3909b3404a745100f006a17 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Nov 2020 18:49:26 +0000 Subject: arm64: mm: Implement arch_wants_old_prefaulted_pte() On CPUs with hardware AF/DBM, initialising prefaulted PTEs as 'old' improves vmscan behaviour and does not appear to introduce any overhead elsewhere. Implement arch_wants_old_prefaulted_pte() to return 'true' if we detect hardware access flag support at runtime. This can be extended in future based on MIDR matching if necessary. Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 501562793ce2..e17b96d0e4b5 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -980,7 +980,17 @@ static inline bool arch_faults_on_old_pte(void) return !cpu_has_hw_af(); } -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_faults_on_old_pte arch_faults_on_old_pte + +/* + * Experimentally, it's cheap to set the access flag in hardware and we + * benefit from prefaulting mappings as 'old' to start with. + */ +static inline bool arch_wants_old_prefaulted_pte(void) +{ + return !arch_faults_on_old_pte(); +} +#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte #endif /* !__ASSEMBLY__ */ -- cgit v1.2.3 From 4a669e2432fce9c01522a8453460e89f877dccd4 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 3 Dec 2020 22:16:09 +0800 Subject: drivers/perf: Add support for ARMv8.3-SPE Armv8.3 extends the SPE by adding: - Alignment field in the Events packet, and filtering on this event using PMSEVFR_EL1. - Support for the Scalable Vector Extension (SVE). The main additions for SVE are: - Recording the vector length for SVE operations in the Operation Type packet. It is not possible to filter on vector length. - Incomplete predicate and empty predicate fields in the Events packet, and filtering on these events using PMSEVFR_EL1. Update the check of pmsevfr for empty/partial predicated SVE and alignment event in SPE driver. Signed-off-by: Wei Li Link: https://lore.kernel.org/r/20201203141609.14148-1-liwei391@huawei.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 9 ++++++++- drivers/perf/arm_spe_pmu.c | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 8b5e7e5c3cc8..767bb2d47be9 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -291,7 +291,11 @@ #define SYS_PMSFCR_EL1_ST_SHIFT 18 #define SYS_PMSEVFR_EL1 sys_reg(3, 0, 9, 9, 5) -#define SYS_PMSEVFR_EL1_RES0 0x0000ffff00ff0f55UL +#define SYS_PMSEVFR_EL1_RES0_8_2 \ + (GENMASK_ULL(47, 32) | GENMASK_ULL(23, 16) | GENMASK_ULL(11, 8) |\ + BIT_ULL(6) | BIT_ULL(4) | BIT_ULL(2) | BIT_ULL(0)) +#define SYS_PMSEVFR_EL1_RES0_8_3 \ + (SYS_PMSEVFR_EL1_RES0_8_2 & ~(BIT_ULL(18) | BIT_ULL(17) | BIT_ULL(11))) #define SYS_PMSLATFR_EL1 sys_reg(3, 0, 9, 9, 6) #define SYS_PMSLATFR_EL1_MINLAT_SHIFT 0 @@ -844,6 +848,9 @@ #define ID_AA64DFR0_PMUVER_8_5 0x6 #define ID_AA64DFR0_PMUVER_IMP_DEF 0xf +#define ID_AA64DFR0_PMSVER_8_2 0x1 +#define ID_AA64DFR0_PMSVER_8_3 0x2 + #define ID_DFR0_PERFMON_SHIFT 24 #define ID_DFR0_PERFMON_8_1 0x4 diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index cc00915ad6d1..bce9aff9f546 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -54,7 +54,7 @@ struct arm_spe_pmu { struct hlist_node hotplug_node; int irq; /* PPI */ - + u16 pmsver; u16 min_period; u16 counter_sz; @@ -655,6 +655,18 @@ static irqreturn_t arm_spe_pmu_irq_handler(int irq, void *dev) return IRQ_HANDLED; } +static u64 arm_spe_pmsevfr_res0(u16 pmsver) +{ + switch (pmsver) { + case ID_AA64DFR0_PMSVER_8_2: + return SYS_PMSEVFR_EL1_RES0_8_2; + case ID_AA64DFR0_PMSVER_8_3: + /* Return the highest version we support in default */ + default: + return SYS_PMSEVFR_EL1_RES0_8_3; + } +} + /* Perf callbacks */ static int arm_spe_pmu_event_init(struct perf_event *event) { @@ -670,7 +682,7 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !cpumask_test_cpu(event->cpu, &spe_pmu->supported_cpus)) return -ENOENT; - if (arm_spe_event_to_pmsevfr(event) & SYS_PMSEVFR_EL1_RES0) + if (arm_spe_event_to_pmsevfr(event) & arm_spe_pmsevfr_res0(spe_pmu->pmsver)) return -EOPNOTSUPP; if (attr->exclude_idle) @@ -937,6 +949,7 @@ static void __arm_spe_pmu_dev_probe(void *info) fld, smp_processor_id()); return; } + spe_pmu->pmsver = (u16)fld; /* Read PMBIDR first to determine whether or not we have access */ reg = read_sysreg_s(SYS_PMBIDR_EL1); -- cgit v1.2.3 From 30b34c4833ea7a1a48132d957052d79b6dcb1ebb Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 17 Jan 2021 22:28:44 +0100 Subject: perf: qcom: Constify static struct attribute_group The only usage is to put their addresses in an array of pointers to const struct attribute group. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Link: https://lore.kernel.org/r/20210117212847.21319-2-rikard.falkeborn@gmail.com Signed-off-by: Will Deacon --- drivers/perf/qcom_l2_pmu.c | 6 +++--- drivers/perf/qcom_l3_pmu.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index 23a0e008dafa..8883af955a2a 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -649,7 +649,7 @@ static struct attribute *l2_cache_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group l2_cache_pmu_cpumask_group = { +static const struct attribute_group l2_cache_pmu_cpumask_group = { .attrs = l2_cache_pmu_cpumask_attrs, }; @@ -665,7 +665,7 @@ static struct attribute *l2_cache_pmu_formats[] = { NULL, }; -static struct attribute_group l2_cache_pmu_format_group = { +static const struct attribute_group l2_cache_pmu_format_group = { .name = "format", .attrs = l2_cache_pmu_formats, }; @@ -700,7 +700,7 @@ static struct attribute *l2_cache_pmu_events[] = { NULL }; -static struct attribute_group l2_cache_pmu_events_group = { +static const struct attribute_group l2_cache_pmu_events_group = { .name = "events", .attrs = l2_cache_pmu_events, }; diff --git a/drivers/perf/qcom_l3_pmu.c b/drivers/perf/qcom_l3_pmu.c index 9ddb577c542b..fb34b87b9471 100644 --- a/drivers/perf/qcom_l3_pmu.c +++ b/drivers/perf/qcom_l3_pmu.c @@ -630,7 +630,7 @@ static struct attribute *qcom_l3_cache_pmu_formats[] = { NULL, }; -static struct attribute_group qcom_l3_cache_pmu_format_group = { +static const struct attribute_group qcom_l3_cache_pmu_format_group = { .name = "format", .attrs = qcom_l3_cache_pmu_formats, }; @@ -663,7 +663,7 @@ static struct attribute *qcom_l3_cache_pmu_events[] = { NULL }; -static struct attribute_group qcom_l3_cache_pmu_events_group = { +static const struct attribute_group qcom_l3_cache_pmu_events_group = { .name = "events", .attrs = qcom_l3_cache_pmu_events, }; @@ -685,7 +685,7 @@ static struct attribute *qcom_l3_cache_pmu_cpumask_attrs[] = { NULL, }; -static struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = { +static const struct attribute_group qcom_l3_cache_pmu_cpumask_attr_group = { .attrs = qcom_l3_cache_pmu_cpumask_attrs, }; -- cgit v1.2.3 From 3cb7d2da183fec42974fa3fa795cc33d1e81322d Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 17 Jan 2021 22:28:45 +0100 Subject: perf/imx_ddr: Constify static struct attribute_group The only usage is to put their addresses in an array of pointers to const struct attribute group. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Link: https://lore.kernel.org/r/20210117212847.21319-3-rikard.falkeborn@gmail.com Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index a11bfd8a0823..be1f26b62ddb 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -133,7 +133,7 @@ static struct attribute *ddr_perf_identifier_attrs[] = { NULL, }; -static struct attribute_group ddr_perf_identifier_attr_group = { +static const struct attribute_group ddr_perf_identifier_attr_group = { .attrs = ddr_perf_identifier_attrs, .is_visible = ddr_perf_identifier_attr_visible, }; @@ -188,7 +188,7 @@ static struct attribute *ddr_perf_filter_cap_attr[] = { NULL, }; -static struct attribute_group ddr_perf_filter_cap_attr_group = { +static const struct attribute_group ddr_perf_filter_cap_attr_group = { .name = "caps", .attrs = ddr_perf_filter_cap_attr, }; @@ -209,7 +209,7 @@ static struct attribute *ddr_perf_cpumask_attrs[] = { NULL, }; -static struct attribute_group ddr_perf_cpumask_attr_group = { +static const struct attribute_group ddr_perf_cpumask_attr_group = { .attrs = ddr_perf_cpumask_attrs, }; @@ -265,7 +265,7 @@ static struct attribute *ddr_perf_events_attrs[] = { NULL, }; -static struct attribute_group ddr_perf_events_attr_group = { +static const struct attribute_group ddr_perf_events_attr_group = { .name = "events", .attrs = ddr_perf_events_attrs, }; @@ -281,7 +281,7 @@ static struct attribute *ddr_perf_format_attrs[] = { NULL, }; -static struct attribute_group ddr_perf_format_attr_group = { +static const struct attribute_group ddr_perf_format_attr_group = { .name = "format", .attrs = ddr_perf_format_attrs, }; -- cgit v1.2.3 From c2c4d5c051b23dd79ff82d6232347245e11d7c9c Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 17 Jan 2021 22:28:46 +0100 Subject: perf: hisi: Constify static struct attribute_group The only usage is to put their addresses in an array of pointers to const struct attribute group. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Link: https://lore.kernel.org/r/20210117212847.21319-4-rikard.falkeborn@gmail.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 2 +- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 5ac6c9113767..ac1a8c120a00 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -319,7 +319,7 @@ static struct attribute *hisi_ddrc_pmu_identifier_attrs[] = { NULL }; -static struct attribute_group hisi_ddrc_pmu_identifier_group = { +static const struct attribute_group hisi_ddrc_pmu_identifier_group = { .attrs = hisi_ddrc_pmu_identifier_attrs, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 41b2dceb5f26..3402f1a395a8 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -331,7 +331,7 @@ static struct attribute *hisi_hha_pmu_identifier_attrs[] = { NULL }; -static struct attribute_group hisi_hha_pmu_identifier_group = { +static const struct attribute_group hisi_hha_pmu_identifier_group = { .attrs = hisi_hha_pmu_identifier_attrs, }; diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 705501d18d03..7d792435c2aa 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -321,7 +321,7 @@ static struct attribute *hisi_l3c_pmu_identifier_attrs[] = { NULL }; -static struct attribute_group hisi_l3c_pmu_identifier_group = { +static const struct attribute_group hisi_l3c_pmu_identifier_group = { .attrs = hisi_l3c_pmu_identifier_attrs, }; -- cgit v1.2.3 From f0c140481d1b807217cacdcf11d24cfa407a7a53 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 17 Jan 2021 22:28:47 +0100 Subject: perf: Constify static struct attribute_group The only usage is to put their addresses in an array of pointers to const struct attribute group. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Link: https://lore.kernel.org/r/20210117212847.21319-5-rikard.falkeborn@gmail.com Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 2 +- drivers/perf/arm-cmn.c | 2 +- drivers/perf/arm_dmc620_pmu.c | 4 ++-- drivers/perf/arm_pmu.c | 2 +- drivers/perf/arm_smmuv3_pmu.c | 8 ++++---- drivers/perf/arm_spe_pmu.c | 6 +++--- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 87c4be9dd412..a75cf77c4de4 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1376,7 +1376,7 @@ static struct attribute *pmu_attrs[] = { NULL, }; -static struct attribute_group pmu_attr_group = { +static const struct attribute_group pmu_attr_group = { .attrs = pmu_attrs, }; diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index a76ff594f3ca..f30fcd330899 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -616,7 +616,7 @@ static struct attribute *arm_cmn_cpumask_attrs[] = { NULL, }; -static struct attribute_group arm_cmn_cpumask_attr_group = { +static const struct attribute_group arm_cmn_cpumask_attr_group = { .attrs = arm_cmn_cpumask_attrs, }; diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 004930eb4bbb..27f54c0afc3b 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -159,7 +159,7 @@ static struct attribute *dmc620_pmu_events_attrs[] = { NULL, }; -static struct attribute_group dmc620_pmu_events_attr_group = { +static const struct attribute_group dmc620_pmu_events_attr_group = { .name = "events", .attrs = dmc620_pmu_events_attrs, }; @@ -222,7 +222,7 @@ static struct attribute *dmc620_pmu_formats_attrs[] = { NULL, }; -static struct attribute_group dmc620_pmu_format_attr_group = { +static const struct attribute_group dmc620_pmu_format_attr_group = { .name = "format", .attrs = dmc620_pmu_formats_attrs, }; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index cb2f55f450e4..2d10d84fb79c 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -577,7 +577,7 @@ static struct attribute *armpmu_common_attrs[] = { NULL, }; -static struct attribute_group armpmu_common_attr_group = { +static const struct attribute_group armpmu_common_attr_group = { .attrs = armpmu_common_attrs, }; diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index 74474bb322c3..8ff7a67f691c 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -493,7 +493,7 @@ static struct attribute *smmu_pmu_cpumask_attrs[] = { NULL }; -static struct attribute_group smmu_pmu_cpumask_group = { +static const struct attribute_group smmu_pmu_cpumask_group = { .attrs = smmu_pmu_cpumask_attrs, }; @@ -548,7 +548,7 @@ static umode_t smmu_pmu_event_is_visible(struct kobject *kobj, return 0; } -static struct attribute_group smmu_pmu_events_group = { +static const struct attribute_group smmu_pmu_events_group = { .name = "events", .attrs = smmu_pmu_events, .is_visible = smmu_pmu_event_is_visible, @@ -583,7 +583,7 @@ static struct attribute *smmu_pmu_identifier_attrs[] = { NULL }; -static struct attribute_group smmu_pmu_identifier_group = { +static const struct attribute_group smmu_pmu_identifier_group = { .attrs = smmu_pmu_identifier_attrs, .is_visible = smmu_pmu_identifier_attr_visible, }; @@ -602,7 +602,7 @@ static struct attribute *smmu_pmu_formats[] = { NULL }; -static struct attribute_group smmu_pmu_format_group = { +static const struct attribute_group smmu_pmu_format_group = { .name = "format", .attrs = smmu_pmu_formats, }; diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index bce9aff9f546..d3929ccebfd2 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -146,7 +146,7 @@ static struct attribute *arm_spe_pmu_cap_attr[] = { NULL, }; -static struct attribute_group arm_spe_pmu_cap_group = { +static const struct attribute_group arm_spe_pmu_cap_group = { .name = "caps", .attrs = arm_spe_pmu_cap_attr, }; @@ -227,7 +227,7 @@ static struct attribute *arm_spe_pmu_formats_attr[] = { NULL, }; -static struct attribute_group arm_spe_pmu_format_group = { +static const struct attribute_group arm_spe_pmu_format_group = { .name = "format", .attrs = arm_spe_pmu_formats_attr, }; @@ -247,7 +247,7 @@ static struct attribute *arm_spe_pmu_attrs[] = { NULL, }; -static struct attribute_group arm_spe_pmu_group = { +static const struct attribute_group arm_spe_pmu_group = { .attrs = arm_spe_pmu_attrs, }; -- cgit v1.2.3 From 742d33729a0df11c9d8d4625dbf21dd20cdefd44 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jan 2021 14:34:23 +0000 Subject: mm: Move immutable fields of 'struct vm_fault' into anonymous struct 'struct vm_fault' contains both information about the fault being serviced alongside mutable fields contributing to the state of the fault-handling logic. Unfortunately, the distinction between the two is not clear-cut, and a number of callers end up manipulating the structure temporarily before restoring it when returning. Try to clean this up by moving the immutable fault information into an anonymous struct, which will later be marked as 'const'. Ideally, the 'flags' field would be part of the new structure too, but it seems as though the ->page_mkwrite() path is not ready for this yet. Cc: Kirill A. Shutemov Suggested-by: Linus Torvalds Link: https://lore.kernel.org/r/CAHk-=whYs9XsO88iqJzN6NC=D-dp2m0oYXuOoZ=eWnvv=5OA+w@mail.gmail.com Signed-off-by: Will Deacon --- include/linux/mm.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 251a2339befb..b4a5cb9bff7d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -517,11 +517,14 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { - struct vm_area_struct *vma; /* Target VMA */ - unsigned int flags; /* FAULT_FLAG_xxx flags */ - gfp_t gfp_mask; /* gfp mask to be used for allocations */ - pgoff_t pgoff; /* Logical page offset based on vma */ - unsigned long address; /* Faulting virtual address */ + struct { + struct vm_area_struct *vma; /* Target VMA */ + gfp_t gfp_mask; /* gfp mask to be used for allocations */ + pgoff_t pgoff; /* Logical page offset based on vma */ + unsigned long address; /* Faulting virtual address */ + }; + unsigned int flags; /* FAULT_FLAG_xxx flags + * XXX: should really be 'const' */ pmd_t *pmd; /* Pointer to pmd entry matching * the 'address' */ pud_t *pud; /* Pointer to pud entry matching -- cgit v1.2.3 From 9d3af4b448a119ac81378d3bc775f1c4a2a7ff36 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 14 Jan 2021 15:24:19 +0000 Subject: mm: Pass 'address' to map to do_set_pte() and drop FAULT_FLAG_PREFAULT Rather than modifying the 'address' field of the 'struct vm_fault' passed to do_set_pte(), leave that to identify the real faulting address and pass in the virtual address to be mapped by the new pte as a separate argument. This makes FAULT_FLAG_PREFAULT redundant, as a prefault entry can be identified simply by comparing the new address parameter with the faulting address, so remove the redundant flag at the same time. Cc: Kirill A. Shutemov Cc: Linus Torvalds Signed-off-by: Will Deacon --- include/linux/mm.h | 7 ++----- mm/filemap.c | 21 +++++++-------------- mm/memory.c | 10 +++++----- 3 files changed, 14 insertions(+), 24 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index b4a5cb9bff7d..e0f056753bef 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -434,7 +434,6 @@ extern pgprot_t protection_map[16]; * @FAULT_FLAG_REMOTE: The fault is not for current task/mm. * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch. * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals. - * @FAULT_FLAG_PREFAULT: Fault was a prefault. * * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify * whether we would allow page faults to retry by specifying these two @@ -465,7 +464,6 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_REMOTE 0x80 #define FAULT_FLAG_INSTRUCTION 0x100 #define FAULT_FLAG_INTERRUPTIBLE 0x200 -#define FAULT_FLAG_PREFAULT 0x400 /* * The default fault flags that should be used by most of the @@ -503,8 +501,7 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) { FAULT_FLAG_USER, "USER" }, \ { FAULT_FLAG_REMOTE, "REMOTE" }, \ { FAULT_FLAG_INSTRUCTION, "INSTRUCTION" }, \ - { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" }, \ - { FAULT_FLAG_PREFAULT, "PREFAULT" } + { FAULT_FLAG_INTERRUPTIBLE, "INTERRUPTIBLE" } /* * vm_fault is filled by the pagefault handler and passed to the vma's @@ -995,7 +992,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) } vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page); -void do_set_pte(struct vm_fault *vmf, struct page *page); +void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr); vm_fault_t finish_fault(struct vm_fault *vmf); vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); diff --git a/mm/filemap.c b/mm/filemap.c index a6dc97906c8e..fb7a8d9b5603 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3018,8 +3018,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, struct file *file = vma->vm_file; struct address_space *mapping = file->f_mapping; pgoff_t last_pgoff = start_pgoff; - unsigned long address = vmf->address; - unsigned long flags = vmf->flags; + unsigned long addr; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct page *head, *page; unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); @@ -3035,8 +3034,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, goto out; } - vmf->address = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); + addr = vma->vm_start + ((start_pgoff - vma->vm_pgoff) << PAGE_SHIFT); + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); do { page = find_subpage(head, xas.xa_index); if (PageHWPoison(page)) @@ -3045,7 +3044,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, if (mmap_miss > 0) mmap_miss--; - vmf->address += (xas.xa_index - last_pgoff) << PAGE_SHIFT; + addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT; vmf->pte += xas.xa_index - last_pgoff; last_pgoff = xas.xa_index; @@ -3053,16 +3052,12 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, goto unlock; /* We're about to handle the fault */ - if (vmf->address == address) { - vmf->flags &= ~FAULT_FLAG_PREFAULT; + if (vmf->address == addr) ret = VM_FAULT_NOPAGE; - } else { - vmf->flags |= FAULT_FLAG_PREFAULT; - } - do_set_pte(vmf, page); + do_set_pte(vmf, page, addr); /* no need to invalidate: a not-present page won't be cached */ - update_mmu_cache(vma, vmf->address, vmf->pte); + update_mmu_cache(vma, addr, vmf->pte); unlock_page(head); continue; unlock: @@ -3072,8 +3067,6 @@ unlock: pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); - vmf->flags = flags; - vmf->address = address; WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); return ret; } diff --git a/mm/memory.c b/mm/memory.c index f0e7c589ca9d..7b1307873325 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3733,11 +3733,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) } #endif -void do_set_pte(struct vm_fault *vmf, struct page *page) +void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr) { struct vm_area_struct *vma = vmf->vma; bool write = vmf->flags & FAULT_FLAG_WRITE; - bool prefault = vmf->flags & FAULT_FLAG_PREFAULT; + bool prefault = vmf->address != addr; pte_t entry; flush_icache_page(vma, page); @@ -3753,13 +3753,13 @@ void do_set_pte(struct vm_fault *vmf, struct page *page) /* copy-on-write page */ if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); - page_add_new_anon_rmap(page, vma, vmf->address, false); + page_add_new_anon_rmap(page, vma, addr, false); lru_cache_add_inactive_or_unevictable(page, vma); } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); } - set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); + set_pte_at(vma->vm_mm, addr, vmf->pte, entry); } /** @@ -3819,7 +3819,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf) ret = 0; /* Re-check under ptl */ if (likely(pte_none(*vmf->pte))) - do_set_pte(vmf, page); + do_set_pte(vmf, page, vmf->address); else ret = VM_FAULT_NOPAGE; -- cgit v1.2.3 From 2b635dd372f6c8f27644c662bb48d10376ce561a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 14 Jan 2021 15:33:49 +0000 Subject: mm: Avoid modifying vmf.address in __collapse_huge_page_swapin() In preparation for const-ifying the anonymous struct field of 'struct vm_fault', rework __collapse_huge_page_swapin() to avoid continuously updating vmf.address and instead populate a new 'struct vm_fault' on the stack for each page being processed. Cc: Kirill A. Shutemov Cc: Linus Torvalds Signed-off-by: Will Deacon --- mm/khugepaged.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 67ab391a5373..fb0fdaec34d5 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -991,38 +991,41 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, static bool __collapse_huge_page_swapin(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long address, pmd_t *pmd, + unsigned long haddr, pmd_t *pmd, int referenced) { int swapped_in = 0; vm_fault_t ret = 0; - struct vm_fault vmf = { - .vma = vma, - .address = address, - .flags = FAULT_FLAG_ALLOW_RETRY, - .pmd = pmd, - .pgoff = linear_page_index(vma, address), - }; - - vmf.pte = pte_offset_map(pmd, address); - for (; vmf.address < address + HPAGE_PMD_NR*PAGE_SIZE; - vmf.pte++, vmf.address += PAGE_SIZE) { + unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE); + + for (address = haddr; address < end; address += PAGE_SIZE) { + struct vm_fault vmf = { + .vma = vma, + .address = address, + .pgoff = linear_page_index(vma, haddr), + .flags = FAULT_FLAG_ALLOW_RETRY, + .pmd = pmd, + }; + + vmf.pte = pte_offset_map(pmd, address); vmf.orig_pte = *vmf.pte; - if (!is_swap_pte(vmf.orig_pte)) + if (!is_swap_pte(vmf.orig_pte)) { + pte_unmap(vmf.pte); continue; + } swapped_in++; ret = do_swap_page(&vmf); /* do_swap_page returns VM_FAULT_RETRY with released mmap_lock */ if (ret & VM_FAULT_RETRY) { mmap_read_lock(mm); - if (hugepage_vma_revalidate(mm, address, &vmf.vma)) { + if (hugepage_vma_revalidate(mm, haddr, &vma)) { /* vma is no longer available, don't continue to swapin */ trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; } /* check if the pmd is still valid */ - if (mm_find_pmd(mm, address) != pmd) { + if (mm_find_pmd(mm, haddr) != pmd) { trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; } @@ -1031,11 +1034,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); return false; } - /* pte is unmapped now, we need to map it */ - vmf.pte = pte_offset_map(pmd, vmf.address); } - vmf.pte--; - pte_unmap(vmf.pte); /* Drain LRU add pagevec to remove extra pin on the swapped in pages */ if (swapped_in) -- cgit v1.2.3 From 8c63ca5bc3e19f11128e8e285dcf20aac6768f97 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 14 Jan 2021 15:42:14 +0000 Subject: mm: Use static initialisers for immutable fields of 'struct vm_fault' In preparation for const-ifying the anonymous struct field of 'struct vm_fault', ensure that it is initialised using designated initialisers. Cc: Kirill A. Shutemov Cc: Linus Torvalds Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- mm/shmem.c | 6 +++--- mm/swapfile.c | 11 ++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7c6b6d8f6c39..1b254fbfdf52 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1520,11 +1520,11 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp, { struct vm_area_struct pvma; struct page *page; - struct vm_fault vmf; + struct vm_fault vmf = { + .vma = &pvma, + }; shmem_pseudo_vma_init(&pvma, info, index); - vmf.vma = &pvma; - vmf.address = 0; page = swap_cluster_readahead(swap, gfp, &vmf); shmem_pseudo_vma_destroy(&pvma); diff --git a/mm/swapfile.c b/mm/swapfile.c index 9fffc5af29d1..2b570a566276 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1951,8 +1951,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, si = swap_info[type]; pte = pte_offset_map(pmd, addr); do { - struct vm_fault vmf; - if (!is_swap_pte(*pte)) continue; @@ -1968,9 +1966,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, swap_map = &si->swap_map[offset]; page = lookup_swap_cache(entry, vma, addr); if (!page) { - vmf.vma = vma; - vmf.address = addr; - vmf.pmd = pmd; + struct vm_fault vmf = { + .vma = vma, + .address = addr, + .pmd = pmd, + }; + page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf); } -- cgit v1.2.3 From 5857c9209ce58f8e262889539ccdf63e73ad7a93 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 14 Jan 2021 15:44:09 +0000 Subject: mm: Mark anonymous struct field of 'struct vm_fault' as 'const' The fields of this struct are only ever read after being initialised, so mark it 'const' before somebody tries to modify it again. GCC will then complain (with an error) about modification of these fields after they have been initialised, although LLVM currently allows them without even a warning: https://bugs.llvm.org/show_bug.cgi?id=48755 Hopefully, future versions of LLVM will emit a warning. Cc: Kirill A. Shutemov Cc: Linus Torvalds Signed-off-by: Will Deacon --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e0f056753bef..7ff3d9817d38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -514,7 +514,7 @@ static inline bool fault_flag_allow_retry_first(unsigned int flags) * pgoff should be used in favour of virtual_address, if possible. */ struct vm_fault { - struct { + const struct { struct vm_area_struct *vma; /* Target VMA */ gfp_t gfp_mask; /* gfp mask to be used for allocations */ pgoff_t pgoff; /* Logical page offset based on vma */ -- cgit v1.2.3 From 390596c9959c2a4f5b456df339f0604df3d55fe0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 5 Nov 2020 16:29:44 +0100 Subject: random: avoid arch_get_random_seed_long() when collecting IRQ randomness When reseeding the CRNG periodically, arch_get_random_seed_long() is called to obtain entropy from an architecture specific source if one is implemented. In most cases, these are special instructions, but in some cases, such as on ARM, we may want to back this using firmware calls, which are considerably more expensive. Another call to arch_get_random_seed_long() exists in the CRNG driver, in add_interrupt_randomness(), which collects entropy by capturing inter-interrupt timing and relying on interrupt jitter to provide random bits. This is done by keeping a per-CPU state, and mixing in the IRQ number, the cycle counter and the return address every time an interrupt is taken, and mixing this per-CPU state into the entropy pool every 64 invocations, or at least once per second. The entropy that is gathered this way is credited as 1 bit of entropy. Every time this happens, arch_get_random_seed_long() is invoked, and the result is mixed in as well, and also credited with 1 bit of entropy. This means that arch_get_random_seed_long() is called at least once per second on every CPU, which seems excessive, and doesn't really scale, especially in a virtualization scenario where CPUs may be oversubscribed: in cases where arch_get_random_seed_long() is backed by an instruction that actually goes back to a shared hardware entropy source (such as RNDRRS on ARM), we will end up hitting it hundreds of times per second. So let's drop the call to arch_get_random_seed_long() from add_interrupt_randomness(), and instead, rely on crng_reseed() to call the arch hook to get random seed material from the platform. Signed-off-by: Ard Biesheuvel Reviewed-by: Andre Przywara Tested-by: Andre Przywara Reviewed-by: Eric Biggers Acked-by: Marc Zyngier Reviewed-by: Jason A. Donenfeld Link: https://lore.kernel.org/r/20201105152944.16953-1-ardb@kernel.org Signed-off-by: Will Deacon --- drivers/char/random.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 5f3b8ac9d97b..84e24986a97a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1261,8 +1261,6 @@ void add_interrupt_randomness(int irq, int irq_flags) cycles_t cycles = random_get_entropy(); __u32 c_high, j_high; __u64 ip; - unsigned long seed; - int credit = 0; if (cycles == 0) cycles = get_reg(fast_pool, regs); @@ -1298,23 +1296,12 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_pool->last = now; __mix_pool_bytes(r, &fast_pool->pool, sizeof(fast_pool->pool)); - - /* - * If we have architectural seed generator, produce a seed and - * add it to the pool. For the sake of paranoia don't let the - * architectural seed generator dominate the input from the - * interrupt noise. - */ - if (arch_get_random_seed_long(&seed)) { - __mix_pool_bytes(r, &seed, sizeof(seed)); - credit = 1; - } spin_unlock(&r->lock); fast_pool->count = 0; /* award one bit for the contents of the fast pool */ - credit_entropy_bits(r, credit + 1); + credit_entropy_bits(r, 1); } EXPORT_SYMBOL_GPL(add_interrupt_randomness); -- cgit v1.2.3 From a37e31fc97efe7f7c68cb381cf4390e472c09061 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 6 Jan 2021 10:34:50 +0000 Subject: firmware: smccc: Introduce SMCCC TRNG framework The ARM DEN0098 document describe an SMCCC based firmware service to deliver hardware generated random numbers. Its existence is advertised according to the SMCCC v1.1 specification. Add a (dummy) call to probe functions implemented in each architecture (ARM and arm64), to determine the existence of this interface. For now this return false, but this will be overwritten by each architecture's support patch. Signed-off-by: Andre Przywara Reviewed-by: Linus Walleij Reviewed-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm/include/asm/archrandom.h | 10 ++++++++++ arch/arm64/include/asm/archrandom.h | 12 ++++++++++++ drivers/firmware/smccc/smccc.c | 6 ++++++ 3 files changed, 28 insertions(+) create mode 100644 arch/arm/include/asm/archrandom.h diff --git a/arch/arm/include/asm/archrandom.h b/arch/arm/include/asm/archrandom.h new file mode 100644 index 000000000000..a8e84ca5c2ee --- /dev/null +++ b/arch/arm/include/asm/archrandom.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARCHRANDOM_H +#define _ASM_ARCHRANDOM_H + +static inline bool __init smccc_probe_trng(void) +{ + return false; +} + +#endif /* _ASM_ARCHRANDOM_H */ diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index ffb1a40d5475..abe07c21da8e 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -8,6 +8,11 @@ #include #include +static inline bool __init smccc_probe_trng(void) +{ + return false; +} + static inline bool __arm64_rndr(unsigned long *v) { bool ok; @@ -79,5 +84,12 @@ arch_get_random_seed_long_early(unsigned long *v) } #define arch_get_random_seed_long_early arch_get_random_seed_long_early +#else /* !CONFIG_ARCH_RANDOM */ + +static inline bool __init smccc_probe_trng(void) +{ + return false; +} + #endif /* CONFIG_ARCH_RANDOM */ #endif /* _ASM_ARCHRANDOM_H */ diff --git a/drivers/firmware/smccc/smccc.c b/drivers/firmware/smccc/smccc.c index 00c88b809c0c..d52bfc5ed5e4 100644 --- a/drivers/firmware/smccc/smccc.c +++ b/drivers/firmware/smccc/smccc.c @@ -5,16 +5,22 @@ #define pr_fmt(fmt) "smccc: " fmt +#include #include #include +#include static u32 smccc_version = ARM_SMCCC_VERSION_1_0; static enum arm_smccc_conduit smccc_conduit = SMCCC_CONDUIT_NONE; +bool __ro_after_init smccc_trng_available = false; + void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit) { smccc_version = version; smccc_conduit = conduit; + + smccc_trng_available = smccc_probe_trng(); } enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) -- cgit v1.2.3 From 38db987316a38a3fe55ff7f5f4653fcb520a9d26 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 6 Jan 2021 10:34:52 +0000 Subject: arm64: Add support for SMCCC TRNG entropy source The ARM architected TRNG firmware interface, described in ARM spec DEN0098, defines an ARM SMCCC based interface to a true random number generator, provided by firmware. This can be discovered via the SMCCC >=v1.1 interface, and provides up to 192 bits of entropy per call. Hook this SMC call into arm64's arch_get_random_*() implementation, coming to the rescue when the CPU does not implement the ARM v8.5 RNG system registers. For the detection, we piggy back on the PSCI/SMCCC discovery (which gives us the conduit to use (hvc/smc)), then try to call the ARM_SMCCC_TRNG_VERSION function, which returns -1 if this interface is not implemented. Reviewed-by: Mark Brown Signed-off-by: Andre Przywara Signed-off-by: Will Deacon --- arch/arm64/include/asm/archrandom.h | 72 +++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index abe07c21da8e..09e43272ccb0 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -4,13 +4,24 @@ #ifdef CONFIG_ARCH_RANDOM +#include #include #include #include +#define ARM_SMCCC_TRNG_MIN_VERSION 0x10000UL + +extern bool smccc_trng_available; + static inline bool __init smccc_probe_trng(void) { - return false; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_VERSION, &res); + if ((s32)res.a0 < 0) + return false; + + return res.a0 >= ARM_SMCCC_TRNG_MIN_VERSION; } static inline bool __arm64_rndr(unsigned long *v) @@ -43,26 +54,55 @@ static inline bool __must_check arch_get_random_int(unsigned int *v) static inline bool __must_check arch_get_random_seed_long(unsigned long *v) { + struct arm_smccc_res res; + + /* + * We prefer the SMCCC call, since its semantics (return actual + * hardware backed entropy) is closer to the idea behind this + * function here than what even the RNDRSS register provides + * (the output of a pseudo RNG freshly seeded by a TRNG). + */ + if (smccc_trng_available) { + arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 64, &res); + if ((int)res.a0 >= 0) { + *v = res.a3; + return true; + } + } + /* * Only support the generic interface after we have detected * the system wide capability, avoiding complexity with the * cpufeature code and with potential scheduling between CPUs * with and without the feature. */ - if (!cpus_have_const_cap(ARM64_HAS_RNG)) - return false; + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + return true; - return __arm64_rndr(v); + return false; } - static inline bool __must_check arch_get_random_seed_int(unsigned int *v) { + struct arm_smccc_res res; unsigned long val; - bool ok = arch_get_random_seed_long(&val); - *v = val; - return ok; + if (smccc_trng_available) { + arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 32, &res); + if ((int)res.a0 >= 0) { + *v = res.a3 & GENMASK(31, 0); + return true; + } + } + + if (cpus_have_const_cap(ARM64_HAS_RNG)) { + if (__arm64_rndr(&val)) { + *v = val; + return true; + } + } + + return false; } static inline bool __init __early_cpu_has_rndr(void) @@ -77,10 +117,20 @@ arch_get_random_seed_long_early(unsigned long *v) { WARN_ON(system_state != SYSTEM_BOOTING); - if (!__early_cpu_has_rndr()) - return false; + if (smccc_trng_available) { + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_TRNG_RND64, 64, &res); + if ((int)res.a0 >= 0) { + *v = res.a3; + return true; + } + } - return __arm64_rndr(v); + if (__early_cpu_has_rndr() && __arm64_rndr(v)) + return true; + + return false; } #define arch_get_random_seed_long_early arch_get_random_seed_long_early -- cgit v1.2.3 From f0b13ee23241846f6f6cd0d119a8ac8059416ec4 Mon Sep 17 00:00:00 2001 From: Sudarshan Rajagopalan Date: Wed, 20 Jan 2021 21:29:13 -0800 Subject: arm64/sparsemem: reduce SECTION_SIZE_BITS memory_block_size_bytes() determines the memory hotplug granularity i.e the amount of memory which can be hot added or hot removed from the kernel. The generic value here being MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS) for memory_block_size_bytes() on platforms like arm64 that does not override. Current SECTION_SIZE_BITS is 30 i.e 1GB which is large and a reduction here increases memory hotplug granularity, thus improving its agility. A reduced section size also reduces memory wastage in vmemmmap mapping for sections with large memory holes. So we try to set the least section size as possible. A section size bits selection must follow: (MAX_ORDER - 1 + PAGE_SHIFT) <= SECTION_SIZE_BITS CONFIG_FORCE_MAX_ZONEORDER is always defined on arm64 and so just following it would help achieve the smallest section size. SECTION_SIZE_BITS = (CONFIG_FORCE_MAX_ZONEORDER - 1 + PAGE_SHIFT) SECTION_SIZE_BITS = 22 (11 - 1 + 12) i.e 4MB for 4K pages SECTION_SIZE_BITS = 24 (11 - 1 + 14) i.e 16MB for 16K pages without THP SECTION_SIZE_BITS = 25 (12 - 1 + 14) i.e 32MB for 16K pages with THP SECTION_SIZE_BITS = 26 (11 - 1 + 16) i.e 64MB for 64K pages without THP SECTION_SIZE_BITS = 29 (14 - 1 + 16) i.e 512MB for 64K pages with THP But there are other problems in reducing SECTION_SIZE_BIT. Reducing it by too much would over populate /sys/devices/system/memory/ and also consume too many page->flags bits in the !vmemmap case. Also section size needs to be multiple of 128MB to have PMD based vmemmap mapping with CONFIG_ARM64_4K_PAGES. Given these constraints, lets just reduce the section size to 128MB for 4K and 16K base page size configs, and to 512MB for 64K base page size config. Signed-off-by: Sudarshan Rajagopalan Suggested-by: Anshuman Khandual Suggested-by: David Hildenbrand Cc: Catalin Marinas Cc: Will Deacon Cc: Anshuman Khandual Cc: David Hildenbrand Cc: Mike Rapoport Cc: Mark Rutland Cc: Logan Gunthorpe Cc: Andrew Morton Cc: Steven Price Cc: Suren Baghdasaryan Reviewed-by: David Hildenbrand Acked-by: Mike Rapoport Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/43843c5e092bfe3ec4c41e3c8c78a7ee35b69bb0.1611206601.git.sudaraja@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/sparsemem.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h index 1f43fcc79738..eb4a75d720ed 100644 --- a/arch/arm64/include/asm/sparsemem.h +++ b/arch/arm64/include/asm/sparsemem.h @@ -7,7 +7,26 @@ #ifdef CONFIG_SPARSEMEM #define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS -#define SECTION_SIZE_BITS 30 -#endif + +/* + * Section size must be at least 512MB for 64K base + * page size config. Otherwise it will be less than + * (MAX_ORDER - 1) and the build process will fail. + */ +#ifdef CONFIG_ARM64_64K_PAGES +#define SECTION_SIZE_BITS 29 + +#else + +/* + * Section size must be at least 128MB for 4K base + * page size config. Otherwise PMD based huge page + * entries could not be created for vmemmap mappings. + * 16K follows 4K for simplicity. + */ +#define SECTION_SIZE_BITS 27 +#endif /* CONFIG_ARM64_64K_PAGES */ + +#endif /* CONFIG_SPARSEMEM*/ #endif -- cgit v1.2.3 From 507d664450f84ab2d5601aed55c6a4b3af9ec4c2 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 26 Jan 2021 20:24:44 +0800 Subject: arm64: mm: Remove unused header file Many header files are never used, let's remove them directly. Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1611663884-43329-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmap.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 07937b49cb88..c66b3878712c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -5,20 +5,10 @@ * Copyright (C) 2012 ARM Ltd. */ -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include -#include +#include /* * You really shouldn't be using read() or write() on /dev/mem. This might go -- cgit v1.2.3 From 1e193c70f5bba8fc66b9da19be64e9a9229a37d6 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 25 Jan 2021 19:55:53 +0800 Subject: arm64: cacheflush: Remove stale comment Remove stale comment since commit a7ba121215fa ("arm64: use asm-generic/cacheflush.h") Cc: Christoph Hellwig Cc: Andrew Morton Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/1611575753-36435-1-git-send-email-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 45217f21f1fe..52e5c1623224 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -30,11 +30,6 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT I-cache. * - * flush_cache_mm(mm) - * - * Clean and invalidate all user space cache entries - * before a change of page tables. - * * flush_icache_range(start, end) * * Ensure coherency between the I-cache and the D-cache in the -- cgit v1.2.3 From c7b9095e87bf81f029fbae2e5d6a8354b6f11987 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 27 Jan 2021 12:52:16 +0000 Subject: arm64: Include linux/io.h in mm/mmap.c Commit 507d664450f8 ("arm64: mm: Remove unused header file") removed a bunch of apparently "unused" header inclusions from our mm/mmap.c implementation, but in doing so introduced the following warning when building with W=1: >> arch/arm64/mm/mmap.c:17:5: warning: no previous prototype for 'valid_phys_addr_range' [-Wmissing-prototypes] 17 | int valid_phys_addr_range(phys_addr_t addr, size_t size) | ^~~~~~~~~~~~~~~~~~~~~ >> arch/arm64/mm/mmap.c:36:5: warning: no previous prototype for 'valid_mmap_phys_addr_range' [-Wmissing-prototypes] 36 | int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ Add back the linux/io.h header inclusion to pull in the missing prototypes. Reported-by: kernel test robot Link: https://lore.kernel.org/r/202101271438.V9TmBC31-lkp@intel.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index c66b3878712c..a38f54cd638c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -5,6 +5,7 @@ * Copyright (C) 2012 ARM Ltd. */ +#include #include #include -- cgit v1.2.3 From 117cda9a7847286484d2353af64728a9875effd4 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:06 -0500 Subject: arm64: kexec: make dtb_mem always enabled Currently, dtb_mem is enabled only when CONFIG_KEXEC_FILE is enabled. This adds ugly ifdefs to c files. Always enabled dtb_mem, when it is not used, it is NULL. Change the dtb_mem to phys_addr_t, as it is a physical address. Signed-off-by: Pavel Tatashin Reviewed-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-2-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kexec.h | 4 ++-- arch/arm64/kernel/machine_kexec.c | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index d24b527e8c00..61530ec3a9b1 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -90,18 +90,18 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif -#ifdef CONFIG_KEXEC_FILE #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { void *dtb; - unsigned long dtb_mem; + phys_addr_t dtb_mem; /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_mem; unsigned long elf_headers_sz; }; +#ifdef CONFIG_KEXEC_FILE extern const struct kexec_file_ops kexec_image_ops; struct kimage; diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index a0b144cfaea7..8096a6aa1d49 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -204,11 +204,7 @@ void machine_kexec(struct kimage *kimage) * In kexec_file case, the kernel starts directly without purgatory. */ cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, -#ifdef CONFIG_KEXEC_FILE - kimage->arch.dtb_mem); -#else - 0); -#endif + kimage->arch.dtb_mem); BUG(); /* Should never get here. */ } -- cgit v1.2.3 From 41f67d40a31d6b007d372461f171875fd940f17d Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:07 -0500 Subject: arm64: hibernate: variable pudp is used instead of pd4dp There should be p4dp used when p4d page is allocated. This is not a functional issue, but for the logical correctness this should be fixed. Fixes: e9f6376858b9 ("arm64: add support for folded p4d page tables") Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/r/20210125191923.1060122-3-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 9c9f47e9f7f4..0a54d81c90f9 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -190,10 +190,10 @@ static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, pgdp = pgd_offset_pgd(trans_pgd, dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); - if (!pudp) + p4dp = (void *)get_safe_page(GFP_ATOMIC); + if (!pgdp) return -ENOMEM; - pgd_populate(&init_mm, pgdp, pudp); + pgd_populate(&init_mm, pgdp, p4dp); } p4dp = p4d_offset(pgdp, dst_addr); -- cgit v1.2.3 From 072e3d96a79a5876691c6532f91f930157144a2a Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:08 -0500 Subject: arm64: hibernate: move page handling function to new trans_pgd.c Now, that we abstracted the required functions move them to a new home. Later, we will generalize these function in order to be useful outside of hibernation. Signed-off-by: Pavel Tatashin Reviewed-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-4-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 + arch/arm64/include/asm/trans_pgd.h | 21 ++++ arch/arm64/kernel/hibernate.c | 228 +-------------------------------- arch/arm64/mm/Makefile | 1 + arch/arm64/mm/trans_pgd.c | 250 +++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+), 227 deletions(-) create mode 100644 arch/arm64/include/asm/trans_pgd.h create mode 100644 arch/arm64/mm/trans_pgd.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f39568b28ec1..fc0ed9d6e011 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1132,6 +1132,10 @@ config CRASH_DUMP For more details see Documentation/admin-guide/kdump/kdump.rst +config TRANS_TABLE + def_bool y + depends on HIBERNATION + config XEN_DOM0 def_bool y depends on XEN diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h new file mode 100644 index 000000000000..23153c13d1ce --- /dev/null +++ b/arch/arm64/include/asm/trans_pgd.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (c) 2020, Microsoft Corporation. + * Pavel Tatashin + */ + +#ifndef _ASM_TRANS_TABLE_H +#define _ASM_TRANS_TABLE_H + +#include +#include +#include + +int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, + unsigned long end); + +int trans_pgd_map_page(pgd_t *trans_pgd, void *page, unsigned long dst_addr, + pgprot_t pgprot); + +#endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 0a54d81c90f9..4a38662f0d90 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -16,7 +16,6 @@ #define pr_fmt(x) "hibernate: " x #include #include -#include #include #include #include @@ -31,13 +30,12 @@ #include #include #include -#include -#include #include #include #include #include #include +#include #include /* @@ -178,54 +176,6 @@ int arch_hibernation_header_restore(void *addr) } EXPORT_SYMBOL(arch_hibernation_header_restore); -static int trans_pgd_map_page(pgd_t *trans_pgd, void *page, - unsigned long dst_addr, - pgprot_t pgprot) -{ - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - p4dp = (void *)get_safe_page(GFP_ATOMIC); - if (!pgdp) - return -ENOMEM; - pgd_populate(&init_mm, pgdp, p4dp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); - if (!pudp) - return -ENOMEM; - p4d_populate(&init_mm, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = (void *)get_safe_page(GFP_ATOMIC); - if (!pmdp) - return -ENOMEM; - pud_populate(&init_mm, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = (void *)get_safe_page(GFP_ATOMIC); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); - - return 0; -} - /* * Copies length bytes, starting at src_start into an new page, * perform cache maintenance, then maps it at the specified address low @@ -462,182 +412,6 @@ int swsusp_arch_suspend(void) return ret; } -static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) -{ - pte_t pte = READ_ONCE(*src_ptep); - - if (pte_valid(pte)) { - /* - * Resume will overwrite areas that may be marked - * read only (code, rodata). Clear the RDONLY bit from - * the temporary mappings we use during restore. - */ - set_pte(dst_ptep, pte_mkwrite(pte)); - } else if (debug_pagealloc_enabled() && !pte_none(pte)) { - /* - * debug_pagealloc will removed the PTE_VALID bit if - * the page isn't in use by the resume kernel. It may have - * been in use by the original kernel, in which case we need - * to put it back in our copy to do the restore. - * - * Before marking this entry valid, check the pfn should - * be mapped. - */ - BUG_ON(!pfn_valid(pte_pfn(pte))); - - set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); - } -} - -static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, - unsigned long end) -{ - pte_t *src_ptep; - pte_t *dst_ptep; - unsigned long addr = start; - - dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); - if (!dst_ptep) - return -ENOMEM; - pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); - dst_ptep = pte_offset_kernel(dst_pmdp, start); - - src_ptep = pte_offset_kernel(src_pmdp, start); - do { - _copy_pte(dst_ptep, src_ptep, addr); - } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); - - return 0; -} - -static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, - unsigned long end) -{ - pmd_t *src_pmdp; - pmd_t *dst_pmdp; - unsigned long next; - unsigned long addr = start; - - if (pud_none(READ_ONCE(*dst_pudp))) { - dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pmdp) - return -ENOMEM; - pud_populate(&init_mm, dst_pudp, dst_pmdp); - } - dst_pmdp = pmd_offset(dst_pudp, start); - - src_pmdp = pmd_offset(src_pudp, start); - do { - pmd_t pmd = READ_ONCE(*src_pmdp); - - next = pmd_addr_end(addr, end); - if (pmd_none(pmd)) - continue; - if (pmd_table(pmd)) { - if (copy_pte(dst_pmdp, src_pmdp, addr, next)) - return -ENOMEM; - } else { - set_pmd(dst_pmdp, - __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); - } - } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); - - return 0; -} - -static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, - unsigned long end) -{ - pud_t *dst_pudp; - pud_t *src_pudp; - unsigned long next; - unsigned long addr = start; - - if (p4d_none(READ_ONCE(*dst_p4dp))) { - dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); - if (!dst_pudp) - return -ENOMEM; - p4d_populate(&init_mm, dst_p4dp, dst_pudp); - } - dst_pudp = pud_offset(dst_p4dp, start); - - src_pudp = pud_offset(src_p4dp, start); - do { - pud_t pud = READ_ONCE(*src_pudp); - - next = pud_addr_end(addr, end); - if (pud_none(pud)) - continue; - if (pud_table(pud)) { - if (copy_pmd(dst_pudp, src_pudp, addr, next)) - return -ENOMEM; - } else { - set_pud(dst_pudp, - __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); - } - } while (dst_pudp++, src_pudp++, addr = next, addr != end); - - return 0; -} - -static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, - unsigned long end) -{ - p4d_t *dst_p4dp; - p4d_t *src_p4dp; - unsigned long next; - unsigned long addr = start; - - dst_p4dp = p4d_offset(dst_pgdp, start); - src_p4dp = p4d_offset(src_pgdp, start); - do { - next = p4d_addr_end(addr, end); - if (p4d_none(READ_ONCE(*src_p4dp))) - continue; - if (copy_pud(dst_p4dp, src_p4dp, addr, next)) - return -ENOMEM; - } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); - - return 0; -} - -static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, - unsigned long end) -{ - unsigned long next; - unsigned long addr = start; - pgd_t *src_pgdp = pgd_offset_k(start); - - dst_pgdp = pgd_offset_pgd(dst_pgdp, start); - do { - next = pgd_addr_end(addr, end); - if (pgd_none(READ_ONCE(*src_pgdp))) - continue; - if (copy_p4d(dst_pgdp, src_pgdp, addr, next)) - return -ENOMEM; - } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); - - return 0; -} - -static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, - unsigned long end) -{ - int rc; - pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); - - if (!trans_pgd) { - pr_err("Failed to allocate memory for temporary page tables.\n"); - return -ENOMEM; - } - - rc = copy_page_tables(trans_pgd, start, end); - if (!rc) - *dst_pgdp = trans_pgd; - - return rc; -} - /* * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit(). * diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 5ead3c3de3b6..77222d92667a 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -6,6 +6,7 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o +obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ARM64_MTE) += mteswap.o diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c new file mode 100644 index 000000000000..e048d1f5c912 --- /dev/null +++ b/arch/arm64/mm/trans_pgd.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Transitional page tables for kexec and hibernate + * + * This file derived from: arch/arm64/kernel/hibernate.c + * + * Copyright (c) 2020, Microsoft Corporation. + * Pavel Tatashin + * + */ + +/* + * Transitional tables are used during system transferring from one world to + * another: such as during hibernate restore, and kexec reboots. During these + * phases one cannot rely on page table not being overwritten. This is because + * hibernate and kexec can overwrite the current page tables during transition. + */ + +#include +#include +#include +#include +#include +#include +#include + +static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) +{ + pte_t pte = READ_ONCE(*src_ptep); + + if (pte_valid(pte)) { + /* + * Resume will overwrite areas that may be marked + * read only (code, rodata). Clear the RDONLY bit from + * the temporary mappings we use during restore. + */ + set_pte(dst_ptep, pte_mkwrite(pte)); + } else if (debug_pagealloc_enabled() && !pte_none(pte)) { + /* + * debug_pagealloc will removed the PTE_VALID bit if + * the page isn't in use by the resume kernel. It may have + * been in use by the original kernel, in which case we need + * to put it back in our copy to do the restore. + * + * Before marking this entry valid, check the pfn should + * be mapped. + */ + BUG_ON(!pfn_valid(pte_pfn(pte))); + + set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); + } +} + +static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, + unsigned long end) +{ + pte_t *src_ptep; + pte_t *dst_ptep; + unsigned long addr = start; + + dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); + if (!dst_ptep) + return -ENOMEM; + pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); + dst_ptep = pte_offset_kernel(dst_pmdp, start); + + src_ptep = pte_offset_kernel(src_pmdp, start); + do { + _copy_pte(dst_ptep, src_ptep, addr); + } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end); + + return 0; +} + +static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, + unsigned long end) +{ + pmd_t *src_pmdp; + pmd_t *dst_pmdp; + unsigned long next; + unsigned long addr = start; + + if (pud_none(READ_ONCE(*dst_pudp))) { + dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pmdp) + return -ENOMEM; + pud_populate(&init_mm, dst_pudp, dst_pmdp); + } + dst_pmdp = pmd_offset(dst_pudp, start); + + src_pmdp = pmd_offset(src_pudp, start); + do { + pmd_t pmd = READ_ONCE(*src_pmdp); + + next = pmd_addr_end(addr, end); + if (pmd_none(pmd)) + continue; + if (pmd_table(pmd)) { + if (copy_pte(dst_pmdp, src_pmdp, addr, next)) + return -ENOMEM; + } else { + set_pmd(dst_pmdp, + __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY)); + } + } while (dst_pmdp++, src_pmdp++, addr = next, addr != end); + + return 0; +} + +static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, + unsigned long end) +{ + pud_t *dst_pudp; + pud_t *src_pudp; + unsigned long next; + unsigned long addr = start; + + if (p4d_none(READ_ONCE(*dst_p4dp))) { + dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!dst_pudp) + return -ENOMEM; + p4d_populate(&init_mm, dst_p4dp, dst_pudp); + } + dst_pudp = pud_offset(dst_p4dp, start); + + src_pudp = pud_offset(src_p4dp, start); + do { + pud_t pud = READ_ONCE(*src_pudp); + + next = pud_addr_end(addr, end); + if (pud_none(pud)) + continue; + if (pud_table(pud)) { + if (copy_pmd(dst_pudp, src_pudp, addr, next)) + return -ENOMEM; + } else { + set_pud(dst_pudp, + __pud(pud_val(pud) & ~PUD_SECT_RDONLY)); + } + } while (dst_pudp++, src_pudp++, addr = next, addr != end); + + return 0; +} + +static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, + unsigned long end) +{ + p4d_t *dst_p4dp; + p4d_t *src_p4dp; + unsigned long next; + unsigned long addr = start; + + dst_p4dp = p4d_offset(dst_pgdp, start); + src_p4dp = p4d_offset(src_pgdp, start); + do { + next = p4d_addr_end(addr, end); + if (p4d_none(READ_ONCE(*src_p4dp))) + continue; + if (copy_pud(dst_p4dp, src_p4dp, addr, next)) + return -ENOMEM; + } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); + + return 0; +} + +static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, + unsigned long end) +{ + unsigned long next; + unsigned long addr = start; + pgd_t *src_pgdp = pgd_offset_k(start); + + dst_pgdp = pgd_offset_pgd(dst_pgdp, start); + do { + next = pgd_addr_end(addr, end); + if (pgd_none(READ_ONCE(*src_pgdp))) + continue; + if (copy_p4d(dst_pgdp, src_pgdp, addr, next)) + return -ENOMEM; + } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); + + return 0; +} + +int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, + unsigned long end) +{ + int rc; + pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); + + if (!trans_pgd) { + pr_err("Failed to allocate memory for temporary page tables.\n"); + return -ENOMEM; + } + + rc = copy_page_tables(trans_pgd, start, end); + if (!rc) + *dst_pgdp = trans_pgd; + + return rc; +} + +int trans_pgd_map_page(pgd_t *trans_pgd, void *page, + unsigned long dst_addr, + pgprot_t pgprot) +{ + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + pgdp = pgd_offset_pgd(trans_pgd, dst_addr); + if (pgd_none(READ_ONCE(*pgdp))) { + p4dp = (void *)get_safe_page(GFP_ATOMIC); + if (!pgdp) + return -ENOMEM; + pgd_populate(&init_mm, pgdp, p4dp); + } + + p4dp = p4d_offset(pgdp, dst_addr); + if (p4d_none(READ_ONCE(*p4dp))) { + pudp = (void *)get_safe_page(GFP_ATOMIC); + if (!pudp) + return -ENOMEM; + p4d_populate(&init_mm, p4dp, pudp); + } + + pudp = pud_offset(p4dp, dst_addr); + if (pud_none(READ_ONCE(*pudp))) { + pmdp = (void *)get_safe_page(GFP_ATOMIC); + if (!pmdp) + return -ENOMEM; + pud_populate(&init_mm, pudp, pmdp); + } + + pmdp = pmd_offset(pudp, dst_addr); + if (pmd_none(READ_ONCE(*pmdp))) { + ptep = (void *)get_safe_page(GFP_ATOMIC); + if (!ptep) + return -ENOMEM; + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + + ptep = pte_offset_kernel(pmdp, dst_addr); + set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); + + return 0; +} -- cgit v1.2.3 From 50f53fb721817a6efa541cca24f1b7caa84801c1 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:09 -0500 Subject: arm64: trans_pgd: make trans_pgd_map_page generic kexec is going to use a different allocator, so make trans_pgd_map_page to accept allocator as an argument, and also kexec is going to use a different map protection, so also pass it via argument. Signed-off-by: Pavel Tatashin Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/20210125191923.1060122-5-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/trans_pgd.h | 19 +++++++++++++++++-- arch/arm64/kernel/hibernate.c | 12 +++++++++++- arch/arm64/mm/trans_pgd.c | 30 ++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 23153c13d1ce..b46409b25234 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -12,10 +12,25 @@ #include #include +/* + * trans_alloc_page + * - Allocator that should return exactly one zeroed page, if this + * allocator fails, trans_pgd_create_copy() and trans_pgd_map_page() + * return -ENOMEM error. + * + * trans_alloc_arg + * - Passed to trans_alloc_page as an argument + */ + +struct trans_pgd_info { + void * (*trans_alloc_page)(void *arg); + void *trans_alloc_arg; +}; + int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, unsigned long end); -int trans_pgd_map_page(pgd_t *trans_pgd, void *page, unsigned long dst_addr, - pgprot_t pgprot); +int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, + void *page, unsigned long dst_addr, pgprot_t pgprot); #endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 4a38662f0d90..c173f280bfea 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -176,6 +176,11 @@ int arch_hibernation_header_restore(void *addr) } EXPORT_SYMBOL(arch_hibernation_header_restore); +static void *hibernate_page_alloc(void *arg) +{ + return (void *)get_safe_page((gfp_t)(unsigned long)arg); +} + /* * Copies length bytes, starting at src_start into an new page, * perform cache maintenance, then maps it at the specified address low @@ -192,6 +197,11 @@ static int create_safe_exec_page(void *src_start, size_t length, unsigned long dst_addr, phys_addr_t *phys_dst_addr) { + struct trans_pgd_info trans_info = { + .trans_alloc_page = hibernate_page_alloc, + .trans_alloc_arg = (void *)GFP_ATOMIC, + }; + void *page = (void *)get_safe_page(GFP_ATOMIC); pgd_t *trans_pgd; int rc; @@ -206,7 +216,7 @@ static int create_safe_exec_page(void *src_start, size_t length, if (!trans_pgd) return -ENOMEM; - rc = trans_pgd_map_page(trans_pgd, page, dst_addr, + rc = trans_pgd_map_page(&trans_info, trans_pgd, page, dst_addr, PAGE_KERNEL_EXEC); if (rc) return rc; diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index e048d1f5c912..f28eceba2242 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -25,6 +25,11 @@ #include #include +static void *trans_alloc(struct trans_pgd_info *info) +{ + return info->trans_alloc_page(info->trans_alloc_arg); +} + static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) { pte_t pte = READ_ONCE(*src_ptep); @@ -201,9 +206,18 @@ int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, return rc; } -int trans_pgd_map_page(pgd_t *trans_pgd, void *page, - unsigned long dst_addr, - pgprot_t pgprot) +/* + * Add map entry to trans_pgd for a base-size page at PTE level. + * info: contains allocator and its argument + * trans_pgd: page table in which new map is added. + * page: page to be mapped. + * dst_addr: new VA address for the page + * pgprot: protection for the page. + * + * Returns 0 on success, and -ENOMEM on failure. + */ +int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, + void *page, unsigned long dst_addr, pgprot_t pgprot) { pgd_t *pgdp; p4d_t *p4dp; @@ -213,7 +227,7 @@ int trans_pgd_map_page(pgd_t *trans_pgd, void *page, pgdp = pgd_offset_pgd(trans_pgd, dst_addr); if (pgd_none(READ_ONCE(*pgdp))) { - p4dp = (void *)get_safe_page(GFP_ATOMIC); + p4dp = trans_alloc(info); if (!pgdp) return -ENOMEM; pgd_populate(&init_mm, pgdp, p4dp); @@ -221,7 +235,7 @@ int trans_pgd_map_page(pgd_t *trans_pgd, void *page, p4dp = p4d_offset(pgdp, dst_addr); if (p4d_none(READ_ONCE(*p4dp))) { - pudp = (void *)get_safe_page(GFP_ATOMIC); + pudp = trans_alloc(info); if (!pudp) return -ENOMEM; p4d_populate(&init_mm, p4dp, pudp); @@ -229,7 +243,7 @@ int trans_pgd_map_page(pgd_t *trans_pgd, void *page, pudp = pud_offset(p4dp, dst_addr); if (pud_none(READ_ONCE(*pudp))) { - pmdp = (void *)get_safe_page(GFP_ATOMIC); + pmdp = trans_alloc(info); if (!pmdp) return -ENOMEM; pud_populate(&init_mm, pudp, pmdp); @@ -237,14 +251,14 @@ int trans_pgd_map_page(pgd_t *trans_pgd, void *page, pmdp = pmd_offset(pudp, dst_addr); if (pmd_none(READ_ONCE(*pmdp))) { - ptep = (void *)get_safe_page(GFP_ATOMIC); + ptep = trans_alloc(info); if (!ptep) return -ENOMEM; pmd_populate_kernel(&init_mm, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC)); + set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot)); return 0; } -- cgit v1.2.3 From 89d1410f4af55c621395548355f3520415d2bcff Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:10 -0500 Subject: arm64: trans_pgd: pass allocator trans_pgd_create_copy Make trans_pgd_create_copy and its subroutines to use allocator that is passed as an argument Signed-off-by: Pavel Tatashin Reviewed-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-6-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/trans_pgd.h | 4 ++-- arch/arm64/kernel/hibernate.c | 7 +++++- arch/arm64/mm/trans_pgd.c | 49 +++++++++++++++++++++++--------------- 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index b46409b25234..7fbf6a3ccff7 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -27,8 +27,8 @@ struct trans_pgd_info { void *trans_alloc_arg; }; -int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, - unsigned long end); +int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd, + unsigned long start, unsigned long end); int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, void *page, unsigned long dst_addr, pgprot_t pgprot); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index c173f280bfea..94fc275cdd21 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -437,13 +437,18 @@ int swsusp_arch_resume(void) phys_addr_t phys_hibernate_exit; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); + struct trans_pgd_info trans_info = { + .trans_alloc_page = hibernate_page_alloc, + .trans_alloc_arg = (void *)GFP_ATOMIC, + }; /* * Restoring the memory image will overwrite the ttbr1 page tables. * Create a second copy of just the linear map, and use this when * restoring. */ - rc = trans_pgd_create_copy(&tmp_pg_dir, PAGE_OFFSET, PAGE_END); + rc = trans_pgd_create_copy(&trans_info, &tmp_pg_dir, PAGE_OFFSET, + PAGE_END); if (rc) return rc; diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index f28eceba2242..47b6b7029907 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -57,14 +57,14 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) } } -static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, - unsigned long end) +static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, + pmd_t *src_pmdp, unsigned long start, unsigned long end) { pte_t *src_ptep; pte_t *dst_ptep; unsigned long addr = start; - dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); + dst_ptep = trans_alloc(info); if (!dst_ptep) return -ENOMEM; pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); @@ -78,8 +78,8 @@ static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start, return 0; } -static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, - unsigned long end) +static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, + pud_t *src_pudp, unsigned long start, unsigned long end) { pmd_t *src_pmdp; pmd_t *dst_pmdp; @@ -87,7 +87,7 @@ static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, unsigned long addr = start; if (pud_none(READ_ONCE(*dst_pudp))) { - dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); + dst_pmdp = trans_alloc(info); if (!dst_pmdp) return -ENOMEM; pud_populate(&init_mm, dst_pudp, dst_pmdp); @@ -102,7 +102,7 @@ static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, if (pmd_none(pmd)) continue; if (pmd_table(pmd)) { - if (copy_pte(dst_pmdp, src_pmdp, addr, next)) + if (copy_pte(info, dst_pmdp, src_pmdp, addr, next)) return -ENOMEM; } else { set_pmd(dst_pmdp, @@ -113,7 +113,8 @@ static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start, return 0; } -static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, +static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, + p4d_t *src_p4dp, unsigned long start, unsigned long end) { pud_t *dst_pudp; @@ -122,7 +123,7 @@ static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, unsigned long addr = start; if (p4d_none(READ_ONCE(*dst_p4dp))) { - dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); + dst_pudp = trans_alloc(info); if (!dst_pudp) return -ENOMEM; p4d_populate(&init_mm, dst_p4dp, dst_pudp); @@ -137,7 +138,7 @@ static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, if (pud_none(pud)) continue; if (pud_table(pud)) { - if (copy_pmd(dst_pudp, src_pudp, addr, next)) + if (copy_pmd(info, dst_pudp, src_pudp, addr, next)) return -ENOMEM; } else { set_pud(dst_pudp, @@ -148,7 +149,8 @@ static int copy_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long start, return 0; } -static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, +static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, + pgd_t *src_pgdp, unsigned long start, unsigned long end) { p4d_t *dst_p4dp; @@ -162,15 +164,15 @@ static int copy_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start, next = p4d_addr_end(addr, end); if (p4d_none(READ_ONCE(*src_p4dp))) continue; - if (copy_pud(dst_p4dp, src_p4dp, addr, next)) + if (copy_pud(info, dst_p4dp, src_p4dp, addr, next)) return -ENOMEM; } while (dst_p4dp++, src_p4dp++, addr = next, addr != end); return 0; } -static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, - unsigned long end) +static int copy_page_tables(struct trans_pgd_info *info, pgd_t *dst_pgdp, + unsigned long start, unsigned long end) { unsigned long next; unsigned long addr = start; @@ -181,25 +183,34 @@ static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start, next = pgd_addr_end(addr, end); if (pgd_none(READ_ONCE(*src_pgdp))) continue; - if (copy_p4d(dst_pgdp, src_pgdp, addr, next)) + if (copy_p4d(info, dst_pgdp, src_pgdp, addr, next)) return -ENOMEM; } while (dst_pgdp++, src_pgdp++, addr = next, addr != end); return 0; } -int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start, - unsigned long end) +/* + * Create trans_pgd and copy linear map. + * info: contains allocator and its argument + * dst_pgdp: new page table that is created, and to which map is copied. + * start: Start of the interval (inclusive). + * end: End of the interval (exclusive). + * + * Returns 0 on success, and -ENOMEM on failure. + */ +int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, + unsigned long start, unsigned long end) { int rc; - pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC); + pgd_t *trans_pgd = trans_alloc(info); if (!trans_pgd) { pr_err("Failed to allocate memory for temporary page tables.\n"); return -ENOMEM; } - rc = copy_page_tables(trans_pgd, start, end); + rc = copy_page_tables(info, trans_pgd, start, end); if (!rc) *dst_pgdp = trans_pgd; -- cgit v1.2.3 From 5de59884ac0ec56007e4aa8226ee259aa669be80 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:11 -0500 Subject: arm64: trans_pgd: pass NULL instead of init_mm to *_populate functions trans_pgd_* should be independent from mm context because the tables that are created by this code are used when there are no mm context around, as it is between kernels. Simply replace mm_init's with NULL. Signed-off-by: Pavel Tatashin Acked-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-7-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/mm/trans_pgd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 47b6b7029907..ded8e2ba0308 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -67,7 +67,7 @@ static int copy_pte(struct trans_pgd_info *info, pmd_t *dst_pmdp, dst_ptep = trans_alloc(info); if (!dst_ptep) return -ENOMEM; - pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep); + pmd_populate_kernel(NULL, dst_pmdp, dst_ptep); dst_ptep = pte_offset_kernel(dst_pmdp, start); src_ptep = pte_offset_kernel(src_pmdp, start); @@ -90,7 +90,7 @@ static int copy_pmd(struct trans_pgd_info *info, pud_t *dst_pudp, dst_pmdp = trans_alloc(info); if (!dst_pmdp) return -ENOMEM; - pud_populate(&init_mm, dst_pudp, dst_pmdp); + pud_populate(NULL, dst_pudp, dst_pmdp); } dst_pmdp = pmd_offset(dst_pudp, start); @@ -126,7 +126,7 @@ static int copy_pud(struct trans_pgd_info *info, p4d_t *dst_p4dp, dst_pudp = trans_alloc(info); if (!dst_pudp) return -ENOMEM; - p4d_populate(&init_mm, dst_p4dp, dst_pudp); + p4d_populate(NULL, dst_p4dp, dst_pudp); } dst_pudp = pud_offset(dst_p4dp, start); @@ -241,7 +241,7 @@ int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, p4dp = trans_alloc(info); if (!pgdp) return -ENOMEM; - pgd_populate(&init_mm, pgdp, p4dp); + pgd_populate(NULL, pgdp, p4dp); } p4dp = p4d_offset(pgdp, dst_addr); @@ -249,7 +249,7 @@ int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, pudp = trans_alloc(info); if (!pudp) return -ENOMEM; - p4d_populate(&init_mm, p4dp, pudp); + p4d_populate(NULL, p4dp, pudp); } pudp = pud_offset(p4dp, dst_addr); @@ -257,7 +257,7 @@ int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, pmdp = trans_alloc(info); if (!pmdp) return -ENOMEM; - pud_populate(&init_mm, pudp, pmdp); + pud_populate(NULL, pudp, pmdp); } pmdp = pmd_offset(pudp, dst_addr); @@ -265,7 +265,7 @@ int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, ptep = trans_alloc(info); if (!ptep) return -ENOMEM; - pmd_populate_kernel(&init_mm, pmdp, ptep); + pmd_populate_kernel(NULL, pmdp, ptep); } ptep = pte_offset_kernel(pmdp, dst_addr); -- cgit v1.2.3 From 1401bef703a48cf79c674215f702a1435362beae Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 25 Jan 2021 14:19:12 -0500 Subject: arm64: mm: Always update TCR_EL1 from __cpu_set_tcr_t0sz() Because only the idmap sets a non-standard T0SZ, __cpu_set_tcr_t0sz() can check for platforms that need to do this using __cpu_uses_extended_idmap() before doing its work. The idmap is only built with enough levels, (and T0SZ bits) to map its single page. To allow hibernate, and then kexec to idmap their single page copy routines, __cpu_set_tcr_t0sz() needs to consider additional users, who may need a different number of levels/T0SZ-bits to the idmap. (i.e. VA_BITS may be enough for the idmap, but not hibernate/kexec) Always read TCR_EL1, and check whether any work needs doing for this request. __cpu_uses_extended_idmap() remains as it is used by KVM, whose idmap is also part of the kernel image. This mostly affects the cpuidle path, where we now get an extra system register read . CC: Lorenzo Pieralisi CC: Sudeep Holla Signed-off-by: James Morse Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/r/20210125191923.1060122-8-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0b3079fd28eb..70ce8c1d2b07 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -81,16 +81,15 @@ static inline bool __cpu_uses_extended_idmap_level(void) } /* - * Set TCR.T0SZ to its default value (based on VA_BITS) + * Ensure TCR.T0SZ is set to the provided value. */ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) { - unsigned long tcr; + unsigned long tcr = read_sysreg(tcr_el1); - if (!__cpu_uses_extended_idmap()) + if ((tcr & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET == t0sz) return; - tcr = read_sysreg(tcr_el1); tcr &= ~TCR_T0SZ_MASK; tcr |= t0sz << TCR_T0SZ_OFFSET; write_sysreg(tcr, tcr_el1); -- cgit v1.2.3 From 7018d467ff2d6a6d3c820a7ad4e897fe2430b040 Mon Sep 17 00:00:00 2001 From: James Morse Date: Mon, 25 Jan 2021 14:19:13 -0500 Subject: arm64: trans_pgd: hibernate: idmap the single page that holds the copy page routines To resume from hibernate, the contents of memory are restored from the swap image. This may overwrite any page, including the running kernel and its page tables. Hibernate copies the code it uses to do the restore into a single page that it knows won't be overwritten, and maps it with page tables built from pages that won't be overwritten. Today the address it uses for this mapping is arbitrary, but to allow kexec to reuse this code, it needs to be idmapped. To idmap the page we must avoid the kernel helpers that have VA_BITS baked in. Convert create_single_mapping() to take a single PA, and idmap it. The page tables are built in the reverse order to normal using pfn_pte() to stir in any bits between 52:48. T0SZ is always increased to cover 48bits, or 52 if the copy code has bits 52:48 in its PA. Signed-off-by: James Morse [Adopted the original patch from James to trans_pgd interface, so it can be commonly used by both Kexec and Hibernate. Some minor clean-ups.] Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/linux-arm-kernel/20200115143322.214247-4-james.morse@arm.com/ Link: https://lore.kernel.org/r/20210125191923.1060122-9-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/trans_pgd.h | 3 +++ arch/arm64/kernel/hibernate.c | 32 +++++++++---------------- arch/arm64/mm/trans_pgd.c | 49 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 7fbf6a3ccff7..5d08e5adf3d5 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -33,4 +33,7 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd, int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, void *page, unsigned long dst_addr, pgprot_t pgprot); +int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, + unsigned long *t0sz, void *page); + #endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 94fc275cdd21..9df32ba0d574 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -194,7 +194,6 @@ static void *hibernate_page_alloc(void *arg) * page system. */ static int create_safe_exec_page(void *src_start, size_t length, - unsigned long dst_addr, phys_addr_t *phys_dst_addr) { struct trans_pgd_info trans_info = { @@ -203,7 +202,8 @@ static int create_safe_exec_page(void *src_start, size_t length, }; void *page = (void *)get_safe_page(GFP_ATOMIC); - pgd_t *trans_pgd; + phys_addr_t trans_ttbr0; + unsigned long t0sz; int rc; if (!page) @@ -211,13 +211,7 @@ static int create_safe_exec_page(void *src_start, size_t length, memcpy(page, src_start, length); __flush_icache_range((unsigned long)page, (unsigned long)page + length); - - trans_pgd = (void *)get_safe_page(GFP_ATOMIC); - if (!trans_pgd) - return -ENOMEM; - - rc = trans_pgd_map_page(&trans_info, trans_pgd, page, dst_addr, - PAGE_KERNEL_EXEC); + rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -230,12 +224,15 @@ static int create_safe_exec_page(void *src_start, size_t length, * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI * runtime services), while for a userspace-driven test_resume cycle it * points to userspace page tables (and we must point it at a zero page - * ourselves). Elsewhere we only (un)install the idmap with preemption - * disabled, so T0SZ should be as required regardless. + * ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). */ cpu_set_reserved_ttbr0(); local_flush_tlb_all(); - write_sysreg(phys_to_ttbr(virt_to_phys(trans_pgd)), ttbr0_el1); + __cpu_set_tcr_t0sz(t0sz); + write_sysreg(trans_ttbr0, ttbr0_el1); isb(); *phys_dst_addr = virt_to_phys(page); @@ -434,7 +431,6 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; - phys_addr_t phys_hibernate_exit; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); struct trans_pgd_info trans_info = { @@ -462,19 +458,13 @@ int swsusp_arch_resume(void) return -ENOMEM; } - /* - * Locate the exit code in the bottom-but-one page, so that *NULL - * still has disastrous affects. - */ - hibernate_exit = (void *)PAGE_SIZE; exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; /* * Copy swsusp_arch_suspend_exit() to a safe page. This will generate * a new set of ttbr0 page tables and load them. */ rc = create_safe_exec_page(__hibernate_exit_text_start, exit_size, - (unsigned long)hibernate_exit, - &phys_hibernate_exit); + (phys_addr_t *)&hibernate_exit); if (rc) { pr_err("Failed to create safe executable page for hibernate_exit code.\n"); return rc; @@ -493,7 +483,7 @@ int swsusp_arch_resume(void) * We can skip this step if we booted at EL1, or are running with VHE. */ if (el2_reset_needed()) { - phys_addr_t el2_vectors = phys_hibernate_exit; /* base */ + phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit; el2_vectors += hibernate_el2_vectors - __hibernate_exit_text_start; /* offset */ diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index ded8e2ba0308..527f0a39c3da 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -273,3 +273,52 @@ int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, return 0; } + +/* + * The page we want to idmap may be outside the range covered by VA_BITS that + * can be built using the kernel's p?d_populate() helpers. As a one off, for a + * single page, we build these page tables bottom up and just assume that will + * need the maximum T0SZ. + * + * Returns 0 on success, and -ENOMEM on failure. + * On success trans_ttbr0 contains page table with idmapped page, t0sz is set to + * maximum T0SZ for this page. + */ +int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, + unsigned long *t0sz, void *page) +{ + phys_addr_t dst_addr = virt_to_phys(page); + unsigned long pfn = __phys_to_pfn(dst_addr); + int max_msb = (dst_addr & GENMASK(52, 48)) ? 51 : 47; + int bits_mapped = PAGE_SHIFT - 4; + unsigned long level_mask, prev_level_entry, *levels[4]; + int this_level, index, level_lsb, level_msb; + + dst_addr &= PAGE_MASK; + prev_level_entry = pte_val(pfn_pte(pfn, PAGE_KERNEL_EXEC)); + + for (this_level = 3; this_level >= 0; this_level--) { + levels[this_level] = trans_alloc(info); + if (!levels[this_level]) + return -ENOMEM; + + level_lsb = ARM64_HW_PGTABLE_LEVEL_SHIFT(this_level); + level_msb = min(level_lsb + bits_mapped, max_msb); + level_mask = GENMASK_ULL(level_msb, level_lsb); + + index = (dst_addr & level_mask) >> level_lsb; + *(levels[this_level] + index) = prev_level_entry; + + pfn = virt_to_pfn(levels[this_level]); + prev_level_entry = pte_val(pfn_pte(pfn, + __pgprot(PMD_TYPE_TABLE))); + + if (level_msb == max_msb) + break; + } + + *trans_ttbr0 = phys_to_ttbr(__pfn_to_phys(pfn)); + *t0sz = TCR_T0SZ(max_msb + 1); + + return 0; +} -- cgit v1.2.3 From 4c3c31230c912d8f2e49c775555aadf79a43d418 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:14 -0500 Subject: arm64: kexec: move relocation function setup Currently, kernel relocation function is configured in machine_kexec() at the time of kexec reboot by using control_code_page. This operation, however, is more logical to be done during kexec_load, and thus remove from reboot time. Move, setup of this function to newly added machine_kexec_post_load(). Because once MMU is enabled, kexec control page will contain more than relocation kernel, but also vector table, add pointer to the actual function within this page arch.kern_reloc. Currently, it equals to the beginning of page, we will add offsets later, when vector table is added. Signed-off-by: Pavel Tatashin Reviewed-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-10-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kexec.h | 1 + arch/arm64/kernel/machine_kexec.c | 46 ++++++++++++++++----------------------- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 61530ec3a9b1..9befcd87e9a8 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -95,6 +95,7 @@ static inline void crash_post_resume(void) {} struct kimage_arch { void *dtb; phys_addr_t dtb_mem; + phys_addr_t kern_reloc; /* Core ELF header buffer */ void *elf_headers; unsigned long elf_headers_mem; diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 8096a6aa1d49..a8aaa6562429 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -42,6 +42,7 @@ static void _kexec_image_info(const char *func, int line, pr_debug(" start: %lx\n", kimage->start); pr_debug(" head: %lx\n", kimage->head); pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); for (i = 0; i < kimage->nr_segments; i++) { pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", @@ -58,6 +59,22 @@ void machine_kexec_cleanup(struct kimage *kimage) /* Empty routine needed to avoid build errors. */ } +int machine_kexec_post_load(struct kimage *kimage) +{ + void *reloc_code = page_to_virt(kimage->control_code_page); + + memcpy(reloc_code, arm64_relocate_new_kernel, + arm64_relocate_new_kernel_size); + kimage->arch.kern_reloc = __pa(reloc_code); + + /* Flush the reloc_code in preparation for its execution. */ + __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); + flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); + + return 0; +} + /** * machine_kexec_prepare - Prepare for a kexec reboot. * @@ -143,8 +160,6 @@ static void kexec_segment_flush(const struct kimage *kimage) */ void machine_kexec(struct kimage *kimage) { - phys_addr_t reboot_code_buffer_phys; - void *reboot_code_buffer; bool in_kexec_crash = (kimage == kexec_crash_image); bool stuck_cpus = cpus_are_stuck_in_kernel(); @@ -155,31 +170,8 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); - reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); - kexec_image_info(kimage); - /* - * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use - * after the kernel is shut down. - */ - memcpy(reboot_code_buffer, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); - - /* Flush the reboot_code_buffer in preparation for its execution. */ - __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); - - /* - * Although we've killed off the secondary CPUs, we don't update - * the online mask if we're handling a crash kernel and consequently - * need to avoid flush_icache_range(), which will attempt to IPI - * the offline CPUs. Therefore, we must use the __* variant here. - */ - __flush_icache_range((uintptr_t)reboot_code_buffer, - (uintptr_t)reboot_code_buffer + - arm64_relocate_new_kernel_size); - /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); @@ -193,7 +185,7 @@ void machine_kexec(struct kimage *kimage) /* * cpu_soft_restart will shutdown the MMU, disable data caches, then - * transfer control to the reboot_code_buffer which contains a copy of + * transfer control to the kern_reloc which contains a copy of * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel * uses physical addressing to relocate the new image to its final * position and transfers control to the image entry point when the @@ -203,7 +195,7 @@ void machine_kexec(struct kimage *kimage) * userspace (kexec-tools). * In kexec_file case, the kernel starts directly without purgatory. */ - cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, + cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, kimage->arch.dtb_mem); BUG(); /* Should never get here. */ -- cgit v1.2.3 From 77a43be1164852be4b761e4acaf651f986c4e8d7 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:15 -0500 Subject: arm64: kexec: call kexec_image_info only once Currently, kexec_image_info() is called during load time, and right before kernel is being kexec'ed. There is no need to do both. So, call it only once when segments are loaded and the physical location of page with copy of arm64_relocate_new_kernel is known. Signed-off-by: Pavel Tatashin Acked-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-11-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index a8aaa6562429..90a335c74442 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -66,6 +66,7 @@ int machine_kexec_post_load(struct kimage *kimage) memcpy(reloc_code, arm64_relocate_new_kernel, arm64_relocate_new_kernel_size); kimage->arch.kern_reloc = __pa(reloc_code); + kexec_image_info(kimage); /* Flush the reloc_code in preparation for its execution. */ __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); @@ -84,8 +85,6 @@ int machine_kexec_post_load(struct kimage *kimage) */ int machine_kexec_prepare(struct kimage *kimage) { - kexec_image_info(kimage); - if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) { pr_err("Can't kexec: CPUs are stuck in the kernel.\n"); return -EBUSY; @@ -170,8 +169,6 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - kexec_image_info(kimage); - /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); -- cgit v1.2.3 From dbd82fee0f258739272349bc87f5841fc1fb982a Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:16 -0500 Subject: arm64: kexec: arm64_relocate_new_kernel clean-ups and optimizations In preparation to bigger changes to arm64_relocate_new_kernel that would enable this function to do MMU backed memory copy, do few clean-ups and optimizations. These include: 1. Call raw_dcache_line_size() only when relocation is actually going to happen. i.e. kdump type kexec, does not need it. 2. copy_page(dest, src, tmps...) increments dest and src by PAGE_SIZE, so no need to store dest prior to calling copy_page and increment it after. Also, src is not used after a copy, not need to copy either. 3. For consistency use comment on the same line with instruction when it describes the instruction itself. 4. Some comment corrections Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/r/20210125191923.1060122-12-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/relocate_kernel.S | 36 ++++++++---------------------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 84eec95ec06c..462ffbc37071 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -17,28 +17,24 @@ /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * - * The memory that the old kernel occupies may be overwritten when coping the + * The memory that the old kernel occupies may be overwritten when copying the * new image to its final location. To assure that the * arm64_relocate_new_kernel routine which does that copy is not overwritten, * all code and data needed by arm64_relocate_new_kernel must be between the * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec - * control_code_page, a special page which has been set up to be preserved - * during the copy operation. + * safe memory that has been set up to be preserved during the copy operation. */ SYM_CODE_START(arm64_relocate_new_kernel) - /* Setup the list loop variables. */ mov x18, x2 /* x18 = dtb address */ mov x17, x1 /* x17 = kimage_start */ mov x16, x0 /* x16 = kimage_head */ - raw_dcache_line_size x15, x0 /* x15 = dcache line size */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ - /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone - + raw_dcache_line_size x15, x0 /* x15 = dcache line size */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ @@ -57,34 +53,18 @@ SYM_CODE_START(arm64_relocate_new_kernel) b.lo 2b dsb sy - mov x20, x13 - mov x21, x12 - copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 - - /* dest += PAGE_SIZE */ - add x13, x13, PAGE_SIZE + copy_page x13, x12, x0, x1, x2, x3, x4, x5, x6, x7 b .Lnext - .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination - - /* ptr = addr */ - mov x14, x12 + mov x14, x12 /* ptr = addr */ b .Lnext - .Ltest_destination: tbz x16, IND_DESTINATION_BIT, .Lnext - - /* dest = addr */ - mov x13, x12 - + mov x13, x12 /* dest = addr */ .Lnext: - /* entry = *ptr++ */ - ldr x16, [x14], #8 - - /* while (!(entry & DONE)) */ - tbz x16, IND_DONE_BIT, .Lloop - + ldr x16, [x14], #8 /* entry = *ptr++ */ + tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */ .Ldone: /* wait for writes from copy_page to finish */ dsb nsh -- cgit v1.2.3 From a360190e8a42d47ea80355f286939ba82b02405a Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 25 Jan 2021 14:19:17 -0500 Subject: arm64: kexec: arm64_relocate_new_kernel don't use x0 as temp x0 will contain the only argument to arm64_relocate_new_kernel; don't use it as a temp. Reassigned registers to free-up x0 so we won't need to copy argument, and can use it at the beginning and at the end of the function. Signed-off-by: Pavel Tatashin Reviewed-by: James Morse Link: https://lore.kernel.org/r/20210125191923.1060122-13-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/relocate_kernel.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 462ffbc37071..b78ea5de97a4 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -34,7 +34,7 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x13, xzr /* x13 = copy dest */ /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone - raw_dcache_line_size x15, x0 /* x15 = dcache line size */ + raw_dcache_line_size x15, x1 /* x15 = dcache line size */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ @@ -43,17 +43,17 @@ SYM_CODE_START(arm64_relocate_new_kernel) tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x0, x13 - add x20, x0, #PAGE_SIZE + mov x2, x13 + add x20, x2, #PAGE_SIZE sub x1, x15, #1 - bic x0, x0, x1 -2: dc ivac, x0 - add x0, x0, x15 - cmp x0, x20 + bic x2, x2, x1 +2: dc ivac, x2 + add x2, x2, x15 + cmp x2, x20 b.lo 2b dsb sy - copy_page x13, x12, x0, x1, x2, x3, x4, x5, x6, x7 + copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination -- cgit v1.2.3 From 3f98a28cc37253269b4104cf95a51f7716a2eb97 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Jan 2021 11:06:26 +0100 Subject: mm/nommu: Fix return type of filemap_map_pages() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_MMU is not set (e.g. m68k/m5272c3_defconfig): mm/nommu.c:1671:6: error: conflicting types for ‘filemap_map_pages’ 1671 | void filemap_map_pages(struct vm_fault *vmf, | ^~~~~~~~~~~~~~~~~ In file included from mm/nommu.c:20: ./include/linux/mm.h:2578:19: note: previous declaration of ‘filemap_map_pages’ was here 2578 | extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, | ^~~~~~~~~~~~~~~~~ The signature of filemap_map_pages() was changed, but the nommu implementation wasn't updated. Reported-by: noreply@ellerman.id.au Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20210128100626.2257638-1-geert@linux-m68k.org Signed-off-by: Will Deacon --- mm/nommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index 870fea12823e..5c9ab799c0e6 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1668,10 +1668,11 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) } EXPORT_SYMBOL(filemap_fault); -void filemap_map_pages(struct vm_fault *vmf, +vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff) { BUG(); + return 0; } EXPORT_SYMBOL(filemap_map_pages); -- cgit v1.2.3 From e30be1455bd3b0626602f42725c49200b2b871b4 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Thu, 28 Jan 2021 17:38:50 +0000 Subject: KVM: arm64: Move __hyp_set_vectors out of .hyp.text The .hyp.text section is supposed to be reserved for the nVHE EL2 code. However, there is currently one occurrence of EL1 executing code located in .hyp.text when calling __hyp_{re}set_vectors(), which happen to sit next to the EL2 stub vectors. While not a problem yet, such patterns will cause issues when removing the host kernel from the TCB, so a cleaner split would be preferable. Fix this by delimiting the end of the .hyp.text section in hyp-stub.S. Acked-by: Marc Zyngier Signed-off-by: Quentin Perret Link: https://lore.kernel.org/r/20210128173850.2478161-1-qperret@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hyp-stub.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 160f5881a0b7..8a60f9c586bb 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -85,6 +85,8 @@ SYM_CODE_END(\label) invalid_vector el1_fiq_invalid invalid_vector el1_error_invalid + .popsection + /* * __hyp_set_vectors: Call this after boot to set the initial hypervisor * vectors as part of hypervisor installation. On an SMP system, this should -- cgit v1.2.3 From 79d7c3dca99fa96033695ddf5d495b775a3a137b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Jan 2021 13:12:43 +0000 Subject: perf/arm-cmn: Fix PMU instance naming Although it's neat to avoid the suffix for the typical case of a single PMU, it means systems with multiple CMN instances end up with inconsistent naming. I think it also breaks perf tool's "uncore alias" logic if the common instance prefix is also the full name of one. Avoid any surprises by not trying to be clever and simply numbering every instance, even when it might technically prove redundant. Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/649a2281233f193d59240b13ed91b57337c77b32.1611839564.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/arm-cmn.rst | 2 +- drivers/perf/arm-cmn.c | 13 ++++--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst index 0e4809346014..796e25b7027b 100644 --- a/Documentation/admin-guide/perf/arm-cmn.rst +++ b/Documentation/admin-guide/perf/arm-cmn.rst @@ -17,7 +17,7 @@ PMU events ---------- The PMU driver registers a single PMU device for the whole interconnect, -see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link +see /sys/bus/event_source/devices/arm_cmn_0. Multi-chip systems may link more than one CMN together via external CCIX links - in this situation, each mesh counts its own events entirely independently, and additional PMU devices will be named arm_cmn_{1..n}. diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index f30fcd330899..6bdf13f65875 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1502,7 +1502,7 @@ static int arm_cmn_probe(struct platform_device *pdev) struct arm_cmn *cmn; const char *name; static atomic_t id; - int err, rootnode, this_id; + int err, rootnode; cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); if (!cmn) @@ -1549,14 +1549,9 @@ static int arm_cmn_probe(struct platform_device *pdev) .cancel_txn = arm_cmn_end_txn, }; - this_id = atomic_fetch_inc(&id); - if (this_id == 0) { - name = "arm_cmn"; - } else { - name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id); - if (!name) - return -ENOMEM; - } + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_fetch_inc(&id)); + if (!name) + return -ENOMEM; err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node); if (err) -- cgit v1.2.3 From 1c8147ea89c883d1f4e20f1b1d9c879291430102 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Jan 2021 13:12:44 +0000 Subject: perf/arm-cmn: Move IRQs when migrating context If we migrate the PMU context to another CPU, we need to remember to retarget the IRQs as well. Fixes: 0ba64770a2f2 ("perf: Add Arm CMN-600 PMU driver") Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/e080640aea4ed8dfa870b8549dfb31221803eb6b.1611839564.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/perf/arm-cmn.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index 6bdf13f65875..1328159fe564 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1150,7 +1150,7 @@ static int arm_cmn_commit_txn(struct pmu *pmu) static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct arm_cmn *cmn; - unsigned int target; + unsigned int i, target; cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); if (cpu != cmn->cpu) @@ -1161,6 +1161,8 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; perf_pmu_migrate_context(&cmn->pmu, cpu, target); + for (i = 0; i < cmn->num_dtcs; i++) + irq_set_affinity_hint(cmn->dtc[i].irq, cpumask_of(target)); cmn->cpu = target; return 0; } -- cgit v1.2.3 From d1bbc35fcab28668c8992c4d5777234b794d7306 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Mon, 1 Feb 2021 10:03:06 -0500 Subject: arm64: hibernate: add __force attribute to gfp_t casting Two new warnings are reported by sparse: "sparse warnings: (new ones prefixed by >>)" >> arch/arm64/kernel/hibernate.c:181:39: sparse: sparse: cast to restricted gfp_t >> arch/arm64/kernel/hibernate.c:202:44: sparse: sparse: cast from restricted gfp_t gfp_t has __bitwise type attribute and requires __force added to casting in order to avoid these warnings. Fixes: 50f53fb72181 ("arm64: trans_pgd: make trans_pgd_map_page generic") Reported-by: kernel test robot Signed-off-by: Pavel Tatashin Link: https://lore.kernel.org/r/20210201150306.54099-2-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 9df32ba0d574..b1cef371df2b 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -178,7 +178,7 @@ EXPORT_SYMBOL(arch_hibernation_header_restore); static void *hibernate_page_alloc(void *arg) { - return (void *)get_safe_page((gfp_t)(unsigned long)arg); + return (void *)get_safe_page((__force gfp_t)(unsigned long)arg); } /* @@ -198,7 +198,7 @@ static int create_safe_exec_page(void *src_start, size_t length, { struct trans_pgd_info trans_info = { .trans_alloc_page = hibernate_page_alloc, - .trans_alloc_arg = (void *)GFP_ATOMIC, + .trans_alloc_arg = (__force void *)GFP_ATOMIC, }; void *page = (void *)get_safe_page(GFP_ATOMIC); -- cgit v1.2.3 From 20116dd93f4d0b2e84a25ee83e3238586dbb79ec Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 2 Feb 2021 15:58:06 +0800 Subject: drivers/perf: Prevent forced unbinding of ARM_DMC620_PMU drivers Set "suppress_bind_attrs" to true, so that bind/unbind can be disabled via sysfs and prevent unbinding ARM_DMC620_PMU drivers during perf sampling. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1612252686-50329-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm_dmc620_pmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 27f54c0afc3b..66ad5b3ece19 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -717,6 +717,7 @@ static struct platform_driver dmc620_pmu_driver = { .driver = { .name = DMC620_DRVNAME, .acpi_match_table = dmc620_acpi_match, + .suppress_bind_attrs = true, }, .probe = dmc620_pmu_device_probe, .remove = dmc620_pmu_device_remove, -- cgit v1.2.3 From 2ceee7ed4c6c9e3eec1004aee43608bc98b10603 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 31 Jan 2021 15:36:15 +0100 Subject: arm64: perf: Constify static attribute_group structs The only usage of these is to put their addresses in an array of pointers to const attribute_group structs. Make them const to allow the compiler to put them in read-only memory. Signed-off-by: Rikard Falkeborn Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3605f77ad4df..59b1eed52236 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -280,7 +280,7 @@ armv8pmu_event_attr_is_visible(struct kobject *kobj, return 0; } -static struct attribute_group armv8_pmuv3_events_attr_group = { +static const struct attribute_group armv8_pmuv3_events_attr_group = { .name = "events", .attrs = armv8_pmuv3_event_attrs, .is_visible = armv8pmu_event_attr_is_visible, @@ -300,7 +300,7 @@ static struct attribute *armv8_pmuv3_format_attrs[] = { NULL, }; -static struct attribute_group armv8_pmuv3_format_attr_group = { +static const struct attribute_group armv8_pmuv3_format_attr_group = { .name = "format", .attrs = armv8_pmuv3_format_attrs, }; @@ -322,7 +322,7 @@ static struct attribute *armv8_pmuv3_caps_attrs[] = { NULL, }; -static struct attribute_group armv8_pmuv3_caps_attr_group = { +static const struct attribute_group armv8_pmuv3_caps_attr_group = { .name = "caps", .attrs = armv8_pmuv3_caps_attrs, }; -- cgit v1.2.3 From 12fc4288408a8799409f7fa62a526b60e92da334 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 1 Feb 2021 19:21:09 -0500 Subject: arm64: ptrace: Fix missing return in hw breakpoint code When delivering a hw-breakpoint SIGTRAP to a compat task via ptrace, the lack of a 'return' statement means we fallthrough to the native case, which differs in its handling of 'si_errno'. Although this looks to be harmless because the subsequent signal is effectively ignored, it's confusing and unintentional, so add the missing 'return'. Signed-off-by: Keno Fischer Link: https://lore.kernel.org/r/20210202002109.GA624440@juliacomputing.com Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8ac487c84e37..3d5c8afca75b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -194,6 +194,7 @@ static void ptrace_hbptriggered(struct perf_event *bp, } arm64_force_sig_ptrace_errno_trap(si_errno, bkpt->trigger, desc); + return; } #endif arm64_force_sig_fault(SIGTRAP, TRAP_HWBKPT, bkpt->trigger, desc); -- cgit v1.2.3 From b9ba680969d1016888fed4e03d8ecb4b97726f34 Mon Sep 17 00:00:00 2001 From: Hailong Liu Date: Tue, 2 Feb 2021 23:07:49 +0800 Subject: arm64/ptdump:display the Linear Mapping start marker The current /sys/kernel/debug/kernel_page_tables does not display the *Linear Mapping start* marker on arm64, which I think should be paired with the *Linear Mapping end* marker. Since *Linear Mapping start* is the first marker, use initialise 'level' to -1 in order to display it. Signed-off-by: Hailong Liu Link: https://lore.kernel.org/r/20210202150749.10104-1-liuhailongg6@163.com Signed-off-by: Will Deacon --- arch/arm64/mm/ptdump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c index 04137a8f3d2d..0e050d76b83a 100644 --- a/arch/arm64/mm/ptdump.c +++ b/arch/arm64/mm/ptdump.c @@ -324,6 +324,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info) st = (struct pg_state){ .seq = s, .marker = info->markers, + .level = -1, .ptdump = { .note_page = note_page, .range = (struct ptdump_range[]){ -- cgit v1.2.3 From db2bb91f2e8e73d85876d1665608e834d91d21ee Mon Sep 17 00:00:00 2001 From: Seiya Wang Date: Wed, 3 Feb 2021 13:53:47 +0800 Subject: arm64: perf: add support for Cortex-A78 Add support for Cortex-A78 using generic PMUv3 for now. Signed-off-by: Seiya Wang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210203055348.4935-2-seiya.wang@mediatek.com Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 59b1eed52236..b6d0b2cbdc79 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1188,6 +1188,12 @@ static int armv8_a77_pmu_init(struct arm_pmu *cpu_pmu) armv8_pmuv3_map_event); } +static int armv8_a78_pmu_init(struct arm_pmu *cpu_pmu) +{ + return armv8_pmu_init_nogroups(cpu_pmu, "armv8_cortex_a78", + armv8_pmuv3_map_event); +} + static int armv8_e1_pmu_init(struct arm_pmu *cpu_pmu) { return armv8_pmu_init_nogroups(cpu_pmu, "armv8_neoverse_e1", @@ -1225,6 +1231,7 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = { {.compatible = "arm,cortex-a75-pmu", .data = armv8_a75_pmu_init}, {.compatible = "arm,cortex-a76-pmu", .data = armv8_a76_pmu_init}, {.compatible = "arm,cortex-a77-pmu", .data = armv8_a77_pmu_init}, + {.compatible = "arm,cortex-a78-pmu", .data = armv8_a78_pmu_init}, {.compatible = "arm,neoverse-e1-pmu", .data = armv8_e1_pmu_init}, {.compatible = "arm,neoverse-n1-pmu", .data = armv8_n1_pmu_init}, {.compatible = "cavium,thunder-pmu", .data = armv8_thunder_pmu_init}, -- cgit v1.2.3 From 750d43b4a79e5f3767ee1db933b42abdf967ce1e Mon Sep 17 00:00:00 2001 From: Seiya Wang Date: Wed, 3 Feb 2021 13:53:48 +0800 Subject: dt-bindings: arm: add Cortex-A78 binding Add compatible for Cortex-A78 PMU Signed-off-by: Seiya Wang Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210203055348.4935-3-seiya.wang@mediatek.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/arm/pmu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 693ef3f185a8..e17ac049e890 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -43,6 +43,7 @@ properties: - arm,cortex-a75-pmu - arm,cortex-a76-pmu - arm,cortex-a77-pmu + - arm,cortex-a78-pmu - arm,neoverse-e1-pmu - arm,neoverse-n1-pmu - brcm,vulcan-pmu -- cgit v1.2.3 From 00ef543419366e8b742435992d08e0d5a87fd561 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 2 Feb 2021 12:36:57 +0000 Subject: arm64: vmlinux.ld.S: add assertion for reserved_pg_dir offset Add RESERVED_SWAPPER_OFFSET and use that instead of hardcoding the offset between swapper_pg_dir and reserved_pg_dir. Then use RESERVED_SWAPPER_OFFSET to assert that the offset is correct at link time. Signed-off-by: Joey Gouly Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20210202123658.22308-2-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-uaccess.h | 4 ++-- arch/arm64/include/asm/memory.h | 6 ++++++ arch/arm64/include/asm/uaccess.h | 2 +- arch/arm64/kernel/vmlinux.lds.S | 3 +++ 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index 9990059be106..ccedf548dac9 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -15,10 +15,10 @@ .macro __uaccess_ttbr0_disable, tmp1 mrs \tmp1, ttbr1_el1 // swapper_pg_dir bic \tmp1, \tmp1, #TTBR_ASID_MASK - sub \tmp1, \tmp1, #PAGE_SIZE // reserved_pg_dir just before swapper_pg_dir + sub \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET // reserved_pg_dir msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1 isb - add \tmp1, \tmp1, #PAGE_SIZE + add \tmp1, \tmp1, #RESERVED_SWAPPER_OFFSET msr ttbr1_el1, \tmp1 // set reserved ASID isb .endm diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 18fce223b67b..f6bf8a972a16 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -159,6 +159,12 @@ #define IOREMAP_MAX_ORDER (PMD_SHIFT) #endif +/* + * Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated + * until link time. + */ +#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index f0fe0cc6abe0..0deb88467111 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -87,7 +87,7 @@ static inline void __uaccess_ttbr0_disable(void) ttbr = read_sysreg(ttbr1_el1); ttbr &= ~TTBR_ASID_MASK; /* reserved_pg_dir placed before swapper_pg_dir */ - write_sysreg(ttbr - PAGE_SIZE, ttbr0_el1); + write_sysreg(ttbr - RESERVED_SWAPPER_OFFSET, ttbr0_el1); isb(); /* Set reserved ASID */ write_sysreg(ttbr, ttbr1_el1); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 4c0b0c89ad59..a03a5300bce9 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -316,3 +316,6 @@ ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, * If padding is applied before .head.text, virt<->phys conversions will fail. */ ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") + +ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, + "RESERVED_SWAPPER_OFFSET is wrong!") -- cgit v1.2.3 From 0188a894c390e51475274ece76b4d601782d709e Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Tue, 2 Feb 2021 12:36:58 +0000 Subject: arm64: vmlinux.ld.S: add assertion for tramp_pg_dir offset Add TRAMP_SWAPPER_OFFSET and use that instead of hardcoding the offset between swapper_pg_dir and tramp_pg_dir. Then use TRAMP_SWAPPER_OFFSET to assert that the offset is correct at link time. Signed-off-by: Joey Gouly Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20210202123658.22308-3-joey.gouly@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 6 ++++++ arch/arm64/kernel/entry.S | 4 ++-- arch/arm64/kernel/vmlinux.lds.S | 5 +++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f6bf8a972a16..6c2674937665 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -165,6 +165,12 @@ */ #define RESERVED_SWAPPER_OFFSET (PAGE_SIZE) +/* + * Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated + * until link time. + */ +#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index c9bae73f2621..c6aee646eb6b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -805,7 +805,7 @@ SYM_CODE_END(ret_to_user) // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 - add \tmp, \tmp, #(2 * PAGE_SIZE) + add \tmp, \tmp, #TRAMP_SWAPPER_OFFSET bic \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 @@ -825,7 +825,7 @@ alternative_else_nop_endif // Move from swapper_pg_dir to tramp_pg_dir .macro tramp_unmap_kernel, tmp mrs \tmp, ttbr1_el1 - sub \tmp, \tmp, #(2 * PAGE_SIZE) + sub \tmp, \tmp, #TRAMP_SWAPPER_OFFSET orr \tmp, \tmp, #USER_ASID_FLAG msr ttbr1_el1, \tmp /* diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index a03a5300bce9..68f76a96c60b 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -319,3 +319,8 @@ ASSERT(_text == KIMAGE_VADDR, "HEAD is misaligned") ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, "RESERVED_SWAPPER_OFFSET is wrong!") + +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, + "TRAMP_SWAPPER_OFFSET is wrong!") +#endif -- cgit v1.2.3 From d13c613f136c9090f3863c49b2306d57ab59feba Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Feb 2021 12:36:18 +0100 Subject: arm64: assembler: add cond_yield macro Add a macro cond_yield that branches to a specified label when called if the TIF_NEED_RESCHED flag is set and decreasing the preempt count would make the task preemptible again, resulting in a schedule to occur. This can be used by kernel mode SIMD code that keeps a lot of state in SIMD registers, which would make chunking the input in order to perform the cond_resched() check from C code disproportionately costly. Signed-off-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210203113626.220151-2-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bf125c591116..27b1ea721c2d 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -745,6 +745,22 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .Lyield_out_\@ : .endm + /* + * Check whether preempt-disabled code should yield as soon as it + * is able. This is the case if re-enabling preemption a single + * time results in a preempt count of zero, and the TIF_NEED_RESCHED + * flag is set. (Note that the latter is stored negated in the + * top word of the thread_info::preempt_count field) + */ + .macro cond_yield, lbl:req, tmp:req +#ifdef CONFIG_PREEMPTION + get_current_task \tmp + ldr \tmp, [\tmp, #TSK_TI_PREEMPT] + sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET + cbz \tmp, \lbl +#endif + .endm + /* * This macro emits a program property note section identifying * architecture features which require special handling, mainly for -- cgit v1.2.3 From d9f1b52afa4012974b3c726ca89ae311f194e83f Mon Sep 17 00:00:00 2001 From: Zhiyuan Dai Date: Thu, 4 Feb 2021 09:43:49 +0800 Subject: arm64: improve whitespace In a few places we don't have whitespace between macro parameters, which makes them hard to read. This patch adds whitespace to clearly separate the parameters. In a few places we have unnecessary whitespace around unary operators, which is confusing, This patch removes the unnecessary whitespace. Signed-off-by: Zhiyuan Dai Link: https://lore.kernel.org/r/1612403029-5011-1-git-send-email-daizhiyuan@phytium.com.cn Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 2 +- arch/arm64/kernel/module-plts.c | 2 +- arch/arm64/kernel/perf_event.c | 2 +- arch/arm64/kernel/process.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index a57cffb752e8..1184c44ea2c7 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -17,7 +17,7 @@ #include #include -#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) +#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 2e224435c024..e53493d8b208 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -131,7 +131,7 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, } #endif -#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b)) +#define cmp_3way(a, b) ((a) < (b) ? -1 : (a) > (b)) static int cmp_rela(const void *a, const void *b) { diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3605f77ad4df..d1fec4ab4bcf 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -810,7 +810,7 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, { int idx; - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) { if (!test_and_set_bit(idx, cpuc->used_mask)) return idx; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 6616486a58fe..4cc1ccc8d6ab 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -304,7 +304,7 @@ void __show_regs(struct pt_regs *regs) } } -void show_regs(struct pt_regs * regs) +void show_regs(struct pt_regs *regs) { __show_regs(regs); dump_backtrace(regs, NULL, KERN_DEFAULT); @@ -587,7 +587,7 @@ unsigned long get_wchan(struct task_struct *p) ret = frame.pc; goto out; } - } while (count ++ < 16); + } while (count++ < 16); out: put_task_stack(p); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 6895ce777e7f..a05d34f0e82a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -45,7 +45,7 @@ #include #include -static const char *handler[]= { +static const char *handler[] = { "Synchronous Abort", "IRQ", "FIQ", -- cgit v1.2.3 From abd4737f67d75563d1d0cc57bd5daab026e8c2d1 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Fri, 5 Feb 2021 04:09:19 -0500 Subject: mm/arm64: Correct obsolete comment in do_page_fault() commit d8ed45c5dcd4 ("mmap locking API: use coccinelle to convert mmap_sem rwsem call sites") has convertd down_read_trylock() to mmap_read_trylock(). But it forgot to update the relevant comment. Signed-off-by: Miaohe Lin Link: https://lore.kernel.org/r/20210205090919.63382-1-linmiaohe@huawei.com Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3c40da479899..86a3877ea86f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -564,7 +564,7 @@ retry: mmap_read_lock(mm); } else { /* - * The above down_read_trylock() might have succeeded in which + * The above mmap_read_trylock() might have succeeded in which * case, we'll have missed the might_sleep() from down_read(). */ might_sleep(); -- cgit v1.2.3 From c0b15c25d25171db4b70cc0b7dbc1130ee94017d Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 3 Feb 2021 23:00:57 +0000 Subject: arm64: Extend workaround for erratum 1024718 to all versions of Cortex-A55 The erratum 1024718 affects Cortex-A55 r0p0 to r2p0. However we apply the work around for r0p0 - r1p0. Unfortunately this won't be fixed for the future revisions for the CPU. Thus extend the work around for all versions of A55, to cover for r2p0 and any future revisions. Cc: stable@vger.kernel.org Cc: Catalin Marinas Cc: Will Deacon Cc: James Morse Cc: Kunihiko Hayashi Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20210203230057.3961239-1-suzuki.poulose@arm.com [will: Update Kconfig help text] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- arch/arm64/kernel/cpufeature.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f39568b28ec1..3dfb25afa616 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -522,7 +522,7 @@ config ARM64_ERRATUM_1024718 help This option adds a workaround for ARM Cortex-A55 Erratum 1024718. - Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect + Affected Cortex-A55 cores (all revisions) could cause incorrect update of the hardware dirty bit when the DBM/AP bits are updated without a break-before-make. The workaround is to disable the usage of hardware DBM locally on the affected cores. CPUs not affected by diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e99eddec0a46..db400ca77427 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1455,7 +1455,7 @@ static bool cpu_has_broken_dbm(void) /* List of CPUs which have broken DBM support. */ static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 - MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif -- cgit v1.2.3 From 114945d84a30a5feba8ec24d854257c78c89abd1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:10 +0000 Subject: arm64: Fix labels in el2_setup macros If someone happens to write the following code: b 1f init_el2_state vhe 1: [...] they will be in for a long debugging session, as the label "1f" will be resolved *inside* the init_el2_state macro instead of after it. Not really what one expects. Instead, rewite the EL2 setup macros to use unambiguous labels, thanks to the usual macro counter trick. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-2-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index a7f5a1bbc8ac..540116de80bf 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -45,24 +45,24 @@ mrs x1, id_aa64dfr0_el1 sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 cmp x0, #1 - b.lt 1f // Skip if no PMU present + b.lt .Lskip_pmu_\@ // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to -1: +.Lskip_pmu_\@: csel x2, xzr, x0, lt // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 - cbz x0, 3f // Skip if SPE not present + cbz x0, .Lskip_spe_\@ // Skip if SPE not present .ifeqs "\mode", "nvhe" mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) - cbnz x0, 2f // then permit sampling of physical + cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ 1 << SYS_PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter -2: +.Lskip_spe_el2_\@: mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. @@ -71,7 +71,7 @@ // and disable access from EL1 .endif -3: +.Lskip_spe_\@: msr mdcr_el2, x2 // Configure debug traps .endm @@ -79,9 +79,9 @@ .macro __init_el2_lor mrs x1, id_aa64mmfr1_el1 ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 - cbz x0, 1f + cbz x0, .Lskip_lor_\@ msr_s SYS_LORC_EL1, xzr -1: +.Lskip_lor_\@: .endm /* Stage-2 translation */ @@ -93,7 +93,7 @@ .macro __init_el2_gicv3 mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 - cbz x0, 1f + cbz x0, .Lskip_gicv3_\@ mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 @@ -103,7 +103,7 @@ mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, 1f // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults -1: +.Lskip_gicv3_\@: .endm .macro __init_el2_hstr @@ -128,14 +128,14 @@ .macro __init_el2_nvhe_sve mrs x1, id_aa64pfr0_el1 ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, 1f + cbz x1, .Lskip_sve_\@ bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps msr cptr_el2, x0 // Disable copro. traps to EL2 isb mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector msr_s SYS_ZCR_EL2, x1 // length for EL1. -1: +.Lskip_sve_\@: .endm .macro __init_el2_nvhe_prepare_eret -- cgit v1.2.3 From b161f92482426a7323884d57cbae683812909988 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:11 +0000 Subject: arm64: Fix outdated TCR setup comment The arm64 kernel has long be able to use more than 39bit VAs. Since day one, actually. Let's rewrite the offending comment. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-3-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/mm/proc.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 1f7ee8c8b7b8..ece785477bdc 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -464,8 +464,8 @@ SYM_FUNC_START(__cpu_setup) #endif msr mair_el1, x5 /* - * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for - * both user and kernel. + * Set/prepare TCR and TTBR. TCR_EL1.T1SZ gets further + * adjusted if the kernel is compiled with 52bit VA support. */ mov_q x10, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ -- cgit v1.2.3 From 8cc8a32415364e475c25277b507f06f67c47ca9a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:12 +0000 Subject: arm64: Turn the MMU-on sequence into a macro Turning the MMU on is a popular sport in the arm64 kernel, and we do it more than once, or even twice. As we are about to add even more, let's turn it into a macro. No expected functional change. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-4-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 17 +++++++++++++++++ arch/arm64/kernel/head.S | 19 ++++--------------- arch/arm64/mm/proc.S | 12 +----------- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bf125c591116..8cded93f99c3 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -675,6 +675,23 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endif .endm +/* + * Set SCTLR_EL1 to the passed value, and invalidate the local icache + * in the process. This is called when setting the MMU on. + */ +.macro set_sctlr_el1, reg + msr sctlr_el1, \reg + isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb +.endm + /* * Check whether to yield to another runnable task from kernel mode NEON code * (which runs with preemption disabled). diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a0dc987724ed..28e9735302df 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -703,16 +703,9 @@ SYM_FUNC_START(__enable_mmu) offset_ttbr1 x1, x3 msr ttbr1_el1, x1 // load TTBR1 isb - msr sctlr_el1, x0 - isb - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb + + set_sctlr_el1 x0 + ret SYM_FUNC_END(__enable_mmu) @@ -883,11 +876,7 @@ SYM_FUNC_START_LOCAL(__primary_switch) tlbi vmalle1 // Remove any stale TLB entries dsb nsh - msr sctlr_el1, x19 // re-enable the MMU - isb - ic iallu // flush instructions fetched - dsb nsh // via old mapping - isb + set_sctlr_el1 x19 // re-enable the MMU bl __relocate_kernel #endif diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index ece785477bdc..c967bfd30d2b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -291,17 +291,7 @@ skip_pgd: /* We're done: fire up the MMU again */ mrs x17, sctlr_el1 orr x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x17 - isb - - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively from the PoC are discarded, since they may have - * been dynamically patched at the PoU. - */ - ic iallu - dsb nsh - isb + set_sctlr_el1 x17 /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] -- cgit v1.2.3 From f359182291c757cdf77bcd014c025d1ed6b87662 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:13 +0000 Subject: arm64: Provide an 'upgrade to VHE' stub hypercall As we are about to change the way a VHE system boots, let's provide the core helper, in the form of a stub hypercall that enables VHE and replicates the full EL1 context at EL2, thanks to EL1 and VHE-EL2 being extremely similar. On exception return, the kernel carries on at EL2. Fancy! Nothing calls this new hypercall yet, so no functional change. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-5-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/virt.h | 7 +++- arch/arm64/kernel/hyp-stub.S | 76 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index ee6a48df89d9..7379f35ae2c6 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -35,8 +35,13 @@ */ #define HVC_RESET_VECTORS 2 +/* + * HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible + */ +#define HVC_VHE_RESTART 3 + /* Max number of HYP stub hypercalls */ -#define HVC_STUB_HCALL_NR 3 +#define HVC_STUB_HCALL_NR 4 /* Error returned when an invalid stub number is passed into x0 */ #define HVC_STUB_ERR 0xbadca11 diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 160f5881a0b7..3f3dbbe8914d 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -8,9 +8,9 @@ #include #include -#include #include +#include #include #include #include @@ -47,10 +47,13 @@ SYM_CODE_END(__hyp_stub_vectors) SYM_CODE_START_LOCAL(el1_sync) cmp x0, #HVC_SET_VECTORS - b.ne 2f + b.ne 1f msr vbar_el2, x1 b 9f +1: cmp x0, #HVC_VHE_RESTART + b.eq mutate_to_vhe + 2: cmp x0, #HVC_SOFT_RESTART b.ne 3f mov x0, x2 @@ -70,6 +73,75 @@ SYM_CODE_START_LOCAL(el1_sync) eret SYM_CODE_END(el1_sync) +// nVHE? No way! Give me the real thing! +SYM_CODE_START_LOCAL(mutate_to_vhe) + // Be prepared to fail + mov_q x0, HVC_STUB_ERR + + // Sanity check: MMU *must* be off + mrs x1, sctlr_el2 + tbnz x1, #0, 1f + + // Needs to be VHE capable, obviously + mrs x1, id_aa64mmfr1_el1 + ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 + cbz x1, 1f + + // Engage the VHE magic! + mov_q x0, HCR_HOST_VHE_FLAGS + msr hcr_el2, x0 + isb + + // Doesn't do much on VHE, but still, worth a shot + init_el2_state vhe + + // Use the EL1 allocated stack, per-cpu offset + mrs x0, sp_el1 + mov sp, x0 + mrs x0, tpidr_el1 + msr tpidr_el2, x0 + + // FP configuration, vectors + mrs_s x0, SYS_CPACR_EL12 + msr cpacr_el1, x0 + mrs_s x0, SYS_VBAR_EL12 + msr vbar_el1, x0 + + // Transfer the MM state from EL1 to EL2 + mrs_s x0, SYS_TCR_EL12 + msr tcr_el1, x0 + mrs_s x0, SYS_TTBR0_EL12 + msr ttbr0_el1, x0 + mrs_s x0, SYS_TTBR1_EL12 + msr ttbr1_el1, x0 + mrs_s x0, SYS_MAIR_EL12 + msr mair_el1, x0 + isb + + // Invalidate TLBs before enabling the MMU + tlbi vmalle1 + dsb nsh + + // Enable the EL2 S1 MMU, as set up from EL1 + mrs_s x0, SYS_SCTLR_EL12 + set_sctlr_el1 x0 + + // Disable the EL1 S1 MMU for a good measure + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr_s SYS_SCTLR_EL12, x0 + + // Hack the exception return to stay at EL2 + mrs x0, spsr_el1 + and x0, x0, #~PSR_MODE_MASK + mov x1, #PSR_MODE_EL2h + orr x0, x0, x1 + msr spsr_el1, x0 + + mov x0, xzr + +1: eret +SYM_CODE_END(mutate_to_vhe) + .macro invalid_vector label SYM_CODE_START_LOCAL(\label) b \label -- cgit v1.2.3 From 6459b8469753e9feaa8b34691d097cffad905931 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 2 Feb 2021 12:03:41 +0000 Subject: arm64: entry: consolidate Cortex-A76 erratum 1463225 workaround The workaround for Cortex-A76 erratum 1463225 is split across the syscall and debug handlers in separate files. This structure currently forces us to do some redundant work for debug exceptions from EL0, is a little difficult to follow, and gets in the way of some future rework of the exception entry code as it requires exceptions to be unmasked late in the syscall handling path. To simplify things, and as a preparatory step for future rework of exception entry, this patch moves all the workaround logic into entry-common.c. As the debug handler only needs to run for EL1 debug exceptions, we no longer call it for EL0 debug exceptions, and no longer need to check user_mode(regs) as this is always false. For clarity cortex_a76_erratum_1463225_debug_handler() is changed to return bool. In the SVC path, the workaround is applied earlier, but this should have no functional impact as exceptions are still masked. In the debug path we run the fixup before explicitly disabling preemption, but we will not attempt to preempt before returning from the exception. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20210202120341.28858-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 2 -- arch/arm64/kernel/entry-common.c | 54 +++++++++++++++++++++++++++++++++++++++- arch/arm64/kernel/syscall.c | 30 ---------------------- arch/arm64/mm/fault.c | 32 ------------------------ 4 files changed, 53 insertions(+), 65 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a63428301f42..506a1cd37973 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -107,8 +107,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) } #ifdef CONFIG_ARM64_ERRATUM_1463225 -DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - static bool has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, int scope) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 5346953e4382..9d3588450473 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -109,6 +109,55 @@ asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) exit_to_kernel_mode(regs); } +#ifdef CONFIG_ARM64_ERRATUM_1463225 +static DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static void cortex_a76_erratum_1463225_svc_handler(void) +{ + u32 reg, val; + + if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) + return; + + if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) + return; + + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); + reg = read_sysreg(mdscr_el1); + val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + write_sysreg(val, mdscr_el1); + asm volatile("msr daifclr, #8"); + isb(); + + /* We will have taken a single-step exception by this point */ + + write_sysreg(reg, mdscr_el1); + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); +} + +static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) + return false; + + /* + * We've taken a dummy step exception from the kernel to ensure + * that interrupts are re-enabled on the syscall path. Return back + * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions + * masked so that we can safely restore the mdscr and get on with + * handling the syscall. + */ + regs->pstate |= PSR_D_BIT; + return true; +} +#else /* CONFIG_ARM64_ERRATUM_1463225 */ +static void cortex_a76_erratum_1463225_svc_handler(void) { } +static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + return false; +} +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -186,7 +235,8 @@ static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); arm64_enter_el1_dbg(regs); - do_debug_exception(far, esr, regs); + if (!cortex_a76_erratum_1463225_debug_handler(regs)) + do_debug_exception(far, esr, regs); arm64_exit_el1_dbg(regs); } @@ -362,6 +412,7 @@ static void noinstr el0_svc(struct pt_regs *regs) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); enter_from_user_mode(); + cortex_a76_erratum_1463225_svc_handler(); do_el0_svc(regs); } @@ -439,6 +490,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); enter_from_user_mode(); + cortex_a76_erratum_1463225_svc_handler(); do_el0_svc_compat(regs); } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index c2877c332f2d..b9cf12b271d7 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -65,35 +65,6 @@ static inline bool has_syscall_work(unsigned long flags) int syscall_trace_enter(struct pt_regs *regs); void syscall_trace_exit(struct pt_regs *regs); -#ifdef CONFIG_ARM64_ERRATUM_1463225 -DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - -static void cortex_a76_erratum_1463225_svc_handler(void) -{ - u32 reg, val; - - if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) - return; - - if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) - return; - - __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); - reg = read_sysreg(mdscr_el1); - val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; - write_sysreg(val, mdscr_el1); - asm volatile("msr daifclr, #8"); - isb(); - - /* We will have taken a single-step exception by this point */ - - write_sysreg(reg, mdscr_el1); - __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); -} -#else -static void cortex_a76_erratum_1463225_svc_handler(void) { } -#endif /* CONFIG_ARM64_ERRATUM_1463225 */ - static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { @@ -120,7 +91,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * (Similarly for HVC and SMC elsewhere.) */ - cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); if (flags & _TIF_MTE_ASYNC_FAULT) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3c40da479899..d0c23d206aab 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -874,44 +874,12 @@ static void debug_exception_exit(struct pt_regs *regs) } NOKPROBE_SYMBOL(debug_exception_exit); -#ifdef CONFIG_ARM64_ERRATUM_1463225 -DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); - -static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) -{ - if (user_mode(regs)) - return 0; - - if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) - return 0; - - /* - * We've taken a dummy step exception from the kernel to ensure - * that interrupts are re-enabled on the syscall path. Return back - * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions - * masked so that we can safely restore the mdscr and get on with - * handling the syscall. - */ - regs->pstate |= PSR_D_BIT; - return 1; -} -#else -static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) -{ - return 0; -} -#endif /* CONFIG_ARM64_ERRATUM_1463225 */ -NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); - void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); - if (cortex_a76_erratum_1463225_debug_handler(regs)) - return; - debug_exception_enter(regs); if (user_mode(regs) && !is_ttbr0_addr(pc)) -- cgit v1.2.3 From 0c93df9622d4d921bcd0dc83f71fed9e98f5119f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:14 +0000 Subject: arm64: Initialise as nVHE before switching to VHE As we are aiming to be able to control whether we enable VHE or not, let's always drop down to EL1 first, and only then upgrade to VHE if at all possible. This means that if the kernel is booted at EL2, we always start with a nVHE init, drop to EL1 to initialise the the kernel, and only then upgrade the kernel EL to EL2 if possible (the process is obviously shortened for secondary CPUs). The resume path is handled similarly to a secondary CPU boot. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-6-maz@kernel.org [will: Avoid calling switch_to_vhe twice on kaslr path] Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 38 ++------------------------------------ arch/arm64/kernel/hyp-stub.S | 24 ++++++++++++++++++++++++ arch/arm64/kernel/sleep.S | 1 + 3 files changed, 27 insertions(+), 36 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 28e9735302df..ec66dc061b0c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -447,6 +447,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) ret // to __primary_switch() 0: #endif + bl switch_to_vhe // Prefer VHE if possible add sp, sp, #16 mov x29, #0 mov x30, #0 @@ -493,42 +494,6 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) eret SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) -#ifdef CONFIG_ARM64_VHE - /* - * Check for VHE being present. x2 being non-zero indicates that we - * do have VHE, and that the kernel is intended to run at EL2. - */ - mrs x2, id_aa64mmfr1_el1 - ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 -#else - mov x2, xzr -#endif - cbz x2, init_el2_nvhe - - /* - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ - mov_q x0, HCR_HOST_VHE_FLAGS - msr hcr_el2, x0 - isb - - init_el2_state vhe - - isb - - mov_q x0, INIT_PSTATE_EL2 - msr spsr_el2, x0 - msr elr_el2, lr - mov w0, #BOOT_CPU_MODE_EL2 - eret - -SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL) - /* - * When VHE is not in use, early init of EL2 and EL1 needs to be - * done here. - */ mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 @@ -623,6 +588,7 @@ SYM_FUNC_START_LOCAL(secondary_startup) /* * Common entry point for secondary CPUs. */ + bl switch_to_vhe bl __cpu_secondary_check52bitva bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 3f3dbbe8914d..373ed2213e1d 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -190,3 +190,27 @@ SYM_FUNC_START(__hyp_reset_vectors) hvc #0 ret SYM_FUNC_END(__hyp_reset_vectors) + +/* + * Entry point to switch to VHE if deemed capable + */ +SYM_FUNC_START(switch_to_vhe) +#ifdef CONFIG_ARM64_VHE + // Need to have booted at EL2 + adr_l x1, __boot_cpu_mode + ldr w0, [x1] + cmp w0, #BOOT_CPU_MODE_EL2 + b.ne 1f + + // and still be at EL1 + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL1 + b.ne 1f + + // Turn the world upside down + mov x0, #HVC_VHE_RESTART + hvc #0 +1: +#endif + ret +SYM_FUNC_END(switch_to_vhe) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 6bdef7362c0e..5bfd9b87f85d 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -100,6 +100,7 @@ SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" SYM_CODE_START(cpu_resume) bl init_kernel_el + bl switch_to_vhe bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir -- cgit v1.2.3 From c6f8c92f3f368d345c38aea5cc0e60515bcb159e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:15 +0000 Subject: arm64: Drop early setting of MDSCR_EL2.TPMS When running VHE, we set MDSCR_EL2.TPMS very early on to force the trapping of EL1 SPE accesses to EL2. However: - we are running with HCR_EL2.{E2H,TGE}={1,1}, meaning that there is no EL1 to trap from - before entering a guest, we call kvm_arm_setup_debug(), which sets MDCR_EL2_TPMS in the per-vcpu shadow mdscr_el2, which gets applied on entry by __activate_traps_common(). The early setting of MDSCR_EL2.TPMS is therefore useless and can be dropped. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210208095732.3267263-7-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 540116de80bf..56c9e1cef180 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -66,9 +66,6 @@ mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. -.else - orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation - // and disable access from EL1 .endif .Lskip_spe_\@: -- cgit v1.2.3 From 19e87e131915a2389a08874092a82fe5aa0f8952 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:16 +0000 Subject: arm64: Move VHE-specific SPE setup to mutate_to_vhe() There isn't much that a VHE kernel needs on top of whatever has been done for nVHE, so let's move the little we need to the VHE stub (the SPE setup), and drop the init_el2_state macro. No expected functional change. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-8-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/hyp-stub.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 373ed2213e1d..6229315d533d 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -92,9 +92,6 @@ SYM_CODE_START_LOCAL(mutate_to_vhe) msr hcr_el2, x0 isb - // Doesn't do much on VHE, but still, worth a shot - init_el2_state vhe - // Use the EL1 allocated stack, per-cpu offset mrs x0, sp_el1 mov sp, x0 @@ -107,6 +104,11 @@ SYM_CODE_START_LOCAL(mutate_to_vhe) mrs_s x0, SYS_VBAR_EL12 msr vbar_el1, x0 + // Use EL2 translations for SPE and disable access from EL1 + mrs x0, mdcr_el2 + bic x0, x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + msr mdcr_el2, x0 + // Transfer the MM state from EL1 to EL2 mrs_s x0, SYS_TCR_EL12 msr tcr_el1, x0 -- cgit v1.2.3 From e2df464173f0b585adb958a09536eae2cd1dbefd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:17 +0000 Subject: arm64: Simplify init_el2_state to be non-VHE only As init_el2_state is now nVHE only, let's simplify it and drop the VHE setup. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-9-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 33 ++++++++------------------------- arch/arm64/kernel/head.S | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-init.S | 2 +- 3 files changed, 10 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 56c9e1cef180..d77d358f9395 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -32,16 +32,14 @@ * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in * EL2. */ -.macro __init_el2_timers mode -.ifeqs "\mode", "nvhe" +.macro __init_el2_timers mrs x0, cnthctl_el2 orr x0, x0, #3 // Enable EL1 physical timers msr cnthctl_el2, x0 -.endif msr cntvoff_el2, xzr // Clear virtual offset .endm -.macro __init_el2_debug mode +.macro __init_el2_debug mrs x1, id_aa64dfr0_el1 sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 cmp x0, #1 @@ -55,7 +53,6 @@ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cbz x0, .Lskip_spe_\@ // Skip if SPE not present -.ifeqs "\mode", "nvhe" mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) cbnz x0, .Lskip_spe_el2_\@ // then permit sampling of physical @@ -66,7 +63,6 @@ mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) orr x2, x2, x0 // If we don't have VHE, then // use EL1&0 translation. -.endif .Lskip_spe_\@: msr mdcr_el2, x2 // Configure debug traps @@ -142,37 +138,24 @@ /** * Initialize EL2 registers to sane values. This should be called early on all - * cores that were booted in EL2. + * cores that were booted in EL2. Note that everything gets initialised as + * if VHE was not evailable. The kernel context will be upgraded to VHE + * if possible later on in the boot process * * Regs: x0, x1 and x2 are clobbered. */ -.macro init_el2_state mode -.ifnes "\mode", "vhe" -.ifnes "\mode", "nvhe" -.error "Invalid 'mode' argument" -.endif -.endif - +.macro init_el2_state __init_el2_sctlr - __init_el2_timers \mode - __init_el2_debug \mode + __init_el2_timers + __init_el2_debug __init_el2_lor __init_el2_stage2 __init_el2_gicv3 __init_el2_hstr - - /* - * When VHE is not in use, early init of EL2 needs to be done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. - */ -.ifeqs "\mode", "nvhe" __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve __init_el2_nvhe_prepare_eret -.endif .endm #endif /* __ARM_KVM_INIT_H__ */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ec66dc061b0c..0a46f722f051 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -501,7 +501,7 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) msr hcr_el2, x0 isb - init_el2_state nvhe + init_el2_state /* Hypervisor stub */ adr_l x0, __hyp_stub_vectors diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 31b060a44045..222cfc3e7190 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -189,7 +189,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) 2: msr SPsel, #1 // We want to use SP_EL{1,2} /* Initialize EL2 CPU state to sane values. */ - init_el2_state nvhe // Clobbers x0..x2 + init_el2_state // Clobbers x0..x2 /* Enable MMU, set vectors and stack. */ mov x0, x28 -- cgit v1.2.3 From d077cb3cb90470f8bd7dbe357a474e13589390b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:18 +0000 Subject: arm64: Move SCTLR_EL1 initialisation to EL-agnostic code We can now move the initial SCTLR_EL1 setup to be used for both EL1 and EL2 setup. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-10-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0a46f722f051..2c1b8ffa3d3e 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -479,13 +479,14 @@ EXPORT_SYMBOL(kimage_vaddr) * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(init_kernel_el) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF + msr sctlr_el1, x0 + mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq init_el2 SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 isb mov_q x0, INIT_PSTATE_EL1 msr spsr_el1, x0 @@ -494,9 +495,6 @@ SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) eret SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) - mov_q x0, INIT_SCTLR_EL1_MMU_OFF - msr sctlr_el1, x0 - mov_q x0, HCR_HOST_NVHE_FLAGS msr hcr_el2, x0 isb -- cgit v1.2.3 From 8f266a5d878ad38fbd43e41e22847650f51d4734 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:19 +0000 Subject: arm64: cpufeature: Add global feature override facility Add a facility to globally override a feature, no matter what the HW says. Yes, this sounds dangerous, but we do respect the "safe" value for a given feature. This doesn't mean the user doesn't need to know what they are doing. Nothing uses this yet, so we are pretty safe. For now. Signed-off-by: Marc Zyngier Reviewed-by: Suzuki K Poulose Acked-by: David Brazdil Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-11-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 6 +++++ arch/arm64/kernel/cpufeature.c | 45 ++++++++++++++++++++++++++++++++----- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9a555809b89c..b1f53147e2b2 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -63,6 +63,11 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +struct arm64_ftr_override { + u64 val; + u64 mask; +}; + /* * @arm64_ftr_reg - Feature register * @strict_mask Bits which should match across all CPUs for sanity. @@ -74,6 +79,7 @@ struct arm64_ftr_reg { u64 user_mask; u64 sys_val; u64 user_val; + struct arm64_ftr_override *override; const struct arm64_ftr_bits *ftr_bits; }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e99eddec0a46..a4e5c619a516 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -352,9 +352,12 @@ static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_END, }; +static struct arm64_ftr_override __ro_after_init no_override = { }; + struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { .name = "SYS_CTR_EL0", - .ftr_bits = ftr_ctr + .ftr_bits = ftr_ctr, + .override = &no_override, }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { @@ -544,13 +547,16 @@ static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; -#define ARM64_FTR_REG(id, table) { \ - .sys_id = id, \ - .reg = &(struct arm64_ftr_reg){ \ - .name = #id, \ - .ftr_bits = &((table)[0]), \ +#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \ + .sys_id = id, \ + .reg = &(struct arm64_ftr_reg){ \ + .name = #id, \ + .override = (ovr), \ + .ftr_bits = &((table)[0]), \ }} +#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) + static const struct __ftr_reg_entry { u32 sys_id; struct arm64_ftr_reg *reg; @@ -770,6 +776,33 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); s64 ftr_new = arm64_ftr_value(ftrp, new); + s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val); + + if ((ftr_mask & reg->override->mask) == ftr_mask) { + s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new); + char *str = NULL; + + if (ftr_ovr != tmp) { + /* Unsafe, remove the override */ + reg->override->mask &= ~ftr_mask; + reg->override->val &= ~ftr_mask; + tmp = ftr_ovr; + str = "ignoring override"; + } else if (ftr_new != tmp) { + /* Override was valid */ + ftr_new = tmp; + str = "forced"; + } else if (ftr_ovr == tmp) { + /* Override was the safe value */ + str = "already set"; + } + + if (str) + pr_warn("%s[%d:%d]: %s to %llx\n", + reg->name, + ftrp->shift + ftrp->width - 1, + ftrp->shift, str, tmp); + } val = arm64_ftr_set_value(ftrp, val, ftr_new); -- cgit v1.2.3 From b3341ae0efa235726ad69e53ce83c6a3c445bda8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:20 +0000 Subject: arm64: cpufeature: Use IDreg override in __read_sysreg_by_encoding() __read_sysreg_by_encoding() is used by a bunch of cpufeature helpers, which should take the feature override into account. Let's do that. For a good measure (and because we are likely to need to further down the line), make this helper available to the rest of the non-modular kernel. Code that needs to know the *real* features of a CPU can still use read_sysreg_s(), and find the bare, ugly truth. Signed-off-by: Marc Zyngier Reviewed-by: Suzuki K Poulose Acked-by: David Brazdil Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-12-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b1f53147e2b2..b5bf7af68691 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -606,6 +606,7 @@ void __init setup_cpu_features(void); void check_local_cpu_capabilities(void); u64 read_sanitised_ftr_reg(u32 id); +u64 __read_sysreg_by_encoding(u32 sys_id); static inline bool cpu_supports_mixed_endian_el0(void) { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a4e5c619a516..97da9ed4b79d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1148,14 +1148,17 @@ u64 read_sanitised_ftr_reg(u32 id) EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ - case r: return read_sysreg_s(r) + case r: val = read_sysreg_s(r); break; /* * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ -static u64 __read_sysreg_by_encoding(u32 sys_id) +u64 __read_sysreg_by_encoding(u32 sys_id) { + struct arm64_ftr_reg *regp; + u64 val; + switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); read_sysreg_case(SYS_ID_PFR1_EL1); @@ -1198,6 +1201,14 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) BUG(); return 0; } + + regp = get_arm64_ftr_reg(sys_id); + if (regp) { + val &= ~regp->override->mask; + val |= (regp->override->val & regp->override->mask); + } + + return val; } #include -- cgit v1.2.3 From f6f0c4362f070cab4a0cec432e82428d702ce0a6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:21 +0000 Subject: arm64: Extract early FDT mapping from kaslr_early_init() As we want to parse more options very early in the kernel lifetime, let's always map the FDT early. This is achieved by moving that code out of kaslr_early_init(). No functional change expected. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-13-maz@kernel.org [will: Ensue KASAN is enabled before running C code] Signed-off-by: Will Deacon --- arch/arm64/include/asm/setup.h | 11 +++++++++++ arch/arm64/kernel/head.S | 3 ++- arch/arm64/kernel/kaslr.c | 7 +++---- arch/arm64/kernel/setup.c | 15 +++++++++++++++ 4 files changed, 31 insertions(+), 5 deletions(-) create mode 100644 arch/arm64/include/asm/setup.h diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h new file mode 100644 index 000000000000..d3320618ed14 --- /dev/null +++ b/arch/arm64/include/asm/setup.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ARM64_ASM_SETUP_H +#define __ARM64_ASM_SETUP_H + +#include + +void *get_early_fdt_ptr(void); +void early_fdt_map(u64 dt_phys); + +#endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2c1b8ffa3d3e..070d1c6ec9eb 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -436,10 +436,11 @@ SYM_FUNC_START_LOCAL(__primary_switched) #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) bl kasan_early_init #endif + mov x0, x21 // pass FDT address in x0 + bl early_fdt_map // Try mapping the FDT early #ifdef CONFIG_RANDOMIZE_BASE tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f - mov x0, x21 // pass FDT address in x0 bl kaslr_early_init // parse FDT for KASLR options cbz x0, 0f // KASLR disabled? just proceed orr x23, x23, x0 // record KASLR offset diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 1c74c45b9494..5fc86e7d01a1 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -19,6 +19,7 @@ #include #include #include +#include enum kaslr_status { KASLR_ENABLED, @@ -92,12 +93,11 @@ out: * containing function pointers) to be reinitialized, and zero-initialized * .bss variables will be reset to 0. */ -u64 __init kaslr_early_init(u64 dt_phys) +u64 __init kaslr_early_init(void) { void *fdt; u64 seed, offset, mask, module_range; unsigned long raw; - int size; /* * Set a reasonable default for module_alloc_base in case @@ -111,8 +111,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * and proceed with KASLR disabled. We will make another * attempt at mapping the FDT in setup_machine() */ - early_fixmap_init(); - fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + fdt = get_early_fdt_ptr(); if (!fdt) { kaslr_status = KASLR_DISABLED_FDT_REMAP; return 0; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index c18aacde8bb0..61845c0821d9 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -168,6 +168,21 @@ static void __init smp_build_mpidr_hash(void) pr_warn("Large number of MPIDR hash buckets detected\n"); } +static void *early_fdt_ptr __initdata; + +void __init *get_early_fdt_ptr(void) +{ + return early_fdt_ptr; +} + +asmlinkage void __init early_fdt_map(u64 dt_phys) +{ + int fdt_size; + + early_fixmap_init(); + early_fdt_ptr = fixmap_remap_fdt(dt_phys, &fdt_size, PAGE_KERNEL); +} + static void __init setup_machine_fdt(phys_addr_t dt_phys) { int size; -- cgit v1.2.3 From 33200303553d3d74e7b980493cf363da545f887d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:22 +0000 Subject: arm64: cpufeature: Add an early command-line cpufeature override facility In order to be able to override CPU features at boot time, let's add a command line parser that matches options of the form "cpureg.feature=value", and store the corresponding value into the override val/mask pair. No features are currently defined, so no expected change in functionality. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-14-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/head.S | 1 + arch/arm64/kernel/idreg-override.c | 150 +++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kernel/idreg-override.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 86364ab6f13f..2262f0392857 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -17,7 +17,7 @@ obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ - syscall.o proton-pack.o + syscall.o proton-pack.o idreg-override.o targets += efi-entry.o diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 070d1c6ec9eb..e7f12bf49730 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -438,6 +438,7 @@ SYM_FUNC_START_LOCAL(__primary_switched) #endif mov x0, x21 // pass FDT address in x0 bl early_fdt_map // Try mapping the FDT early + bl init_feature_override // Parse cpu feature overrides #ifdef CONFIG_RANDOMIZE_BASE tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c new file mode 100644 index 000000000000..3a347b42d07e --- /dev/null +++ b/arch/arm64/kernel/idreg-override.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Early cpufeature override framework + * + * Copyright (C) 2020 Google LLC + * Author: Marc Zyngier + */ + +#include +#include +#include + +#include +#include + +#define FTR_DESC_NAME_LEN 20 +#define FTR_DESC_FIELD_LEN 10 + +struct ftr_set_desc { + char name[FTR_DESC_NAME_LEN]; + struct arm64_ftr_override *override; + struct { + char name[FTR_DESC_FIELD_LEN]; + u8 shift; + } fields[]; +}; + +static const struct ftr_set_desc * const regs[] __initconst = { +}; + +static int __init find_field(const char *cmdline, + const struct ftr_set_desc *reg, int f, u64 *v) +{ + char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2]; + int len; + + len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=", + reg->name, reg->fields[f].name); + + if (!parameqn(cmdline, opt, len)) + return -1; + + return kstrtou64(cmdline + len, 0, v); +} + +static void __init match_options(const char *cmdline) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + int f; + + if (!regs[i]->override) + continue; + + for (f = 0; strlen(regs[i]->fields[f].name); f++) { + u64 shift = regs[i]->fields[f].shift; + u64 mask = 0xfUL << shift; + u64 v; + + if (find_field(cmdline, regs[i], f, &v)) + continue; + + regs[i]->override->val &= ~mask; + regs[i]->override->val |= (v << shift) & mask; + regs[i]->override->mask |= mask; + + return; + } + } +} + +static __init void __parse_cmdline(const char *cmdline) +{ + do { + char buf[256]; + size_t len; + int i; + + cmdline = skip_spaces(cmdline); + + for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++); + if (!len) + return; + + len = min(len, ARRAY_SIZE(buf) - 1); + strncpy(buf, cmdline, len); + buf[len] = 0; + + if (strcmp(buf, "--") == 0) + return; + + cmdline += len; + + match_options(buf); + + } while (1); +} + +static __init void parse_cmdline(void) +{ + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + const u8 *prop; + void *fdt; + int node; + + fdt = get_early_fdt_ptr(); + if (!fdt) + goto out; + + node = fdt_path_offset(fdt, "/chosen"); + if (node < 0) + goto out; + + prop = fdt_getprop(fdt, node, "bootargs", NULL); + if (!prop) + goto out; + + __parse_cmdline(prop); + + if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND)) + return; + } + +out: + __parse_cmdline(CONFIG_CMDLINE); +} + +/* Keep checkers quiet */ +void init_feature_override(void); + +asmlinkage void __init init_feature_override(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + if (regs[i]->override) { + regs[i]->override->val = 0; + regs[i]->override->mask = 0; + } + } + + parse_cmdline(); + + for (i = 0; i < ARRAY_SIZE(regs); i++) { + if (regs[i]->override) + __flush_dcache_area(regs[i]->override, + sizeof(*regs[i]->override)); + } +} -- cgit v1.2.3 From 361db0fca7affafa920f7d91bf93b9d9da44712f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:23 +0000 Subject: arm64: Allow ID_AA64MMFR1_EL1.VH to be overridden from the command line As we want to be able to disable VHE at runtime, let's match "id_aa64mmfr1.vh=" from the command line as an override. This doesn't have much effect yet as our boot code doesn't look at the cpufeature, but only at the HW registers. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Acked-by: Suzuki K Poulose Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-15-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/kernel/cpufeature.c | 5 ++++- arch/arm64/kernel/idreg-override.c | 11 +++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b5bf7af68691..570f1b4ba3cc 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -818,6 +818,8 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +extern struct arm64_ftr_override id_aa64mmfr1_override; + u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 97da9ed4b79d..faada5d8bea6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -557,6 +557,8 @@ static const struct arm64_ftr_bits ftr_raz[] = { #define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) +struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; + static const struct __ftr_reg_entry { u32 sys_id; struct arm64_ftr_reg *reg; @@ -604,7 +606,8 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), - ARM64_FTR_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, + &id_aa64mmfr1_override), ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2), /* Op1 = 0, CRn = 1, CRm = 2 */ diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 3a347b42d07e..2da11bf60195 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -11,6 +11,7 @@ #include #include +#include #include #define FTR_DESC_NAME_LEN 20 @@ -25,7 +26,17 @@ struct ftr_set_desc { } fields[]; }; +static const struct ftr_set_desc mmfr1 __initconst = { + .name = "id_aa64mmfr1", + .override = &id_aa64mmfr1_override, + .fields = { + { "vh", ID_AA64MMFR1_VHE_SHIFT }, + {} + }, +}; + static const struct ftr_set_desc * const regs[] __initconst = { + &mmfr1, }; static int __init find_field(const char *cmdline, -- cgit v1.2.3 From 41fac42c25338f4ea295b58106c26683d893a1c6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:24 +0000 Subject: arm64: Honor VHE being disabled from the command-line Finally we can check whether VHE is disabled on the command line, and not enable it if that's the user's wish. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210208095732.3267263-16-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 3 +++ arch/arm64/kernel/hyp-stub.S | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 301784463587..a36e2fc330d4 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -99,6 +99,9 @@ int main(void) DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); + DEFINE(FTR_OVR_VAL_OFFSET, offsetof(struct arm64_ftr_override, val)); + DEFINE(FTR_OVR_MASK_OFFSET, offsetof(struct arm64_ftr_override, mask)); + BLANK(); #ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 6229315d533d..3e08dcc924b5 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -87,6 +87,17 @@ SYM_CODE_START_LOCAL(mutate_to_vhe) ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 cbz x1, 1f + // Check whether VHE is disabled from the command line + adr_l x1, id_aa64mmfr1_override + ldr x2, [x1, FTR_OVR_VAL_OFFSET] + ldr x1, [x1, FTR_OVR_MASK_OFFSET] + ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 + ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4 + cmp x1, xzr + and x2, x2, x1 + csinv x2, x2, xzr, ne + cbz x2, 1f + // Engage the VHE magic! mov_q x0, HCR_HOST_VHE_FLAGS msr hcr_el2, x0 -- cgit v1.2.3 From 863ace77e9ff85c06d57e9491faffae8512070de Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:25 +0000 Subject: arm64: Add an aliasing facility for the idreg override In order to map the override of idregs to options that a user can easily understand, let's introduce yet another option array, which maps an option to the corresponding idreg options. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-17-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 2da11bf60195..226bac544e20 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -16,6 +16,8 @@ #define FTR_DESC_NAME_LEN 20 #define FTR_DESC_FIELD_LEN 10 +#define FTR_ALIAS_NAME_LEN 30 +#define FTR_ALIAS_OPTION_LEN 80 struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; @@ -39,6 +41,12 @@ static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, }; +static const struct { + char alias[FTR_ALIAS_NAME_LEN]; + char feature[FTR_ALIAS_OPTION_LEN]; +} aliases[] __initconst = { +}; + static int __init find_field(const char *cmdline, const struct ftr_set_desc *reg, int f, u64 *v) { @@ -81,7 +89,7 @@ static void __init match_options(const char *cmdline) } } -static __init void __parse_cmdline(const char *cmdline) +static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) { do { char buf[256]; @@ -105,6 +113,9 @@ static __init void __parse_cmdline(const char *cmdline) match_options(buf); + for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++) + if (parameq(buf, aliases[i].alias)) + __parse_cmdline(aliases[i].feature, false); } while (1); } @@ -127,14 +138,14 @@ static __init void parse_cmdline(void) if (!prop) goto out; - __parse_cmdline(prop); + __parse_cmdline(prop, true); if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND)) return; } out: - __parse_cmdline(CONFIG_CMDLINE); + __parse_cmdline(CONFIG_CMDLINE, true); } /* Keep checkers quiet */ -- cgit v1.2.3 From 1945a067f351debcd2518d9f6039b1835de08dfd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:26 +0000 Subject: arm64: Make kvm-arm.mode={nvhe, protected} an alias of id_aa64mmfr1.vh=0 Admitedly, passing id_aa64mmfr1.vh=0 on the command-line isn't that easy to understand, and it is likely that users would much prefer write "kvm-arm.mode=nvhe", or "...=protected". So here you go. This has the added advantage that we can now always honor the "kvm-arm.mode=protected" option, even when booting on a VHE system. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-18-maz@kernel.org Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/arm64/kernel/idreg-override.c | 2 ++ arch/arm64/kvm/arm.c | 3 +++ 3 files changed, 8 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a10b545c2070..41786d205e0d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2257,6 +2257,9 @@ kvm-arm.mode= [KVM,ARM] Select one of KVM/arm64's modes of operation. + nvhe: Standard nVHE-based mode, without support for + protected guests. + protected: nVHE-based mode with support for guests whose state is kept private from the host. Not valid if the kernel is running in EL2. diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 226bac544e20..b994d689d6fb 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -45,6 +45,8 @@ static const struct { char alias[FTR_ALIAS_NAME_LEN]; char feature[FTR_ALIAS_OPTION_LEN]; } aliases[] __initconst = { + { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, + { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, }; static int __init find_field(const char *cmdline, diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 04c44853b103..597565a65ca2 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1966,6 +1966,9 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } + if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) + return 0; + return -EINVAL; } early_param("kvm-arm.mode", early_kvm_mode_cfg); -- cgit v1.2.3 From 166cc2a4be0d80075d379b30d3e84895c878a1a8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:27 +0000 Subject: KVM: arm64: Document HVC_VHE_RESTART stub hypercall For completeness, let's document the HVC_VHE_RESTART stub. Signed-off-by: Marc Zyngier Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-19-maz@kernel.org Signed-off-by: Will Deacon --- Documentation/virt/kvm/arm/hyp-abi.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/virt/kvm/arm/hyp-abi.rst b/Documentation/virt/kvm/arm/hyp-abi.rst index 83cadd8186fa..4d43fbc25195 100644 --- a/Documentation/virt/kvm/arm/hyp-abi.rst +++ b/Documentation/virt/kvm/arm/hyp-abi.rst @@ -58,6 +58,15 @@ these functions (see arch/arm{,64}/include/asm/virt.h): into place (arm64 only), and jump to the restart address while at HYP/EL2. This hypercall is not expected to return to its caller. +* :: + + x0 = HVC_VHE_RESTART (arm64 only) + + Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling + the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU + being off, and VHE not being disabled by any other means (command line + option, for example). + Any other value of r0/x0 triggers a hypervisor-specific handling, which is not documented here. -- cgit v1.2.3 From a762f4ffc3c8a434da1b712e57a80d8d10404198 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:28 +0000 Subject: arm64: Move "nokaslr" over to the early cpufeature infrastructure Given that the early cpufeature infrastructure has borrowed quite a lot of code from the kaslr implementation, let's reimplement the matching of the "nokaslr" option with it. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-20-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/idreg-override.c | 15 +++++++++++++++ arch/arm64/kernel/kaslr.c | 36 ++---------------------------------- 2 files changed, 17 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index b994d689d6fb..70dd70eee7a2 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -37,8 +37,22 @@ static const struct ftr_set_desc mmfr1 __initconst = { }, }; +extern struct arm64_ftr_override kaslr_feature_override; + +static const struct ftr_set_desc kaslr __initconst = { + .name = "kaslr", +#ifdef CONFIG_RANDOMIZE_BASE + .override = &kaslr_feature_override, +#endif + .fields = { + { "disabled", 0 }, + {} + }, +}; + static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, + &kaslr, }; static const struct { @@ -47,6 +61,7 @@ static const struct { } aliases[] __initconst = { { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "nokaslr", "kaslr.disabled=1" }, }; static int __init find_field(const char *cmdline, diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 5fc86e7d01a1..27f8939deb1b 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -51,39 +51,7 @@ static __init u64 get_kaslr_seed(void *fdt) return ret; } -static __init bool cmdline_contains_nokaslr(const u8 *cmdline) -{ - const u8 *str; - - str = strstr(cmdline, "nokaslr"); - return str == cmdline || (str > cmdline && *(str - 1) == ' '); -} - -static __init bool is_kaslr_disabled_cmdline(void *fdt) -{ - if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { - int node; - const u8 *prop; - - node = fdt_path_offset(fdt, "/chosen"); - if (node < 0) - goto out; - - prop = fdt_getprop(fdt, node, "bootargs", NULL); - if (!prop) - goto out; - - if (cmdline_contains_nokaslr(prop)) - return true; - - if (IS_ENABLED(CONFIG_CMDLINE_EXTEND)) - goto out; - - return false; - } -out: - return cmdline_contains_nokaslr(CONFIG_CMDLINE); -} +struct arm64_ftr_override kaslr_feature_override __initdata; /* * This routine will be executed with the kernel mapped at its default virtual @@ -126,7 +94,7 @@ u64 __init kaslr_early_init(void) * Check if 'nokaslr' appears on the command line, and * return 0 if that is the case. */ - if (is_kaslr_disabled_cmdline(fdt)) { + if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { kaslr_status = KASLR_DISABLED_CMDLINE; return 0; } -- cgit v1.2.3 From 93ad55b7852b324a3fd7d46910b88c81deb62357 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:29 +0000 Subject: arm64: cpufeatures: Allow disabling of BTI from the command-line In order to be able to disable BTI at runtime, whether it is for testing purposes, or to work around HW issues, let's add support for overriding the ID_AA64PFR1_EL1.BTI field. This is further mapped on the arm64.nobti command-line alias. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Acked-by: David Brazdil Tested-by: Srinivas Ramana Link: https://lore.kernel.org/r/20210208095732.3267263-21-maz@kernel.org Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 4 +++- arch/arm64/kernel/idreg-override.c | 11 +++++++++++ arch/arm64/mm/mmu.c | 2 +- 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 41786d205e0d..c00236b22c6e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -373,6 +373,9 @@ arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards Format: ,, + arm64.nobti [ARM64] Unconditionally disable Branch Target + Identification support + ataflop= [HW,M68k] atarimouse= [HW,MOUSE] Atari Mouse diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 570f1b4ba3cc..30917b9a760b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -819,6 +819,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) } extern struct arm64_ftr_override id_aa64mmfr1_override; +extern struct arm64_ftr_override id_aa64pfr1_override; u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index faada5d8bea6..7fbeab497adb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -558,6 +558,7 @@ static const struct arm64_ftr_bits ftr_raz[] = { #define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -593,7 +594,8 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), - ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, + &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 70dd70eee7a2..d691e9015c62 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -37,6 +37,15 @@ static const struct ftr_set_desc mmfr1 __initconst = { }, }; +static const struct ftr_set_desc pfr1 __initconst = { + .name = "id_aa64pfr1", + .override = &id_aa64pfr1_override, + .fields = { + { "bt", ID_AA64PFR1_BT_SHIFT }, + {} + }, +}; + extern struct arm64_ftr_override kaslr_feature_override; static const struct ftr_set_desc kaslr __initconst = { @@ -52,6 +61,7 @@ static const struct ftr_set_desc kaslr __initconst = { static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, + &pfr1, &kaslr, }; @@ -61,6 +71,7 @@ static const struct { } aliases[] __initconst = { { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, + { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ae0c3d023824..617e704c980b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -628,7 +628,7 @@ static bool arm64_early_this_cpu_has_bti(void) if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) return false; - pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1); + pfr1 = __read_sysreg_by_encoding(SYS_ID_AA64PFR1_EL1); return cpuid_feature_extract_unsigned_field(pfr1, ID_AA64PFR1_BT_SHIFT); } -- cgit v1.2.3 From 7f6240858cf3abb75237c9ba63ec70d232573ae8 Mon Sep 17 00:00:00 2001 From: Srinivas Ramana Date: Mon, 8 Feb 2021 09:57:30 +0000 Subject: arm64: Defer enabling pointer authentication on boot core Defer enabling pointer authentication on boot core until after its required to be enabled by cpufeature framework. This will help in controlling the feature dynamically with a boot parameter. Signed-off-by: Ajay Patil Signed-off-by: Prasad Sodagudi Signed-off-by: Srinivas Ramana Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1610152163-16554-2-git-send-email-sramana@codeaurora.org Reviewed-by: Catalin Marinas Acked-by: David Brazdil Link: https://lore.kernel.org/r/20210208095732.3267263-22-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pointer_auth.h | 10 ++++++++++ arch/arm64/include/asm/stackprotector.h | 1 + arch/arm64/kernel/head.S | 4 ---- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index c6b4f0603024..b112a11e9302 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -76,6 +76,15 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) return ptrauth_clear_pac(ptr); } +static __always_inline void ptrauth_enable(void) +{ + if (!system_supports_address_auth()) + return; + sysreg_clear_set(sctlr_el1, 0, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB)); + isb(); +} + #define ptrauth_thread_init_user(tsk) \ ptrauth_keys_init_user(&(tsk)->thread.keys_user) #define ptrauth_thread_init_kernel(tsk) \ @@ -84,6 +93,7 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) ptrauth_keys_switch_kernel(&(tsk)->thread.keys_kernel) #else /* CONFIG_ARM64_PTR_AUTH */ +#define ptrauth_enable() #define ptrauth_prctl_reset_keys(tsk, arg) (-EINVAL) #define ptrauth_strip_insn_pac(lr) (lr) #define ptrauth_thread_init_user(tsk) diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h index 7263e0bac680..33f1bb453150 100644 --- a/arch/arm64/include/asm/stackprotector.h +++ b/arch/arm64/include/asm/stackprotector.h @@ -41,6 +41,7 @@ static __always_inline void boot_init_stack_canary(void) #endif ptrauth_thread_init_kernel(current); ptrauth_thread_switch_kernel(current); + ptrauth_enable(); } #endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index e7f12bf49730..1e30b5550d2a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -404,10 +404,6 @@ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x5, init_task msr sp_el0, x5 // Save thread_info -#ifdef CONFIG_ARM64_PTR_AUTH - __ptrauth_keys_init_cpu x5, x6, x7, x8 -#endif - adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb -- cgit v1.2.3 From f8da5752fd1b25f1ecf78a79013e2dfd2b860589 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 8 Feb 2021 09:57:31 +0000 Subject: arm64: cpufeatures: Allow disabling of Pointer Auth from the command-line In order to be able to disable Pointer Authentication at runtime, whether it is for testing purposes, or to work around HW issues, let's add support for overriding the ID_AA64ISAR1_EL1.{GPI,GPA,API,APA} fields. This is further mapped on the arm64.nopauth command-line alias. Signed-off-by: Marc Zyngier Reviewed-by: Catalin Marinas Acked-by: David Brazdil Tested-by: Srinivas Ramana Link: https://lore.kernel.org/r/20210208095732.3267263-23-maz@kernel.org Signed-off-by: Will Deacon --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 4 +++- arch/arm64/kernel/idreg-override.c | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c00236b22c6e..cbb51ab2be93 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -376,6 +376,9 @@ arm64.nobti [ARM64] Unconditionally disable Branch Target Identification support + arm64.nopauth [ARM64] Unconditionally disable Pointer Authentication + support + ataflop= [HW,M68k] atarimouse= [HW,MOUSE] Atari Mouse diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 30917b9a760b..61177bac49fa 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -820,6 +820,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64pfr1_override; +extern struct arm64_ftr_override id_aa64isar1_override; u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 7fbeab497adb..3bce87a03717 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -559,6 +559,7 @@ static const struct arm64_ftr_bits ftr_raz[] = { struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar1_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -604,7 +605,8 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), - ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, + &id_aa64isar1_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d691e9015c62..dffb16682330 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -46,6 +46,18 @@ static const struct ftr_set_desc pfr1 __initconst = { }, }; +static const struct ftr_set_desc isar1 __initconst = { + .name = "id_aa64isar1", + .override = &id_aa64isar1_override, + .fields = { + { "gpi", ID_AA64ISAR1_GPI_SHIFT }, + { "gpa", ID_AA64ISAR1_GPA_SHIFT }, + { "api", ID_AA64ISAR1_API_SHIFT }, + { "apa", ID_AA64ISAR1_APA_SHIFT }, + {} + }, +}; + extern struct arm64_ftr_override kaslr_feature_override; static const struct ftr_set_desc kaslr __initconst = { @@ -62,6 +74,7 @@ static const struct ftr_set_desc kaslr __initconst = { static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, &pfr1, + &isar1, &kaslr, }; @@ -72,6 +85,9 @@ static const struct { { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, + { "arm64.nopauth", + "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " + "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, { "nokaslr", "kaslr.disabled=1" }, }; -- cgit v1.2.3 From e9c6deee00e9197e75cd6aa0d265d3d45bd7cc28 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 8 Feb 2021 17:57:20 -0700 Subject: arm64: Make CPU_BIG_ENDIAN depend on ld.bfd or ld.lld 13.0.0+ Similar to commit 28187dc8ebd9 ("ARM: 9025/1: Kconfig: CPU_BIG_ENDIAN depends on !LD_IS_LLD"), ld.lld prior to 13.0.0 does not properly support aarch64 big endian, leading to the following build error when CONFIG_CPU_BIG_ENDIAN is selected: ld.lld: error: unknown emulation: aarch64linuxb This has been resolved in LLVM 13. To avoid errors like this, only allow CONFIG_CPU_BIG_ENDIAN to be selected if using ld.bfd or ld.lld 13.0.0 and newer. While we are here, the indentation of this symbol used spaces since its introduction in commit a872013d6d03 ("arm64: kconfig: allow CPU_BIG_ENDIAN to be selected"). Change it to tabs to be consistent with kernel coding style. Link: https://github.com/ClangBuiltLinux/linux/issues/380 Link: https://github.com/ClangBuiltLinux/linux/issues/1288 Link: https://github.com/llvm/llvm-project/commit/7605a9a009b5fa3bdac07e3131c8d82f6d08feb7 Link: https://github.com/llvm/llvm-project/commit/eea34aae2e74e9b6fbdd5b95f479bc7f397bf387 Reported-by: Arnd Bergmann Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20210209005719.803608-1-nathan@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f39568b28ec1..912da4e2ab59 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -952,8 +952,9 @@ choice that is selected here. config CPU_BIG_ENDIAN - bool "Build big-endian kernel" - help + bool "Build big-endian kernel" + depends on !LD_IS_LLD || LLD_VERSION >= 130000 + help Say Y if you plan on running a kernel with a big-endian userspace. config CPU_LITTLE_ENDIAN -- cgit v1.2.3 From de591a82f41b61af4a8fce49d21b43105c5c2186 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 10 Feb 2021 11:15:11 +0000 Subject: mm: filemap: Fix microblaze build failure with 'mmu_defconfig' Commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") added a call to 'update_mmu_cache()' in mm/filemap.c, which breaks the build for microblaze: | mm/filemap.c: In function 'filemap_map_pages': | mm/filemap.c:3153:3: error: implicit declaration of function 'update_mmu_cache'; did you mean 'update_mmu_tlb'? Include asm/tlbflush.h in mm/filemap.c to make sure that the function (or indeed, macro) is available. Reported-by: Guenter Roeck Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20210209202449.GA104837@roeck-us.net Signed-off-by: Will Deacon --- mm/filemap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/filemap.c b/mm/filemap.c index fb7a8d9b5603..2ca13227747b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "internal.h" #define CREATE_TRACE_POINTS -- cgit v1.2.3 From 8ee37e0f97ec66b953d202257293670efaab1daa Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Tue, 9 Feb 2021 17:42:22 +0800 Subject: drivers/perf: Replace spin_lock_irqsave to spin_lock There is no need to do spin_lock_irqsave in context of hard IRQ, so replace them with spin_lock. Signed-off-by: Qi Liu Link: https://lore.kernel.org/r/1612863742-1551-1-git-send-email-liuqi115@huawei.com Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 5 ++--- drivers/perf/xgene_pmu.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index a75cf77c4de4..f81e2ec90005 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1026,12 +1026,11 @@ static void pmu_event_set_period(struct perf_event *event) static irqreturn_t pmu_handle_irq(int irq_num, void *dev) { - unsigned long flags; struct cci_pmu *cci_pmu = dev; struct cci_pmu_hw_events *events = &cci_pmu->hw_events; int idx, handled = IRQ_NONE; - raw_spin_lock_irqsave(&events->pmu_lock, flags); + raw_spin_lock(&events->pmu_lock); /* Disable the PMU while we walk through the counters */ __cci_pmu_disable(cci_pmu); @@ -1061,7 +1060,7 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev) /* Enable the PMU and sync possibly overflowed counters */ __cci_pmu_enable_sync(cci_pmu); - raw_spin_unlock_irqrestore(&events->pmu_lock, flags); + raw_spin_unlock(&events->pmu_lock); return IRQ_RETVAL(handled); } diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 633cf07ba672..44faa51ba799 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1234,10 +1234,9 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) u32 intr_mcu, intr_mcb, intr_l3c, intr_iob; struct xgene_pmu_dev_ctx *ctx; struct xgene_pmu *xgene_pmu = dev_id; - unsigned long flags; u32 val; - raw_spin_lock_irqsave(&xgene_pmu->lock, flags); + raw_spin_lock(&xgene_pmu->lock); /* Get Interrupt PMU source */ val = readl(xgene_pmu->pcppmu_csr + PCPPMU_INTSTATUS_REG); @@ -1273,7 +1272,7 @@ static irqreturn_t xgene_pmu_isr(int irq, void *dev_id) } } - raw_spin_unlock_irqrestore(&xgene_pmu->lock, flags); + raw_spin_unlock(&xgene_pmu->lock); return IRQ_HANDLED; } -- cgit v1.2.3