From e63cf610ead18d5f8df0739a36cba57ccba43f6b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Sep 2021 13:54:04 +0100 Subject: arm64: mm: Drop pointless call to set_max_mapnr() set_max_mapnr() is an empty stub function if CONFIG_NUMA=y, otherwise it assigns to the 'max_mapnr' variable which is used to provide a generic pfn_valid() implementation if CONFIG_MMU=n. Since we don't support nommu on arm64, drop the pointless call to set_max_mapnr() from mem_init(). Link: https://lore.kernel.org/r/130a50d7-92fd-31fa-261e-f73dadcb4fcf@redhat.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 37a81754d9b6..142125749783 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -416,8 +416,6 @@ void __init mem_init(void) else if (!xen_swiotlb_detect()) swiotlb_force = SWIOTLB_NO_FORCE; - set_max_mapnr(max_pfn - PHYS_PFN_OFFSET); - /* this will put all unused low memory onto the freelists */ memblock_free_all(); -- cgit v1.2.3 From 78d2d816c45af0231d797e958250ad9ac590c627 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:54:58 +0100 Subject: selftests: arm64: Use a define for the number of SVE ptrace tests to be run Partly in preparation for future refactoring move from hard coding the number of tests in main() to putting #define at the top of the source instead. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-2-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 612d3899614a..7f7ed1c96867 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -19,6 +19,8 @@ #include "../../kselftest.h" +#define EXPECTED_TESTS 20 + /* and don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 @@ -313,7 +315,7 @@ int main(void) pid_t child; ksft_print_header(); - ksft_set_plan(20); + ksft_set_plan(EXPECTED_TESTS); if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) ksft_exit_skip("SVE not available\n"); -- cgit v1.2.3 From 09121ad7186ecd28eb7fca1b22c17f517aaea87f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:54:59 +0100 Subject: selftests: arm64: Don't log child creation as a test in SVE ptrace test Currently we log the creation of the child process as a test but it's not really relevant to what we're trying to test and can make the output a little confusing so don't do that. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-3-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 7f7ed1c96867..7035f01423b3 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -19,7 +19,7 @@ #include "../../kselftest.h" -#define EXPECTED_TESTS 20 +#define EXPECTED_TESTS 19 /* and don't like each other, so: */ #ifndef NT_ARM_SVE @@ -169,8 +169,6 @@ static int do_parent(pid_t child) if (WIFEXITED(status) || WIFSIGNALED(status)) ksft_exit_fail_msg("Child died unexpectedly\n"); - ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", - status); if (!WIFSTOPPED(status)) goto error; -- cgit v1.2.3 From eab281e3afa6a740df9b9c2e313eb4a9944deeea Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:55:00 +0100 Subject: selftests: arm64: Remove extraneous register setting code For some reason the SVE ptrace test code starts off by setting values in some of the SVE vector registers in the parent process which it then never interacts with when verifying the ptrace interfaces. This is not especially relevant to what's being tested and somewhat confusing when reading the code so let's remove it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-4-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/Makefile | 2 +- tools/testing/selftests/arm64/fp/sve-ptrace-asm.S | 33 ----------------------- tools/testing/selftests/arm64/fp/sve-ptrace.c | 28 ------------------- 3 files changed, 1 insertion(+), 62 deletions(-) delete mode 100644 tools/testing/selftests/arm64/fp/sve-ptrace-asm.S diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index f2abdd6ba12e..4367125b7c27 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -12,7 +12,7 @@ all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) fpsimd-test: fpsimd-test.o $(CC) -nostdlib $^ -o $@ rdvl-sve: rdvl-sve.o rdvl.o -sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-ptrace: sve-ptrace.o sve-probe-vls: sve-probe-vls.o rdvl.o sve-test: sve-test.o $(CC) -nostdlib $^ -o $@ diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S deleted file mode 100644 index 3e81f9fab574..000000000000 --- a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -// Copyright (C) 2015-2019 ARM Limited. -// Original author: Dave Martin -#include - -.arch_extension sve - -.globl sve_store_patterns - -sve_store_patterns: - mov x1, x0 - - index z0.b, #0, #1 - str q0, [x1] - - mov w8, #__NR_getpid - svc #0 - str q0, [x1, #0x10] - - mov z1.d, z0.d - str q0, [x1, #0x20] - - mov w8, #__NR_getpid - svc #0 - str q0, [x1, #0x30] - - mov z1.d, z0.d - str q0, [x1, #0x40] - - ret - -.size sve_store_patterns, . - sve_store_patterns -.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 7035f01423b3..d2ec48f649f9 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -26,11 +26,6 @@ #define NT_ARM_SVE 0x405 #endif -/* Number of registers filled in by sve_store_patterns */ -#define NR_VREGS 5 - -void sve_store_patterns(__uint128_t v[NR_VREGS]); - static void dump(const void *buf, size_t size) { size_t i; @@ -40,23 +35,6 @@ static void dump(const void *buf, size_t size) printf(" %.2x", *p++); } -static int check_vregs(const __uint128_t vregs[NR_VREGS]) -{ - int i; - int ok = 1; - - for (i = 0; i < NR_VREGS; ++i) { - printf("# v[%d]:", i); - dump(&vregs[i], sizeof vregs[i]); - putchar('\n'); - - if (vregs[i] != vregs[0]) - ok = 0; - } - - return ok; -} - static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) @@ -309,7 +287,6 @@ disappeared: int main(void) { int ret = EXIT_SUCCESS; - __uint128_t v[NR_VREGS]; pid_t child; ksft_print_header(); @@ -318,11 +295,6 @@ int main(void) if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) ksft_exit_skip("SVE not available\n"); - sve_store_patterns(v); - - if (!check_vregs(v)) - ksft_exit_fail_msg("Initial check_vregs() failed\n"); - child = fork(); if (!child) return do_child(); -- cgit v1.2.3 From 736e6d5a54511e1438cb81d38e5d9f164eb2af1a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:55:01 +0100 Subject: selftests: arm64: Document what the SVE ptrace test is doing Before we go modifying it further let's add some comments and output clarifications explaining what this test is actually doing. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-5-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index d2ec48f649f9..fc4a672825eb 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -181,6 +181,7 @@ static int do_parent(pid_t child) } } + /* New process should start with FPSIMD registers only */ sve = get_sve(pid, &svebuf, &svebufsz); if (!sve) { int e = errno; @@ -191,14 +192,15 @@ static int do_parent(pid_t child) goto error; } else { - ksft_test_result_pass("get_sve\n"); + ksft_test_result_pass("get_sve(FPSIMD)\n"); } ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "FPSIMD registers\n"); + "Set FPSIMD registers\n"); if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) goto error; + /* Try to set a known FPSIMD state via PT_REGS_SVE */ fpsimd = (struct user_fpsimd_state *)((char *)sve + SVE_PT_FPSIMD_OFFSET); for (i = 0; i < 32; ++i) { @@ -219,6 +221,7 @@ static int do_parent(pid_t child) goto error; } + /* Zero the first SVE Z register */ vq = sve_vq_from_vl(sve->vl); newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); @@ -245,6 +248,7 @@ static int do_parent(pid_t child) goto error; } + /* Try to read back the value we just set */ new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); if (!new_sve) { int e = errno; @@ -257,12 +261,13 @@ static int do_parent(pid_t child) } ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, - "SVE registers\n"); + "Get SVE registers\n"); if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) goto error; dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + /* Verify that the register we set has the value we expected */ p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { unsigned char expected = i; -- cgit v1.2.3 From 8c9eece0bfbf90064470e173c80fe3495f24b397 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:55:02 +0100 Subject: selftests: arm64: Clarify output when verifying SVE register set When verifying setting a Z register via ptrace we check each byte by hand, iterating over the buffer using a pointer called p and treating each register value written as a test. This creates output referring to "p[X]" which is confusing since SVE also has predicate registers Pn. Tweak the output to avoid confusion here. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-6-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index fc4a672825eb..2d130fedc019 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -275,7 +275,7 @@ static int do_parent(pid_t child) if (__BYTE_ORDER == __BIG_ENDIAN) expected = sizeof fpsimd->vregs[0] - 1 - expected; - ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + ksft_test_result(p[i] == expected, "buf[%d] == expected\n", i); if (p[i] != expected) goto error; } -- cgit v1.2.3 From 9f7d03a2c5a1e57bb4ec5fddecff4c000f34a786 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:55:03 +0100 Subject: selftests: arm64: Verify interoperation of SVE and FPSIMD register sets After setting the FPSIMD registers via the SVE register set read them back via the FPSIMD register set, validating that the two register sets are interoperating and that the values we thought we set made it into the child process. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-7-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 28 +++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 2d130fedc019..31a2c2fc529d 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -46,6 +46,15 @@ static int do_child(void) return EXIT_SUCCESS; } +static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) +{ + struct iovec iov; + + iov.iov_base = fpsimd; + iov.iov_len = sizeof(*fpsimd); + return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); +} + static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) { struct user_sve_header *sve; @@ -122,7 +131,7 @@ static int do_parent(pid_t child) void *svebuf = NULL, *newsvebuf; size_t svebufsz = 0, newsvebufsz; struct user_sve_header *sve, *new_sve; - struct user_fpsimd_state *fpsimd; + struct user_fpsimd_state *fpsimd, new_fpsimd; unsigned int i, j; unsigned char *p; unsigned int vq; @@ -221,7 +230,22 @@ static int do_parent(pid_t child) goto error; } - /* Zero the first SVE Z register */ + /* Verify via the FPSIMD regset */ + if (get_fpsimd(pid, &new_fpsimd)) { + int e = errno; + + ksft_test_result_fail("get_fpsimd(): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) + ksft_test_result_pass("get_fpsimd() gave same state\n"); + else + ksft_test_result_fail("get_fpsimd() gave different state\n"); + vq = sve_vq_from_vl(sve->vl); newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); -- cgit v1.2.3 From a1d7111257cdfba87a18a630fd96a44ba69c3d9c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:55:04 +0100 Subject: selftests: arm64: More comprehensively test the SVE ptrace interface Currently the selftest for the SVE register set is not quite as thorough as is desirable - it only validates that the value of a single Z register is not modified by a partial write to a lower numbered Z register after having previously been set through the FPSIMD regset. Make this more thorough: - Test the ability to set vector lengths and enumerate those supported in the system. - Validate data in all Z and P registers, plus FPSR and FPCR. - Test reads via the FPSIMD regset after set via the SVE regset. There's still some oversights, the main one being that due to the need to generate a pattern in FFR and the fact that this rewrite is primarily motivated by SME's streaming SVE which doesn't have FFR we don't currently test FFR. Update the TODO to reflect those that occurred to me (and fix an adjacent typo in there). Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-8-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/TODO | 9 +- tools/testing/selftests/arm64/fp/sve-ptrace.c | 327 +++++++++++++++++++------- 2 files changed, 254 insertions(+), 82 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO index b6b7ebfcf362..44004e53da33 100644 --- a/tools/testing/selftests/arm64/fp/TODO +++ b/tools/testing/selftests/arm64/fp/TODO @@ -1,4 +1,7 @@ - Test unsupported values in the ABIs. -- More coverage for ptrace (eg, vector length conversions). -- Coverage for signals. -- Test PR_SVE_VL_INHERITY after a double fork. +- More coverage for ptrace: + - Get/set of FFR. + - Ensure ptraced processes actually see the register state visible through + the ptrace interface. + - Big endian. +- Test PR_SVE_VL_INHERIT after a double fork. diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 31a2c2fc529d..199710ba65c7 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -1,15 +1,17 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2015-2020 ARM Limited. + * Copyright (C) 2015-2021 ARM Limited. * Original author: Dave Martin */ #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -19,20 +21,22 @@ #include "../../kselftest.h" -#define EXPECTED_TESTS 19 +#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) +#define FPSIMD_TESTS 3 + +#define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS) /* and don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 #endif -static void dump(const void *buf, size_t size) +static void fill_buf(char *buf, size_t size) { - size_t i; - const unsigned char *p = buf; + int i; - for (i = 0; i < size; ++i) - printf(" %.2x", *p++); + for (i = 0; i < size; i++) + buf[i] = random(); } static int do_child(void) @@ -101,25 +105,228 @@ static int set_sve(pid_t pid, const struct user_sve_header *sve) return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); } -static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, - unsigned int vlmax) +/* Validate attempting to set the specfied VL via ptrace */ +static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) +{ + struct user_sve_header sve; + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + int ret, prctl_vl; + + *supported = false; + + /* Check if the VL is supported in this process */ + prctl_vl = prctl(PR_SVE_SET_VL, vl); + if (prctl_vl == -1) + ksft_exit_fail_msg("prctl(PR_SVE_SET_VL) failed: %s (%d)\n", + strerror(errno), errno); + + /* If the VL is not supported then a supported VL will be returned */ + *supported = (prctl_vl == vl); + + /* Set the VL by doing a set with no register payload */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.vl = vl; + ret = set_sve(child, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u\n", vl); + return; + } + + /* + * Read back the new register state and verify that we have the + * same VL that we got from prctl() on ourselves. + */ + if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read VL %u\n", vl); + return; + } + + ksft_test_result(new_sve->vl = prctl_vl, "Set VL %u\n", vl); + + free(new_sve); +} + +static void check_u32(unsigned int vl, const char *reg, + uint32_t *in, uint32_t *out, int *errors) +{ + if (*in != *out) { + printf("# VL %d %s wrote %x read %x\n", + vl, reg, *in, *out); + (*errors)++; + } +} + +/* Validate attempting to set SVE data and read SVE data */ +static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) +{ + void *write_buf; + void *read_buf = NULL; + struct user_sve_header *write_sve; + struct user_sve_header *read_sve; + size_t read_sve_size = 0; + unsigned int vq = sve_vq_from_vl(vl); + int ret, i; + size_t data_size; + int errors = 0; + + data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + data_size, vl); + return; + } + write_sve = write_buf; + + /* Set up some data and write it out */ + memset(write_sve, 0, data_size); + write_sve->size = data_size; + write_sve->vl = vl; + write_sve->flags = SVE_PT_REGS_SVE; + + for (i = 0; i < __SVE_NUM_ZREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)); + + for (i = 0; i < __SVE_NUM_PREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + SVE_PT_SVE_PREG_SIZE(vq)); + + fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); + fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); + + /* TODO: Generate a valid FFR pattern */ + + ret = set_sve(child, write_sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u data\n", vl); + goto out; + } + + /* Read the data back */ + if (!get_sve(child, (void **)&read_buf, &read_sve_size)) { + ksft_test_result_fail("Failed to read VL %u data\n", vl); + goto out; + } + read_sve = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_sve->size < write_sve->size) { + ksft_test_result_fail("Wrote %d bytes, only read %d\n", + write_sve->size, read_sve->size); + goto out_read; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)) != 0) { + printf("# Mismatch in %u Z%d\n", vl, i); + errors++; + } + } + + for (i = 0; i < __SVE_NUM_PREGS; i++) { + if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + SVE_PT_SVE_PREG_SIZE(vq)) != 0) { + printf("# Mismatch in %u P%d\n", vl, i); + errors++; + } + } + + check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), + read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors); + check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), + read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); + + ksft_test_result(errors == 0, "Set and get SVE data for VL %u\n", vl); + +out_read: + free(read_buf); +out: + free(write_buf); +} + +/* Validate attempting to set SVE data and read SVE data */ +static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl) { - unsigned int vq; - unsigned int i; + void *write_buf; + struct user_sve_header *write_sve; + unsigned int vq = sve_vq_from_vl(vl); + struct user_fpsimd_state fpsimd_state; + int ret, i; + size_t data_size; + int errors = 0; + + if (__BYTE_ORDER == __BIG_ENDIAN) { + ksft_test_result_skip("Big endian not supported\n"); + return; + } + + data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + data_size, vl); + return; + } + write_sve = write_buf; - if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) - ksft_exit_fail_msg("Dumping non-SVE register\n"); + /* Set up some data and write it out */ + memset(write_sve, 0, data_size); + write_sve->size = data_size; + write_sve->vl = vl; + write_sve->flags = SVE_PT_REGS_SVE; - if (vlmax > sve->vl) - vlmax = sve->vl; + for (i = 0; i < __SVE_NUM_ZREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)); - vq = sve_vq_from_vl(sve->vl); - for (i = 0; i < num; ++i) { - printf("# z%u:", i); - dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), - vlmax); - printf("%s\n", vlmax == sve->vl ? "" : " ..."); + fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); + fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); + + ret = set_sve(child, write_sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u data\n", vl); + goto out; + } + + /* Read the data back */ + if (get_fpsimd(child, &fpsimd_state)) { + ksft_test_result_fail("Failed to read VL %u FPSIMD data\n", + vl); + goto out; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + __uint128_t tmp = 0; + + /* + * Z regs are stored endianness invariant, this won't + * work for big endian + */ + memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + sizeof(tmp)); + + if (tmp != fpsimd_state.vregs[i]) { + printf("# Mismatch in FPSIMD for VL %u Z%d\n", vl, i); + errors++; + } } + + check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), + &fpsimd_state.fpsr, &errors); + check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), + &fpsimd_state.fpcr, &errors); + + ksft_test_result(errors == 0, "Set and get FPSIMD data for VL %u\n", + vl); + +out: + free(write_buf); } static int do_parent(pid_t child) @@ -128,13 +335,14 @@ static int do_parent(pid_t child) pid_t pid; int status; siginfo_t si; - void *svebuf = NULL, *newsvebuf; - size_t svebufsz = 0, newsvebufsz; - struct user_sve_header *sve, *new_sve; + void *svebuf = NULL; + size_t svebufsz = 0; + struct user_sve_header *sve; struct user_fpsimd_state *fpsimd, new_fpsimd; unsigned int i, j; unsigned char *p; - unsigned int vq; + unsigned int vq, vl; + bool vl_supported; /* Attach to the child */ while (1) { @@ -246,62 +454,21 @@ static int do_parent(pid_t child) else ksft_test_result_fail("get_fpsimd() gave different state\n"); - vq = sve_vq_from_vl(sve->vl); - - newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); - new_sve = newsvebuf = malloc(newsvebufsz); - if (!new_sve) { - errno = ENOMEM; - perror(NULL); - goto error; - } - - *new_sve = *sve; - new_sve->flags &= ~SVE_PT_REGS_MASK; - new_sve->flags |= SVE_PT_REGS_SVE; - memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), - 0, SVE_PT_SVE_ZREG_SIZE(vq)); - new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); - if (set_sve(pid, new_sve)) { - int e = errno; - - ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } - - /* Try to read back the value we just set */ - new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); - if (!new_sve) { - int e = errno; - - ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); - if (e == ESRCH) - goto disappeared; + /* Step through every possible VQ */ + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); - goto error; - } - - ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, - "Get SVE registers\n"); - if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) - goto error; - - dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + /* First, try to set this vector length */ + ptrace_set_get_vl(child, vl, &vl_supported); - /* Verify that the register we set has the value we expected */ - p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); - for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { - unsigned char expected = i; - - if (__BYTE_ORDER == __BIG_ENDIAN) - expected = sizeof fpsimd->vregs[0] - 1 - expected; - - ksft_test_result(p[i] == expected, "buf[%d] == expected\n", i); - if (p[i] != expected) - goto error; + /* If the VL is supported validate data set/get */ + if (vl_supported) { + ptrace_set_sve_get_sve_data(child, vl); + ptrace_set_sve_get_fpsimd_data(child, vl); + } else { + ksft_test_result_skip("set SVE get SVE for VL %d\n", vl); + ksft_test_result_skip("set SVE get FPSIMD for VL %d\n", vl); + } } ret = EXIT_SUCCESS; @@ -318,6 +485,8 @@ int main(void) int ret = EXIT_SUCCESS; pid_t child; + srandom(getpid()); + ksft_print_header(); ksft_set_plan(EXPECTED_TESTS); -- cgit v1.2.3 From 34785030dc069302be20b09f0f647ddb03203d72 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 13 Sep 2021 13:55:05 +0100 Subject: selftests: arm64: Move FPSIMD in SVE ptrace test into a function Now that all the other tests are in functions rather than inline in the main parent process function also move the test for accessing the FPSIMD registers via the SVE regset out into their own function. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210913125505.52619-9-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 120 +++++++++++++------------- 1 file changed, 59 insertions(+), 61 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 199710ba65c7..ac0629f05365 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -158,6 +158,63 @@ static void check_u32(unsigned int vl, const char *reg, } } +/* Access the FPSIMD registers via the SVE regset */ +static void ptrace_sve_fpsimd(pid_t child) +{ + void *svebuf = NULL; + size_t svebufsz = 0; + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd, new_fpsimd; + unsigned int i, j; + unsigned char *p; + + /* New process should start with FPSIMD registers only */ + sve = get_sve(child, &svebuf, &svebufsz); + if (!sve) { + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + + return; + } else { + ksft_test_result_pass("get_sve(FPSIMD)\n"); + } + + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "Set FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto out; + + /* Try to set a known FPSIMD state via PT_REGS_SVE */ + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j) + p[j] = j; + } + + if (set_sve(child, sve)) { + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + + goto out; + } + + /* Verify via the FPSIMD regset */ + if (get_fpsimd(child, &new_fpsimd)) { + ksft_test_result_fail("get_fpsimd(): %s\n", + strerror(errno)); + goto out; + } + if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) + ksft_test_result_pass("get_fpsimd() gave same state\n"); + else + ksft_test_result_fail("get_fpsimd() gave different state\n"); + +out: + free(svebuf); +} + /* Validate attempting to set SVE data and read SVE data */ static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl) { @@ -335,12 +392,6 @@ static int do_parent(pid_t child) pid_t pid; int status; siginfo_t si; - void *svebuf = NULL; - size_t svebufsz = 0; - struct user_sve_header *sve; - struct user_fpsimd_state *fpsimd, new_fpsimd; - unsigned int i, j; - unsigned char *p; unsigned int vq, vl; bool vl_supported; @@ -398,61 +449,8 @@ static int do_parent(pid_t child) } } - /* New process should start with FPSIMD registers only */ - sve = get_sve(pid, &svebuf, &svebufsz); - if (!sve) { - int e = errno; - - ksft_test_result_fail("get_sve: %s\n", strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } else { - ksft_test_result_pass("get_sve(FPSIMD)\n"); - } - - ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Set FPSIMD registers\n"); - if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) - goto error; - - /* Try to set a known FPSIMD state via PT_REGS_SVE */ - fpsimd = (struct user_fpsimd_state *)((char *)sve + - SVE_PT_FPSIMD_OFFSET); - for (i = 0; i < 32; ++i) { - p = (unsigned char *)&fpsimd->vregs[i]; - - for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) - p[j] = j; - } - - if (set_sve(pid, sve)) { - int e = errno; - - ksft_test_result_fail("set_sve(FPSIMD): %s\n", - strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } - - /* Verify via the FPSIMD regset */ - if (get_fpsimd(pid, &new_fpsimd)) { - int e = errno; - - ksft_test_result_fail("get_fpsimd(): %s\n", - strerror(errno)); - if (e == ESRCH) - goto disappeared; - - goto error; - } - if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) - ksft_test_result_pass("get_fpsimd() gave same state\n"); - else - ksft_test_result_fail("get_fpsimd() gave different state\n"); + /* FPSIMD via SVE regset */ + ptrace_sve_fpsimd(child); /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { -- cgit v1.2.3 From ff944c44b782d34dc95ea9dcf3a5b62a0bdbac18 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Sep 2021 16:19:22 +0100 Subject: selftests: arm64: Fix printf() format mismatch in vec-syscfg The format for this error message calls for the plain text version of the error but we weren't supply it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210929151925.9601-2-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index c02071dcb563..b2de002ee325 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -109,7 +109,7 @@ static int get_child_rdvl(struct vec_data *data) /* exec() a new binary which puts the VL on stdout */ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL); - fprintf(stderr, "execl(%s) failed: %d\n", + fprintf(stderr, "execl(%s) failed: %d (%s)\n", data->rdvl_binary, errno, strerror(errno)); exit(EXIT_FAILURE); -- cgit v1.2.3 From 4caf339c037c5e70d0ea789af093e123c2c6fc80 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Sep 2021 16:19:23 +0100 Subject: selftests: arm64: Remove bogus error check on writing to files Due to some refactoring with the error handling we ended up mangling things so we never actually set ret and therefore shouldn't be checking it. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210929151925.9601-3-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index b2de002ee325..d48d3ee1bc36 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -180,7 +180,6 @@ static int file_read_integer(const char *name, int *val) static int file_write_integer(const char *name, int val) { FILE *f; - int ret; f = fopen(name, "w"); if (!f) { @@ -192,11 +191,6 @@ static int file_write_integer(const char *name, int val) fprintf(f, "%d", val); fclose(f); - if (ret < 0) { - ksft_test_result_fail("Error writing %d to %s\n", - val, name); - return -1; - } return 0; } -- cgit v1.2.3 From e42391150eabcfb2cc42f58c8b0394ebc3039c34 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Sep 2021 16:19:24 +0100 Subject: selftests: arm64: Fix and enable test for setting current VL in vec-syscfg We had some test code for verifying that we can write the current VL via the prctl() interface but the condition for the test was inverted which wasn't noticed as it was never actually hooked up to the array of tests we execute. Fix this. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210929151925.9601-4-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index d48d3ee1bc36..9d6ac843e651 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -329,12 +329,9 @@ static void prctl_set_same(struct vec_data *data) return; } - if (cur_vl != data->rdvl()) - ksft_test_result_pass("%s current VL is %d\n", - data->name, ret); - else - ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n", - data->name, ret, data->rdvl()); + ksft_test_result(cur_vl == data->rdvl(), + "%s set VL %d and have VL %d\n", + data->name, cur_vl, data->rdvl()); } /* Can we set a new VL for this process? */ @@ -555,6 +552,7 @@ static const test_type tests[] = { proc_write_max, prctl_get, + prctl_set_same, prctl_set, prctl_set_no_child, prctl_set_for_child, -- cgit v1.2.3 From 8694e5e6388695195a32bd5746635ca166a8df56 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 29 Sep 2021 16:19:25 +0100 Subject: selftests: arm64: Verify that all possible vector lengths are handled As part of the enumeration interface for setting vector lengths it is valid to set vector lengths not supported in the system, these will be rounded to a supported vector length and returned from the prctl(). Add a test which exercises this for every valid vector length and makes sure that the return value is as expected and that this is reflected in the actual system state. Signed-off-by: Mark Brown Reviewed-by: Tomohiro Misono Link: https://lore.kernel.org/r/20210929151925.9601-5-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/vec-syscfg.c | 77 +++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 9d6ac843e651..272b888e018e 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -540,6 +540,82 @@ static void prctl_set_onexec(struct vec_data *data) file_write_integer(data->default_vl_file, data->default_vl); } +/* For each VQ verify that setting via prctl() does the right thing */ +static void prctl_set_all_vqs(struct vec_data *data) +{ + int ret, vq, vl, new_vl; + int errors = 0; + + if (!data->min_vl || !data->max_vl) { + ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n", + data->name); + return; + } + + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* Attempt to set the VL */ + ret = prctl(data->prctl_set, vl); + if (ret < 0) { + errors++; + ksft_print_msg("%s prctl set failed for %d: %d (%s)\n", + data->name, vl, + errno, strerror(errno)); + continue; + } + + new_vl = ret & PR_SVE_VL_LEN_MASK; + + /* Check that we actually have the reported new VL */ + if (data->rdvl() != new_vl) { + ksft_print_msg("Set %s VL %d but RDVL reports %d\n", + data->name, new_vl, data->rdvl()); + errors++; + } + + /* Was that the VL we asked for? */ + if (new_vl == vl) + continue; + + /* Should round up to the minimum VL if below it */ + if (vl < data->min_vl) { + if (new_vl != data->min_vl) { + ksft_print_msg("%s VL %d returned %d not minimum %d\n", + data->name, vl, new_vl, + data->min_vl); + errors++; + } + + continue; + } + + /* Should round down to maximum VL if above it */ + if (vl > data->max_vl) { + if (new_vl != data->max_vl) { + ksft_print_msg("%s VL %d returned %d not maximum %d\n", + data->name, vl, new_vl, + data->max_vl); + errors++; + } + + continue; + } + + /* Otherwise we should've rounded down */ + if (!(new_vl < vl)) { + ksft_print_msg("%s VL %d returned %d, did not round down\n", + data->name, vl, new_vl); + errors++; + + continue; + } + } + + ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n", + data->name, errors); +} + typedef void (*test_type)(struct vec_data *); static const test_type tests[] = { @@ -557,6 +633,7 @@ static const test_type tests[] = { prctl_set_no_child, prctl_set_for_child, prctl_set_onexec, + prctl_set_all_vqs, }; int main(void) -- cgit v1.2.3 From f5b650f887f30dda15a8d524249e48a407544126 Mon Sep 17 00:00:00 2001 From: Amit Daniel Kachhap Date: Fri, 17 Sep 2021 11:28:11 +0530 Subject: arm64/traps: Avoid unnecessary kernel/user pointer conversion Annotating a pointer from kernel to __user and then back again requires an extra __force annotation to silent sparse warning. In call_undef_hook() this unnecessary complexity can be avoided by modifying the intermediate user pointer to unsigned long. This way there is no inter-changeable use of user and kernel pointers and the code is consistent. Note: This patch adds no functional changes to code. Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Amit Daniel Kachhap Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210917055811.22341-1-amit.kachhap@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b03e383d944a..09236751283e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -400,11 +400,11 @@ static int call_undef_hook(struct pt_regs *regs) unsigned long flags; u32 instr; int (*fn)(struct pt_regs *regs, u32 instr) = NULL; - void __user *pc = (void __user *)instruction_pointer(regs); + unsigned long pc = instruction_pointer(regs); if (!user_mode(regs)) { __le32 instr_le; - if (get_kernel_nofault(instr_le, (__force __le32 *)pc)) + if (get_kernel_nofault(instr_le, (__le32 *)pc)) goto exit; instr = le32_to_cpu(instr_le); } else if (compat_thumb_mode(regs)) { -- cgit v1.2.3 From f8b46c4b51ab109457770e662bb3d0cde390879f Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 20 Sep 2021 14:59:31 +0530 Subject: arm64/mm: Add pud_sect_supported() Section mapping at PUD level is supported only on 4K pages and currently it gets verified with explicit #ifdef or IS_ENABLED() constructs. This adds a new helper pud_sect_supported() for this purpose, which particularly cleans up the HugeTLB code path. It updates relevant switch statements with checks for __PAGETABLE_PMD_FOLDED in order to avoid build failures caused with two identical switch case values in those code blocks. Cc: Catalin Marinas Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Suggested-by: Mark Rutland Signed-off-by: Anshuman Khandual Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/1632130171-472-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 5 +++++ arch/arm64/include/asm/vmalloc.h | 4 ++-- arch/arm64/mm/hugetlbpage.c | 26 +++++++++++++++----------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index dfa76afa0ccf..84fbb52b4224 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1022,6 +1022,11 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot) return PAGE_READONLY_EXEC; } +static inline bool pud_sect_supported(void) +{ + return PAGE_SIZE == SZ_4K; +} + #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 7a22aeea9bb5..b9185503feae 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -2,6 +2,7 @@ #define _ASM_ARM64_VMALLOC_H #include +#include #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP @@ -9,10 +10,9 @@ static inline bool arch_vmap_pud_supported(pgprot_t prot) { /* - * Only 4k granule supports level 1 block mappings. * SW table walks can't handle removal of intermediate entries. */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + return pud_sect_supported() && !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 23505fc35324..029cf5e42c4c 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -40,11 +40,10 @@ void __init arm64_hugetlb_cma_reserve(void) { int order; -#ifdef CONFIG_ARM64_4K_PAGES - order = PUD_SHIFT - PAGE_SHIFT; -#else - order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; -#endif + if (pud_sect_supported()) + order = PUD_SHIFT - PAGE_SHIFT; + else + order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; /* * HugeTLB CMA reservation is required for gigantic * huge pages which could not be allocated via the @@ -62,8 +61,9 @@ bool arch_hugetlb_migration_supported(struct hstate *h) size_t pagesize = huge_page_size(h); switch (pagesize) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case PMD_SIZE: case CONT_PMD_SIZE: @@ -126,8 +126,11 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = size; switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + if (pud_sect_supported()) + contig_ptes = 1; + break; #endif case PMD_SIZE: contig_ptes = 1; @@ -489,9 +492,9 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma, static int __init hugetlbpage_init(void) { -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); -#endif + if (pud_sect_supported()) + hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); + hugetlb_add_hstate(CONT_PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); hugetlb_add_hstate(CONT_PTE_SHIFT - PAGE_SHIFT); @@ -503,8 +506,9 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { switch (size) { -#ifdef CONFIG_ARM64_4K_PAGES +#ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: + return pud_sect_supported(); #endif case CONT_PMD_SIZE: case PMD_SIZE: -- cgit v1.2.3 From 8fac67ca236b961b573355e203dbaf62a706a2e5 Mon Sep 17 00:00:00 2001 From: Sudarshan Rajagopalan Date: Tue, 28 Sep 2021 11:51:49 -0700 Subject: arm64: mm: update max_pfn after memory hotplug After new memory blocks have been hotplugged, max_pfn and max_low_pfn needs updating to reflect on new PFNs being hot added to system. Without this patch, debug-related functions that use max_pfn such as get_max_dump_pfn() or read_page_owner() will not work with any page in memory that is hot-added after boot. Fixes: 4ab215061554 ("arm64: Add memory hotplug support") Signed-off-by: Sudarshan Rajagopalan Signed-off-by: Chris Goldsworthy Acked-by: David Hildenbrand Cc: Florian Fainelli Cc: Georgi Djakov Tested-by: Georgi Djakov Link: https://lore.kernel.org/r/a51a27ee7be66024b5ce626310d673f24107bcb8.1632853776.git.quic_cgoldswo@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cfd9deb347c3..fd85b51b9d50 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1499,6 +1499,11 @@ int arch_add_memory(int nid, u64 start, u64 size, if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); + else { + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + } + return ret; } -- cgit v1.2.3 From e5af50a5df571c1d0268b02f924de49b742c990f Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 23 Sep 2021 18:06:55 -0700 Subject: arm64: kasan: mte: move GCR_EL1 switch to task switch when KASAN disabled It is not necessary to write to GCR_EL1 on every kernel entry and exit when HW tag-based KASAN is disabled because the kernel will not execute any IRG instructions in that mode. Since accessing GCR_EL1 can be expensive on some microarchitectures, avoid doing so by moving the access to task switch when HW tag-based KASAN is disabled. Signed-off-by: Peter Collingbourne Acked-by: Andrey Konovalov Link: https://linux-review.googlesource.com/id/I78e90d60612a94c24344526f476ac4ff216e10d2 Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20210924010655.2886918-1-pcc@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 10 +++++----- arch/arm64/kernel/mte.c | 26 ++++++++++++++++++++++++++ include/linux/kasan.h | 9 +++++++-- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index bc6d5a970a13..2f69ae43941d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -168,9 +168,9 @@ alternative_else_nop_endif .macro mte_set_kernel_gcr, tmp, tmp2 #ifdef CONFIG_KASAN_HW_TAGS -alternative_if_not ARM64_MTE +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end mov \tmp, KERNEL_GCR_EL1 msr_s SYS_GCR_EL1, \tmp 1: @@ -178,10 +178,10 @@ alternative_else_nop_endif .endm .macro mte_set_user_gcr, tsk, tmp, tmp2 -#ifdef CONFIG_ARM64_MTE -alternative_if_not ARM64_MTE +#ifdef CONFIG_KASAN_HW_TAGS +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end ldr \tmp, [\tsk, #THREAD_MTE_CTRL] mte_set_gcr \tmp, \tmp2 diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index e5e801bc5312..0cdae086966e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -179,6 +179,30 @@ static void mte_update_sctlr_user(struct task_struct *task) task->thread.sctlr_user = sctlr; } +static void mte_update_gcr_excl(struct task_struct *task) +{ + /* + * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by + * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled. + */ + if (kasan_hw_tags_enabled()) + return; + + write_sysreg_s( + ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) & + SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND, + SYS_GCR_EL1); +} + +void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); /* Branch -> NOP */ + + if (kasan_hw_tags_enabled()) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + void mte_thread_init_user(void) { if (!system_supports_mte()) @@ -198,6 +222,7 @@ void mte_thread_switch(struct task_struct *next) return; mte_update_sctlr_user(next); + mte_update_gcr_excl(next); /* * Check if an async tag exception occurred at EL1. @@ -243,6 +268,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (task == current) { preempt_disable(); mte_update_sctlr_user(task); + mte_update_gcr_excl(task); update_sctlr_el1(task->thread.sctlr_user); preempt_enable(); } diff --git a/include/linux/kasan.h b/include/linux/kasan.h index dd874a1ee862..de5f5913374d 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -89,7 +89,7 @@ static __always_inline bool kasan_enabled(void) return static_branch_likely(&kasan_flag_enabled); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return kasan_enabled(); } @@ -104,7 +104,7 @@ static inline bool kasan_enabled(void) return IS_ENABLED(CONFIG_KASAN); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return false; } @@ -125,6 +125,11 @@ static __always_inline void kasan_free_pages(struct page *page, #endif /* CONFIG_KASAN_HW_TAGS */ +static inline bool kasan_has_integrated_init(void) +{ + return kasan_hw_tags_enabled(); +} + #ifdef CONFIG_KASAN struct kasan_cache { -- cgit v1.2.3 From 528a4ab45300fa6283556d9b48e26b45a8aa15c4 Mon Sep 17 00:00:00 2001 From: Yee Lee Date: Thu, 30 Sep 2021 16:16:13 +0800 Subject: scs: Release kasan vmalloc poison in scs_free process Since scs allocation is moved to vmalloc region, the shadow stack is protected by kasan_posion_vmalloc. However, the vfree_atomic operation needs to access its context for scs_free process and causes kasan error as the dump info below. This patch Adds kasan_unpoison_vmalloc() before vfree_atomic, which aligns to the prior flow as using kmem_cache. The vmalloc region will go back posioned in the following vumap() operations. ================================================================== BUG: KASAN: vmalloc-out-of-bounds in llist_add_batch+0x60/0xd4 Write of size 8 at addr ffff8000100b9000 by task kthreadd/2 CPU: 0 PID: 2 Comm: kthreadd Not tainted 5.15.0-rc2-11681-g92477dd1faa6-dirty #1 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x43c show_stack+0x1c/0x2c dump_stack_lvl+0x68/0x84 print_address_description+0x80/0x394 kasan_report+0x180/0x1dc __asan_report_store8_noabort+0x48/0x58 llist_add_batch+0x60/0xd4 vfree_atomic+0x60/0xe0 scs_free+0x1dc/0x1fc scs_release+0xa4/0xd4 free_task+0x30/0xe4 __put_task_struct+0x1ec/0x2e0 delayed_put_task_struct+0x5c/0xa0 rcu_do_batch+0x62c/0x8a0 rcu_core+0x60c/0xc14 rcu_core_si+0x14/0x24 __do_softirq+0x19c/0x68c irq_exit+0x118/0x2dc handle_domain_irq+0xcc/0x134 gic_handle_irq+0x7c/0x1bc call_on_irq_stack+0x40/0x70 do_interrupt_handler+0x78/0x9c el1_interrupt+0x34/0x60 el1h_64_irq_handler+0x1c/0x2c el1h_64_irq+0x78/0x7c _raw_spin_unlock_irqrestore+0x40/0xcc sched_fork+0x4f0/0xb00 copy_process+0xacc/0x3648 kernel_clone+0x168/0x534 kernel_thread+0x13c/0x1b0 kthreadd+0x2bc/0x400 ret_from_fork+0x10/0x20 Memory state around the buggy address: ffff8000100b8f00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffff8000100b8f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >ffff8000100b9000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffff8000100b9080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffff8000100b9100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ================================================================== Suggested-by: Kuan-Ying Lee Acked-by: Will Deacon Tested-by: Will Deacon Reviewed-by: Sami Tolvanen Signed-off-by: Yee Lee Fixes: a2abe7cbd8fe ("scs: switch to vmapped shadow stacks") Link: https://lore.kernel.org/r/20210930081619.30091-1-yee.lee@mediatek.com Signed-off-by: Will Deacon --- kernel/scs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/scs.c b/kernel/scs.c index e2a71fc82fa0..579841be8864 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -78,6 +78,7 @@ void scs_free(void *s) if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) return; + kasan_unpoison_vmalloc(s, SCS_SIZE); vfree_atomic(s); } -- cgit v1.2.3 From 094a3684b9b67758ccedf0e6068d90f22f2942d9 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:30:59 +0000 Subject: arm64: kernel: add helper for booted at EL2 and not VHE Replace places that contain logic like this: is_hyp_mode_available() && !is_kernel_in_hyp_mode() With a dedicated boolean function is_hyp_nvhe(). This will be needed later in kexec in order to sooner switch back to EL2. Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-2-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/virt.h | 5 +++++ arch/arm64/kernel/cpu-reset.h | 3 +-- arch/arm64/kernel/hibernate.c | 2 +- arch/arm64/kernel/sdei.c | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7379f35ae2c6..a9457e96203c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -128,6 +128,11 @@ static __always_inline bool is_protected_kvm_enabled(void) return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); } +static inline bool is_hyp_nvhe(void) +{ + return is_hyp_mode_available() && !is_kernel_in_hyp_mode(); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 9a7b1262ef17..81b3d0fe7a63 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -20,8 +20,7 @@ static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry, { typeof(__cpu_soft_restart) *restart; - unsigned long el2_switch = !is_kernel_in_hyp_mode() && - is_hyp_mode_available(); + unsigned long el2_switch = is_hyp_nvhe(); restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); cpu_install_idmap(); diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 46a0b4d6e251..4c9533f4c0c4 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -49,7 +49,7 @@ extern int in_suspend; /* Do we need to reset el2? */ -#define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) +#define el2_reset_needed() (is_hyp_nvhe()) /* temporary el2 vectors in the __hibernate_exit_text section. */ extern char hibernate_el2_vectors[]; diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 47f77d1234cb..d20620a1c51a 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -202,7 +202,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) * dropped to EL1 because we don't support VHE, then we can't support * SDEI. */ - if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { + if (is_hyp_nvhe()) { pr_err("Not supported on this hardware/boot configuration\n"); goto out_err; } -- cgit v1.2.3 From 788bfdd97434982b6d575062581e8e72eea755af Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:00 +0000 Subject: arm64: trans_pgd: hibernate: Add trans_pgd_copy_el2_vectors Users of trans_pgd may also need a copy of vector table because it is also may be overwritten if a linear map can be overwritten. Move setup of EL2 vectors from hibernate to trans_pgd, so it can be later shared with kexec as well. Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-3-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/trans_pgd.h | 9 ++++-- arch/arm64/include/asm/virt.h | 2 ++ arch/arm64/kernel/hibernate-asm.S | 52 ---------------------------------- arch/arm64/kernel/hibernate.c | 26 +++++++---------- arch/arm64/mm/Makefile | 1 + arch/arm64/mm/trans_pgd-asm.S | 58 ++++++++++++++++++++++++++++++++++++++ arch/arm64/mm/trans_pgd.c | 27 ++++++++++++++++-- 7 files changed, 103 insertions(+), 72 deletions(-) create mode 100644 arch/arm64/mm/trans_pgd-asm.S diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 5d08e5adf3d5..7b04d32b102c 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin */ #ifndef _ASM_TRANS_TABLE_H @@ -36,4 +36,9 @@ int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, unsigned long *t0sz, void *page); +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors); + +extern char trans_pgd_stub_vectors[]; + #endif /* _ASM_TRANS_TABLE_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index a9457e96203c..3c8af033a997 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -67,6 +67,8 @@ */ extern u32 __boot_cpu_mode[2]; +#define ARM64_VECTOR_TABLE_LEN SZ_2K + void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 81c0186a5e32..a30a2c3f905e 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -112,56 +112,4 @@ alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE hvc #0 3: ret SYM_CODE_END(swsusp_arch_suspend_exit) - -/* - * Restore the hyp stub. - * This must be done before the hibernate page is unmapped by _cpu_resume(), - * but happens before any of the hyp-stub's code is cleaned to PoC. - * - * x24: The physical address of __hyp_stub_vectors - */ -SYM_CODE_START_LOCAL(el1_sync) - msr vbar_el2, x24 - eret -SYM_CODE_END(el1_sync) - -.macro invalid_vector label -SYM_CODE_START_LOCAL(\label) - b \label -SYM_CODE_END(\label) -.endm - - invalid_vector el2_sync_invalid - invalid_vector el2_irq_invalid - invalid_vector el2_fiq_invalid - invalid_vector el2_error_invalid - invalid_vector el1_sync_invalid - invalid_vector el1_irq_invalid - invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid - -/* el2 vectors - switch el2 here while we restore the memory image. */ - .align 11 -SYM_CODE_START(hibernate_el2_vectors) - ventry el2_sync_invalid // Synchronous EL2t - ventry el2_irq_invalid // IRQ EL2t - ventry el2_fiq_invalid // FIQ EL2t - ventry el2_error_invalid // Error EL2t - - ventry el2_sync_invalid // Synchronous EL2h - ventry el2_irq_invalid // IRQ EL2h - ventry el2_fiq_invalid // FIQ EL2h - ventry el2_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq_invalid // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync_invalid // Synchronous 32-bit EL1 - ventry el1_irq_invalid // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -SYM_CODE_END(hibernate_el2_vectors) - .popsection diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 4c9533f4c0c4..b96ef9060e4c 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -51,9 +51,6 @@ extern int in_suspend; /* Do we need to reset el2? */ #define el2_reset_needed() (is_hyp_nvhe()) -/* temporary el2 vectors in the __hibernate_exit_text section. */ -extern char hibernate_el2_vectors[]; - /* hyp-stub vectors, used to restore el2 during resume from hibernate. */ extern char __hyp_stub_vectors[]; @@ -434,6 +431,7 @@ int swsusp_arch_resume(void) void *zero_page; size_t exit_size; pgd_t *tmp_pg_dir; + phys_addr_t el2_vectors; void __noreturn (*hibernate_exit)(phys_addr_t, phys_addr_t, void *, void *, phys_addr_t, phys_addr_t); struct trans_pgd_info trans_info = { @@ -461,6 +459,14 @@ int swsusp_arch_resume(void) return -ENOMEM; } + if (el2_reset_needed()) { + rc = trans_pgd_copy_el2_vectors(&trans_info, &el2_vectors); + if (rc) { + pr_err("Failed to setup el2 vectors\n"); + return rc; + } + } + exit_size = __hibernate_exit_text_end - __hibernate_exit_text_start; /* * Copy swsusp_arch_suspend_exit() to a safe page. This will generate @@ -473,26 +479,14 @@ int swsusp_arch_resume(void) return rc; } - /* - * The hibernate exit text contains a set of el2 vectors, that will - * be executed at el2 with the mmu off in order to reload hyp-stub. - */ - dcache_clean_inval_poc((unsigned long)hibernate_exit, - (unsigned long)hibernate_exit + exit_size); - /* * KASLR will cause the el2 vectors to be in a different location in * the resumed kernel. Load hibernate's temporary copy into el2. * * We can skip this step if we booted at EL1, or are running with VHE. */ - if (el2_reset_needed()) { - phys_addr_t el2_vectors = (phys_addr_t)hibernate_exit; - el2_vectors += hibernate_el2_vectors - - __hibernate_exit_text_start; /* offset */ - + if (el2_reset_needed()) __hyp_set_vectors(el2_vectors); - } hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1, resume_hdr.reenter_kernel, restore_pblist, diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index f188c9092696..ff1e800ba7a1 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o +obj-$(CONFIG_TRANS_TABLE) += trans_pgd-asm.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ARM64_MTE) += mteswap.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/trans_pgd-asm.S b/arch/arm64/mm/trans_pgd-asm.S new file mode 100644 index 000000000000..8c4bffe3089d --- /dev/null +++ b/arch/arm64/mm/trans_pgd-asm.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +/* + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin + */ + +#include +#include +#include + +.macro invalid_vector label +SYM_CODE_START_LOCAL(\label) + .align 7 + b \label +SYM_CODE_END(\label) +.endm + +.macro el1_sync_vector +SYM_CODE_START_LOCAL(el1_sync) + .align 7 + cmp x0, #HVC_SET_VECTORS /* Called from hibernate */ + b.ne 1f + msr vbar_el2, x1 + mov x0, xzr + eret +1: /* Unexpected argument, set an error */ + mov_q x0, HVC_STUB_ERR + eret +SYM_CODE_END(el1_sync) +.endm + +SYM_CODE_START(trans_pgd_stub_vectors) + invalid_vector hyp_stub_el2t_sync_invalid // Synchronous EL2t + invalid_vector hyp_stub_el2t_irq_invalid // IRQ EL2t + invalid_vector hyp_stub_el2t_fiq_invalid // FIQ EL2t + invalid_vector hyp_stub_el2t_error_invalid // Error EL2t + + invalid_vector hyp_stub_el2h_sync_invalid // Synchronous EL2h + invalid_vector hyp_stub_el2h_irq_invalid // IRQ EL2h + invalid_vector hyp_stub_el2h_fiq_invalid // FIQ EL2h + invalid_vector hyp_stub_el2h_error_invalid // Error EL2h + + el1_sync_vector // Synchronous 64-bit EL1 + invalid_vector hyp_stub_el1_irq_invalid // IRQ 64-bit EL1 + invalid_vector hyp_stub_el1_fiq_invalid // FIQ 64-bit EL1 + invalid_vector hyp_stub_el1_error_invalid // Error 64-bit EL1 + + invalid_vector hyp_stub_32b_el1_sync_invalid // Synchronous 32-bit EL1 + invalid_vector hyp_stub_32b_el1_irq_invalid // IRQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_fiq_invalid // FIQ 32-bit EL1 + invalid_vector hyp_stub_32b_el1_error_invalid // Error 32-bit EL1 + .align 11 +SYM_INNER_LABEL(__trans_pgd_stub_vectors_end, SYM_L_LOCAL) +SYM_CODE_END(trans_pgd_stub_vectors) + +# Check the trans_pgd_stub_vectors didn't overflow +.org . - (__trans_pgd_stub_vectors_end - trans_pgd_stub_vectors) + SZ_2K diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 527f0a39c3da..26bd8f2d95af 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -5,8 +5,8 @@ * * This file derived from: arch/arm64/kernel/hibernate.c * - * Copyright (c) 2020, Microsoft Corporation. - * Pavel Tatashin + * Copyright (c) 2021, Microsoft Corporation. + * Pasha Tatashin * */ @@ -322,3 +322,26 @@ int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, return 0; } + +/* + * Create a copy of the vector table so we can call HVC_SET_VECTORS or + * HVC_SOFT_RESTART from contexts where the table may be overwritten. + */ +int trans_pgd_copy_el2_vectors(struct trans_pgd_info *info, + phys_addr_t *el2_vectors) +{ + void *hyp_stub = trans_alloc(info); + + if (!hyp_stub) + return -ENOMEM; + *el2_vectors = virt_to_phys(hyp_stub); + memcpy(hyp_stub, &trans_pgd_stub_vectors, ARM64_VECTOR_TABLE_LEN); + caches_clean_inval_pou((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + dcache_clean_inval_poc((unsigned long)hyp_stub, + (unsigned long)hyp_stub + + ARM64_VECTOR_TABLE_LEN); + + return 0; +} -- cgit v1.2.3 From a347f601452ff3e7cc15bc31307915cea3b3f3f5 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:01 +0000 Subject: arm64: hibernate: abstract ttrb0 setup function Currently, only hibernate sets custom ttbr0 with safe idmaped function. Kexec, is also going to be using this functionality when relocation code is going to be idmapped. Move the setup sequence to a dedicated cpu_install_ttbr0() for custom ttbr0. Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-4-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu_context.h | 24 ++++++++++++++++++++++++ arch/arm64/kernel/hibernate.c | 21 +-------------------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f4ba93d4ffeb..6770667b34a3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -115,6 +115,30 @@ static inline void cpu_install_idmap(void) cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm); } +/* + * Load our new page tables. A strict BBM approach requires that we ensure that + * TLBs are free of any entries that may overlap with the global mappings we are + * about to install. + * + * For a real hibernate/resume/kexec cycle TTBR0 currently points to a zero + * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI runtime + * services), while for a userspace-driven test_resume cycle it points to + * userspace page tables (and we must point it at a zero page ourselves). + * + * We change T0SZ as part of installing the idmap. This is undone by + * cpu_uninstall_idmap() in __cpu_suspend_exit(). + */ +static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz) +{ + cpu_set_reserved_ttbr0(); + local_flush_tlb_all(); + __cpu_set_tcr_t0sz(t0sz); + + /* avoid cpu_switch_mm() and its SW-PAN and CNP interactions */ + write_sysreg(ttbr0, ttbr0_el1); + isb(); +} + /* * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD, * avoiding the possibility of conflicting TLB entries being allocated. diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b96ef9060e4c..2758f75d6809 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -212,26 +212,7 @@ static int create_safe_exec_page(void *src_start, size_t length, if (rc) return rc; - /* - * Load our new page tables. A strict BBM approach requires that we - * ensure that TLBs are free of any entries that may overlap with the - * global mappings we are about to install. - * - * For a real hibernate/resume cycle TTBR0 currently points to a zero - * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI - * runtime services), while for a userspace-driven test_resume cycle it - * points to userspace page tables (and we must point it at a zero page - * ourselves). - * - * We change T0SZ as part of installing the idmap. This is undone by - * cpu_uninstall_idmap() in __cpu_suspend_exit(). - */ - cpu_set_reserved_ttbr0(); - local_flush_tlb_all(); - __cpu_set_tcr_t0sz(t0sz); - write_sysreg(trans_ttbr0, ttbr0_el1); - isb(); - + cpu_install_ttbr0(trans_ttbr0, t0sz); *phys_dst_addr = virt_to_phys(page); return 0; -- cgit v1.2.3 From 0d8732e461d6b4dc2c625a69225f20e24da4dd79 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:02 +0000 Subject: arm64: kexec: flush image and lists during kexec load time Currently, during kexec load we are copying relocation function and flushing it. However, we can also flush kexec relocation buffers and if new kernel image is already in place (i.e. crash kernel), we can also flush the new kernel image itself. Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-5-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 58 ++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 213d56c14f60..b6d5a02cba2e 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -60,29 +60,6 @@ void machine_kexec_cleanup(struct kimage *kimage) /* Empty routine needed to avoid build errors. */ } -int machine_kexec_post_load(struct kimage *kimage) -{ - void *reloc_code = page_to_virt(kimage->control_code_page); - - memcpy(reloc_code, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); - kimage->arch.kern_reloc = __pa(reloc_code); - kexec_image_info(kimage); - - /* - * For execution with the MMU off, reloc_code needs to be cleaned to the - * PoC and invalidated from the I-cache. - */ - dcache_clean_inval_poc((unsigned long)reloc_code, - (unsigned long)reloc_code + - arm64_relocate_new_kernel_size); - icache_inval_pou((uintptr_t)reloc_code, - (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); - - return 0; -} - /** * machine_kexec_prepare - Prepare for a kexec reboot. * @@ -163,6 +140,32 @@ static void kexec_segment_flush(const struct kimage *kimage) } } +int machine_kexec_post_load(struct kimage *kimage) +{ + void *reloc_code = page_to_virt(kimage->control_code_page); + + /* If in place flush new kernel image, else flush lists and buffers */ + if (kimage->head & IND_DONE) + kexec_segment_flush(kimage); + else + kexec_list_flush(kimage); + + memcpy(reloc_code, arm64_relocate_new_kernel, + arm64_relocate_new_kernel_size); + kimage->arch.kern_reloc = __pa(reloc_code); + kexec_image_info(kimage); + + /* Flush the reloc_code in preparation for its execution. */ + dcache_clean_inval_poc((unsigned long)reloc_code, + (unsigned long)reloc_code + + arm64_relocate_new_kernel_size); + icache_inval_pou((uintptr_t)reloc_code, + (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); + + return 0; +} + /** * machine_kexec - Do the kexec reboot. * @@ -180,13 +183,6 @@ void machine_kexec(struct kimage *kimage) WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); - /* Flush the kimage list and its buffers. */ - kexec_list_flush(kimage); - - /* Flush the new image if already in place. */ - if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) - kexec_segment_flush(kimage); - pr_info("Bye!\n"); local_daif_mask(); @@ -261,8 +257,6 @@ void arch_kexec_protect_crashkres(void) { int i; - kexec_segment_flush(kexec_crash_image); - for (i = 0; i < kexec_crash_image->nr_segments; i++) set_memory_valid( __phys_to_virt(kexec_crash_image->segment[i].mem), -- cgit v1.2.3 From 5bb6834fc2900052a377df79b9ab065a698bf70b Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:03 +0000 Subject: arm64: kexec: skip relocation code for inplace kexec In case of kdump or when segments are already in place the relocation is not needed, therefore the setup of relocation function and call to it can be skipped. Signed-off-by: Pasha Tatashin Suggested-by: James Morse Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-6-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 34 +++++++++++++++++++++------------- arch/arm64/kernel/relocate_kernel.S | 3 --- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index b6d5a02cba2e..7f1cb5a2a463 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -144,16 +144,16 @@ int machine_kexec_post_load(struct kimage *kimage) { void *reloc_code = page_to_virt(kimage->control_code_page); - /* If in place flush new kernel image, else flush lists and buffers */ - if (kimage->head & IND_DONE) + /* If in place, relocation is not used, only flush next kernel */ + if (kimage->head & IND_DONE) { kexec_segment_flush(kimage); - else - kexec_list_flush(kimage); + kexec_image_info(kimage); + return 0; + } memcpy(reloc_code, arm64_relocate_new_kernel, arm64_relocate_new_kernel_size); kimage->arch.kern_reloc = __pa(reloc_code); - kexec_image_info(kimage); /* Flush the reloc_code in preparation for its execution. */ dcache_clean_inval_poc((unsigned long)reloc_code, @@ -162,6 +162,8 @@ int machine_kexec_post_load(struct kimage *kimage) icache_inval_pou((uintptr_t)reloc_code, (uintptr_t)reloc_code + arm64_relocate_new_kernel_size); + kexec_list_flush(kimage); + kexec_image_info(kimage); return 0; } @@ -188,19 +190,25 @@ void machine_kexec(struct kimage *kimage) local_daif_mask(); /* - * cpu_soft_restart will shutdown the MMU, disable data caches, then - * transfer control to the kern_reloc which contains a copy of - * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel - * uses physical addressing to relocate the new image to its final - * position and transfers control to the image entry point when the - * relocation is complete. + * Both restart and cpu_soft_restart will shutdown the MMU, disable data + * caches. However, restart will start new kernel or purgatory directly, + * cpu_soft_restart will transfer control to arm64_relocate_new_kernel * In kexec case, kimage->start points to purgatory assuming that * kernel entry and dtb address are embedded in purgatory by * userspace (kexec-tools). * In kexec_file case, the kernel starts directly without purgatory. */ - cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, kimage->start, - kimage->arch.dtb_mem); + if (kimage->head & IND_DONE) { + typeof(__cpu_soft_restart) *restart; + + cpu_install_idmap(); + restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); + restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, + 0, 0); + } else { + cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, + kimage->start, kimage->arch.dtb_mem); + } BUG(); /* Should never get here. */ } diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index b78ea5de97a4..8058fabe0a76 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -32,8 +32,6 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x16, x0 /* x16 = kimage_head */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ - /* Check if the new image needs relocation. */ - tbnz x16, IND_DONE_BIT, .Ldone raw_dcache_line_size x15, x1 /* x15 = dcache line size */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ @@ -65,7 +63,6 @@ SYM_CODE_START(arm64_relocate_new_kernel) .Lnext: ldr x16, [x14], #8 /* entry = *ptr++ */ tbz x16, IND_DONE_BIT, .Lloop /* while (!(entry & DONE)) */ -.Ldone: /* wait for writes from copy_page to finish */ dsb nsh ic iallu -- cgit v1.2.3 From 3036ec599332cdfb406249270e50ad3f1a5c5940 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:04 +0000 Subject: arm64: kexec: Use dcache ops macros instead of open-coding kexec does dcache maintenance when it re-writes all memory. Our dcache_by_line_op macro depends on reading the sanitized DminLine from memory. Kexec may have overwritten this, so open-codes the sequence. dcache_by_line_op is a whole set of macros, it uses dcache_line_size which uses read_ctr for the sanitsed DminLine. Reading the DminLine is the first thing the dcache_by_line_op does. Rename dcache_by_line_op dcache_by_myline_op and take DminLine as an argument. Kexec can now use the slightly smaller macro. This makes up-coming changes to the dcache maintenance easier on the eye. Code generated by the existing callers is unchanged. Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-7-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 30 +++++++++++++++++++++++------- arch/arm64/kernel/relocate_kernel.S | 13 +++---------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bfa58409a4d4..d5281f75a58d 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -405,19 +405,19 @@ alternative_endif /* * Macro to perform a data cache maintenance for the interval - * [start, end) + * [start, end) with dcache line size explicitly provided. * * op: operation passed to dc instruction * domain: domain used in dsb instruciton * start: starting virtual address of the region * end: end virtual address of the region + * linesz: dcache line size * fixup: optional label to branch to on user fault - * Corrupts: start, end, tmp1, tmp2 + * Corrupts: start, end, tmp */ - .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup - dcache_line_size \tmp1, \tmp2 - sub \tmp2, \tmp1, #1 - bic \start, \start, \tmp2 + .macro dcache_by_myline_op op, domain, start, end, linesz, tmp, fixup + sub \tmp, \linesz, #1 + bic \start, \start, \tmp .Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \start @@ -436,7 +436,7 @@ alternative_endif .endif .endif .endif - add \start, \start, \tmp1 + add \start, \start, \linesz cmp \start, \end b.lo .Ldcache_op\@ dsb \domain @@ -444,6 +444,22 @@ alternative_endif _cond_extable .Ldcache_op\@, \fixup .endm +/* + * Macro to perform a data cache maintenance for the interval + * [start, end) + * + * op: operation passed to dc instruction + * domain: domain used in dsb instruciton + * start: starting virtual address of the region + * end: end virtual address of the region + * fixup: optional label to branch to on user fault + * Corrupts: start, end, tmp1, tmp2 + */ + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup + dcache_line_size \tmp1, \tmp2 + dcache_by_myline_op \op, \domain, \start, \end, \tmp1, \tmp2, \fixup + .endm + /* * Macro to perform an instruction cache maintenance for the interval * [start, end) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 8058fabe0a76..8c43779e8cc6 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -41,16 +41,9 @@ SYM_CODE_START(arm64_relocate_new_kernel) tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x2, x13 - add x20, x2, #PAGE_SIZE - sub x1, x15, #1 - bic x2, x2, x1 -2: dc ivac, x2 - add x2, x2, x15 - cmp x2, x20 - b.lo 2b - dsb sy - + mov x2, x13 + add x1, x2, #PAGE_SIZE + dcache_by_myline_op ivac, sy, x2, x1, x15, x20 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 b .Lnext .Ltest_indirection: -- cgit v1.2.3 From 878fdbd704864352b9b11e29805e92ffa182904e Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:05 +0000 Subject: arm64: kexec: pass kimage as the only argument to relocation function Currently, kexec relocation function (arm64_relocate_new_kernel) accepts the following arguments: head: start of array that contains relocation information. entry: entry point for new kernel or purgatory. dtb_mem: first and only argument to entry. The number of arguments cannot be easily expended, because this function is also called from HVC_SOFT_RESTART, which preserves only three arguments. And, also arm64_relocate_new_kernel is written in assembly but called without stack, thus no place to move extra arguments to free registers. Soon, we will need to pass more arguments: once we enable MMU we will need to pass information about page tables. Pass kimage to arm64_relocate_new_kernel, and teach it to get the required fields from kimage. Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-8-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 7 +++++++ arch/arm64/kernel/machine_kexec.c | 7 +++++-- arch/arm64/kernel/relocate_kernel.S | 10 ++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 551427ae8cc5..1d3319c7518e 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -170,6 +171,12 @@ int main(void) DEFINE(PTRAUTH_KERNEL_KEY_APIA, offsetof(struct ptrauth_keys_kernel, apia)); #endif BLANK(); +#endif +#ifdef CONFIG_KEXEC_CORE + DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); + DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); + DEFINE(KIMAGE_START, offsetof(struct kimage, start)); + BLANK(); #endif return 0; } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 7f1cb5a2a463..e210b19592c6 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -84,6 +84,9 @@ static void kexec_list_flush(struct kimage *kimage) { kimage_entry_t *entry; + dcache_clean_inval_poc((unsigned long)kimage, + (unsigned long)kimage + sizeof(*kimage)); + for (entry = &kimage->head; ; entry++) { unsigned int flag; unsigned long addr; @@ -206,8 +209,8 @@ void machine_kexec(struct kimage *kimage) restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, 0, 0); } else { - cpu_soft_restart(kimage->arch.kern_reloc, kimage->head, - kimage->start, kimage->arch.dtb_mem); + cpu_soft_restart(kimage->arch.kern_reloc, virt_to_phys(kimage), + 0, 0); } BUG(); /* Should never get here. */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 8c43779e8cc6..63ea19868f63 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -27,9 +27,7 @@ */ SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ - mov x18, x2 /* x18 = dtb address */ - mov x17, x1 /* x17 = kimage_start */ - mov x16, x0 /* x16 = kimage_head */ + ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ raw_dcache_line_size x15, x1 /* x15 = dcache line size */ @@ -63,12 +61,12 @@ SYM_CODE_START(arm64_relocate_new_kernel) isb /* Start new image. */ - mov x0, x18 + ldr x4, [x0, #KIMAGE_START] /* relocation start */ + ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ mov x1, xzr mov x2, xzr mov x3, xzr - br x17 - + br x4 SYM_CODE_END(arm64_relocate_new_kernel) .align 3 /* To keep the 64-bit values below naturally aligned. */ -- cgit v1.2.3 From 08eae0ef618f34a813c1478200eb351d4416f3ca Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:06 +0000 Subject: arm64: kexec: configure EL2 vectors for kexec If we have a EL2 mode without VHE, the EL2 vectors are needed in order to switch to EL2 and jump to new world with hypervisor privileges. In preparation to MMU enabled relocation, configure our EL2 table now. Kexec uses #HVC_SOFT_RESTART to branch to the new world, so extend el1_sync vector that is provided by trans_pgd_copy_el2_vectors() to support this case. Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-9-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- arch/arm64/include/asm/kexec.h | 1 + arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/machine_kexec.c | 31 +++++++++++++++++++++++++++++++ arch/arm64/mm/trans_pgd-asm.S | 9 ++++++++- 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..552a057b40af 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1135,7 +1135,7 @@ config CRASH_DUMP config TRANS_TABLE def_bool y - depends on HIBERNATION + depends on HIBERNATION || KEXEC_CORE config XEN_DOM0 def_bool y diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 00dbcc71aeb2..753a1c398898 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -96,6 +96,7 @@ struct kimage_arch { void *dtb; phys_addr_t dtb_mem; phys_addr_t kern_reloc; + phys_addr_t el2_vectors; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 1d3319c7518e..6a2b8b1a4872 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -174,6 +174,7 @@ int main(void) #endif #ifdef CONFIG_KEXEC_CORE DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); + DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors)); DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); DEFINE(KIMAGE_START, offsetof(struct kimage, start)); BLANK(); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index e210b19592c6..59a4b4172b68 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "cpu-reset.h" @@ -43,7 +44,9 @@ static void _kexec_image_info(const char *func, int line, pr_debug(" start: %lx\n", kimage->start); pr_debug(" head: %lx\n", kimage->head); pr_debug(" nr_segments: %lu\n", kimage->nr_segments); + pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem); pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc); + pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors); for (i = 0; i < kimage->nr_segments; i++) { pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n", @@ -143,9 +146,27 @@ static void kexec_segment_flush(const struct kimage *kimage) } } +/* Allocates pages for kexec page table */ +static void *kexec_page_alloc(void *arg) +{ + struct kimage *kimage = (struct kimage *)arg; + struct page *page = kimage_alloc_control_pages(kimage, 0); + + if (!page) + return NULL; + + memset(page_address(page), 0, PAGE_SIZE); + + return page_address(page); +} + int machine_kexec_post_load(struct kimage *kimage) { void *reloc_code = page_to_virt(kimage->control_code_page); + struct trans_pgd_info info = { + .trans_alloc_page = kexec_page_alloc, + .trans_alloc_arg = kimage, + }; /* If in place, relocation is not used, only flush next kernel */ if (kimage->head & IND_DONE) { @@ -154,6 +175,14 @@ int machine_kexec_post_load(struct kimage *kimage) return 0; } + kimage->arch.el2_vectors = 0; + if (is_hyp_nvhe()) { + int rc = trans_pgd_copy_el2_vectors(&info, + &kimage->arch.el2_vectors); + if (rc) + return rc; + } + memcpy(reloc_code, arm64_relocate_new_kernel, arm64_relocate_new_kernel_size); kimage->arch.kern_reloc = __pa(reloc_code); @@ -209,6 +238,8 @@ void machine_kexec(struct kimage *kimage) restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, 0, 0); } else { + if (is_hyp_nvhe()) + __hyp_set_vectors(kimage->arch.el2_vectors); cpu_soft_restart(kimage->arch.kern_reloc, virt_to_phys(kimage), 0, 0); } diff --git a/arch/arm64/mm/trans_pgd-asm.S b/arch/arm64/mm/trans_pgd-asm.S index 8c4bffe3089d..021c31573bcb 100644 --- a/arch/arm64/mm/trans_pgd-asm.S +++ b/arch/arm64/mm/trans_pgd-asm.S @@ -24,7 +24,14 @@ SYM_CODE_START_LOCAL(el1_sync) msr vbar_el2, x1 mov x0, xzr eret -1: /* Unexpected argument, set an error */ +1: cmp x0, #HVC_SOFT_RESTART /* Called from kexec */ + b.ne 2f + mov x0, x2 + mov x2, x4 + mov x4, x1 + mov x1, x3 + br x4 +2: /* Unexpected argument, set an error */ mov_q x0, HVC_STUB_ERR eret SYM_CODE_END(el1_sync) -- cgit v1.2.3 From ba959fe96a1bbb98765762da20ecb3a6eb9c9d39 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:07 +0000 Subject: arm64: kexec: relocate in EL1 mode Since we are going to keep MMU enabled during relocation, we need to keep EL1 mode throughout the relocation. Keep EL1 enabled, and switch EL2 only before entering the new world. Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-10-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu-reset.h | 3 +-- arch/arm64/kernel/machine_kexec.c | 4 ++-- arch/arm64/kernel/relocate_kernel.S | 13 +++++++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 81b3d0fe7a63..296abbac7192 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -20,11 +20,10 @@ static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry, { typeof(__cpu_soft_restart) *restart; - unsigned long el2_switch = is_hyp_nvhe(); restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); cpu_install_idmap(); - restart(el2_switch, entry, arg0, arg1, arg2); + restart(0, entry, arg0, arg1, arg2); unreachable(); } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 59a4b4172b68..cf5d6f22a041 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -240,8 +240,8 @@ void machine_kexec(struct kimage *kimage) } else { if (is_hyp_nvhe()) __hyp_set_vectors(kimage->arch.el2_vectors); - cpu_soft_restart(kimage->arch.kern_reloc, virt_to_phys(kimage), - 0, 0); + cpu_soft_restart(kimage->arch.kern_reloc, + virt_to_phys(kimage), 0, 0); } BUG(); /* Should never get here. */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 63ea19868f63..b4fb97312a80 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -13,6 +13,7 @@ #include #include #include +#include /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. @@ -61,12 +62,20 @@ SYM_CODE_START(arm64_relocate_new_kernel) isb /* Start new image. */ + ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ + cbz x1, .Lel1 + ldr x1, [x0, #KIMAGE_START] /* relocation start */ + ldr x2, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ + mov x3, xzr + mov x4, xzr + mov x0, #HVC_SOFT_RESTART + hvc #0 /* Jumps from el2 */ +.Lel1: ldr x4, [x0, #KIMAGE_START] /* relocation start */ ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ - mov x1, xzr mov x2, xzr mov x3, xzr - br x4 + br x4 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) .align 3 /* To keep the 64-bit values below naturally aligned. */ -- cgit v1.2.3 From 19a046f07ce5a5c34ebb6432192d98cfdb38444f Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:08 +0000 Subject: arm64: kexec: use ld script for relocation function Currently, relocation code declares start and end variables which are used to compute its size. The better way to do this is to use ld script, and put relocation function in its own section. Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-11-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sections.h | 1 + arch/arm64/kernel/machine_kexec.c | 16 ++++++---------- arch/arm64/kernel/relocate_kernel.S | 14 +------------- arch/arm64/kernel/vmlinux.lds.S | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index e4ad9db53af1..152cb35bf9df 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -21,5 +21,6 @@ extern char __exittext_begin[], __exittext_end[]; extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +extern char __relocate_new_kernel_start[], __relocate_new_kernel_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index cf5d6f22a041..320442d35811 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -21,14 +21,11 @@ #include #include #include +#include #include #include "cpu-reset.h" -/* Global variables for the arm64_relocate_new_kernel routine. */ -extern const unsigned char arm64_relocate_new_kernel[]; -extern const unsigned long arm64_relocate_new_kernel_size; - /** * kexec_image_info - For debugging output. */ @@ -163,6 +160,7 @@ static void *kexec_page_alloc(void *arg) int machine_kexec_post_load(struct kimage *kimage) { void *reloc_code = page_to_virt(kimage->control_code_page); + long reloc_size; struct trans_pgd_info info = { .trans_alloc_page = kexec_page_alloc, .trans_alloc_arg = kimage, @@ -183,17 +181,15 @@ int machine_kexec_post_load(struct kimage *kimage) return rc; } - memcpy(reloc_code, arm64_relocate_new_kernel, - arm64_relocate_new_kernel_size); + reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; + memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); kimage->arch.kern_reloc = __pa(reloc_code); /* Flush the reloc_code in preparation for its execution. */ dcache_clean_inval_poc((unsigned long)reloc_code, - (unsigned long)reloc_code + - arm64_relocate_new_kernel_size); + (unsigned long)reloc_code + reloc_size); icache_inval_pou((uintptr_t)reloc_code, - (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); + (uintptr_t)reloc_code + reloc_size); kexec_list_flush(kimage); kexec_image_info(kimage); diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index b4fb97312a80..2227741b96fa 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -15,6 +15,7 @@ #include #include +.section ".kexec_relocate.text", "ax" /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * @@ -77,16 +78,3 @@ SYM_CODE_START(arm64_relocate_new_kernel) mov x3, xzr br x4 /* Jumps from el1 */ SYM_CODE_END(arm64_relocate_new_kernel) - -.align 3 /* To keep the 64-bit values below naturally aligned. */ - -.Lcopy_end: -.org KEXEC_CONTROL_PAGE_SIZE - -/* - * arm64_relocate_new_kernel_size - Number of bytes to copy to the - * control_code_page. - */ -.globl arm64_relocate_new_kernel_size -arm64_relocate_new_kernel_size: - .quad .Lcopy_end - arm64_relocate_new_kernel diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f6b1a88245db..0760331af85c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -100,6 +101,16 @@ jiffies = jiffies_64; #define HIBERNATE_TEXT #endif +#ifdef CONFIG_KEXEC_CORE +#define KEXEC_TEXT \ + . = ALIGN(SZ_4K); \ + __relocate_new_kernel_start = .; \ + *(.kexec_relocate.text) \ + __relocate_new_kernel_end = .; +#else +#define KEXEC_TEXT +#endif + #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ @@ -160,6 +171,7 @@ SECTIONS HYPERVISOR_TEXT IDMAP_TEXT HIBERNATE_TEXT + KEXEC_TEXT TRAMP_TEXT *(.fixup) *(.gnu.warning) @@ -348,3 +360,10 @@ ASSERT(swapper_pg_dir - reserved_pg_dir == RESERVED_SWAPPER_OFFSET, ASSERT(swapper_pg_dir - tramp_pg_dir == TRAMP_SWAPPER_OFFSET, "TRAMP_SWAPPER_OFFSET is wrong!") #endif + +#ifdef CONFIG_KEXEC_CORE +/* kexec relocation code should fit into one KEXEC_CONTROL_PAGE_SIZE */ +ASSERT(__relocate_new_kernel_end - (__relocate_new_kernel_start & ~(SZ_4K - 1)) + <= SZ_4K, "kexec relocation code is too big or misaligned") +ASSERT(KEXEC_CONTROL_PAGE_SIZE >= SZ_4K, "KEXEC_CONTROL_PAGE_SIZE is broken") +#endif -- cgit v1.2.3 From 3744b5280e67f54579abe92576deec0079242323 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:09 +0000 Subject: arm64: kexec: install a copy of the linear-map To perform the kexec relocation with the MMU enabled, we need a copy of the linear map. Create one, and install it from the relocation code. This has to be done from the assembly code as it will be idmapped with TTBR0. The kernel runs in TTRB1, so can't use the break-before-make sequence on the mapping it is executing from. The makes no difference yet as the relocation code runs with the MMU disabled. Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-12-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 19 +++++++++++++++++++ arch/arm64/include/asm/kexec.h | 2 ++ arch/arm64/kernel/asm-offsets.c | 2 ++ arch/arm64/kernel/hibernate-asm.S | 20 -------------------- arch/arm64/kernel/machine_kexec.c | 16 ++++++++++++++-- arch/arm64/kernel/relocate_kernel.S | 3 +++ 6 files changed, 40 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index d5281f75a58d..5da176dd37a9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -483,6 +483,25 @@ alternative_endif _cond_extable .Licache_op\@, \fixup .endm +/* + * To prevent the possibility of old and new partial table walks being visible + * in the tlb, switch the ttbr to a zero page when we invalidate the old + * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i + * Even switching to our copied tables will cause a changed output address at + * each stage of the walk. + */ + .macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 + phys_to_ttbr \tmp, \zero_page + msr ttbr1_el1, \tmp + isb + tlbi vmalle1 + dsb nsh + phys_to_ttbr \tmp, \page_table + offset_ttbr1 \tmp, \tmp2 + msr ttbr1_el1, \tmp + isb + .endm + /* * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present */ diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 753a1c398898..d678f0ceb7ee 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -97,6 +97,8 @@ struct kimage_arch { phys_addr_t dtb_mem; phys_addr_t kern_reloc; phys_addr_t el2_vectors; + phys_addr_t ttbr1; + phys_addr_t zero_page; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 6a2b8b1a4872..1f565224dafd 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -175,6 +175,8 @@ int main(void) #ifdef CONFIG_KEXEC_CORE DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors)); + DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page)); + DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1)); DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); DEFINE(KIMAGE_START, offsetof(struct kimage, start)); BLANK(); diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index a30a2c3f905e..0e1d9c3c6a93 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -15,26 +15,6 @@ #include #include -/* - * To prevent the possibility of old and new partial table walks being visible - * in the tlb, switch the ttbr to a zero page when we invalidate the old - * records. D4.7.1 'General TLB maintenance requirements' in ARM DDI 0487A.i - * Even switching to our copied tables will cause a changed output address at - * each stage of the walk. - */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 - phys_to_ttbr \tmp, \zero_page - msr ttbr1_el1, \tmp - isb - tlbi vmalle1 - dsb nsh - phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp, \tmp2 - msr ttbr1_el1, \tmp - isb -.endm - - /* * Resume from hibernate * diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 320442d35811..fbff545565f1 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -159,6 +159,8 @@ static void *kexec_page_alloc(void *arg) int machine_kexec_post_load(struct kimage *kimage) { + int rc; + pgd_t *trans_pgd; void *reloc_code = page_to_virt(kimage->control_code_page); long reloc_size; struct trans_pgd_info info = { @@ -175,12 +177,22 @@ int machine_kexec_post_load(struct kimage *kimage) kimage->arch.el2_vectors = 0; if (is_hyp_nvhe()) { - int rc = trans_pgd_copy_el2_vectors(&info, - &kimage->arch.el2_vectors); + rc = trans_pgd_copy_el2_vectors(&info, + &kimage->arch.el2_vectors); if (rc) return rc; } + /* Create a copy of the linear map */ + trans_pgd = kexec_page_alloc(kimage); + if (!trans_pgd) + return -ENOMEM; + rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END); + if (rc) + return rc; + kimage->arch.ttbr1 = __pa(trans_pgd); + kimage->arch.zero_page = __pa(empty_zero_page); + reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); kimage->arch.kern_reloc = __pa(reloc_code); diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 2227741b96fa..2b80232246f7 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -29,10 +29,13 @@ */ SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ + ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ + ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ raw_dcache_line_size x15, x1 /* x15 = dcache line size */ + break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ -- cgit v1.2.3 From efc2d0f20a9dab2d0e92a271dc4b8e3496377739 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:10 +0000 Subject: arm64: kexec: keep MMU enabled during kexec relocation Now, that we have linear map page tables configured, keep MMU enabled to allow faster relocation of segments to final destination. Cavium ThunderX2: Kernel Image size: 38M Iniramfs size: 46M Total relocation size: 84M MMU-disabled: relocation 7.489539915s MMU-enabled: relocation 0.03946095s Broadcom Stingray: The performance data: for a moderate size kernel + initramfs: 25M the relocation was taking 0.382s, with enabled MMU it now takes 0.019s only or x20 improvement. The time is proportional to the size of relocation, therefore if initramfs is larger, 100M it could take over a second. Signed-off-by: Pasha Tatashin Tested-by: Pingfan Liu Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-13-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kexec.h | 3 +++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/machine_kexec.c | 16 ++++++++++++---- arch/arm64/kernel/relocate_kernel.S | 31 ++++++++++++++++++++----------- 4 files changed, 36 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index d678f0ceb7ee..dca6dedc3b25 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -97,8 +97,11 @@ struct kimage_arch { phys_addr_t dtb_mem; phys_addr_t kern_reloc; phys_addr_t el2_vectors; + phys_addr_t ttbr0; phys_addr_t ttbr1; phys_addr_t zero_page; + unsigned long phys_offset; + unsigned long t0sz; }; #ifdef CONFIG_KEXEC_FILE diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 1f565224dafd..2124357c2075 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -176,6 +176,7 @@ int main(void) DEFINE(KIMAGE_ARCH_DTB_MEM, offsetof(struct kimage, arch.dtb_mem)); DEFINE(KIMAGE_ARCH_EL2_VECTORS, offsetof(struct kimage, arch.el2_vectors)); DEFINE(KIMAGE_ARCH_ZERO_PAGE, offsetof(struct kimage, arch.zero_page)); + DEFINE(KIMAGE_ARCH_PHYS_OFFSET, offsetof(struct kimage, arch.phys_offset)); DEFINE(KIMAGE_ARCH_TTBR1, offsetof(struct kimage, arch.ttbr1)); DEFINE(KIMAGE_HEAD, offsetof(struct kimage, head)); DEFINE(KIMAGE_START, offsetof(struct kimage, start)); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index fbff545565f1..1e9a2a45e016 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -196,6 +196,11 @@ int machine_kexec_post_load(struct kimage *kimage) reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start; memcpy(reloc_code, __relocate_new_kernel_start, reloc_size); kimage->arch.kern_reloc = __pa(reloc_code); + rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0, + &kimage->arch.t0sz, reloc_code); + if (rc) + return rc; + kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage; /* Flush the reloc_code in preparation for its execution. */ dcache_clean_inval_poc((unsigned long)reloc_code, @@ -230,9 +235,9 @@ void machine_kexec(struct kimage *kimage) local_daif_mask(); /* - * Both restart and cpu_soft_restart will shutdown the MMU, disable data + * Both restart and kernel_reloc will shutdown the MMU, disable data * caches. However, restart will start new kernel or purgatory directly, - * cpu_soft_restart will transfer control to arm64_relocate_new_kernel + * kernel_reloc contains the body of arm64_relocate_new_kernel * In kexec case, kimage->start points to purgatory assuming that * kernel entry and dtb address are embedded in purgatory by * userspace (kexec-tools). @@ -246,10 +251,13 @@ void machine_kexec(struct kimage *kimage) restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, 0, 0); } else { + void (*kernel_reloc)(struct kimage *kimage); + if (is_hyp_nvhe()) __hyp_set_vectors(kimage->arch.el2_vectors); - cpu_soft_restart(kimage->arch.kern_reloc, - virt_to_phys(kimage), 0, 0); + cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz); + kernel_reloc = (void *)kimage->arch.kern_reloc; + kernel_reloc(kimage); } BUG(); /* Should never get here. */ diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index 2b80232246f7..f0a3df9e18a3 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -4,6 +4,8 @@ * * Copyright (C) Linaro. * Copyright (C) Huawei Futurewei Technologies. + * Copyright (C) 2021, Microsoft Corporation. + * Pasha Tatashin */ #include @@ -15,6 +17,13 @@ #include #include +.macro turn_off_mmu tmp1, tmp2 + mov_q \tmp1, INIT_SCTLR_EL1_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el1, \tmp1 + isb +.endm + .section ".kexec_relocate.text", "ax" /* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. @@ -32,22 +41,21 @@ SYM_CODE_START(arm64_relocate_new_kernel) ldr x18, [x0, #KIMAGE_ARCH_ZERO_PAGE] /* x18 = zero page for BBM */ ldr x17, [x0, #KIMAGE_ARCH_TTBR1] /* x17 = linear map copy */ ldr x16, [x0, #KIMAGE_HEAD] /* x16 = kimage_head */ - mov x14, xzr /* x14 = entry ptr */ - mov x13, xzr /* x13 = copy dest */ + ldr x22, [x0, #KIMAGE_ARCH_PHYS_OFFSET] /* x22 phys_offset */ raw_dcache_line_size x15, x1 /* x15 = dcache line size */ break_before_make_ttbr_switch x18, x17, x1, x2 /* set linear map */ .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ - + sub x12, x12, x22 /* Convert x12 to virt */ /* Test the entry flags. */ .Ltest_source: tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ - mov x2, x13 - add x1, x2, #PAGE_SIZE - dcache_by_myline_op ivac, sy, x2, x1, x15, x20 + mov x19, x13 copy_page x13, x12, x1, x2, x3, x4, x5, x6, x7, x8 + add x1, x19, #PAGE_SIZE + dcache_by_myline_op civac, sy, x19, x1, x15, x20 b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination @@ -64,19 +72,20 @@ SYM_CODE_START(arm64_relocate_new_kernel) ic iallu dsb nsh isb + ldr x4, [x0, #KIMAGE_START] /* relocation start */ + ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ + ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ + turn_off_mmu x12, x13 /* Start new image. */ - ldr x1, [x0, #KIMAGE_ARCH_EL2_VECTORS] /* relocation start */ cbz x1, .Lel1 - ldr x1, [x0, #KIMAGE_START] /* relocation start */ - ldr x2, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ + mov x1, x4 /* relocation start */ + mov x2, x0 /* dtb address */ mov x3, xzr mov x4, xzr mov x0, #HVC_SOFT_RESTART hvc #0 /* Jumps from el2 */ .Lel1: - ldr x4, [x0, #KIMAGE_START] /* relocation start */ - ldr x0, [x0, #KIMAGE_ARCH_DTB_MEM] /* dtb address */ mov x2, xzr mov x3, xzr br x4 /* Jumps from el1 */ -- cgit v1.2.3 From 939f1b9564c6aa2bd0f4e4e336ac74379692c38b Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:11 +0000 Subject: arm64: kexec: remove the pre-kexec PoC maintenance Now that kexec does its relocations with the MMU enabled, we no longer need to clean the relocation data to the PoC. Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-14-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 43 --------------------------------------- 1 file changed, 43 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 1e9a2a45e016..559d47a3c59c 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -77,48 +77,6 @@ int machine_kexec_prepare(struct kimage *kimage) return 0; } -/** - * kexec_list_flush - Helper to flush the kimage list and source pages to PoC. - */ -static void kexec_list_flush(struct kimage *kimage) -{ - kimage_entry_t *entry; - - dcache_clean_inval_poc((unsigned long)kimage, - (unsigned long)kimage + sizeof(*kimage)); - - for (entry = &kimage->head; ; entry++) { - unsigned int flag; - unsigned long addr; - - /* flush the list entries. */ - dcache_clean_inval_poc((unsigned long)entry, - (unsigned long)entry + - sizeof(kimage_entry_t)); - - flag = *entry & IND_FLAGS; - if (flag == IND_DONE) - break; - - addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); - - switch (flag) { - case IND_INDIRECTION: - /* Set entry point just before the new list page. */ - entry = (kimage_entry_t *)addr - 1; - break; - case IND_SOURCE: - /* flush the source pages. */ - dcache_clean_inval_poc(addr, addr + PAGE_SIZE); - break; - case IND_DESTINATION: - break; - default: - BUG(); - } - } -} - /** * kexec_segment_flush - Helper to flush the kimage segments to PoC. */ @@ -207,7 +165,6 @@ int machine_kexec_post_load(struct kimage *kimage) (unsigned long)reloc_code + reloc_size); icache_inval_pou((uintptr_t)reloc_code, (uintptr_t)reloc_code + reloc_size); - kexec_list_flush(kimage); kexec_image_info(kimage); return 0; -- cgit v1.2.3 From 7a2512fa649397c68127a480ef8fdd9dcf323045 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:12 +0000 Subject: arm64: kexec: remove cpu-reset.h This header contains only cpu_soft_restart() which is never used directly anymore. So, remove this header, and rename the helper to be cpu_soft_restart(). Suggested-by: James Morse Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-15-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kexec.h | 6 ++++++ arch/arm64/kernel/cpu-reset.S | 7 +++---- arch/arm64/kernel/cpu-reset.h | 30 ------------------------------ arch/arm64/kernel/machine_kexec.c | 6 ++---- 4 files changed, 11 insertions(+), 38 deletions(-) delete mode 100644 arch/arm64/kernel/cpu-reset.h diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index dca6dedc3b25..9839bfc163d7 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -90,6 +90,12 @@ static inline void crash_prepare_suspend(void) {} static inline void crash_post_resume(void) {} #endif +#if defined(CONFIG_KEXEC_CORE) +void cpu_soft_restart(unsigned long el2_switch, unsigned long entry, + unsigned long arg0, unsigned long arg1, + unsigned long arg2); +#endif + #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index d47ff63a5b66..48a8af97faa9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -16,8 +16,7 @@ .pushsection .idmap.text, "awx" /* - * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for - * cpu_soft_restart. + * cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) * * @el2_switch: Flag to indicate a switch to EL2 is needed. * @entry: Location to jump to for soft reset. @@ -29,7 +28,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -SYM_CODE_START(__cpu_soft_restart) +SYM_CODE_START(cpu_soft_restart) mov_q x12, INIT_SCTLR_EL1_MMU_OFF pre_disable_mmu_workaround /* @@ -48,6 +47,6 @@ SYM_CODE_START(__cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -SYM_CODE_END(__cpu_soft_restart) +SYM_CODE_END(cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h deleted file mode 100644 index 296abbac7192..000000000000 --- a/arch/arm64/kernel/cpu-reset.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * CPU reset routines - * - * Copyright (C) 2015 Huawei Futurewei Technologies. - */ - -#ifndef _ARM64_CPU_RESET_H -#define _ARM64_CPU_RESET_H - -#include - -void __cpu_soft_restart(unsigned long el2_switch, unsigned long entry, - unsigned long arg0, unsigned long arg1, unsigned long arg2); - -static inline void __noreturn __nocfi cpu_soft_restart(unsigned long entry, - unsigned long arg0, - unsigned long arg1, - unsigned long arg2) -{ - typeof(__cpu_soft_restart) *restart; - - restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); - - cpu_install_idmap(); - restart(0, entry, arg0, arg1, arg2); - unreachable(); -} - -#endif diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 559d47a3c59c..1038494135c8 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -24,8 +24,6 @@ #include #include -#include "cpu-reset.h" - /** * kexec_image_info - For debugging output. */ @@ -201,10 +199,10 @@ void machine_kexec(struct kimage *kimage) * In kexec_file case, the kernel starts directly without purgatory. */ if (kimage->head & IND_DONE) { - typeof(__cpu_soft_restart) *restart; + typeof(cpu_soft_restart) *restart; cpu_install_idmap(); - restart = (void *)__pa_symbol(function_nocfi(__cpu_soft_restart)); + restart = (void *)__pa_symbol(function_nocfi(cpu_soft_restart)); restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem, 0, 0); } else { -- cgit v1.2.3 From 6091dd9eaf8e77311548b616281c1a9c67e6ca40 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 30 Sep 2021 14:31:13 +0000 Subject: arm64: trans_pgd: remove trans_pgd_map_page() The intend of trans_pgd_map_page() was to map contiguous range of VA memory to the memory that is getting relocated during kexec. However, since we are now using linear map instead of contiguous range this function is not needed Suggested-by: Pingfan Liu Signed-off-by: Pasha Tatashin Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20210930143113.1502553-16-pasha.tatashin@soleen.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/trans_pgd.h | 5 +--- arch/arm64/mm/trans_pgd.c | 57 -------------------------------------- 2 files changed, 1 insertion(+), 61 deletions(-) diff --git a/arch/arm64/include/asm/trans_pgd.h b/arch/arm64/include/asm/trans_pgd.h index 7b04d32b102c..033d400a4ea4 100644 --- a/arch/arm64/include/asm/trans_pgd.h +++ b/arch/arm64/include/asm/trans_pgd.h @@ -15,7 +15,7 @@ /* * trans_alloc_page * - Allocator that should return exactly one zeroed page, if this - * allocator fails, trans_pgd_create_copy() and trans_pgd_map_page() + * allocator fails, trans_pgd_create_copy() and trans_pgd_idmap_page() * return -ENOMEM error. * * trans_alloc_arg @@ -30,9 +30,6 @@ struct trans_pgd_info { int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **trans_pgd, unsigned long start, unsigned long end); -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot); - int trans_pgd_idmap_page(struct trans_pgd_info *info, phys_addr_t *trans_ttbr0, unsigned long *t0sz, void *page); diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 26bd8f2d95af..d7da8ca40d2e 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -217,63 +217,6 @@ int trans_pgd_create_copy(struct trans_pgd_info *info, pgd_t **dst_pgdp, return rc; } -/* - * Add map entry to trans_pgd for a base-size page at PTE level. - * info: contains allocator and its argument - * trans_pgd: page table in which new map is added. - * page: page to be mapped. - * dst_addr: new VA address for the page - * pgprot: protection for the page. - * - * Returns 0 on success, and -ENOMEM on failure. - */ -int trans_pgd_map_page(struct trans_pgd_info *info, pgd_t *trans_pgd, - void *page, unsigned long dst_addr, pgprot_t pgprot) -{ - pgd_t *pgdp; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - pgdp = pgd_offset_pgd(trans_pgd, dst_addr); - if (pgd_none(READ_ONCE(*pgdp))) { - p4dp = trans_alloc(info); - if (!pgdp) - return -ENOMEM; - pgd_populate(NULL, pgdp, p4dp); - } - - p4dp = p4d_offset(pgdp, dst_addr); - if (p4d_none(READ_ONCE(*p4dp))) { - pudp = trans_alloc(info); - if (!pudp) - return -ENOMEM; - p4d_populate(NULL, p4dp, pudp); - } - - pudp = pud_offset(p4dp, dst_addr); - if (pud_none(READ_ONCE(*pudp))) { - pmdp = trans_alloc(info); - if (!pmdp) - return -ENOMEM; - pud_populate(NULL, pudp, pmdp); - } - - pmdp = pmd_offset(pudp, dst_addr); - if (pmd_none(READ_ONCE(*pmdp))) { - ptep = trans_alloc(info); - if (!ptep) - return -ENOMEM; - pmd_populate_kernel(NULL, pmdp, ptep); - } - - ptep = pte_offset_kernel(pmdp, dst_addr); - set_pte(ptep, pfn_pte(virt_to_pfn(page), pgprot)); - - return 0; -} - /* * The page we want to idmap may be outside the range covered by VA_BITS that * can be built using the kernel's p?d_populate() helpers. As a one off, for a -- cgit v1.2.3 From a9c38c5d267cb94871dfa2de5539c92025c855d7 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 30 Sep 2021 04:30:38 +0300 Subject: dma-mapping: remove bogus test for pfn_valid from dma_map_resource dma_map_resource() uses pfn_valid() to ensure the range is not RAM. However, pfn_valid() only checks for availability of the memory map for a PFN but it does not ensure that the PFN is actually backed by RAM. As dma_map_resource() is the only method in DMA mapping APIs that has this check, simply drop the pfn_valid() test from dma_map_resource(). Link: https://lore.kernel.org/all/20210824173741.GC623@arm.com/ Signed-off-by: Mike Rapoport Reviewed-by: Christoph Hellwig Acked-by: David Hildenbrand Link: https://lore.kernel.org/r/20210930013039.11260-2-rppt@kernel.org Signed-off-by: Will Deacon --- kernel/dma/mapping.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 06fec5547e7c..dda8d8b84a55 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -296,10 +296,6 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, if (WARN_ON_ONCE(!dev->dma_mask)) return DMA_MAPPING_ERROR; - /* Don't allow RAM to be mapped */ - if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) - return DMA_MAPPING_ERROR; - if (dma_map_direct(dev, ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); else if (ops->map_resource) -- cgit v1.2.3 From 3de360c3fdb34fbdbaf6da3af94367d3fded95d3 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 30 Sep 2021 04:30:39 +0300 Subject: arm64/mm: drop HAVE_ARCH_PFN_VALID CONFIG_SPARSEMEM_VMEMMAP is now the only available memory model on arm64 platforms and free_unused_memmap() would just return without creating any holes in the memmap mapping. There is no need for any special handling in pfn_valid() and HAVE_ARCH_PFN_VALID can just be dropped. This also moves the pfn upper bits sanity check into generic pfn_valid(). [rppt: rebased on v5.15-rc3] Link: https://lkml.kernel.org/r/1621947349-25421-1-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: David Hildenbrand Acked-by: Mike Rapoport Cc: Catalin Marinas Cc: Will Deacon Cc: David Hildenbrand Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Mike Rapoport Link: https://lore.kernel.org/r/20210930013039.11260-3-rppt@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - arch/arm64/include/asm/page.h | 1 - arch/arm64/mm/init.c | 37 ------------------------------------- 3 files changed, 39 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..53dd23f305be 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -154,7 +154,6 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT - select HAVE_ARCH_PFN_VALID select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index f98c91bbd7c1..993a27ea6f54 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -41,7 +41,6 @@ void tag_clear_highpage(struct page *to); typedef struct page *pgtable_t; -int pfn_valid(unsigned long pfn); int pfn_is_map_memory(unsigned long pfn); #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 37a81754d9b6..e60c6eb813b7 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -160,43 +160,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init(max_zone_pfns); } -int pfn_valid(unsigned long pfn) -{ - phys_addr_t addr = PFN_PHYS(pfn); - struct mem_section *ms; - - /* - * Ensure the upper PAGE_SHIFT bits are clear in the - * pfn. Else it might lead to false positives when - * some of the upper bits are set, but the lower bits - * match a valid pfn. - */ - if (PHYS_PFN(addr) != pfn) - return 0; - - if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) - return 0; - - ms = __pfn_to_section(pfn); - if (!valid_section(ms)) - return 0; - - /* - * ZONE_DEVICE memory does not have the memblock entries. - * memblock_is_map_memory() check for ZONE_DEVICE based - * addresses will always fail. Even the normal hotplugged - * memory will never have MEMBLOCK_NOMAP flag set in their - * memblock entries. Skip memblock search for all non early - * memory sections covering all of hotplug memory including - * both normal and ZONE_DEVICE based. - */ - if (!early_section(ms)) - return pfn_section_valid(ms, pfn); - - return memblock_is_memory(addr); -} -EXPORT_SYMBOL(pfn_valid); - int pfn_is_map_memory(unsigned long pfn) { phys_addr_t addr = PFN_PHYS(pfn); -- cgit v1.2.3 From 16cc4af286aae85859ee39e25d21f3d910f8175d Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Tue, 28 Sep 2021 20:30:22 +0800 Subject: drivers/perf: hisi: Fix PA PMU counter offset The PA PMU counter offset was correct in [1] and the driver has already been verified. We want to keep the register offset using lower case character in later version that is consistent with the existed driver. Since there was no functional change, we didn't do more test. However there is typo when modified the PA PMU counter offset by mistake, so fix this bad mistake. [1] https://www.spinics.net/lists/arm-kernel/msg865263.html Cc: Will Deacon Cc: Mark Rutland Cc: John Garry Cc: Qi Liu Signed-off-by: Shaokun Zhang Link: https://lore.kernel.org/r/20210928123022.23467-1-zhangshaokun@hisilicon.com Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pa_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c index 83264ec0a957..bad99d149172 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pa_pmu.c @@ -27,7 +27,7 @@ #define PA_INT_CLEAR 0x1c7c #define PA_EVENT_TYPE0 0x1c80 #define PA_PMU_VERSION 0x1cf0 -#define PA_EVENT_CNT0_L 0x1f00 +#define PA_EVENT_CNT0_L 0x1d00 #define PA_EVTYPE_MASK 0xff #define PA_NR_COUNTERS 0x8 -- cgit v1.2.3 From 78cac393b4642eb5936d9c0685acd50d8370648f Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 1 Oct 2021 18:48:45 +0800 Subject: drivers/perf: thunderx2_pmu: Change data in size tx2_uncore_event_update() A LSL of 32 requires > 32b value to hold the result. However in tx2_uncore_event_update(), 1UL << 32 currently only works as unsigned long is 64b on a 64b system. If we want to compile test for a 32b system, we need unsigned long long, whose min size is 64b. Signed-off-by: John Garry Link: https://lore.kernel.org/r/1633085326-156653-2-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index fc1a376ee906..05378c0fd8f3 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -487,7 +487,7 @@ static void tx2_uncore_event_update(struct perf_event *event) new = reg_readl(hwc->event_base); prev = local64_xchg(&hwc->prev_count, new); /* handles rollover of 32 bit counter */ - delta = (u32)(((1UL << 32) - prev) + new); + delta = (u32)(((1ULL << 32) - prev) + new); } /* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */ -- cgit v1.2.3 From e656972b69864348a747954ea187576808000c5a Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 1 Oct 2021 18:48:46 +0800 Subject: drivers/perf: Improve build test coverage Improve build test cover by allowing some drivers to build under COMPILE_TEST where possible. Some notes: - Mostly a dependency on CONFIG_ACPI is not really required for only building (but left untouched), but is required for TX2 which uses ACPI functions which have no stubs - XGENE required 64b dependency as it relies on some unsigned long perf struct fields being 64b - I don't see why TX2 requires NUMA to build, but left untouched - Added an explicit dependency on GENERIC_MSI_IRQ_DOMAIN for ARM_SMMU_V3_PMU, which is required for platform MSI functions Signed-off-by: John Garry Link: https://lore.kernel.org/r/1633085326-156653-3-git-send-email-john.garry@huawei.com Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 77522e5efe11..4374af292e6d 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -36,7 +36,7 @@ config ARM_CCI5xx_PMU config ARM_CCN tristate "ARM CCN driver support" - depends on ARM || ARM64 + depends on ARM || ARM64 || COMPILE_TEST help PMU (perf) driver supporting the ARM CCN (Cache Coherent Network) interconnect. @@ -62,7 +62,8 @@ config ARM_PMU_ACPI config ARM_SMMU_V3_PMU tristate "ARM SMMUv3 Performance Monitors Extension" - depends on ARM64 && ACPI + depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT) + depends on GENERIC_MSI_IRQ_DOMAIN help Provides support for the ARM SMMUv3 Performance Monitor Counter Groups (PMCG), which provide monitoring of transactions passing @@ -80,7 +81,7 @@ config ARM_DSU_PMU config FSL_IMX8_DDR_PMU tristate "Freescale i.MX8 DDR perf monitor" - depends on ARCH_MXC + depends on ARCH_MXC || COMPILE_TEST help Provides support for the DDR performance monitor in i.MX8, which can give information about memory throughput and other related @@ -108,7 +109,8 @@ config QCOM_L3_PMU config THUNDERX2_PMU tristate "Cavium ThunderX2 SoC PMU UNCORE" - depends on ARCH_THUNDER2 && ARM64 && ACPI && NUMA + depends on ARCH_THUNDER2 || COMPILE_TEST + depends on NUMA && ACPI default m help Provides support for ThunderX2 UNCORE events. @@ -116,7 +118,7 @@ config THUNDERX2_PMU in the DDR4 Memory Controller (DMC). config XGENE_PMU - depends on ARCH_XGENE + depends on ARCH_XGENE || (COMPILE_TEST && 64BIT) bool "APM X-Gene SoC PMU" default n help -- cgit v1.2.3 From 0ba1ce1e86052deea3f115285802ce8ffff3b152 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 5 Oct 2021 13:35:37 +0100 Subject: selftests: arm64: Add coverage of ptrace flags for SVE VL inheritance Add a test that covers enabling and disabling of SVE vector length inheritance via the ptrace interface. Signed-off-by: Mark Brown Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20211005123537.976795-1-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/sve-ptrace.c | 55 ++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index ac0629f05365..c4417bc48d4f 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -22,7 +22,7 @@ #include "../../kselftest.h" #define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) -#define FPSIMD_TESTS 3 +#define FPSIMD_TESTS 5 #define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS) @@ -105,6 +105,56 @@ static int set_sve(pid_t pid, const struct user_sve_header *sve) return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); } +/* Validate setting and getting the inherit flag */ +static void ptrace_set_get_inherit(pid_t child) +{ + struct user_sve_header sve; + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + int ret; + + /* First set the flag */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.vl = sve_vl_from_vq(SVE_VQ_MIN); + sve.flags = SVE_PT_VL_INHERIT; + ret = set_sve(child, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set SVE_PT_VL_INHERIT\n"); + return; + } + + /* + * Read back the new register state and verify that we have + * set the flags we expected. + */ + if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read SVE flags\n"); + return; + } + + ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT, + "SVE_PT_VL_INHERIT set\n"); + + /* Now clear */ + sve.flags &= ~SVE_PT_VL_INHERIT; + ret = set_sve(child, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to clear SVE_PT_VL_INHERIT\n"); + return; + } + + if (!get_sve(child, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read SVE flags\n"); + return; + } + + ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT), + "SVE_PT_VL_INHERIT cleared\n"); + + free(new_sve); +} + /* Validate attempting to set the specfied VL via ptrace */ static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) { @@ -452,6 +502,9 @@ static int do_parent(pid_t child) /* FPSIMD via SVE regset */ ptrace_sve_fpsimd(child); + /* prctl() flags */ + ptrace_set_get_inherit(child); + /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); -- cgit v1.2.3 From f5627ec1ff2cd91920abe0023e878eb872623ac3 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 6 Oct 2021 16:47:47 +0100 Subject: kasan: Remove duplicate of kasan_flag_async After merging async mode for KASAN_HW_TAGS a duplicate of the kasan_flag_async flag was left erroneously inside the code. Remove the duplicate. Note: This change does not bring functional changes to the code base. Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Marco Elver Cc: Evgenii Stepanov Cc: Andrey Konovalov Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20211006154751.4463-2-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- mm/kasan/kasan.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8bf568a80eb8..3639e7c8bb98 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -38,8 +38,6 @@ static inline bool kasan_async_mode_enabled(void) #endif -extern bool kasan_flag_async __ro_after_init; - #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #else -- cgit v1.2.3 From ba1a98e8b1720f7a78154e0020c77dbc2b34d0ce Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 6 Oct 2021 16:47:48 +0100 Subject: arm64: mte: Bitfield definitions for Asymm MTE Add Asymmetric Memory Tagging Extension bitfield definitions. Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Vincenzo Frascino Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20211006154751.4463-3-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67ed..f51d5912b41c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -621,6 +621,7 @@ #define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_ENIA_SHIFT 31 @@ -666,6 +667,7 @@ #define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_BT1 (BIT(36)) @@ -807,6 +809,7 @@ #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 +#define ID_AA64PFR1_MTE_ASYMM 0x3 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 -- cgit v1.2.3 From d73c162e073376dd207d716cb4b9cfc809be7e80 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 6 Oct 2021 16:47:49 +0100 Subject: arm64: mte: CPU feature detection for Asymm MTE Add the cpufeature entries to detect the presence of Asymmetric MTE. Note: The tag checking mode is initialized via cpu_enable_mte() -> kasan_init_hw_tags() hence to enable it we require asymmetric mode to be at least on the boot CPU. If the boot CPU does not have it, it is fine for late CPUs to have it as long as the feature is not enabled (ARM64_CPUCAP_BOOT_CPU_FEATURE). Cc: Will Deacon Cc: Catalin Marinas Cc: Suzuki K Poulose Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211006154751.4463-4-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 10 ++++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 11 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6ec7036ef7e1..9e3e8ad75f20 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2321,6 +2321,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, }, + { + .desc = "Asymmetric MTE Tag Check Fault", + .capability = ARM64_MTE_ASYMM, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE_ASYMM, + .sign = FTR_UNSIGNED, + }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..74a569bf52d6 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -39,6 +39,7 @@ HW_DBM KVM_PROTECTED_MODE MISMATCHED_CACHE_TYPE MTE +MTE_ASYMM SPECTRE_V2 SPECTRE_V3A SPECTRE_V4 -- cgit v1.2.3 From ec0288369f0cc6d85837a18f1c4c65451c94477b Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 6 Oct 2021 16:47:50 +0100 Subject: arm64: mte: Add asymmetric mode support MTE provides an asymmetric mode for detecting tag exceptions. In particular, when such a mode is present, the CPU triggers a fault on a tag mismatch during a load operation and asynchronously updates a register when a tag mismatch is detected during a store operation. Add support for MTE asymmetric mode. Note: If the CPU does not support MTE asymmetric mode the kernel falls back on synchronous mode which is the default for kasan=on. Cc: Will Deacon Cc: Catalin Marinas Cc: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Acked-by: Andrey Konovalov Link: https://lore.kernel.org/r/20211006154751.4463-5-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 1 + arch/arm64/include/asm/mte-kasan.h | 5 +++++ arch/arm64/include/asm/mte.h | 8 ++++---- arch/arm64/include/asm/uaccess.h | 4 ++-- arch/arm64/kernel/mte.c | 41 ++++++++++++++++++++++++++++++++------ 5 files changed, 47 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f1745a843414..1b9a1e242612 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -243,6 +243,7 @@ static inline const void *__tag_set(const void *addr, u8 tag) #ifdef CONFIG_KASAN_HW_TAGS #define arch_enable_tagging_sync() mte_enable_kernel_sync() #define arch_enable_tagging_async() mte_enable_kernel_async() +#define arch_enable_tagging_asymm() mte_enable_kernel_asymm() #define arch_force_async_tag_fault() mte_check_tfsr_exit() #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index 22420e1f8c03..478b9bcf69ad 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -130,6 +130,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, void mte_enable_kernel_sync(void); void mte_enable_kernel_async(void); +void mte_enable_kernel_asymm(void); #else /* CONFIG_ARM64_MTE */ @@ -161,6 +162,10 @@ static inline void mte_enable_kernel_async(void) { } +static inline void mte_enable_kernel_asymm(void) +{ +} + #endif /* CONFIG_ARM64_MTE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 02511650cffe..075539f5f1c8 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -88,11 +88,11 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ -DECLARE_STATIC_KEY_FALSE(mte_async_mode); +DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { - return static_branch_unlikely(&mte_async_mode); + return static_branch_unlikely(&mte_async_or_asymm_mode); } void mte_check_tfsr_el1(void); @@ -121,7 +121,7 @@ static inline void mte_check_tfsr_exit(void) mte_check_tfsr_el1(); } #else -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { return false; } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 190b494e22ab..315354047d69 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -196,13 +196,13 @@ static inline void __uaccess_enable_tco(void) */ static inline void __uaccess_disable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_disable_tco(); } static inline void __uaccess_enable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_enable_tco(); } diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 0cdae086966e..f418ebc65f95 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -26,9 +26,12 @@ static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred); #ifdef CONFIG_KASAN_HW_TAGS -/* Whether the MTE asynchronous mode is enabled. */ -DEFINE_STATIC_KEY_FALSE(mte_async_mode); -EXPORT_SYMBOL_GPL(mte_async_mode); +/* + * The asynchronous and asymmetric MTE modes have the same behavior for + * store operations. This flag is set when either of these modes is enabled. + */ +DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); +EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); #endif static void mte_sync_page_tags(struct page *page, pte_t old_pte, @@ -116,7 +119,7 @@ void mte_enable_kernel_sync(void) * Make sure we enter this function when no PE has set * async mode previously. */ - WARN_ONCE(system_uses_mte_async_mode(), + WARN_ONCE(system_uses_mte_async_or_asymm_mode(), "MTE async mode enabled system wide!"); __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); @@ -134,8 +137,34 @@ void mte_enable_kernel_async(void) * mode in between sync and async, this strategy needs * to be reviewed. */ - if (!system_uses_mte_async_mode()) - static_branch_enable(&mte_async_mode); + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); +} + +void mte_enable_kernel_asymm(void) +{ + if (cpus_have_cap(ARM64_MTE_ASYMM)) { + __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM); + + /* + * MTE asymm mode behaves as async mode for store + * operations. The mode is set system wide by the + * first PE that executes this function. + * + * Note: If in future KASAN acquires a runtime switching + * mode in between sync and async, this strategy needs + * to be reviewed. + */ + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); + } else { + /* + * If the CPU does not support MTE asymmetric mode the + * kernel falls back on synchronous mode which is the + * default for kasan=on. + */ + mte_enable_kernel_sync(); + } } #endif -- cgit v1.2.3 From 2d27e585147395316289c63efc932984675c65c2 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Wed, 6 Oct 2021 16:47:51 +0100 Subject: kasan: Extend KASAN mode kernel parameter Architectures supported by KASAN_HW_TAGS can provide an asymmetric mode of execution. On an MTE enabled arm64 hw for example this can be identified with the asymmetric tagging mode of execution. In particular, when such a mode is present, the CPU triggers a fault on a tag mismatch during a load operation and asynchronously updates a register when a tag mismatch is detected during a store operation. Extend the KASAN HW execution mode kernel command line parameter to support asymmetric mode. Cc: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Signed-off-by: Vincenzo Frascino Reviewed-by: Catalin Marinas Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20211006154751.4463-6-vincenzo.frascino@arm.com Signed-off-by: Will Deacon --- Documentation/dev-tools/kasan.rst | 7 +++++-- lib/test_kasan.c | 2 +- mm/kasan/hw_tags.c | 29 +++++++++++++++++++---------- mm/kasan/kasan.h | 30 ++++++++++++++++++++++++++---- mm/kasan/report.c | 2 +- 5 files changed, 52 insertions(+), 18 deletions(-) diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 21dc03bc10a4..8089c559d339 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -194,14 +194,17 @@ additional boot parameters that allow disabling KASAN or controlling features: - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``). -- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in - synchronous or asynchronous mode of execution (default: ``sync``). +- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN + is configured in synchronous, asynchronous or asymmetric mode of + execution (default: ``sync``). Synchronous mode: a bad access is detected immediately when a tag check fault occurs. Asynchronous mode: a bad access detection is delayed. When a tag check fault occurs, the information is stored in hardware (in the TFSR_EL1 register for arm64). The kernel periodically checks the hardware and only reports tag faults during these checks. + Asymmetric mode: a bad access is detected synchronously on reads and + asynchronously on writes. - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack traces collection (default: ``on``). diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 8835e0784578..ebed755ebf34 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -88,7 +88,7 @@ static void kasan_test_exit(struct kunit *test) */ #define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ - !kasan_async_mode_enabled()) \ + kasan_sync_fault_possible()) \ migrate_disable(); \ KUNIT_EXPECT_FALSE(test, READ_ONCE(fail_data.report_found)); \ barrier(); \ diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 05d1e9460e2e..dc892119e88f 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -29,6 +29,7 @@ enum kasan_arg_mode { KASAN_ARG_MODE_DEFAULT, KASAN_ARG_MODE_SYNC, KASAN_ARG_MODE_ASYNC, + KASAN_ARG_MODE_ASYMM, }; enum kasan_arg_stacktrace { @@ -45,9 +46,9 @@ static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled); EXPORT_SYMBOL(kasan_flag_enabled); -/* Whether the asynchronous mode is enabled. */ -bool kasan_flag_async __ro_after_init; -EXPORT_SYMBOL_GPL(kasan_flag_async); +/* Whether the selected mode is synchronous/asynchronous/asymmetric.*/ +enum kasan_mode kasan_mode __ro_after_init; +EXPORT_SYMBOL_GPL(kasan_mode); /* Whether to collect alloc/free stack traces. */ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); @@ -69,7 +70,7 @@ static int __init early_kasan_flag(char *arg) } early_param("kasan", early_kasan_flag); -/* kasan.mode=sync/async */ +/* kasan.mode=sync/async/asymm */ static int __init early_kasan_mode(char *arg) { if (!arg) @@ -79,6 +80,8 @@ static int __init early_kasan_mode(char *arg) kasan_arg_mode = KASAN_ARG_MODE_SYNC; else if (!strcmp(arg, "async")) kasan_arg_mode = KASAN_ARG_MODE_ASYNC; + else if (!strcmp(arg, "asymm")) + kasan_arg_mode = KASAN_ARG_MODE_ASYMM; else return -EINVAL; @@ -116,11 +119,13 @@ void kasan_init_hw_tags_cpu(void) return; /* - * Enable async mode only when explicitly requested through - * the command line. + * Enable async or asymm modes only when explicitly requested + * through the command line. */ if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) hw_enable_tagging_async(); + else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM) + hw_enable_tagging_asymm(); else hw_enable_tagging_sync(); } @@ -143,15 +148,19 @@ void __init kasan_init_hw_tags(void) case KASAN_ARG_MODE_DEFAULT: /* * Default to sync mode. - * Do nothing, kasan_flag_async keeps its default value. */ - break; + fallthrough; case KASAN_ARG_MODE_SYNC: - /* Do nothing, kasan_flag_async keeps its default value. */ + /* Sync mode enabled. */ + kasan_mode = KASAN_MODE_SYNC; break; case KASAN_ARG_MODE_ASYNC: /* Async mode enabled. */ - kasan_flag_async = true; + kasan_mode = KASAN_MODE_ASYNC; + break; + case KASAN_ARG_MODE_ASYMM: + /* Asymm mode enabled. */ + kasan_mode = KASAN_MODE_ASYMM; break; } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 3639e7c8bb98..b495e17445ad 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -13,16 +13,28 @@ #include "../slab.h" DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace); -extern bool kasan_flag_async __ro_after_init; + +enum kasan_mode { + KASAN_MODE_SYNC, + KASAN_MODE_ASYNC, + KASAN_MODE_ASYMM, +}; + +extern enum kasan_mode kasan_mode __ro_after_init; static inline bool kasan_stack_collection_enabled(void) { return static_branch_unlikely(&kasan_flag_stacktrace); } -static inline bool kasan_async_mode_enabled(void) +static inline bool kasan_async_fault_possible(void) +{ + return kasan_mode == KASAN_MODE_ASYNC || kasan_mode == KASAN_MODE_ASYMM; +} + +static inline bool kasan_sync_fault_possible(void) { - return kasan_flag_async; + return kasan_mode == KASAN_MODE_SYNC || kasan_mode == KASAN_MODE_ASYMM; } #else @@ -31,11 +43,16 @@ static inline bool kasan_stack_collection_enabled(void) return true; } -static inline bool kasan_async_mode_enabled(void) +static inline bool kasan_async_fault_possible(void) { return false; } +static inline bool kasan_sync_fault_possible(void) +{ + return true; +} + #endif #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) @@ -287,6 +304,9 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #ifndef arch_enable_tagging_async #define arch_enable_tagging_async() #endif +#ifndef arch_enable_tagging_asymm +#define arch_enable_tagging_asymm() +#endif #ifndef arch_force_async_tag_fault #define arch_force_async_tag_fault() #endif @@ -302,6 +322,7 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_enable_tagging_sync() arch_enable_tagging_sync() #define hw_enable_tagging_async() arch_enable_tagging_async() +#define hw_enable_tagging_asymm() arch_enable_tagging_asymm() #define hw_force_async_tag_fault() arch_force_async_tag_fault() #define hw_get_random_tag() arch_get_random_tag() #define hw_get_mem_tag(addr) arch_get_mem_tag(addr) @@ -312,6 +333,7 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_enable_tagging_sync() #define hw_enable_tagging_async() +#define hw_enable_tagging_asymm() #endif /* CONFIG_KASAN_HW_TAGS */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 884a950c7026..9da071ad930c 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -112,7 +112,7 @@ static void start_report(unsigned long *flags) static void end_report(unsigned long *flags, unsigned long addr) { - if (!kasan_async_mode_enabled()) + if (!kasan_async_fault_possible()) trace_error_report_end(ERROR_DETECTOR_KASAN, addr); pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); -- cgit v1.2.3 From 1dfde0892b325ed1872975053c6745f5148050a2 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 7 Oct 2021 21:56:01 +0200 Subject: arm64: asm: setup.h: export common variables When building the kernel with sparse enabled 'C=1' the following warnings can be seen: arch/arm64/kernel/setup.c:58:13: warning: symbol '__fdt_pointer' was not declared. Should it be static? arch/arm64/kernel/setup.c:84:25: warning: symbol 'boot_args' was not declared. Should it be static? Rework so the variables are exported, since these two variable are created and used in setup.c, also used in head.S. Signed-off-by: Anders Roxell Link: https://lore.kernel.org/r/20211007195601.677474-1-anders.roxell@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/setup.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index d3320618ed14..6437df661700 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -8,4 +8,10 @@ void *get_early_fdt_ptr(void); void early_fdt_map(u64 dt_phys); +/* + * These two variables are used in the head.S file. + */ +extern phys_addr_t __fdt_pointer __initdata; +extern u64 __cacheline_aligned boot_args[4]; + #endif -- cgit v1.2.3 From de56379f21c70196ff18c48790e8e43865893869 Mon Sep 17 00:00:00 2001 From: Sumit Garg Date: Mon, 11 Oct 2021 18:20:59 +0530 Subject: arm64: ftrace: use function_nocfi for _mcount as well Commit 800618f955a9 ("arm64: ftrace: use function_nocfi for ftrace_call") only fixed address of ftrace_call but address of _mcount needs to be fixed as well. Use function_nocfi() to get the actual address of _mcount function as with CONFIG_CFI_CLANG, the compiler replaces function pointers with jump table addresses which breaks dynamic ftrace as the address of _mcount is replaced with the address of _mcount.cfi_jt. With mainline, this won't be a problem since by default CONFIG_DYNAMIC_FTRACE_WITH_REGS=y with Clang >= 10 as it supports -fpatchable-function-entry and CFI requires Clang 12 but for consistency we should add function_nocfi() for _mcount as well. Signed-off-by: Sumit Garg Acked-by: Mark Rutland Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20211011125059.3378646-1-sumit.garg@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 91fa4baa1a93..347b0cc68f07 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -15,7 +15,7 @@ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR ((unsigned long)function_nocfi(_mcount)) #endif /* The BL at the callsite's adjusted rec->ip */ -- cgit v1.2.3 From fdf865988b5a404f91f86a1b3b665440a9ebafb2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Oct 2021 13:42:22 +0100 Subject: arm64: Add a capability for FEAT_ECV Add a new capability to detect the Enhanced Counter Virtualization feature (FEAT_ECV). Reviewed-by: Oliver Upton Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211017124225.3018098-15-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 10 ++++++++++ arch/arm64/tools/cpucaps | 1 + 2 files changed, 11 insertions(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6ec7036ef7e1..6fc33365cffe 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1930,6 +1930,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .min_field_value = 1, }, + { + .desc = "Enhanced Counter Virtualization", + .capability = ARM64_HAS_ECV, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64MMFR0_EL1, + .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .sign = FTR_UNSIGNED, + .min_field_value = 1, + }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..7a7c58acd8f0 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -18,6 +18,7 @@ HAS_CRC32 HAS_DCPODP HAS_DCPOP HAS_E0PD +HAS_ECV HAS_EPAN HAS_GENERIC_AUTH HAS_GENERIC_AUTH_ARCH -- cgit v1.2.3 From 9ee840a96042cef9f7d36337ce05144d6c013858 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Oct 2021 13:42:23 +0100 Subject: arm64: Add CNT{P,V}CTSS_EL0 alternatives to cnt{p,v}ct_el0 CNTPCTSS_EL0 and CNTVCTSS_EL0 are alternatives to the usual CNTPCT_EL0 and CNTVCT_EL0 that do not require a previous ISB to be synchronised (SS stands for Self-Synchronising). Use the ARM64_HAS_ECV capability to control alternative sequences that switch to these low(er)-cost primitives. Note that the counter access in the VDSO is for now left alone until we decide whether we want to allow this. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211017124225.3018098-16-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_timer.h | 32 ++++++++++++++++++++++++-------- arch/arm64/include/asm/sysreg.h | 3 +++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index 519ac1f7f859..af1fafbe7e1d 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -64,14 +64,26 @@ DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *, static inline notrace u64 arch_timer_read_cntpct_el0(void) { - isb(); - return read_sysreg(cntpct_el0); + u64 cnt; + + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); + + return cnt; } static inline notrace u64 arch_timer_read_cntvct_el0(void) { - isb(); - return read_sysreg(cntvct_el0); + u64 cnt; + + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); + + return cnt; } #define arch_timer_reg_read_stable(reg) \ @@ -174,8 +186,10 @@ static __always_inline u64 __arch_counter_get_cntpct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntpct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntpct_el0", + "nop\n" __mrs_s("%0", SYS_CNTPCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } @@ -193,8 +207,10 @@ static __always_inline u64 __arch_counter_get_cntvct(void) { u64 cnt; - isb(); - cnt = read_sysreg(cntvct_el0); + asm volatile(ALTERNATIVE("isb\n mrs %0, cntvct_el0", + "nop\n" __mrs_s("%0", SYS_CNTVCTSS_EL0), + ARM64_HAS_ECV) + : "=r" (cnt)); arch_counter_enforce_ordering(cnt); return cnt; } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67ed..5ce70c034d37 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -507,6 +507,9 @@ #define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5) +#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6) + #define SYS_CNTP_TVAL_EL0 sys_reg(3, 3, 14, 2, 0) #define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1) #define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2) -- cgit v1.2.3 From ae976f063b605dd558571eff40c8229ffbc39e24 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Oct 2021 13:42:24 +0100 Subject: arm64: Add handling of CNTVCTSS traps Since CNTVCTSS obey the same control bits as CNTVCT, add the necessary decoding to the hook table. Note that there is no known user of this at the moment. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211017124225.3018098-17-maz@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 6 ++++++ arch/arm64/kernel/traps.c | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 29f97eb3dad4..a305ce256090 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -227,6 +227,9 @@ #define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + #define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ ESR_ELx_SYS64_ISS_DIR_READ) @@ -317,6 +320,9 @@ #define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ ESR_ELx_CP15_64_ISS_DIR_READ) +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + #define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ ESR_ELx_CP15_32_ISS_DIR_READ) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b03e383d944a..16710ca55fbb 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -653,6 +653,12 @@ static const struct sys64_hook sys64_hooks[] = { .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCT, .handler = cntvct_read_handler, }, + { + /* Trap read access to CNTVCTSS_EL0 */ + .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, + .esr_val = ESR_ELx_SYS64_ISS_SYS_CNTVCTSS, + .handler = cntvct_read_handler, + }, { /* Trap read access to CNTFRQ_EL0 */ .esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK, @@ -729,6 +735,11 @@ static const struct sys64_hook cp15_64_hooks[] = { .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, .handler = compat_cntvct_read_handler, }, + { + .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, + .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS, + .handler = compat_cntvct_read_handler, + }, {}, }; -- cgit v1.2.3 From fee29f008aa3f2aff01117f28b57b1145d92cb9b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Oct 2021 13:42:25 +0100 Subject: arm64: Add HWCAP for self-synchronising virtual counter Since userspace can make use of the CNTVSS_EL0 instruction, expose it via a HWCAP. Suggested-by: Will Deacon Acked-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211017124225.3018098-18-maz@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/cpu-feature-registers.rst | 12 ++++++++++-- Documentation/arm64/elf_hwcaps.rst | 4 ++++ arch/arm64/include/asm/hwcap.h | 1 + arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/cpufeature.c | 3 ++- arch/arm64/kernel/cpuinfo.c | 1 + 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 328e0c454fbd..9f9b8fd06089 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -235,7 +235,15 @@ infrastructure: | DPB | [3-0] | y | +------------------------------+---------+---------+ - 6) ID_AA64MMFR2_EL1 - Memory model feature register 2 + 6) ID_AA64MMFR0_EL1 - Memory model feature register 0 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | ECV | [63-60] | y | + +------------------------------+---------+---------+ + + 7) ID_AA64MMFR2_EL1 - Memory model feature register 2 +------------------------------+---------+---------+ | Name | bits | visible | @@ -243,7 +251,7 @@ infrastructure: | AT | [35-32] | y | +------------------------------+---------+---------+ - 7) ID_AA64ZFR0_EL1 - SVE feature ID register 0 + 8) ID_AA64ZFR0_EL1 - SVE feature ID register 0 +------------------------------+---------+---------+ | Name | bits | visible | diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index ec1a5a63c1d0..af106af8e1c0 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -247,6 +247,10 @@ HWCAP2_MTE Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described by Documentation/arm64/memory-tagging-extension.rst. +HWCAP2_ECV + + Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 8c129db8232a..b100e0055eab 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -105,6 +105,7 @@ #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) #define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) +#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b8f41aa234ee..7b23b16f21ce 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -75,5 +75,6 @@ #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_ECV (1 << 19) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6fc33365cffe..de4cf6cfd806 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -279,7 +279,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* @@ -2461,6 +2461,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), #endif /* CONFIG_ARM64_MTE */ + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 87731fea5e41..6e27b759056a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -94,6 +94,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", [KERNEL_HWCAP_MTE] = "mte", + [KERNEL_HWCAP_ECV] = "ecv", }; #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 2d481bd3b6361ed16c3ddeb58537f149623d30a0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:06 +0100 Subject: arm64/fp: Reindent fpsimd_save() Currently all the active code in fpsimd_save() is inside a check for TIF_FOREIGN_FPSTATE. Reduce the indentation level by changing to return from the function if TIF_FOREIGN_FPSTATE is set. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index ff4962750b3d..995f8801602b 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -308,24 +308,26 @@ static void fpsimd_save(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - if (IS_ENABLED(CONFIG_ARM64_SVE) && - test_thread_flag(TIF_SVE)) { - if (WARN_ON(sve_get_vl() != last->sve_vl)) { - /* - * Can't save the user regs, so current would - * re-enter user with corrupt state. - * There's no way to recover, so kill it: - */ - force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); - return; - } - - sve_save_state((char *)last->sve_state + - sve_ffr_offset(last->sve_vl), - &last->st->fpsr); - } else - fpsimd_save_state(last->st); + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + return; + + if (IS_ENABLED(CONFIG_ARM64_SVE) && + test_thread_flag(TIF_SVE)) { + if (WARN_ON(sve_get_vl() != last->sve_vl)) { + /* + * Can't save the user regs, so current would + * re-enter user with corrupt state. + * There's no way to recover, so kill it: + */ + force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); + return; + } + + sve_save_state((char *)last->sve_state + + sve_ffr_offset(last->sve_vl), + &last->st->fpsr); + } else { + fpsimd_save_state(last->st); } } -- cgit v1.2.3 From b53223e0a4d9fbdba1a1dd1161f7240506666946 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:07 +0100 Subject: arm64/sve: Remove sve_load_from_fpsimd_state() Following optimisations of the SVE register handling we no longer load the SVE state from a saved copy of the FPSIMD registers, we convert directly in registers or from one saved state to another. Remove the function so we don't need to update it during further refactoring. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 -- arch/arm64/kernel/entry-fpsimd.S | 16 ---------------- 2 files changed, 18 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 9a62884183e5..e0e30567b80f 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -69,8 +69,6 @@ extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); extern void sve_flush_live(unsigned long vq_minus_1); -extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state, - unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 196e921f61de..afbf7dc47e1d 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -66,22 +66,6 @@ SYM_FUNC_START(sve_set_vq) ret SYM_FUNC_END(sve_set_vq) -/* - * Load SVE state from FPSIMD state. - * - * x0 = pointer to struct fpsimd_state - * x1 = VQ - 1 - * - * Each SVE vector will be loaded with the first 128-bits taken from FPSIMD - * and the rest zeroed. All the other SVE registers will be zeroed. - */ -SYM_FUNC_START(sve_load_from_fpsimd_state) - sve_load_vq x1, x2, x3 - fpsimd_restore x0, 8 - sve_flush_p_ffr - ret -SYM_FUNC_END(sve_load_from_fpsimd_state) - /* * Zero all SVE registers but the first 128-bits of each vector * -- cgit v1.2.3 From 12cc2352bfb34dbdf97e51b006c32a8bd0d13bcb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:08 +0100 Subject: arm64/sve: Make sve_state_size() static There are no users outside fpsimd.c so make sve_state_size() static. KVM open codes an equivalent. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-4-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 -- arch/arm64/kernel/fpsimd.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index e0e30567b80f..917ecc301d1d 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -104,8 +104,6 @@ static inline bool sve_vq_available(unsigned int vq) #ifdef CONFIG_ARM64_SVE -extern size_t sve_state_size(struct task_struct const *task); - extern void sve_alloc(struct task_struct *task); extern void fpsimd_release_task(struct task_struct *task); extern void fpsimd_sync_to_sve(struct task_struct *task); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 995f8801602b..0f6df1ece618 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -497,7 +497,7 @@ static void sve_to_fpsimd(struct task_struct *task) * Return how many bytes of memory are required to store the full SVE * state for task, given task's currently configured vector length. */ -size_t sve_state_size(struct task_struct const *task) +static size_t sve_state_size(struct task_struct const *task) { return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl)); } -- cgit v1.2.3 From 9f5848665788a0f07bc175cb2cdd06d367b7556e Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:09 +0100 Subject: arm64/sve: Make access to FFR optional SME introduces streaming SVE mode in which FFR is not present and the instructions for accessing it UNDEF. In preparation for handling this update the low level SVE state access functions to take a flag specifying if FFR should be handled. When saving the register state we store a zero for FFR to guard against uninitialized data being read. No behaviour change should be introduced by this patch. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-5-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 6 +++--- arch/arm64/include/asm/fpsimdmacros.h | 20 ++++++++++++++------ arch/arm64/kernel/entry-fpsimd.S | 19 ++++++++++++------- arch/arm64/kernel/fpsimd.c | 10 ++++++---- arch/arm64/kvm/hyp/fpsimd.S | 6 ++++-- 5 files changed, 39 insertions(+), 22 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 917ecc301d1d..7f8a44a9a5e6 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -65,10 +65,10 @@ static inline void *sve_pffr(struct thread_struct *thread) return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); } -extern void sve_save_state(void *state, u32 *pfpsr); +extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, - unsigned long vq_minus_1); -extern void sve_flush_live(unsigned long vq_minus_1); + int restore_ffr, unsigned long vq_minus_1); +extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 00a2c0b69c2b..e5ffd8b265b6 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -217,28 +217,36 @@ .macro sve_flush_z _for n, 0, 31, _sve_flush_z \n .endm -.macro sve_flush_p_ffr +.macro sve_flush_p _for n, 0, 15, _sve_pfalse \n +.endm +.macro sve_flush_ffr _sve_wrffr 0 .endm -.macro sve_save nxbase, xpfpsr, nxtmp +.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 + cbz \save_ffr, 921f _sve_rdffr 0 _sve_str_p 0, \nxbase _sve_ldr_p 0, \nxbase, -16 - + b 922f +921: + str xzr, [x\nxbase] // Zero out FFR +922: mrs x\nxtmp, fpsr str w\nxtmp, [\xpfpsr] mrs x\nxtmp, fpcr str w\nxtmp, [\xpfpsr, #4] .endm -.macro __sve_load nxbase, xpfpsr, nxtmp +.macro __sve_load nxbase, xpfpsr, restore_ffr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 + cbz \restore_ffr, 921f _sve_ldr_p 0, \nxbase _sve_wrffr 0 +921: _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16 ldr w\nxtmp, [\xpfpsr] @@ -247,7 +255,7 @@ msr fpcr, x\nxtmp .endm -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 +.macro sve_load nxbase, xpfpsr, restore_ffr, xvqminus1, nxtmp, xtmp2 sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 - __sve_load \nxbase, \xpfpsr, \nxtmp + __sve_load \nxbase, \xpfpsr, \restore_ffr, \nxtmp .endm diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index afbf7dc47e1d..f588c214d44b 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -38,9 +38,10 @@ SYM_FUNC_END(fpsimd_load_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR + * x2 - Save FFR if non-zero */ SYM_FUNC_START(sve_save_state) - sve_save 0, x1, 2 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(sve_save_state) @@ -49,10 +50,11 @@ SYM_FUNC_END(sve_save_state) * * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR - * x2 - VQ-1 + * x2 - Restore FFR if non-zero + * x3 - VQ-1 */ SYM_FUNC_START(sve_load_state) - sve_load 0, x1, x2, 3, x4 + sve_load 0, x1, x2, x3, 4, x5 ret SYM_FUNC_END(sve_load_state) @@ -72,13 +74,16 @@ SYM_FUNC_END(sve_set_vq) * VQ must already be configured by caller, any further updates of VQ * will need to ensure that the register state remains valid. * - * x0 = VQ - 1 + * x0 = include FFR? + * x1 = VQ - 1 */ SYM_FUNC_START(sve_flush_live) - cbz x0, 1f // A VQ-1 of 0 is 128 bits so no extra Z state + cbz x1, 1f // A VQ-1 of 0 is 128 bits so no extra Z state sve_flush_z -1: sve_flush_p_ffr - ret +1: sve_flush_p + tbz x0, #0, 2f + sve_flush_ffr +2: ret SYM_FUNC_END(sve_flush_live) #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0f6df1ece618..3d5d243c3f1c 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -289,7 +289,7 @@ static void task_fpsimd_load(void) if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, + ¤t->thread.uw.fpsimd_state.fpsr, true, sve_vq_from_vl(current->thread.sve_vl) - 1); else fpsimd_load_state(¤t->thread.uw.fpsimd_state); @@ -325,7 +325,7 @@ static void fpsimd_save(void) sve_save_state((char *)last->sve_state + sve_ffr_offset(last->sve_vl), - &last->st->fpsr); + &last->st->fpsr, true); } else { fpsimd_save_state(last->st); } @@ -962,7 +962,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) unsigned long vq_minus_one = sve_vq_from_vl(current->thread.sve_vl) - 1; sve_set_vq(vq_minus_one); - sve_flush_live(vq_minus_one); + sve_flush_live(true, vq_minus_one); fpsimd_bind_task_to_cpu(); } else { fpsimd_to_sve(current); @@ -1356,7 +1356,8 @@ void __efi_fpsimd_begin(void) __this_cpu_write(efi_sve_state_used, true); sve_save_state(sve_state + sve_ffr_offset(sve_max_vl), - &this_cpu_ptr(&efi_fpsimd_state)->fpsr); + &this_cpu_ptr(&efi_fpsimd_state)->fpsr, + true); } else { fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); } @@ -1382,6 +1383,7 @@ void __efi_fpsimd_end(void) sve_load_state(sve_state + sve_ffr_offset(sve_max_vl), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, + true, sve_vq_from_vl(sve_get_vl()) - 1); __this_cpu_write(efi_sve_state_used, false); diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 3c635929771a..1bb3b04b84e6 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -21,11 +21,13 @@ SYM_FUNC_START(__fpsimd_restore_state) SYM_FUNC_END(__fpsimd_restore_state) SYM_FUNC_START(__sve_restore_state) - __sve_load 0, x1, 2 + mov x2, #1 + __sve_load 0, x1, x2, 3 ret SYM_FUNC_END(__sve_restore_state) SYM_FUNC_START(__sve_save_state) - sve_save 0, x1, 2 + mov x2, #1 + sve_save 0, x1, x2, 3 ret SYM_FUNC_END(__sve_save_state) -- cgit v1.2.3 From 059613f546b67423a5b49cb6e6fa8b72fbaa4e0b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:10 +0100 Subject: arm64/sve: Rename find_supported_vector_length() The function has SVE specific checks in it and it will be more trouble to add conditional code for SME than it is to simply rename it to be SVE specific. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-6-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3d5d243c3f1c..25aa6f2386a1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -337,7 +337,7 @@ static void fpsimd_save(void) * If things go wrong there's a bug somewhere, but try to fall back to a * safe choice. */ -static unsigned int find_supported_vector_length(unsigned int vl) +static unsigned int find_supported_sve_vector_length(unsigned int vl) { int bit; int max_vl = sve_max_vl; @@ -379,7 +379,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, if (!sve_vl_valid(vl)) return -EINVAL; - set_sve_default_vl(find_supported_vector_length(vl)); + set_sve_default_vl(find_supported_sve_vector_length(vl)); return 0; } @@ -598,7 +598,7 @@ int sve_set_vector_length(struct task_struct *task, if (vl > SVE_VL_ARCH_MAX) vl = SVE_VL_ARCH_MAX; - vl = find_supported_vector_length(vl); + vl = find_supported_sve_vector_length(vl); if (flags & (PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) @@ -873,14 +873,14 @@ void __init sve_setup(void) * Sanity-check that the max VL we determined through CPU features * corresponds properly to sve_vq_map. If not, do our best: */ - if (WARN_ON(sve_max_vl != find_supported_vector_length(sve_max_vl))) - sve_max_vl = find_supported_vector_length(sve_max_vl); + if (WARN_ON(sve_max_vl != find_supported_sve_vector_length(sve_max_vl))) + sve_max_vl = find_supported_sve_vector_length(sve_max_vl); /* * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - set_sve_default_vl(find_supported_vector_length(64)); + set_sve_default_vl(find_supported_sve_vector_length(64)); bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); @@ -1066,7 +1066,7 @@ void fpsimd_flush_thread(void) if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; - supported_vl = find_supported_vector_length(vl); + supported_vl = find_supported_sve_vector_length(vl); if (WARN_ON(supported_vl != vl)) vl = supported_vl; -- cgit v1.2.3 From 0423eedcf4e1ba49f262a9e925ad9ab8ad8eaa36 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:11 +0100 Subject: arm64/sve: Use accessor functions for vector lengths in thread_struct In a system with SME there are parallel vector length controls for SVE and SME vectors which function in much the same way so it is desirable to share the code for handling them as much as possible. In order to prepare for doing this add a layer of accessor functions for the various VL related operations on tasks. Since almost all current interactions are actually via task->thread rather than directly with the thread_info the accessors use that. Accessors are provided for both generic and SVE specific usage, the generic accessors should be used for cases where register state is being manipulated since the registers are shared between streaming and regular SVE so we know that when SME support is implemented we will always have to be in the appropriate mode already and hence can generalise now. Since we are using task_struct and we don't want to cause widespread inclusion of sched.h the acessors are all out of line, it is hoped that none of the uses are in a sufficiently critical path for this to be an issue. Those that are most likely to present an issue are in the same translation unit so hopefully the compiler may be able to inline anyway. This is purely adding the layer of abstraction, additional work will be needed to support tasks using SME. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-7-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 +- arch/arm64/include/asm/processor.h | 10 +++++++ arch/arm64/kernel/fpsimd.c | 55 ++++++++++++++++++++++++++------------ arch/arm64/kernel/ptrace.c | 4 +-- arch/arm64/kernel/signal.c | 6 ++--- 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 7f8a44a9a5e6..d164e2f35837 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -62,7 +62,7 @@ static inline size_t sve_ffr_offset(int vl) static inline void *sve_pffr(struct thread_struct *thread) { - return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); + return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread)); } extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ee2bdc1b9f5b..adb6a46a1fae 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -164,6 +164,16 @@ struct thread_struct { u64 sctlr_user; }; +static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) +{ + return thread->sve_vl; +} + +unsigned int task_get_sve_vl(const struct task_struct *task); +void task_set_sve_vl(struct task_struct *task, unsigned long vl); +unsigned int task_get_sve_vl_onexec(const struct task_struct *task); +void task_set_sve_vl_onexec(struct task_struct *task, unsigned long vl); + #define SCTLR_USER_MASK \ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \ SCTLR_EL1_TCF0_MASK) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 25aa6f2386a1..f7eb54e9a8b8 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -228,6 +228,26 @@ static void sve_free(struct task_struct *task) __sve_free(task); } +unsigned int task_get_sve_vl(const struct task_struct *task) +{ + return task->thread.sve_vl; +} + +void task_set_sve_vl(struct task_struct *task, unsigned long vl) +{ + task->thread.sve_vl = vl; +} + +unsigned int task_get_sve_vl_onexec(const struct task_struct *task) +{ + return task->thread.sve_vl_onexec; +} + +void task_set_sve_vl_onexec(struct task_struct *task, unsigned long vl) +{ + task->thread.sve_vl_onexec = vl; +} + /* * TIF_SVE controls whether a task can use SVE without trapping while * in userspace, and also the way a task's FPSIMD/SVE state is stored @@ -290,7 +310,7 @@ static void task_fpsimd_load(void) if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, true, - sve_vq_from_vl(current->thread.sve_vl) - 1); + sve_vq_from_vl(task_get_sve_vl(current)) - 1); else fpsimd_load_state(¤t->thread.uw.fpsimd_state); } @@ -458,7 +478,7 @@ static void fpsimd_to_sve(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); __fpsimd_to_sve(sst, fst, vq); } @@ -484,7 +504,7 @@ static void sve_to_fpsimd(struct task_struct *task) if (!system_supports_sve()) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); for (i = 0; i < SVE_NUM_ZREGS; ++i) { p = (__uint128_t const *)ZREG(sst, vq, i); fst->vregs[i] = arm64_le128_to_cpu(*p); @@ -499,7 +519,7 @@ static void sve_to_fpsimd(struct task_struct *task) */ static size_t sve_state_size(struct task_struct const *task) { - return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task->thread.sve_vl)); + return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task))); } /* @@ -574,7 +594,7 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) if (!test_tsk_thread_flag(task, TIF_SVE)) return; - vq = sve_vq_from_vl(task->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(task)); memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); __fpsimd_to_sve(sst, fst, vq); @@ -602,16 +622,16 @@ int sve_set_vector_length(struct task_struct *task, if (flags & (PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) - task->thread.sve_vl_onexec = vl; + task_set_sve_vl_onexec(task, vl); else /* Reset VL to system default on next exec: */ - task->thread.sve_vl_onexec = 0; + task_set_sve_vl_onexec(task, 0); /* Only actually set the VL if not deferred: */ if (flags & PR_SVE_SET_VL_ONEXEC) goto out; - if (vl == task->thread.sve_vl) + if (vl == task_get_sve_vl(task)) goto out; /* @@ -638,7 +658,7 @@ int sve_set_vector_length(struct task_struct *task, */ sve_free(task); - task->thread.sve_vl = vl; + task_set_sve_vl(task, vl); out: update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT, @@ -658,9 +678,9 @@ static int sve_prctl_status(unsigned long flags) int ret; if (flags & PR_SVE_SET_VL_ONEXEC) - ret = current->thread.sve_vl_onexec; + ret = task_get_sve_vl_onexec(current); else - ret = current->thread.sve_vl; + ret = task_get_sve_vl(current); if (test_thread_flag(TIF_SVE_VL_INHERIT)) ret |= PR_SVE_VL_INHERIT; @@ -960,7 +980,7 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs) */ if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { unsigned long vq_minus_one = - sve_vq_from_vl(current->thread.sve_vl) - 1; + sve_vq_from_vl(task_get_sve_vl(current)) - 1; sve_set_vq(vq_minus_one); sve_flush_live(true, vq_minus_one); fpsimd_bind_task_to_cpu(); @@ -1060,8 +1080,9 @@ void fpsimd_flush_thread(void) * If a bug causes this to go wrong, we make some noise and * try to fudge thread.sve_vl to a safe value here. */ - vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : get_sve_default_vl(); + vl = task_get_sve_vl_onexec(current); + if (!vl) + vl = get_sve_default_vl(); if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; @@ -1070,14 +1091,14 @@ void fpsimd_flush_thread(void) if (WARN_ON(supported_vl != vl)) vl = supported_vl; - current->thread.sve_vl = vl; + task_set_sve_vl(current, vl); /* * If the task is not set to inherit, ensure that the vector * length will be reset by a subsequent exec: */ if (!test_thread_flag(TIF_SVE_VL_INHERIT)) - current->thread.sve_vl_onexec = 0; + task_set_sve_vl_onexec(current, 0); } put_cpu_fpsimd_context(); @@ -1122,7 +1143,7 @@ static void fpsimd_bind_task_to_cpu(void) WARN_ON(!system_supports_fpsimd()); last->st = ¤t->thread.uw.fpsimd_state; last->sve_state = current->thread.sve_state; - last->sve_vl = current->thread.sve_vl; + last->sve_vl = task_get_sve_vl(current); current->thread.fpsimd_cpu = smp_processor_id(); if (system_supports_sve()) { diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e26196a33cf4..95ff03a1b077 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -725,7 +725,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - header->vl = target->thread.sve_vl; + header->vl = task_get_sve_vl(target); vq = sve_vq_from_vl(header->vl); header->max_vl = sve_max_vl; @@ -820,7 +820,7 @@ static int sve_set(struct task_struct *target, goto out; /* Actual VL set may be less than the user asked for: */ - vq = sve_vq_from_vl(target->thread.sve_vl); + vq = sve_vq_from_vl(task_get_sve_vl(target)); /* Registers: FPSIMD-only case */ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c287b9407f28..aa1d9d7918da 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -227,7 +227,7 @@ static int preserve_sve_context(struct sve_context __user *ctx) { int err = 0; u16 reserved[ARRAY_SIZE(ctx->__reserved)]; - unsigned int vl = current->thread.sve_vl; + unsigned int vl = task_get_sve_vl(current); unsigned int vq = 0; if (test_thread_flag(TIF_SVE)) @@ -266,7 +266,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) if (__copy_from_user(&sve, user->sve, sizeof(sve))) return -EFAULT; - if (sve.vl != current->thread.sve_vl) + if (sve.vl != task_get_sve_vl(current)) return -EINVAL; if (sve.head.size <= sizeof(*user->sve)) { @@ -597,7 +597,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, int vl = sve_max_vl; if (!add_all) - vl = current->thread.sve_vl; + vl = task_get_sve_vl(current); vq = sve_vq_from_vl(vl); } -- cgit v1.2.3 From b5bc00ffddc08c20a799514cbcfd2abaa6718014 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:12 +0100 Subject: arm64/sve: Put system wide vector length information into structs With the introduction of SME we will have a second vector length in the system, enumerated and configured in a very similar fashion to the existing SVE vector length. While there are a few differences in how things are handled this is a relatively small portion of the overall code so in order to avoid code duplication we factor out We create two structs, one vl_info for the static hardware properties and one vl_config for the runtime configuration, with an array instantiated for each and update all the users to reference these. Some accessor functions are provided where helpful for readability, and the write to set the vector length is put into a function since the system register being updated needs to be chosen at compile time. This is a mostly mechanical replacement, further work will be required to actually make things generic, ensuring that we handle those places where there are differences properly. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-8-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 101 +++++++++++++++++++---- arch/arm64/include/asm/processor.h | 5 ++ arch/arm64/kernel/cpufeature.c | 6 +- arch/arm64/kernel/fpsimd.c | 163 +++++++++++++++++++++---------------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 2 +- arch/arm64/kvm/reset.c | 6 +- 7 files changed, 191 insertions(+), 94 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index d164e2f35837..5a1f79a4a500 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -77,10 +77,6 @@ extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); extern u64 read_zcr_features(void); -extern int __ro_after_init sve_max_vl; -extern int __ro_after_init sve_max_virtualisable_vl; -extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); - /* * Helpers to translate bit indices in sve_vq_map to VQ values (and * vice versa). This allows find_next_bit() to be used to find the @@ -96,11 +92,27 @@ static inline unsigned int __bit_to_vq(unsigned int bit) return SVE_VQ_MAX - bit; } -/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ -static inline bool sve_vq_available(unsigned int vq) -{ - return test_bit(__vq_to_bit(vq), sve_vq_map); -} + +struct vl_info { + enum vec_type type; + const char *name; /* For display purposes */ + + /* Minimum supported vector length across all CPUs */ + int min_vl; + + /* Maximum supported vector length across all CPUs */ + int max_vl; + int max_virtualisable_vl; + + /* + * Set of available vector lengths, + * where length vq encoded as bit __vq_to_bit(vq): + */ + DECLARE_BITMAP(vq_map, SVE_VQ_MAX); + + /* Set of vector lengths present on at least one cpu: */ + DECLARE_BITMAP(vq_partial_map, SVE_VQ_MAX); +}; #ifdef CONFIG_ARM64_SVE @@ -139,11 +151,63 @@ static inline void sve_user_enable(void) * Probing and setup functions. * Calls to these functions must be serialised with one another. */ -extern void __init sve_init_vq_map(void); -extern void sve_update_vq_map(void); -extern int sve_verify_vq_map(void); +enum vec_type; + +extern void __init vec_init_vq_map(enum vec_type type); +extern void vec_update_vq_map(enum vec_type type); +extern int vec_verify_vq_map(enum vec_type type); extern void __init sve_setup(void); +extern __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX]; + +static inline void write_vl(enum vec_type type, u64 val) +{ + u64 tmp; + + switch (type) { +#ifdef CONFIG_ARM64_SVE + case ARM64_VEC_SVE: + tmp = read_sysreg_s(SYS_ZCR_EL1) & ~ZCR_ELx_LEN_MASK; + write_sysreg_s(tmp | val, SYS_ZCR_EL1); + break; +#endif + default: + WARN_ON_ONCE(1); + break; + } +} + +static inline int vec_max_vl(enum vec_type type) +{ + return vl_info[type].max_vl; +} + +static inline int vec_max_virtualisable_vl(enum vec_type type) +{ + return vl_info[type].max_virtualisable_vl; +} + +static inline int sve_max_vl(void) +{ + return vec_max_vl(ARM64_VEC_SVE); +} + +static inline int sve_max_virtualisable_vl(void) +{ + return vec_max_virtualisable_vl(ARM64_VEC_SVE); +} + +/* Ensure vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX before calling this function */ +static inline bool vq_available(enum vec_type type, unsigned int vq) +{ + return test_bit(__vq_to_bit(vq), vl_info[type].vq_map); +} + +static inline bool sve_vq_available(unsigned int vq) +{ + return vq_available(ARM64_VEC_SVE, vq); +} + #else /* ! CONFIG_ARM64_SVE */ static inline void sve_alloc(struct task_struct *task) { } @@ -161,14 +225,21 @@ static inline int sve_get_current_vl(void) return -EINVAL; } +static inline int sve_max_vl(void) +{ + return -EINVAL; +} + +static inline bool sve_vq_available(unsigned int vq) { return false; } + static inline void sve_user_disable(void) { BUILD_BUG(); } static inline void sve_user_enable(void) { BUILD_BUG(); } #define sve_cond_update_zcr_vq(val, reg) do { } while (0) -static inline void sve_init_vq_map(void) { } -static inline void sve_update_vq_map(void) { } -static inline int sve_verify_vq_map(void) { return 0; } +static inline void vec_init_vq_map(enum vec_type t) { } +static inline void vec_update_vq_map(enum vec_type t) { } +static inline int vec_verify_vq_map(enum vec_type t) { return 0; } static inline void sve_setup(void) { } #endif /* ! CONFIG_ARM64_SVE */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index adb6a46a1fae..fb0608fe9ded 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -115,6 +115,11 @@ struct debug_info { #endif }; +enum vec_type { + ARM64_VEC_SVE = 0, + ARM64_VEC_MAX, +}; + struct cpu_context { unsigned long x19; unsigned long x20; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 6ec7036ef7e1..405a65d7e618 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -941,7 +941,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ZCR_EL1, info->reg_zcr); - sve_init_vq_map(); + vec_init_vq_map(ARM64_VEC_SVE); } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) @@ -1175,7 +1175,7 @@ void update_cpu_features(int cpu, /* Probe vector lengths, unless we already gave up on SVE */ if (id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && !system_capabilities_finalized()) - sve_update_vq_map(); + vec_update_vq_map(ARM64_VEC_SVE); } /* @@ -2739,7 +2739,7 @@ static void verify_sve_features(void) unsigned int safe_len = safe_zcr & ZCR_ELx_LEN_MASK; unsigned int len = zcr & ZCR_ELx_LEN_MASK; - if (len < safe_len || sve_verify_vq_map()) { + if (len < safe_len || vec_verify_vq_map(ARM64_VEC_SVE)) { pr_crit("CPU%d: SVE: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index f7eb54e9a8b8..9248c9efe5fd 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -121,40 +121,51 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); -/* Default VL for tasks that don't set it explicitly: */ -static int __sve_default_vl = -1; +__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { +#ifdef CONFIG_ARM64_SVE + [ARM64_VEC_SVE] = { + .type = ARM64_VEC_SVE, + .name = "SVE", + .min_vl = SVE_VL_MIN, + .max_vl = SVE_VL_MIN, + .max_virtualisable_vl = SVE_VL_MIN, + }, +#endif +}; + +struct vl_config { + int __default_vl; /* Default VL for tasks */ +}; + +static struct vl_config vl_config[ARM64_VEC_MAX]; + +static int get_default_vl(enum vec_type type) +{ + return READ_ONCE(vl_config[type].__default_vl); +} static int get_sve_default_vl(void) { - return READ_ONCE(__sve_default_vl); + return get_default_vl(ARM64_VEC_SVE); } #ifdef CONFIG_ARM64_SVE -static void set_sve_default_vl(int val) +static void set_default_vl(enum vec_type type, int val) { - WRITE_ONCE(__sve_default_vl, val); + WRITE_ONCE(vl_config[type].__default_vl, val); } -/* Maximum supported vector length across all CPUs (initially poisoned) */ -int __ro_after_init sve_max_vl = SVE_VL_MIN; -int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; - -/* - * Set of available vector lengths, - * where length vq encoded as bit __vq_to_bit(vq): - */ -__ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); -/* Set of vector lengths present on at least one cpu: */ -static __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); +static void set_sve_default_vl(int val) +{ + set_default_vl(ARM64_VEC_SVE, val); +} static void __percpu *efi_sve_state; #else /* ! CONFIG_ARM64_SVE */ /* Dummy declaration for code that will be optimised out: */ -extern __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); -extern __ro_after_init DECLARE_BITMAP(sve_vq_partial_map, SVE_VQ_MAX); extern void __percpu *efi_sve_state; #endif /* ! CONFIG_ARM64_SVE */ @@ -357,21 +368,23 @@ static void fpsimd_save(void) * If things go wrong there's a bug somewhere, but try to fall back to a * safe choice. */ -static unsigned int find_supported_sve_vector_length(unsigned int vl) +static unsigned int find_supported_vector_length(enum vec_type type, + unsigned int vl) { + struct vl_info *info = &vl_info[type]; int bit; - int max_vl = sve_max_vl; + int max_vl = info->max_vl; if (WARN_ON(!sve_vl_valid(vl))) - vl = SVE_VL_MIN; + vl = info->min_vl; if (WARN_ON(!sve_vl_valid(max_vl))) - max_vl = SVE_VL_MIN; + max_vl = info->min_vl; if (vl > max_vl) vl = max_vl; - bit = find_next_bit(sve_vq_map, SVE_VQ_MAX, + bit = find_next_bit(info->vq_map, SVE_VQ_MAX, __vq_to_bit(sve_vq_from_vl(vl))); return sve_vl_from_vq(__bit_to_vq(bit)); } @@ -381,6 +394,7 @@ static unsigned int find_supported_sve_vector_length(unsigned int vl) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; int ret; int vl = get_sve_default_vl(); struct ctl_table tmp_table = { @@ -394,12 +408,12 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, /* Writing -1 has the special meaning "set to max": */ if (vl == -1) - vl = sve_max_vl; + vl = info->max_vl; if (!sve_vl_valid(vl)) return -EINVAL; - set_sve_default_vl(find_supported_sve_vector_length(vl)); + set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, vl)); return 0; } @@ -618,7 +632,7 @@ int sve_set_vector_length(struct task_struct *task, if (vl > SVE_VL_ARCH_MAX) vl = SVE_VL_ARCH_MAX; - vl = find_supported_sve_vector_length(vl); + vl = find_supported_vector_length(ARM64_VEC_SVE, vl); if (flags & (PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) @@ -716,18 +730,15 @@ int sve_get_current_vl(void) return sve_prctl_status(0); } -static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) +static void vec_probe_vqs(struct vl_info *info, + DECLARE_BITMAP(map, SVE_VQ_MAX)) { unsigned int vq, vl; - unsigned long zcr; bitmap_zero(map, SVE_VQ_MAX); - zcr = ZCR_ELx_LEN_MASK; - zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr; - for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { - write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1); /* self-syncing */ + write_vl(info->type, vq - 1); /* self-syncing */ vl = sve_get_vl(); vq = sve_vq_from_vl(vl); /* skip intervening lengths */ set_bit(__vq_to_bit(vq), map); @@ -738,10 +749,11 @@ static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX)) * Initialise the set of known supported VQs for the boot CPU. * This is called during kernel boot, before secondary CPUs are brought up. */ -void __init sve_init_vq_map(void) +void __init vec_init_vq_map(enum vec_type type) { - sve_probe_vqs(sve_vq_map); - bitmap_copy(sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); + struct vl_info *info = &vl_info[type]; + vec_probe_vqs(info, info->vq_map); + bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX); } /* @@ -749,30 +761,33 @@ void __init sve_init_vq_map(void) * those not supported by the current CPU. * This function is called during the bring-up of early secondary CPUs only. */ -void sve_update_vq_map(void) +void vec_update_vq_map(enum vec_type type) { + struct vl_info *info = &vl_info[type]; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); - sve_probe_vqs(tmp_map); - bitmap_and(sve_vq_map, sve_vq_map, tmp_map, SVE_VQ_MAX); - bitmap_or(sve_vq_partial_map, sve_vq_partial_map, tmp_map, SVE_VQ_MAX); + vec_probe_vqs(info, tmp_map); + bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX); + bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map, + SVE_VQ_MAX); } /* * Check whether the current CPU supports all VQs in the committed set. * This function is called during the bring-up of late secondary CPUs only. */ -int sve_verify_vq_map(void) +int vec_verify_vq_map(enum vec_type type) { + struct vl_info *info = &vl_info[type]; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; - sve_probe_vqs(tmp_map); + vec_probe_vqs(info, tmp_map); bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); - if (bitmap_intersects(tmp_map, sve_vq_map, SVE_VQ_MAX)) { - pr_warn("SVE: cpu%d: Required vector length(s) missing\n", - smp_processor_id()); + if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) { + pr_warn("%s: cpu%d: Required vector length(s) missing\n", + info->name, smp_processor_id()); return -EINVAL; } @@ -788,7 +803,7 @@ int sve_verify_vq_map(void) /* Recover the set of supported VQs: */ bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); /* Find VQs supported that are not globally supported: */ - bitmap_andnot(tmp_map, tmp_map, sve_vq_map, SVE_VQ_MAX); + bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX); /* Find the lowest such VQ, if any: */ b = find_last_bit(tmp_map, SVE_VQ_MAX); @@ -799,9 +814,9 @@ int sve_verify_vq_map(void) * Mismatches above sve_max_virtualisable_vl are fine, since * no guest is allowed to configure ZCR_EL2.LEN to exceed this: */ - if (sve_vl_from_vq(__bit_to_vq(b)) <= sve_max_virtualisable_vl) { - pr_warn("SVE: cpu%d: Unsupported vector length(s) present\n", - smp_processor_id()); + if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) { + pr_warn("%s: cpu%d: Unsupported vector length(s) present\n", + info->name, smp_processor_id()); return -EINVAL; } @@ -810,6 +825,8 @@ int sve_verify_vq_map(void) static void __init sve_efi_setup(void) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; + if (!IS_ENABLED(CONFIG_EFI)) return; @@ -818,11 +835,11 @@ static void __init sve_efi_setup(void) * This is evidence of a crippled system and we are returning void, * so no attempt is made to handle this situation here. */ - if (!sve_vl_valid(sve_max_vl)) + if (!sve_vl_valid(info->max_vl)) goto fail; efi_sve_state = __alloc_percpu( - SVE_SIG_REGS_SIZE(sve_vq_from_vl(sve_max_vl)), SVE_VQ_BYTES); + SVE_SIG_REGS_SIZE(sve_vq_from_vl(info->max_vl)), SVE_VQ_BYTES); if (!efi_sve_state) goto fail; @@ -871,6 +888,7 @@ u64 read_zcr_features(void) void __init sve_setup(void) { + struct vl_info *info = &vl_info[ARM64_VEC_SVE]; u64 zcr; DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); unsigned long b; @@ -883,49 +901,52 @@ void __init sve_setup(void) * so sve_vq_map must have at least SVE_VQ_MIN set. * If something went wrong, at least try to patch it up: */ - if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map))) - set_bit(__vq_to_bit(SVE_VQ_MIN), sve_vq_map); + if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map))) + set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map); zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); - sve_max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); + info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); /* * Sanity-check that the max VL we determined through CPU features * corresponds properly to sve_vq_map. If not, do our best: */ - if (WARN_ON(sve_max_vl != find_supported_sve_vector_length(sve_max_vl))) - sve_max_vl = find_supported_sve_vector_length(sve_max_vl); + if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE, + info->max_vl))) + info->max_vl = find_supported_vector_length(ARM64_VEC_SVE, + info->max_vl); /* * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - set_sve_default_vl(find_supported_sve_vector_length(64)); + set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64)); - bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, + bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map, SVE_VQ_MAX); b = find_last_bit(tmp_map, SVE_VQ_MAX); if (b >= SVE_VQ_MAX) /* No non-virtualisable VLs found */ - sve_max_virtualisable_vl = SVE_VQ_MAX; + info->max_virtualisable_vl = SVE_VQ_MAX; else if (WARN_ON(b == SVE_VQ_MAX - 1)) /* No virtualisable VLs? This is architecturally forbidden. */ - sve_max_virtualisable_vl = SVE_VQ_MIN; + info->max_virtualisable_vl = SVE_VQ_MIN; else /* b + 1 < SVE_VQ_MAX */ - sve_max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); + info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); - if (sve_max_virtualisable_vl > sve_max_vl) - sve_max_virtualisable_vl = sve_max_vl; + if (info->max_virtualisable_vl > info->max_vl) + info->max_virtualisable_vl = info->max_vl; - pr_info("SVE: maximum available vector length %u bytes per vector\n", - sve_max_vl); - pr_info("SVE: default vector length %u bytes per vector\n", - get_sve_default_vl()); + pr_info("%s: maximum available vector length %u bytes per vector\n", + info->name, info->max_vl); + pr_info("%s: default vector length %u bytes per vector\n", + info->name, get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ - if (sve_max_virtualisable_vl < sve_max_vl) - pr_warn("SVE: unvirtualisable vector lengths present\n"); + if (sve_max_virtualisable_vl() < sve_max_vl()) + pr_warn("%s: unvirtualisable vector lengths present\n", + info->name); sve_efi_setup(); } @@ -1087,7 +1108,7 @@ void fpsimd_flush_thread(void) if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; - supported_vl = find_supported_sve_vector_length(vl); + supported_vl = find_supported_vector_length(ARM64_VEC_SVE, vl); if (WARN_ON(supported_vl != vl)) vl = supported_vl; @@ -1376,7 +1397,7 @@ void __efi_fpsimd_begin(void) __this_cpu_write(efi_sve_state_used, true); - sve_save_state(sve_state + sve_ffr_offset(sve_max_vl), + sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, true); } else { @@ -1402,7 +1423,7 @@ void __efi_fpsimd_end(void) likely(__this_cpu_read(efi_sve_state_used))) { char const *sve_state = this_cpu_ptr(efi_sve_state); - sve_load_state(sve_state + sve_ffr_offset(sve_max_vl), + sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, true, sve_vq_from_vl(sve_get_vl()) - 1); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 95ff03a1b077..88a9034fb9b5 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -728,7 +728,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, header->vl = task_get_sve_vl(target); vq = sve_vq_from_vl(header->vl); - header->max_vl = sve_max_vl; + header->max_vl = sve_max_vl(); header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index aa1d9d7918da..8f6372b44b65 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -594,7 +594,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE)) { - int vl = sve_max_vl; + int vl = sve_max_vl(); if (!add_all) vl = task_get_sve_vl(current); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343..09cd30a9aafb 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,7 +46,7 @@ unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) { if (system_supports_sve()) { - kvm_sve_max_vl = sve_max_virtualisable_vl; + kvm_sve_max_vl = sve_max_virtualisable_vl(); /* * The get_sve_reg()/set_sve_reg() ioctl interface will need @@ -61,7 +61,7 @@ int kvm_arm_init_sve(void) * Don't even try to make use of vector lengths that * aren't available on all CPUs, for now: */ - if (kvm_sve_max_vl < sve_max_vl) + if (kvm_sve_max_vl < sve_max_vl()) pr_warn("KVM: SVE vector length for guests limited to %u bytes\n", kvm_sve_max_vl); } @@ -102,7 +102,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ - if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || + if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl() || vl > SVE_VL_ARCH_MAX)) return -EIO; -- cgit v1.2.3 From ddc806b5c4752d35bdaa4dfa2aaa72785711a3da Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:13 +0100 Subject: arm64/sve: Explicitly load vector length when restoring SVE state Currently when restoring the SVE state we supply the SVE vector length as an argument to sve_load_state() and the underlying macros. This becomes inconvenient with the addition of SME since we may need to restore any combination of SVE and SME vector lengths, and we already separately restore the vector length in the KVM code. We don't need to know the vector length during the actual register load since the SME load instructions can index into the data array for us. Refactor the interface so we explicitly set the vector length separately to restoring the SVE registers in preparation for adding SME support, no functional change should be involved. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-9-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 2 +- arch/arm64/include/asm/fpsimdmacros.h | 7 +------ arch/arm64/kernel/entry-fpsimd.S | 3 +-- arch/arm64/kernel/fpsimd.c | 13 +++++++------ arch/arm64/kvm/hyp/fpsimd.S | 2 +- 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 5a1f79a4a500..1d0b5fa253a0 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -67,7 +67,7 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr); extern void sve_load_state(void const *state, u32 const *pfpsr, - int restore_ffr, unsigned long vq_minus_1); + int restore_ffr); extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1); extern unsigned int sve_get_vl(void); extern void sve_set_vq(unsigned long vq_minus_1); diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index e5ffd8b265b6..2509d7dde55a 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -241,7 +241,7 @@ str w\nxtmp, [\xpfpsr, #4] .endm -.macro __sve_load nxbase, xpfpsr, restore_ffr, nxtmp +.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 cbz \restore_ffr, 921f _sve_ldr_p 0, \nxbase @@ -254,8 +254,3 @@ ldr w\nxtmp, [\xpfpsr, #4] msr fpcr, x\nxtmp .endm - -.macro sve_load nxbase, xpfpsr, restore_ffr, xvqminus1, nxtmp, xtmp2 - sve_load_vq \xvqminus1, x\nxtmp, \xtmp2 - __sve_load \nxbase, \xpfpsr, \restore_ffr, \nxtmp -.endm diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index f588c214d44b..dc242e269f9a 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -51,10 +51,9 @@ SYM_FUNC_END(sve_save_state) * x0 - pointer to buffer for state * x1 - pointer to storage for FPSR * x2 - Restore FFR if non-zero - * x3 - VQ-1 */ SYM_FUNC_START(sve_load_state) - sve_load 0, x1, x2, x3, 4, x5 + sve_load 0, x1, x2, 4 ret SYM_FUNC_END(sve_load_state) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 9248c9efe5fd..e75dd20a40cf 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -318,12 +318,13 @@ static void task_fpsimd_load(void) WARN_ON(!system_supports_fpsimd()); WARN_ON(!have_cpu_fpsimd_context()); - if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) + if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { + sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); sve_load_state(sve_pffr(¤t->thread), - ¤t->thread.uw.fpsimd_state.fpsr, true, - sve_vq_from_vl(task_get_sve_vl(current)) - 1); - else + ¤t->thread.uw.fpsimd_state.fpsr, true); + } else { fpsimd_load_state(¤t->thread.uw.fpsimd_state); + } } /* @@ -1423,10 +1424,10 @@ void __efi_fpsimd_end(void) likely(__this_cpu_read(efi_sve_state_used))) { char const *sve_state = this_cpu_ptr(efi_sve_state); + sve_set_vq(sve_vq_from_vl(sve_get_vl()) - 1); sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), &this_cpu_ptr(&efi_fpsimd_state)->fpsr, - true, - sve_vq_from_vl(sve_get_vl()) - 1); + true); __this_cpu_write(efi_sve_state_used, false); } else { diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 1bb3b04b84e6..e950875e31ce 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -22,7 +22,7 @@ SYM_FUNC_END(__fpsimd_restore_state) SYM_FUNC_START(__sve_restore_state) mov x2, #1 - __sve_load 0, x1, x2, 3 + sve_load 0, x1, x2, 3 ret SYM_FUNC_END(__sve_restore_state) -- cgit v1.2.3 From 5838a155798479e3fe7e1482a31f0db657d5bbdd Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 18:22:14 +0100 Subject: arm64/sve: Track vector lengths for tasks in an array As for SVE we will track a per task SME vector length for tasks. Convert the existing storage for the vector length into an array and update fpsimd_flush_task() to initialise this in a function. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019172247.3045838-10-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 44 +++++++++++++--- arch/arm64/include/asm/thread_info.h | 2 +- arch/arm64/kernel/fpsimd.c | 97 +++++++++++++++++++++--------------- 3 files changed, 95 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index fb0608fe9ded..9b854e8196df 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -152,8 +152,8 @@ struct thread_struct { unsigned int fpsimd_cpu; void *sve_state; /* SVE registers, if any */ - unsigned int sve_vl; /* SVE vector length */ - unsigned int sve_vl_onexec; /* SVE vl after next exec */ + unsigned int vl[ARM64_VEC_MAX]; /* vector length */ + unsigned int vl_onexec[ARM64_VEC_MAX]; /* vl after next exec */ unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ @@ -169,15 +169,45 @@ struct thread_struct { u64 sctlr_user; }; +static inline unsigned int thread_get_vl(struct thread_struct *thread, + enum vec_type type) +{ + return thread->vl[type]; +} + static inline unsigned int thread_get_sve_vl(struct thread_struct *thread) { - return thread->sve_vl; + return thread_get_vl(thread, ARM64_VEC_SVE); +} + +unsigned int task_get_vl(const struct task_struct *task, enum vec_type type); +void task_set_vl(struct task_struct *task, enum vec_type type, + unsigned long vl); +void task_set_vl_onexec(struct task_struct *task, enum vec_type type, + unsigned long vl); +unsigned int task_get_vl_onexec(const struct task_struct *task, + enum vec_type type); + +static inline unsigned int task_get_sve_vl(const struct task_struct *task) +{ + return task_get_vl(task, ARM64_VEC_SVE); } -unsigned int task_get_sve_vl(const struct task_struct *task); -void task_set_sve_vl(struct task_struct *task, unsigned long vl); -unsigned int task_get_sve_vl_onexec(const struct task_struct *task); -void task_set_sve_vl_onexec(struct task_struct *task, unsigned long vl); +static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl) +{ + task_set_vl(task, ARM64_VEC_SVE, vl); +} + +static inline unsigned int task_get_sve_vl_onexec(const struct task_struct *task) +{ + return task_get_vl_onexec(task, ARM64_VEC_SVE); +} + +static inline void task_set_sve_vl_onexec(struct task_struct *task, + unsigned long vl) +{ + task_set_vl_onexec(task, ARM64_VEC_SVE, vl); +} #define SCTLR_USER_MASK \ (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | SCTLR_ELx_ENDA | SCTLR_ELx_ENDB | \ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 6623c99f0984..d5c8ac81ce11 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -78,7 +78,7 @@ int arch_dup_task_struct(struct task_struct *dst, #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ #define TIF_SVE 23 /* Scalable Vector Extension in use */ -#define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ +#define TIF_SVE_VL_INHERIT 24 /* Inherit SVE vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ #define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e75dd20a40cf..3474122f9207 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -133,6 +133,17 @@ __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { #endif }; +static unsigned int vec_vl_inherit_flag(enum vec_type type) +{ + switch (type) { + case ARM64_VEC_SVE: + return TIF_SVE_VL_INHERIT; + default: + WARN_ON_ONCE(1); + return 0; + } +} + struct vl_config { int __default_vl; /* Default VL for tasks */ }; @@ -239,24 +250,27 @@ static void sve_free(struct task_struct *task) __sve_free(task); } -unsigned int task_get_sve_vl(const struct task_struct *task) +unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) { - return task->thread.sve_vl; + return task->thread.vl[type]; } -void task_set_sve_vl(struct task_struct *task, unsigned long vl) +void task_set_vl(struct task_struct *task, enum vec_type type, + unsigned long vl) { - task->thread.sve_vl = vl; + task->thread.vl[type] = vl; } -unsigned int task_get_sve_vl_onexec(const struct task_struct *task) +unsigned int task_get_vl_onexec(const struct task_struct *task, + enum vec_type type) { - return task->thread.sve_vl_onexec; + return task->thread.vl_onexec[type]; } -void task_set_sve_vl_onexec(struct task_struct *task, unsigned long vl) +void task_set_vl_onexec(struct task_struct *task, enum vec_type type, + unsigned long vl) { - task->thread.sve_vl_onexec = vl; + task->thread.vl_onexec[type] = vl; } /* @@ -1074,10 +1088,43 @@ void fpsimd_thread_switch(struct task_struct *next) __put_cpu_fpsimd_context(); } -void fpsimd_flush_thread(void) +static void fpsimd_flush_thread_vl(enum vec_type type) { int vl, supported_vl; + /* + * Reset the task vector length as required. This is where we + * ensure that all user tasks have a valid vector length + * configured: no kernel task can become a user task without + * an exec and hence a call to this function. By the time the + * first call to this function is made, all early hardware + * probing is complete, so __sve_default_vl should be valid. + * If a bug causes this to go wrong, we make some noise and + * try to fudge thread.sve_vl to a safe value here. + */ + vl = task_get_vl_onexec(current, type); + if (!vl) + vl = get_default_vl(type); + + if (WARN_ON(!sve_vl_valid(vl))) + vl = SVE_VL_MIN; + + supported_vl = find_supported_vector_length(type, vl); + if (WARN_ON(supported_vl != vl)) + vl = supported_vl; + + task_set_vl(current, type, vl); + + /* + * If the task is not set to inherit, ensure that the vector + * length will be reset by a subsequent exec: + */ + if (!test_thread_flag(vec_vl_inherit_flag(type))) + task_set_vl_onexec(current, type, 0); +} + +void fpsimd_flush_thread(void) +{ if (!system_supports_fpsimd()) return; @@ -1090,37 +1137,7 @@ void fpsimd_flush_thread(void) if (system_supports_sve()) { clear_thread_flag(TIF_SVE); sve_free(current); - - /* - * Reset the task vector length as required. - * This is where we ensure that all user tasks have a valid - * vector length configured: no kernel task can become a user - * task without an exec and hence a call to this function. - * By the time the first call to this function is made, all - * early hardware probing is complete, so __sve_default_vl - * should be valid. - * If a bug causes this to go wrong, we make some noise and - * try to fudge thread.sve_vl to a safe value here. - */ - vl = task_get_sve_vl_onexec(current); - if (!vl) - vl = get_sve_default_vl(); - - if (WARN_ON(!sve_vl_valid(vl))) - vl = SVE_VL_MIN; - - supported_vl = find_supported_vector_length(ARM64_VEC_SVE, vl); - if (WARN_ON(supported_vl != vl)) - vl = supported_vl; - - task_set_sve_vl(current, vl); - - /* - * If the task is not set to inherit, ensure that the vector - * length will be reset by a subsequent exec: - */ - if (!test_thread_flag(TIF_SVE_VL_INHERIT)) - task_set_sve_vl_onexec(current, 0); + fpsimd_flush_thread_vl(ARM64_VEC_SVE); } put_cpu_fpsimd_context(); -- cgit v1.2.3 From 1907d3ff5a644ad7c07bf3c0a56a0b1864c9e5cf Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 19 Oct 2021 15:36:43 -0700 Subject: arm64: vdso32: drop the test for dmb ishld Binutils added support for this instruction in commit e797f7e0b2bedc9328d4a9a0ebc63ca7a2dbbebc which shipped in 2.24 (just missing the 2.23 release) but was cherry-picked into 2.23 in commit 27a50d6755bae906bc73b4ec1a8b448467f0bea1. Thanks to Christian and Simon for helping me with the patch archaeology. According to Documentation/process/changes.rst, the minimum supported version of binutils is 2.23. Since all supported versions of GAS support this instruction, drop the assembler invocation, preprocessor flags/guards, and the cross assembler macro that's now unused. This also avoids a recursive self reference in a follow up cleanup patch. Cc: Christian Biesinger Cc: Simon Marchi Signed-off-by: Nick Desaulniers Reviewed-by: Vincenzo Frascino Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20211019223646.1146945-2-ndesaulniers@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/vdso/compat_barrier.h | 2 +- arch/arm64/kernel/vdso32/Makefile | 8 -------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index 3fd8fd6d8fc2..fb60a88b5ed4 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -20,7 +20,7 @@ #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD) +#if __LINUX_ARM_ARCH__ >= 8 #define aarch32_smp_mb() dmb(ish) #define aarch32_smp_rmb() dmb(ishld) #define aarch32_smp_wmb() dmb(ishst) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 3dba0c4f8f42..89299a26638b 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -29,8 +29,6 @@ cc32-option = $(call try-run,\ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -cc32-as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) # We cannot use the global flags to compile the vDSO files, the main reason # being that the 32-bit compiler may be older than the main (64-bit) compiler @@ -113,12 +111,6 @@ endif VDSO_AFLAGS := $(VDSO_CAFLAGS) VDSO_AFLAGS += -D__ASSEMBLY__ -# Check for binutils support for dmb ishld -dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) - -VDSO_CFLAGS += $(dmbinstr) -VDSO_AFLAGS += $(dmbinstr) - # From arm vDSO Makefile VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 -- cgit v1.2.3 From a517faa902b5a048adbb4d6bbce9509ba5288af3 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 19 Oct 2021 15:36:44 -0700 Subject: arm64: vdso32: drop test for -march=armv8-a As Arnd points out: gcc-4.8 already supported -march=armv8, and we require gcc-5.1 now, so both this #if/#else construct and the corresponding "cc32-option,-march=armv8-a" check should be obsolete now. Link: https://lore.kernel.org/lkml/CAK8P3a3UBEJ0Py2ycz=rHfgog8g3mCOeQOwO0Gmp-iz6Uxkapg@mail.gmail.com/ Suggested-by: Arnd Bergmann Signed-off-by: Nick Desaulniers Reviewed-by: Vincenzo Frascino Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20211019223646.1146945-3-ndesaulniers@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/vdso/compat_barrier.h | 7 ------- arch/arm64/kernel/vdso32/Makefile | 8 +------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h index fb60a88b5ed4..3ac35f4a667c 100644 --- a/arch/arm64/include/asm/vdso/compat_barrier.h +++ b/arch/arm64/include/asm/vdso/compat_barrier.h @@ -20,16 +20,9 @@ #define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory") -#if __LINUX_ARM_ARCH__ >= 8 #define aarch32_smp_mb() dmb(ish) #define aarch32_smp_rmb() dmb(ishld) #define aarch32_smp_wmb() dmb(ishst) -#else -#define aarch32_smp_mb() dmb(ish) -#define aarch32_smp_rmb() aarch32_smp_mb() -#define aarch32_smp_wmb() dmb(ishst) -#endif - #undef smp_mb #undef smp_rmb diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 89299a26638b..1407516e041e 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -65,13 +65,7 @@ endif # From arm vDSO Makefile VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING - - -# Try to compile for ARMv8. If the compiler is too old and doesn't support it, -# fall back to v7. There is no easy way to check for what architecture the code -# is being compiled, so define a macro specifying that (see arch/arm/Makefile). -VDSO_CAFLAGS += $(call cc32-option,-march=armv8-a -D__LINUX_ARM_ARCH__=8,\ - -march=armv7-a -D__LINUX_ARM_ARCH__=7) +VDSO_CAFLAGS += -march=armv8-a VDSO_CFLAGS := $(VDSO_CAFLAGS) VDSO_CFLAGS += -DENABLE_COMPAT_VDSO=1 -- cgit v1.2.3 From 14831fad73f5ac30ac61760487d95a538e6ab3cb Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 19 Oct 2021 15:36:45 -0700 Subject: arm64: vdso32: suppress error message for 'make mrproper' When running the following command without arm-linux-gnueabi-gcc in one's $PATH, the following warning is observed: $ ARCH=arm64 CROSS_COMPILE_COMPAT=arm-linux-gnueabi- make -j72 LLVM=1 mrproper make[1]: arm-linux-gnueabi-gcc: No such file or directory This is because KCONFIG is not run for mrproper, so CONFIG_CC_IS_CLANG is not set, and we end up eagerly evaluating various variables that try to invoke CC_COMPAT. This is a similar problem to what was observed in commit dc960bfeedb0 ("h8300: suppress error messages for 'make clean'") Reported-by: Lucas Henneman Suggested-by: Masahiro Yamada Signed-off-by: Nick Desaulniers Reviewed-by: Vincenzo Frascino Reviewed-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20211019223646.1146945-4-ndesaulniers@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso32/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 1407516e041e..e478cebb9891 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -38,7 +38,8 @@ cc32-disable-warning = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags -- cgit v1.2.3 From 3e6f8d1fa18457d54b20917bd9174d27daf09ab9 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 19 Oct 2021 15:36:46 -0700 Subject: arm64: vdso32: require CROSS_COMPILE_COMPAT for gcc+bfd Similar to commit 231ad7f409f1 ("Makefile: infer --target from ARCH for CC=clang") There really is no point in setting --target based on $CROSS_COMPILE_COMPAT for clang when the integrated assembler is being used, since commit ef94340583ee ("arm64: vdso32: drop -no-integrated-as flag"). Allows COMPAT_VDSO to be selected without setting $CROSS_COMPILE_COMPAT when using clang and lld together. Before: $ ARCH=arm64 CROSS_COMPILE_COMPAT=arm-linux-gnueabi- make -j72 LLVM=1 defconfig $ grep CONFIG_COMPAT_VDSO .config CONFIG_COMPAT_VDSO=y $ ARCH=arm64 make -j72 LLVM=1 defconfig $ grep CONFIG_COMPAT_VDSO .config $ After: $ ARCH=arm64 CROSS_COMPILE_COMPAT=arm-linux-gnueabi- make -j72 LLVM=1 defconfig $ grep CONFIG_COMPAT_VDSO .config CONFIG_COMPAT_VDSO=y $ ARCH=arm64 make -j72 LLVM=1 defconfig $ grep CONFIG_COMPAT_VDSO .config CONFIG_COMPAT_VDSO=y Reviewed-by: Nathan Chancellor Suggested-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Reviewed-by: Vincenzo Frascino Link: https://lore.kernel.org/r/20211019223646.1146945-5-ndesaulniers@google.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 ++- arch/arm64/kernel/vdso32/Makefile | 17 +++++------------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..f0f2c95aa4c8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1264,7 +1264,8 @@ config KUSER_HELPERS config COMPAT_VDSO bool "Enable vDSO for 32-bit applications" - depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != "" + depends on !CPU_BIG_ENDIAN + depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != "" select GENERIC_COMPAT_VDSO default y help diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index e478cebb9891..c8fec493a450 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -10,18 +10,15 @@ include $(srctree)/lib/vdso/Makefile # Same as cc-*option, but using CC_COMPAT instead of CC ifeq ($(CONFIG_CC_IS_CLANG), y) -CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) - CC_COMPAT ?= $(CC) -CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) - -ifneq ($(LLVM),) -LD_COMPAT ?= $(LD) +CC_COMPAT += --target=arm-linux-gnueabi else -LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc endif + +ifeq ($(CONFIG_LD_IS_LLD), y) +LD_COMPAT ?= $(LD) else -CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld endif @@ -45,10 +42,6 @@ VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags # From top-level Makefile VDSO_CAFLAGS := $(VDSO_CPPFLAGS) -ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),) -VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) -endif - VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) ifdef CONFIG_DEBUG_INFO VDSO_CAFLAGS += -g -- cgit v1.2.3 From 35d67794b88283337e0d311a4dbacc42d07a12a5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:07 +0100 Subject: arm64: lib: __arch_clear_user(): fold fixups into body Like other functions, __arch_clear_user() places its exception fixups in the `.fixup` section without any clear association with __arch_clear_user() itself. If we backtrace the fixup code, it will be symbolized as an offset from the nearest prior symbol, which happens to be `__entry_tramp_text_end`. Further, since the PC adjustment for the fixup is akin to a direct branch rather than a function call, __arch_clear_user() itself will be missing from the backtrace. This is confusing and hinders debugging. In general this pattern will also be problematic for CONFIG_LIVEPATCH, since fixups often return to their associated function, but this isn't accurately captured in the stacktrace. To solve these issues for assembly functions, we must move fixups into the body of the functions themselves, after the usual fast-path returns. This patch does so for __arch_clear_user(). Inline assembly will be dealt with in subsequent patches. Other than the improved backtracing, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Robin Murphy Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/clear_user.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index a7efb2ad2a1c..2827fddc33f8 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -45,13 +45,11 @@ USER(9f, sttrh wzr, [x0]) USER(7f, sttrb wzr, [x2, #-1]) 5: mov x0, #0 ret -SYM_FUNC_END(__arch_clear_user) -EXPORT_SYMBOL(__arch_clear_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 7: sub x0, x2, #5 // Adjust for faulting on the final byte... 8: add x0, x0, #4 // ...or the second word of the 4-7 byte case 9: sub x0, x2, x0 ret - .previous +SYM_FUNC_END(__arch_clear_user) +EXPORT_SYMBOL(__arch_clear_user) -- cgit v1.2.3 From 4012e0e22739eef92499171957145a60445c0b60 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:08 +0100 Subject: arm64: lib: __arch_copy_from_user(): fold fixups into body Like other functions, __arch_copy_from_user() places its exception fixups in the `.fixup` section without any clear association with __arch_copy_from_user() itself. If we backtrace the fixup code, it will be symbolized as an offset from the nearest prior symbol, which happens to be `__entry_tramp_text_end`. Further, since the PC adjustment for the fixup is akin to a direct branch rather than a function call, __arch_copy_from_user() itself will be missing from the backtrace. This is confusing and hinders debugging. In general this pattern will also be problematic for CONFIG_LIVEPATCH, since fixups often return to their associated function, but this isn't accurately captured in the stacktrace. To solve these issues for assembly functions, we must move fixups into the body of the functions themselves, after the usual fast-path returns. This patch does so for __arch_copy_from_user(). Inline assembly will be dealt with in subsequent patches. Other than the improved backtracing, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Robin Murphy Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/copy_from_user.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 2cf999e41d30..34e317907524 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -60,11 +60,8 @@ SYM_FUNC_START(__arch_copy_from_user) #include "copy_template.S" mov x0, #0 // Nothing to copy ret -SYM_FUNC_END(__arch_copy_from_user) -EXPORT_SYMBOL(__arch_copy_from_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, ldtrb tmp1w, [srcin]) strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_from_user) +EXPORT_SYMBOL(__arch_copy_from_user) -- cgit v1.2.3 From 139f9ab73d60cf76d770841a019c5284fcf26c74 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:09 +0100 Subject: arm64: lib: __arch_copy_to_user(): fold fixups into body Like other functions, __arch_copy_to_user() places its exception fixups in the `.fixup` section without any clear association with __arch_copy_to_user() itself. If we backtrace the fixup code, it will be symbolized as an offset from the nearest prior symbol, which happens to be `__entry_tramp_text_end`. Further, since the PC adjustment for the fixup is akin to a direct branch rather than a function call, __arch_copy_to_user() itself will be missing from the backtrace. This is confusing and hinders debugging. In general this pattern will also be problematic for CONFIG_LIVEPATCH, since fixups often return to their associated function, but this isn't accurately captured in the stacktrace. To solve these issues for assembly functions, we must move fixups into the body of the functions themselves, after the usual fast-path returns. This patch does so for __arch_copy_to_user(). Inline assembly will be dealt with in subsequent patches. Other than the improved backtracing, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Robin Murphy Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Mark Brown Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-4-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/lib/copy_to_user.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 9f380eecf653..802231772608 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -59,11 +59,8 @@ SYM_FUNC_START(__arch_copy_to_user) #include "copy_template.S" mov x0, #0 ret -SYM_FUNC_END(__arch_copy_to_user) -EXPORT_SYMBOL(__arch_copy_to_user) - .section .fixup,"ax" - .align 2 + // Exception fixups 9997: cmp dst, dstin b.ne 9998f // Before being absolutely sure we couldn't copy anything, try harder @@ -72,4 +69,5 @@ USER(9998f, sttrb tmp1w, [dst]) add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret - .previous +SYM_FUNC_END(__arch_copy_to_user) +EXPORT_SYMBOL(__arch_copy_to_user) -- cgit v1.2.3 From ae2b2f3384c69a7e4b3ee6fdbc7e1eeaaad3e634 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:10 +0100 Subject: arm64: kvm: use kvm_exception_table_entry In subsequent patches we'll alter `struct exception_table_entry`, adding fields that are not needed for KVM exception fixups. In preparation for this, migrate KVM to its own `struct kvm_exception_table_entry`, which is identical to the current format of `struct exception_table_entry`. Comments are updated accordingly. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Alexandru Elisei Cc: Catalin Marinas Cc: James Morse Cc: Marc Zyngier Cc: Robin Murphy Cc: Suzuki K Poulose Cc: Will Deacon Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20211019160219.5202-5-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_asm.h | 7 ++++--- arch/arm64/kvm/hyp/include/hyp/switch.h | 10 +++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..6486b1db268e 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -263,9 +263,10 @@ extern u64 __kvm_get_mdcr_el2(void); /* * KVM extable for unexpected exceptions. - * In the same format _asm_extable, but output to a different section so that - * it can be mapped to EL2. The KVM version is not sorted. The caller must - * ensure: + * Create a struct kvm_exception_table_entry output to a section that can be + * mapped by EL2. The table is not sorted. + * + * The caller must ensure: * x18 has the hypervisor value to allow any Shadow-Call-Stack instrumented * code to write to it, and that SPSR_EL2 and ELR_EL2 are restored by the fixup. */ diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0e78a6027be..d5a47b93ef9b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -30,8 +30,12 @@ #include #include -extern struct exception_table_entry __start___kvm_ex_table; -extern struct exception_table_entry __stop___kvm_ex_table; +struct kvm_exception_table_entry { + int insn, fixup; +}; + +extern struct kvm_exception_table_entry __start___kvm_ex_table; +extern struct kvm_exception_table_entry __stop___kvm_ex_table; /* Check whether the FP regs were dirtied while in the host-side run loop: */ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) @@ -510,7 +514,7 @@ static inline void __kvm_unexpected_el2_exception(void) { extern char __guest_exit_panic[]; unsigned long addr, fixup; - struct exception_table_entry *entry, *end; + struct kvm_exception_table_entry *entry, *end; unsigned long elr_el2 = read_sysreg(elr_el2); entry = &__start___kvm_ex_table; -- cgit v1.2.3 From 8ed1b498ada6c5bd9d9f53c59621734551829ec5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:11 +0100 Subject: arm64: factor out GPR numbering helpers In we have macros to convert the names of general purpose registers (GPRs) into integer constants, which we use to manually build the encoding for `MRS` and `MSR` instructions where we can't rely on the assembler to do so for us. In subsequent patches we'll need to map the same GPR names to integer constants so that we can use this to build metadata for exception fixups. So that the we can use the mappings elsewhere, factor out the definitions into a new header, renaming the definitions to align with this "GPR num" naming for clarity. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-6-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/gpr-num.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 25 ++++++++----------------- 2 files changed, 30 insertions(+), 17 deletions(-) create mode 100644 arch/arm64/include/asm/gpr-num.h diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h new file mode 100644 index 000000000000..f936aa34dc63 --- /dev/null +++ b/arch/arm64/include/asm/gpr-num.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_GPR_NUM_H +#define __ASM_GPR_NUM_H + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ .L__gpr_num_x\num, \num + .endr + .equ .L__gpr_num_xzr, 31 + +#else /* __ASSEMBLY__ */ + +#define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .endr\n" \ +" .equ .L__gpr_num_xzr, 31\n" + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_GPR_NUM_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67ed..58f6e669dab4 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -13,6 +13,8 @@ #include #include +#include + /* * ARMv8 ARM reserves the following encoding for system registers: * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", @@ -1192,17 +1194,12 @@ #ifdef __ASSEMBLY__ - .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 - .equ .L__reg_num_x\num, \num - .endr - .equ .L__reg_num_xzr, 31 - .macro mrs_s, rt, sreg - __emit_inst(0xd5200000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt)) .endm .macro msr_s, sreg, rt - __emit_inst(0xd5000000|(\sreg)|(.L__reg_num_\rt)) + __emit_inst(0xd5000000|(\sreg)|(.L__gpr_num_\rt)) .endm #else @@ -1211,22 +1208,16 @@ #include #include -#define __DEFINE_MRS_MSR_S_REGNUM \ -" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ -" .equ .L__reg_num_x\\num, \\num\n" \ -" .endr\n" \ -" .equ .L__reg_num_xzr, 31\n" - #define DEFINE_MRS_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro mrs_s, rt, sreg\n" \ - __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5200000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define DEFINE_MSR_S \ - __DEFINE_MRS_MSR_S_REGNUM \ + __DEFINE_ASM_GPR_NUMS \ " .macro msr_s, sreg, rt\n" \ - __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \ + __emit_inst(0xd5000000|(\\sreg)|(.L__gpr_num_\\rt)) \ " .endm\n" #define UNDEFINE_MRS_S \ -- cgit v1.2.3 From 286fba6c2a4566f02d4568e655a88e43ffee66d3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:12 +0100 Subject: arm64: gpr-num: support W registers In subsequent patches we'll want to map W registers to their register numbers. Update gpr-num.h so that we can do this. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-7-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/gpr-num.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/gpr-num.h b/arch/arm64/include/asm/gpr-num.h index f936aa34dc63..05da4a7c5788 100644 --- a/arch/arm64/include/asm/gpr-num.h +++ b/arch/arm64/include/asm/gpr-num.h @@ -6,16 +6,20 @@ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 .equ .L__gpr_num_x\num, \num + .equ .L__gpr_num_w\num, \num .endr .equ .L__gpr_num_xzr, 31 + .equ .L__gpr_num_wzr, 31 #else /* __ASSEMBLY__ */ #define __DEFINE_ASM_GPR_NUMS \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ " .equ .L__gpr_num_x\\num, \\num\n" \ +" .equ .L__gpr_num_w\\num, \\num\n" \ " .endr\n" \ -" .equ .L__gpr_num_xzr, 31\n" +" .equ .L__gpr_num_xzr, 31\n" \ +" .equ .L__gpr_num_wzr, 31\n" #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 819771cc289226e392d5d45f1d162b47ace4eff6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:13 +0100 Subject: arm64: extable: consolidate definitions In subsequent patches we'll alter the structure and usage of struct exception_table_entry. For inline assembly, we create these using the `_ASM_EXTABLE()` CPP macro defined in , and for plain assembly code we use the `_asm_extable()` GAS macro defined in , which are largely identical save for different escaping and stringification requirements. This patch moves the common definitions to a new header, so that it's easier to keep the two in-sync, and to remove the implication that these are only used for uaccess helpers (as e.g. load_unaligned_zeropad() is only used on kernel memory, and depends upon `_ASM_EXTABLE()`. At the same time, a few minor modifications are made for clarity and in preparation for subsequent patches: * The structure creation is factored out into an `__ASM_EXTABLE_RAW()` macro. This will make it easier to support different fixup variants in subsequent patches without needing to update all users of `_ASM_EXTABLE()`, and makes it easier to see tha the CPP and GAS variants of the macros are structurally identical. For the CPP macro, the stringification of fields is left to the wrapper macro, `_ASM_EXTABLE()`, as in subsequent patches it will be necessary to stringify fields in wrapper macros to safely concatenate strings which cannot be token-pasted together in CPP. * The fields of the structure are created separately on their own lines. This will make it easier to add/remove/modify individual fields clearly. * Additional parentheses are added around the use of macro arguments in field definitions to avoid any potential problems with evaluation due to operator precedence, and to make errors upon misuse clearer. * USER() is moved into , as it is not required by all assembly code, and is already refered to by comments in that file. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-8-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-extable.h | 48 ++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/asm-uaccess.h | 7 +++++- arch/arm64/include/asm/assembler.h | 29 ++-------------------- arch/arm64/include/asm/uaccess.h | 7 +----- arch/arm64/lib/clear_user.S | 2 +- 5 files changed, 58 insertions(+), 35 deletions(-) create mode 100644 arch/arm64/include/asm/asm-extable.h diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h new file mode 100644 index 000000000000..986b4c0d4792 --- /dev/null +++ b/arch/arm64/include/asm/asm-extable.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_ASM_EXTABLE_H +#define __ASM_ASM_EXTABLE_H + +#ifdef __ASSEMBLY__ + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + .pushsection __ex_table, "a"; \ + .align 3; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ + .popsection; + +/* + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. + */ + .macro _asm_extable, insn, fixup + __ASM_EXTABLE_RAW(\insn, \fixup) + .endm + +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + +#else /* __ASSEMBLY__ */ + +#include + +#define __ASM_EXTABLE_RAW(insn, fixup) \ + ".pushsection __ex_table, \"a\"\n" \ + ".align 3\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ + ".popsection\n" + +#define _ASM_EXTABLE(insn, fixup) \ + __ASM_EXTABLE_RAW(#insn, #fixup) + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index ccedf548dac9..0557af834e03 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -3,10 +3,11 @@ #define __ASM_ASM_UACCESS_H #include +#include +#include #include #include #include -#include /* * User access enabling/disabling macros. @@ -58,6 +59,10 @@ alternative_else_nop_endif .endm #endif +#define USER(l, x...) \ +9999: x; \ + _asm_extable 9999b, l + /* * Generate the assembly for LDTR/STTR with exception table entries. * This is complicated as there is no post-increment or pair versions of the diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index bfa58409a4d4..ec67480d55fb 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -14,9 +14,10 @@ #include -#include #include #include +#include +#include #include #include #include @@ -129,32 +130,6 @@ alternative_endif .endr .endm -/* - * Create an exception table entry for `insn`, which will branch to `fixup` - * when an unhandled fault is taken. - */ - .macro _asm_extable, insn, fixup - .pushsection __ex_table, "a" - .align 3 - .long (\insn - .), (\fixup - .) - .popsection - .endm - -/* - * Create an exception table entry for `insn` if `fixup` is provided. Otherwise - * do nothing. - */ - .macro _cond_extable, insn, fixup - .ifnc \fixup, - _asm_extable \insn, \fixup - .endif - .endm - - -#define USER(l, x...) \ -9999: x; \ - _asm_extable 9999b, l - /* * Register aliases. */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 190b494e22ab..759019523b85 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -70,12 +71,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si #define access_ok(addr, size) __range_ok(addr, size) -#define _ASM_EXTABLE(from, to) \ - " .pushsection __ex_table, \"a\"\n" \ - " .align 3\n" \ - " .long (" #from " - .), (" #to " - .)\n" \ - " .popsection\n" - /* * User access enabling/disabling. */ diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 2827fddc33f8..a5a5f5b97b17 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -4,7 +4,7 @@ */ #include -#include +#include .text -- cgit v1.2.3 From e8c328d7de03bf4d7a18e38ff87a7c12fdf8afb1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:14 +0100 Subject: arm64: extable: make fixup_exception() return bool The return values of fixup_exception() and arm64_bpf_fixup_exception() represent a boolean condition rather than an error code, so for clarity it would be better to return `bool` rather than `int`. This patch adjusts the code accordingly. While we're modifying the prototype, we also remove the unnecessary `extern` keyword, so that this won't look out of place when we make subsequent additions to the header. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Catalin Marinas Cc: Daniel Borkmann Cc: James Morse Cc: Jean-Philippe Brucker Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-9-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/extable.h | 10 +++++----- arch/arm64/mm/extable.c | 6 +++--- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index b15eb4a3e6b2..1859b9fd566f 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -32,16 +32,16 @@ static inline bool in_bpf_jit(struct pt_regs *regs) } #ifdef CONFIG_BPF_JIT -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, +bool arm64_bpf_fixup_exception(const struct exception_table_entry *ex, struct pt_regs *regs); #else /* !CONFIG_BPF_JIT */ static inline -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) { - return 0; + return false; } #endif /* !CONFIG_BPF_JIT */ -extern int fixup_exception(struct pt_regs *regs); +bool fixup_exception(struct pt_regs *regs); #endif diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index aa0060178343..3ebc738870f5 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -6,17 +6,17 @@ #include #include -int fixup_exception(struct pt_regs *regs) +bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *fixup; fixup = search_exception_tables(instruction_pointer(regs)); if (!fixup) - return 0; + return false; if (in_bpf_jit(regs)) return arm64_bpf_fixup_exception(fixup, regs); regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; - return 1; + return true; } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 41c23f474ea6..956c841ef346 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -358,15 +358,15 @@ static void build_epilogue(struct jit_ctx *ctx) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) -int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool arm64_bpf_fixup_exception(const struct exception_table_entry *ex, + struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); regs->regs[dst_reg] = 0; regs->pc = (unsigned long)&ex->fixup - offset; - return 1; + return true; } /* For accesses to BTF pointers, add an entry to the exception table */ -- cgit v1.2.3 From 5d0e79051425a6607959e2ab918ef3068cce07f0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:15 +0100 Subject: arm64: extable: use `ex` for `exception_table_entry` Subsequent patches will extend `struct exception_table_entry` with more fields, and the distinction between the entry and its `fixup` field will become more important. For clarity, let's consistently use `ex` to refer to refer to an entire entry. In subsequent patches we'll use `fixup` to refer to the fixup field specifically. This matches the naming convention used today in arch/arm64/net/bpf_jit_comp.c. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Robin Murphy Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-10-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/mm/extable.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index 3ebc738870f5..dba3d59f3eca 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -8,15 +8,15 @@ bool fixup_exception(struct pt_regs *regs) { - const struct exception_table_entry *fixup; + const struct exception_table_entry *ex; - fixup = search_exception_tables(instruction_pointer(regs)); - if (!fixup) + ex = search_exception_tables(instruction_pointer(regs)); + if (!ex) return false; if (in_bpf_jit(regs)) - return arm64_bpf_fixup_exception(fixup, regs); + return arm64_bpf_fixup_exception(ex, regs); - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; + regs->pc = (unsigned long)&ex->fixup + ex->fixup; return true; } -- cgit v1.2.3 From d6e2cc56477538255160ed02fdb11b0da60356cc Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:16 +0100 Subject: arm64: extable: add `type` and `data` fields Subsequent patches will add specialized handlers for fixups, in addition to the simple PC fixup and BPF handlers we have today. In preparation, this patch adds a new `type` field to struct exception_table_entry, and uses this to distinguish the fixup and BPF cases. A `data` field is also added so that subsequent patches can associate data specific to each exception site (e.g. register numbers). Handlers are named ex_handler_*() for consistency, following the exmaple of x86. At the same time, get_ex_fixup() is split out into a helper so that it can be used by other ex_handler_*() functions ins subsequent patches. This patch will increase the size of the exception tables, which will be remedied by subsequent patches removing redundant fixup code. There should be no functional change as a result of this patch. Since each entry is now 12 bytes in size, we must reduce the alignment of each entry from `.align 3` (i.e. 8 bytes) to `.align 2` (i.e. 4 bytes), which is the natrual alignment of the `insn` and `fixup` fields. The current 8-byte alignment is a holdover from when the `insn` and `fixup` fields was 8 bytes, and while not harmful has not been necessary since commit: 6c94f27ac847ff8e ("arm64: switch to relative exception tables") Similarly, RO_EXCEPTION_TABLE_ALIGN is dropped to 4 bytes. Concurrently with this patch, x86's exception table entry format is being updated (similarly to a 12-byte format, with 32-bytes of absolute data). Once both have been merged it should be possible to unify the sorttable logic for the two. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Catalin Marinas Cc: Daniel Borkmann Cc: James Morse Cc: Jean-Philippe Brucker Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-11-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-extable.h | 32 ++++++++++++++++++++------------ arch/arm64/include/asm/extable.h | 19 +++++++++++++++---- arch/arm64/kernel/vmlinux.lds.S | 2 +- arch/arm64/mm/extable.c | 29 +++++++++++++++++++++++++---- arch/arm64/net/bpf_jit_comp.c | 7 +++++-- scripts/sorttable.c | 30 ++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 23 deletions(-) diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 986b4c0d4792..5ee748edaef1 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -2,13 +2,19 @@ #ifndef __ASM_ASM_EXTABLE_H #define __ASM_ASM_EXTABLE_H +#define EX_TYPE_NONE 0 +#define EX_TYPE_FIXUP 1 +#define EX_TYPE_BPF 2 + #ifdef __ASSEMBLY__ -#define __ASM_EXTABLE_RAW(insn, fixup) \ - .pushsection __ex_table, "a"; \ - .align 3; \ - .long ((insn) - .); \ - .long ((fixup) - .); \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + .pushsection __ex_table, "a"; \ + .align 2; \ + .long ((insn) - .); \ + .long ((fixup) - .); \ + .short (type); \ + .short (data); \ .popsection; /* @@ -16,7 +22,7 @@ * when an unhandled fault is taken. */ .macro _asm_extable, insn, fixup - __ASM_EXTABLE_RAW(\insn, \fixup) + __ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0) .endm /* @@ -33,15 +39,17 @@ #include -#define __ASM_EXTABLE_RAW(insn, fixup) \ - ".pushsection __ex_table, \"a\"\n" \ - ".align 3\n" \ - ".long ((" insn ") - .)\n" \ - ".long ((" fixup ") - .)\n" \ +#define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ + ".pushsection __ex_table, \"a\"\n" \ + ".align 2\n" \ + ".long ((" insn ") - .)\n" \ + ".long ((" fixup ") - .)\n" \ + ".short (" type ")\n" \ + ".short (" data ")\n" \ ".popsection\n" #define _ASM_EXTABLE(insn, fixup) \ - __ASM_EXTABLE_RAW(#insn, #fixup) + __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h index 1859b9fd566f..8b300dd28def 100644 --- a/arch/arm64/include/asm/extable.h +++ b/arch/arm64/include/asm/extable.h @@ -18,10 +18,21 @@ struct exception_table_entry { int insn, fixup; + short type, data; }; #define ARCH_HAS_RELATIVE_EXTABLE +#define swap_ex_entry_fixup(a, b, tmp, delta) \ +do { \ + (a)->fixup = (b)->fixup + (delta); \ + (b)->fixup = (tmp).fixup - (delta); \ + (a)->type = (b)->type; \ + (b)->type = (tmp).type; \ + (a)->data = (b)->data; \ + (b)->data = (tmp).data; \ +} while (0) + static inline bool in_bpf_jit(struct pt_regs *regs) { if (!IS_ENABLED(CONFIG_BPF_JIT)) @@ -32,12 +43,12 @@ static inline bool in_bpf_jit(struct pt_regs *regs) } #ifdef CONFIG_BPF_JIT -bool arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs); +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs); #else /* !CONFIG_BPF_JIT */ static inline -bool arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { return false; } diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f6b1a88245db..fa8a8e8ddcfd 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -57,7 +57,7 @@ #define SBSS_ALIGN 0 #endif -#define RO_EXCEPTION_TABLE_ALIGN 8 +#define RO_EXCEPTION_TABLE_ALIGN 4 #define RUNTIME_DISCARD_EXIT #include diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index dba3d59f3eca..c2951b963335 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -6,6 +6,24 @@ #include #include +#include + +typedef bool (*ex_handler_t)(const struct exception_table_entry *, + struct pt_regs *); + +static inline unsigned long +get_ex_fixup(const struct exception_table_entry *ex) +{ + return ((unsigned long)&ex->fixup + ex->fixup); +} + +static bool ex_handler_fixup(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + regs->pc = get_ex_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -14,9 +32,12 @@ bool fixup_exception(struct pt_regs *regs) if (!ex) return false; - if (in_bpf_jit(regs)) - return arm64_bpf_fixup_exception(ex, regs); + switch (ex->type) { + case EX_TYPE_FIXUP: + return ex_handler_fixup(ex, regs); + case EX_TYPE_BPF: + return ex_handler_bpf(ex, regs); + } - regs->pc = (unsigned long)&ex->fixup + ex->fixup; - return true; + BUG(); } diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 956c841ef346..7df7345e60d8 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -358,8 +359,8 @@ static void build_epilogue(struct jit_ctx *ctx) #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) -bool arm64_bpf_fixup_exception(const struct exception_table_entry *ex, - struct pt_regs *regs) +bool ex_handler_bpf(const struct exception_table_entry *ex, + struct pt_regs *regs) { off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); @@ -412,6 +413,8 @@ static int add_exception_handler(const struct bpf_insn *insn, ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); + ex->type = EX_TYPE_BPF; + ctx->exentry_idx++; return 0; } diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 6ee4fa882919..ee95bb47a50d 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -231,6 +231,34 @@ static void sort_relative_table(char *extab_image, int image_size) } } +static void arm64_sort_relative_table(char *extab_image, int image_size) +{ + int i = 0; + + while (i < image_size) { + uint32_t *loc = (uint32_t *)(extab_image + i); + + w(r(loc) + i, loc); + w(r(loc + 1) + i + 4, loc + 1); + /* Don't touch the fixup type or data */ + + i += sizeof(uint32_t) * 3; + } + + qsort(extab_image, image_size / 12, 12, compare_relative_table); + + i = 0; + while (i < image_size) { + uint32_t *loc = (uint32_t *)(extab_image + i); + + w(r(loc) - i, loc); + w(r(loc + 1) - (i + 4), loc + 1); + /* Don't touch the fixup type or data */ + + i += sizeof(uint32_t) * 3; + } +} + static void x86_sort_relative_table(char *extab_image, int image_size) { int i = 0; @@ -343,6 +371,8 @@ static int do_file(char const *const fname, void *addr) custom_sort = s390_sort_relative_table; break; case EM_AARCH64: + custom_sort = arm64_sort_relative_table; + break; case EM_PARISC: case EM_PPC: case EM_PPC64: -- cgit v1.2.3 From 2e77a62cb3a6d2eb9dd875516411bcd131dd04e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:17 +0100 Subject: arm64: extable: add a dedicated uaccess handler For inline assembly, we place exception fixups out-of-line in the `.fixup` section such that these are out of the way of the fast path. This has a few drawbacks: * Since the fixup code is anonymous, backtraces will symbolize fixups as offsets from the nearest prior symbol, currently `__entry_tramp_text_end`. This is confusing, and painful to debug without access to the relevant vmlinux. * Since the exception handler adjusts the PC to execute the fixup, and the fixup uses a direct branch back into the function it fixes, backtraces of fixups miss the original function. This is confusing, and violates requirements for RELIABLE_STACKTRACE (and therefore LIVEPATCH). * Inline assembly and associated fixups are generated from templates, and we have many copies of logically identical fixups which only differ in which specific registers are written to and which address is branched to at the end of the fixup. This is potentially wasteful of I-cache resources, and makes it hard to add additional logic to fixups without significant bloat. This patch address all three concerns for inline uaccess fixups by adding a dedicated exception handler which updates registers in exception context and subsequent returns back into the function which faulted, removing the need for fixups specialized to each faulting instruction. Other than backtracing, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-12-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-extable.h | 24 ++++++++++++++++++++++++ arch/arm64/include/asm/futex.h | 25 ++++++++----------------- arch/arm64/include/asm/uaccess.h | 19 ++++--------------- arch/arm64/kernel/armv8_deprecated.c | 12 +++--------- arch/arm64/kernel/traps.c | 9 ++------- arch/arm64/mm/extable.c | 17 +++++++++++++++++ 6 files changed, 58 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 5ee748edaef1..11209da19c62 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -5,6 +5,7 @@ #define EX_TYPE_NONE 0 #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 +#define EX_TYPE_UACCESS_ERR_ZERO 3 #ifdef __ASSEMBLY__ @@ -37,8 +38,11 @@ #else /* __ASSEMBLY__ */ +#include #include +#include + #define __ASM_EXTABLE_RAW(insn, fixup, type, data) \ ".pushsection __ex_table, \"a\"\n" \ ".align 2\n" \ @@ -51,6 +55,26 @@ #define _ASM_EXTABLE(insn, fixup) \ __ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0") +#define EX_DATA_REG_ERR_SHIFT 0 +#define EX_DATA_REG_ERR GENMASK(4, 0) +#define EX_DATA_REG_ZERO_SHIFT 5 +#define EX_DATA_REG_ZERO GENMASK(9, 5) + +#define EX_DATA_REG(reg, gpr) \ + "((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")" + +#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_UACCESS_ERR_ZERO), \ + "(" \ + EX_DATA_REG(ERR, err) " | " \ + EX_DATA_REG(ZERO, zero) \ + ")") + +#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 8e41faa37c69..bc06691d2062 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -25,19 +25,14 @@ do { \ " cbz %w0, 3f\n" \ " sub %w4, %w4, %w0\n" \ " cbnz %w4, 1b\n" \ -" mov %w0, %w7\n" \ +" mov %w0, %w6\n" \ "3:\n" \ " dmb ish\n" \ -" .pushsection .fixup,\"ax\"\n" \ -" .align 2\n" \ -"4: mov %w0, %w6\n" \ -" b 3b\n" \ -" .popsection\n" \ - _ASM_EXTABLE(1b, 4b) \ - _ASM_EXTABLE(2b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \ "+r" (loops) \ - : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ + : "r" (oparg), "Ir" (-EAGAIN) \ : "memory"); \ uaccess_disable_privileged(); \ } while (0) @@ -105,18 +100,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, " cbz %w3, 3f\n" " sub %w4, %w4, %w3\n" " cbnz %w4, 1b\n" -" mov %w0, %w8\n" +" mov %w0, %w7\n" "3:\n" " dmb ish\n" "4:\n" -" .pushsection .fixup,\"ax\"\n" -"5: mov %w0, %w7\n" -" b 4b\n" -" .popsection\n" - _ASM_EXTABLE(1b, 5b) - _ASM_EXTABLE(2b, 5b) + _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0) + _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) - : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) + : "r" (oldval), "r" (newval), "Ir" (-EAGAIN) : "memory"); uaccess_disable_privileged(); diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 759019523b85..9bc218991c5a 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -255,15 +255,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup, \"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " mov %1, #0\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ : "+r" (err), "=&r" (x) \ - : "r" (addr), "i" (-EFAULT)) + : "r" (addr)) #define __raw_get_mem(ldr, x, ptr, err) \ do { \ @@ -332,14 +326,9 @@ do { \ asm volatile( \ "1: " store " " reg "1, [%2]\n" \ "2:\n" \ - " .section .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %3\n" \ - " b 2b\n" \ - " .previous\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "+r" (err) \ - : "r" (x), "r" (addr), "i" (-EFAULT)) + : "r" (x), "r" (addr)) #define __raw_put_mem(str, x, ptr, err) \ do { \ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 0e86e8b9cedd..6875a16b09d2 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -279,7 +279,7 @@ static void __init register_insn_emulation_sysctl(void) do { \ uaccess_enable_privileged(); \ __asm__ __volatile__( \ - " mov %w3, %w7\n" \ + " mov %w3, %w6\n" \ "0: ldxr"B" %w2, [%4]\n" \ "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ @@ -290,16 +290,10 @@ do { \ "2:\n" \ " mov %w1, %w2\n" \ "3:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "4: mov %w0, %w6\n" \ - " b 3b\n" \ - " .popsection" \ - _ASM_EXTABLE(0b, 4b) \ - _ASM_EXTABLE(1b, 4b) \ + _ASM_EXTABLE_UACCESS_ERR(0b, 3b, %w0) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) \ : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ : "r" ((unsigned long)addr), "i" (-EAGAIN), \ - "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ uaccess_disable_privileged(); \ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index b03e383d944a..268a81a3006e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -527,14 +527,9 @@ NOKPROBE_SYMBOL(do_ptrauth_fault); "1: " insn ", %1\n" \ " mov %w0, #0\n" \ "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %w2\n" \ - " b 2b\n" \ - " .popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ : "=r" (res) \ - : "r" (address), "i" (-EFAULT)); \ + : "r" (address)); \ uaccess_ttbr0_disable(); \ } diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index c2951b963335..bbbc95313f2e 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -3,10 +3,12 @@ * Based on arch/arm/mm/extable.c */ +#include #include #include #include +#include typedef bool (*ex_handler_t)(const struct exception_table_entry *, struct pt_regs *); @@ -24,6 +26,19 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex, return true; } +static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); + int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); + + pt_regs_write_reg(regs, reg_err, -EFAULT); + pt_regs_write_reg(regs, reg_zero, 0); + + regs->pc = get_ex_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -37,6 +52,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_fixup(ex, regs); case EX_TYPE_BPF: return ex_handler_bpf(ex, regs); + case EX_TYPE_UACCESS_ERR_ZERO: + return ex_handler_uaccess_err_zero(ex, regs); } BUG(); -- cgit v1.2.3 From 753b32368705c396000f95f33c3b7018474e33ad Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:18 +0100 Subject: arm64: extable: add load_unaligned_zeropad() handler For inline assembly, we place exception fixups out-of-line in the `.fixup` section such that these are out of the way of the fast path. This has a few drawbacks: * Since the fixup code is anonymous, backtraces will symbolize fixups as offsets from the nearest prior symbol, currently `__entry_tramp_text_end`. This is confusing, and painful to debug without access to the relevant vmlinux. * Since the exception handler adjusts the PC to execute the fixup, and the fixup uses a direct branch back into the function it fixes, backtraces of fixups miss the original function. This is confusing, and violates requirements for RELIABLE_STACKTRACE (and therefore LIVEPATCH). * Inline assembly and associated fixups are generated from templates, and we have many copies of logically identical fixups which only differ in which specific registers are written to and which address is branched to at the end of the fixup. This is potentially wasteful of I-cache resources, and makes it hard to add additional logic to fixups without significant bloat. * In the case of load_unaligned_zeropad(), the logic in the fixup requires a temporary register that we must allocate even in the fast-path where it will not be used. This patch address all four concerns for load_unaligned_zeropad() fixups by adding a dedicated exception handler which performs the fixup logic in exception context and subsequent returns back after the faulting instruction. For the moment, the fixup logic is identical to the old assembly fixup logic, but in future we could enhance this by taking the ESR and FAR into account to constrain the faults we try to fix up, or to specialize fixups for MTE tag check faults. Other than backtracing, there should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-13-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/asm-extable.h | 15 +++++++++++++++ arch/arm64/include/asm/word-at-a-time.h | 21 ++++----------------- arch/arm64/mm/extable.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 11209da19c62..c39f2437e08e 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -6,6 +6,7 @@ #define EX_TYPE_FIXUP 1 #define EX_TYPE_BPF 2 #define EX_TYPE_UACCESS_ERR_ZERO 3 +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD 4 #ifdef __ASSEMBLY__ @@ -75,6 +76,20 @@ #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, wzr) +#define EX_DATA_REG_DATA_SHIFT 0 +#define EX_DATA_REG_DATA GENMASK(4, 0) +#define EX_DATA_REG_ADDR_SHIFT 5 +#define EX_DATA_REG_ADDR GENMASK(9, 5) + +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ + __DEFINE_ASM_GPR_NUMS \ + __ASM_EXTABLE_RAW(#insn, #fixup, \ + __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD), \ + "(" \ + EX_DATA_REG(DATA, data) " | " \ + EX_DATA_REG(ADDR, addr) \ + ")") + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ASM_EXTABLE_H */ diff --git a/arch/arm64/include/asm/word-at-a-time.h b/arch/arm64/include/asm/word-at-a-time.h index 2dcb104c645b..1c8e4f2490bf 100644 --- a/arch/arm64/include/asm/word-at-a-time.h +++ b/arch/arm64/include/asm/word-at-a-time.h @@ -53,29 +53,16 @@ static inline unsigned long find_zero(unsigned long mask) */ static inline unsigned long load_unaligned_zeropad(const void *addr) { - unsigned long ret, tmp; + unsigned long ret; __uaccess_enable_tco_async(); /* Load word from unaligned pointer addr */ asm( - "1: ldr %0, %3\n" + "1: ldr %0, %2\n" "2:\n" - " .pushsection .fixup,\"ax\"\n" - " .align 2\n" - "3: bic %1, %2, #0x7\n" - " ldr %0, [%1]\n" - " and %1, %2, #0x7\n" - " lsl %1, %1, #0x3\n" -#ifndef __AARCH64EB__ - " lsr %0, %0, %1\n" -#else - " lsl %0, %0, %1\n" -#endif - " b 2b\n" - " .popsection\n" - _ASM_EXTABLE(1b, 3b) - : "=&r" (ret), "=&r" (tmp) + _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1) + : "=&r" (ret) : "r" (addr), "Q" (*(unsigned long *)addr)); __uaccess_disable_tco_async(); diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index bbbc95313f2e..c3d53811a15e 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -39,6 +39,33 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, return true; } +static bool +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, + struct pt_regs *regs) +{ + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + unsigned long data, addr, offset; + + addr = pt_regs_read_reg(regs, reg_addr); + + offset = addr & 0x7UL; + addr &= ~0x7UL; + + data = *(unsigned long*)addr; + +#ifndef __AARCH64EB__ + data >>= 8 * offset; +#else + data <<= 8 * offset; +#endif + + pt_regs_write_reg(regs, reg_data, data); + + regs->pc = get_ex_fixup(ex); + return true; +} + bool fixup_exception(struct pt_regs *regs) { const struct exception_table_entry *ex; @@ -54,6 +81,8 @@ bool fixup_exception(struct pt_regs *regs) return ex_handler_bpf(ex, regs); case EX_TYPE_UACCESS_ERR_ZERO: return ex_handler_uaccess_err_zero(ex, regs); + case EX_TYPE_LOAD_UNALIGNED_ZEROPAD: + return ex_handler_load_unaligned_zeropad(ex, regs); } BUG(); -- cgit v1.2.3 From bf6e667f47384585f737216ea1e928d987c3e6e2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 19 Oct 2021 17:02:19 +0100 Subject: arm64: vmlinux.lds.S: remove `.fixup` section We no longer place anything into a `.fixup` section, so we no longer need to place those sections into the `.text` section in the main kernel Image. Remove the use of `.fixup`. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Robin Murphy Cc: Will Deacon Link: https://lore.kernel.org/r/20211019160219.5202-14-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/vmlinux.lds.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index fa8a8e8ddcfd..443f03453036 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -161,7 +161,6 @@ SECTIONS IDMAP_TEXT HIBERNATE_TEXT TRAMP_TEXT - *(.fixup) *(.gnu.warning) . = ALIGN(16); *(.got) /* Global offset table */ -- cgit v1.2.3 From 260ea4ba94e88385724754c644212ce552de8b01 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 19 Oct 2021 19:18:51 +0100 Subject: selftests: arm64: Factor out utility functions for assembly FP tests The various floating point test programs written in assembly have a bunch of helper functions and macros which are cut'n'pasted between them. Factor them out into a separate source file which is linked into all of them. We don't include memcmp() since it isn't as generic as it should be and directly branches to report an error in the programs. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211019181851.3341232-1-broonie@kernel.org Signed-off-by: Will Deacon --- tools/testing/selftests/arm64/fp/Makefile | 4 +- tools/testing/selftests/arm64/fp/asm-utils.S | 172 +++++++++++++++++++++++++ tools/testing/selftests/arm64/fp/assembler.h | 11 ++ tools/testing/selftests/arm64/fp/fpsimd-test.S | 164 ----------------------- tools/testing/selftests/arm64/fp/sve-test.S | 163 ----------------------- 5 files changed, 185 insertions(+), 329 deletions(-) create mode 100644 tools/testing/selftests/arm64/fp/asm-utils.S diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile index 4367125b7c27..ba1488c7c315 100644 --- a/tools/testing/selftests/arm64/fp/Makefile +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -9,12 +9,12 @@ TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \ all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) -fpsimd-test: fpsimd-test.o +fpsimd-test: fpsimd-test.o asm-utils.o $(CC) -nostdlib $^ -o $@ rdvl-sve: rdvl-sve.o rdvl.o sve-ptrace: sve-ptrace.o sve-probe-vls: sve-probe-vls.o rdvl.o -sve-test: sve-test.o +sve-test: sve-test.o asm-utils.o $(CC) -nostdlib $^ -o $@ vec-syscfg: vec-syscfg.o rdvl.o vlset: vlset.o diff --git a/tools/testing/selftests/arm64/fp/asm-utils.S b/tools/testing/selftests/arm64/fp/asm-utils.S new file mode 100644 index 000000000000..4b9728efc18d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-utils.S @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2021 ARM Limited. +// Original author: Dave Martin +// +// Utility functions for assembly code. + +#include +#include "assembler.h" + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction +.globl putdec + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction +.globl putdecn + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +.globl puthexb + +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction +.globl puthexnibble + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction +.globl dumphex + + // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction +.globl memcpy + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction +.globl memfill_ae + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction +.globl memclr + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction +.globl memfill diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 8944f2189206..90bd433d2665 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -54,4 +54,15 @@ endfunction .purgem \name\()_entry .endm +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + #endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 0dbd594c2747..e21e8ea52c7e 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -33,131 +33,6 @@ define_accessor setv, NVR, _vldr define_accessor getv, NVR, _vstr -// Print a single character x0 to stdout -// Clobbers x0-x2,x8 -function putc - str x0, [sp, #-16]! - - mov x0, #1 // STDOUT_FILENO - mov x1, sp - mov x2, #1 - mov x8, #__NR_write - svc #0 - - add sp, sp, #16 - ret -endfunction - -// Print a NUL-terminated string starting at address x0 to stdout -// Clobbers x0-x3,x8 -function puts - mov x1, x0 - - mov x2, #0 -0: ldrb w3, [x0], #1 - cbz w3, 1f - add x2, x2, #1 - b 0b - -1: mov w0, #1 // STDOUT_FILENO - mov x8, #__NR_write - svc #0 - - ret -endfunction - -// Utility macro to print a literal string -// Clobbers x0-x4,x8 -.macro puts string - .pushsection .rodata.str1.1, "aMS", 1 -.L__puts_literal\@: .string "\string" - .popsection - - ldr x0, =.L__puts_literal\@ - bl puts -.endm - -// Print an unsigned decimal number x0 to stdout -// Clobbers x0-x4,x8 -function putdec - mov x1, sp - str x30, [sp, #-32]! // Result can't be > 20 digits - - mov x2, #0 - strb w2, [x1, #-1]! // Write the NUL terminator - - mov x2, #10 -0: udiv x3, x0, x2 // div-mod loop to generate the digits - msub x0, x3, x2, x0 - add w0, w0, #'0' - strb w0, [x1, #-1]! - mov x0, x3 - cbnz x3, 0b - - ldrb w0, [x1] - cbnz w0, 1f - mov w0, #'0' // Print "0" for 0, not "" - strb w0, [x1, #-1]! - -1: mov x0, x1 - bl puts - - ldr x30, [sp], #32 - ret -endfunction - -// Print an unsigned decimal number x0 to stdout, followed by a newline -// Clobbers x0-x5,x8 -function putdecn - mov x5, x30 - - bl putdec - mov x0, #'\n' - bl putc - - ret x5 -endfunction - - -// Clobbers x0-x3,x8 -function puthexb - str x30, [sp, #-0x10]! - - mov w3, w0 - lsr w0, w0, #4 - bl puthexnibble - mov w0, w3 - - ldr x30, [sp], #0x10 - // fall through to puthexnibble -endfunction -// Clobbers x0-x2,x8 -function puthexnibble - and w0, w0, #0xf - cmp w0, #10 - blo 1f - add w0, w0, #'a' - ('9' + 1) -1: add w0, w0, #'0' - b putc -endfunction - -// x0=data in, x1=size in, clobbers x0-x5,x8 -function dumphex - str x30, [sp, #-0x10]! - - mov x4, x0 - mov x5, x1 - -0: subs x5, x5, #1 - b.lo 1f - ldrb w0, [x4], #1 - bl puthexb - b 0b - -1: ldr x30, [sp], #0x10 - ret -endfunction - // Declare some storate space to shadow the SVE register contents: .pushsection .text .data @@ -168,18 +43,6 @@ scratch: .space MAXVL_B .popsection -// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. -// Clobbers x0-x3 -function memcpy - cmp x2, #0 - b.eq 1f -0: ldrb w3, [x1], #1 - strb w3, [x0], #1 - subs x2, x2, #1 - b.ne 0b -1: ret -endfunction - // Generate a test pattern for storage in SVE registers // x0: pid (16 bits) // x1: register number (6 bits) @@ -227,33 +90,6 @@ function setup_vreg ret x4 endfunction -// Fill x1 bytes starting at x0 with 0xae (for canary purposes) -// Clobbers x1, x2. -function memfill_ae - mov w2, #0xae - b memfill -endfunction - -// Fill x1 bytes starting at x0 with 0. -// Clobbers x1, x2. -function memclr - mov w2, #0 -endfunction - // fall through to memfill - -// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 -// Clobbers x1 -function memfill - cmp x1, #0 - b.eq 1f - -0: strb w2, [x0], #1 - subs x1, x1, #1 - b.ne 0b - -1: ret -endfunction - // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index e3e08d9c7020..f5b1b48ffff2 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -46,130 +46,6 @@ define_accessor getz, NZR, _sve_str_v define_accessor setp, NPR, _sve_ldr_p define_accessor getp, NPR, _sve_str_p -// Print a single character x0 to stdout -// Clobbers x0-x2,x8 -function putc - str x0, [sp, #-16]! - - mov x0, #1 // STDOUT_FILENO - mov x1, sp - mov x2, #1 - mov x8, #__NR_write - svc #0 - - add sp, sp, #16 - ret -endfunction - -// Print a NUL-terminated string starting at address x0 to stdout -// Clobbers x0-x3,x8 -function puts - mov x1, x0 - - mov x2, #0 -0: ldrb w3, [x0], #1 - cbz w3, 1f - add x2, x2, #1 - b 0b - -1: mov w0, #1 // STDOUT_FILENO - mov x8, #__NR_write - svc #0 - - ret -endfunction - -// Utility macro to print a literal string -// Clobbers x0-x4,x8 -.macro puts string - .pushsection .rodata.str1.1, "aMS", 1 -.L__puts_literal\@: .string "\string" - .popsection - - ldr x0, =.L__puts_literal\@ - bl puts -.endm - -// Print an unsigned decimal number x0 to stdout -// Clobbers x0-x4,x8 -function putdec - mov x1, sp - str x30, [sp, #-32]! // Result can't be > 20 digits - - mov x2, #0 - strb w2, [x1, #-1]! // Write the NUL terminator - - mov x2, #10 -0: udiv x3, x0, x2 // div-mod loop to generate the digits - msub x0, x3, x2, x0 - add w0, w0, #'0' - strb w0, [x1, #-1]! - mov x0, x3 - cbnz x3, 0b - - ldrb w0, [x1] - cbnz w0, 1f - mov w0, #'0' // Print "0" for 0, not "" - strb w0, [x1, #-1]! - -1: mov x0, x1 - bl puts - - ldr x30, [sp], #32 - ret -endfunction - -// Print an unsigned decimal number x0 to stdout, followed by a newline -// Clobbers x0-x5,x8 -function putdecn - mov x5, x30 - - bl putdec - mov x0, #'\n' - bl putc - - ret x5 -endfunction - -// Clobbers x0-x3,x8 -function puthexb - str x30, [sp, #-0x10]! - - mov w3, w0 - lsr w0, w0, #4 - bl puthexnibble - mov w0, w3 - - ldr x30, [sp], #0x10 - // fall through to puthexnibble -endfunction -// Clobbers x0-x2,x8 -function puthexnibble - and w0, w0, #0xf - cmp w0, #10 - blo 1f - add w0, w0, #'a' - ('9' + 1) -1: add w0, w0, #'0' - b putc -endfunction - -// x0=data in, x1=size in, clobbers x0-x5,x8 -function dumphex - str x30, [sp, #-0x10]! - - mov x4, x0 - mov x5, x1 - -0: subs x5, x5, #1 - b.lo 1f - ldrb w0, [x4], #1 - bl puthexb - b 0b - -1: ldr x30, [sp], #0x10 - ret -endfunction - // Declare some storate space to shadow the SVE register contents: .pushsection .text .data @@ -184,18 +60,6 @@ scratch: .space MAXVL_B .popsection -// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. -// Clobbers x0-x3 -function memcpy - cmp x2, #0 - b.eq 1f -0: ldrb w3, [x1], #1 - strb w3, [x0], #1 - subs x2, x2, #1 - b.ne 0b -1: ret -endfunction - // Generate a test pattern for storage in SVE registers // x0: pid (16 bits) // x1: register number (6 bits) @@ -316,33 +180,6 @@ function setup_ffr ret x4 endfunction -// Fill x1 bytes starting at x0 with 0xae (for canary purposes) -// Clobbers x1, x2. -function memfill_ae - mov w2, #0xae - b memfill -endfunction - -// Fill x1 bytes starting at x0 with 0. -// Clobbers x1, x2. -function memclr - mov w2, #0 -endfunction - // fall through to memfill - -// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 -// Clobbers x1 -function memfill - cmp x1, #0 - b.eq 1f - -0: strb w2, [x0], #1 - subs x1, x1, #1 - b.ne 0b - -1: ret -endfunction - // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. -- cgit v1.2.3 From 2d0d656700d67239a57afaf617439143d8dac9be Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:39 +0100 Subject: arm64: Add Neoverse-N2, Cortex-A710 CPU part definition Add the CPU Partnumbers for the new Arm designs. Cc: Catalin Marinas Cc: Mark Rutland Cc: Will Deacon Acked-by: Catalin Marinas Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-2-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 6231e1f0abe7..19b8441aa8f2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,6 +73,8 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -113,6 +115,8 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) -- cgit v1.2.3 From b9d216fcef4298de76519e2baeed69ba482467bd Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:40 +0100 Subject: arm64: errata: Add detection for TRBE overwrite in FILL mode Arm Neoverse-N2 and the Cortex-A710 cores are affected by a CPU erratum where the TRBE will overwrite the trace buffer in FILL mode. The TRBE doesn't stop (as expected in FILL mode) when it reaches the limit and wraps to the base to continue writing upto 3 cache lines. This will overwrite any trace that was written previously. Add the Neoverse-N2 erratum(#2139208) and Cortex-A710 erratum (#2119858) to the detection logic. This will be used by the TRBE driver in later patches to work around the issue. The detection has been kept with the core arm64 errata framework list to make sure : - We don't duplicate the framework in TRBE driver - The errata detection is advertised like the rest of the CPU errata. Note that the Kconfig entries are not fully active until the TRBE driver implements the work around. Cc: Will Deacon Cc: Mark Rutland Cc: Anshuman Khandual Cc: Catalin Marinas Cc: Mathieu Poirier Cc: Mike Leach cc: Leo Yan Acked-by: Catalin Marinas Reviewed-by: Mathieu Poirier Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-3-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 41 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 25 +++++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 71 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d410a47ffa57..2f99229d993c 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -92,12 +92,16 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..26bd128935bc 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -666,6 +666,47 @@ config ARM64_ERRATUM_1508412 If unsure, say Y. +config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + bool + +config ARM64_ERRATUM_2119858 + bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Cortex-A710 erratum 2119858. + + Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + +config ARM64_ERRATUM_2139208 + bool "Neoverse-N2: 2139208: workaround TRBE overwriting trace data in FILL mode" + default y + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2139208. + + Affected Neoverse-N2 cores could overwrite up to 3 cache lines of trace + data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in + the event of a WRAP event. + + Work around the issue by always making sure we move the TRBPTR_EL1 by + 256 bytes before enabling the buffer and filling the first 256 bytes of + the buffer with ETM ignore packets upon disabling. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e2c20c036442..ccd757373f36 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -340,6 +340,18 @@ static const struct midr_range erratum_1463225[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE +static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2139208 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2119858 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -533,6 +545,19 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP, ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE + { + /* + * The erratum work around is handled within the TRBE + * driver and can be applied per-cpu. So, we can allow + * a late CPU to come online with this erratum. + */ + .desc = "ARM erratum 2119858 or 2139208", + .capability = ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 49305c2e6dfd..1ccb92165bd8 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -53,6 +53,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 -- cgit v1.2.3 From fa82d0b4b833790ac4572377fb777dcea24a9d69 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:41 +0100 Subject: arm64: errata: Add workaround for TSB flush failures Arm Neoverse-N2 (#2067961) and Cortex-A710 (#2054223) suffers from errata, where a TSB (trace synchronization barrier) fails to flush the trace data completely, when executed from a trace prohibited region. In Linux we always execute it after we have moved the PE to trace prohibited region. So, we can apply the workaround every time a TSB is executed. The work around is to issue two TSB consecutively. NOTE: This errata is defined as LOCAL_CPU_ERRATUM, implying that a late CPU could be blocked from booting if it is the first CPU that requires the workaround. This is because we do not allow setting a cpu_hwcaps after the SMP boot. The other alternative is to use "this_cpu_has_cap()" instead of the faster system wide check, which may be a bit of an overhead, given we may have to do this in nvhe KVM host before a guest entry. Cc: Will Deacon Cc: Catalin Marinas Cc: Mathieu Poirier Cc: Mike Leach Cc: Mark Rutland Cc: Anshuman Khandual Cc: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Mathieu Poirier Reviewed-by: Anshuman Khandual Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-4-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 33 +++++++++++++++++++++++++++++++++ arch/arm64/include/asm/barrier.h | 16 +++++++++++++++- arch/arm64/kernel/cpu_errata.c | 19 +++++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 5 files changed, 72 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 2f99229d993c..569a92411dcd 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -94,6 +94,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -102,6 +104,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 26bd128935bc..ec3bb346957f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -707,6 +707,39 @@ config ARM64_ERRATUM_2139208 If unsure, say Y. +config ARM64_WORKAROUND_TSB_FLUSH_FAILURE + bool + +config ARM64_ERRATUM_2054223 + bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Cortex-A710 erratum 2054223 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_ERRATUM_2067961 + bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Neoverse-N2 erratum 2067961 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 451e11e5fd23..1c5a00598458 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,7 +23,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") -#define tsb_csync() asm volatile("hint #18" : : : "memory") +#define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") #ifdef CONFIG_ARM64_PSEUDO_NMI @@ -46,6 +46,20 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) + +#define tsb_csync() \ + do { \ + /* \ + * CPUs affected by Arm Erratum 2054223 or 2067961 needs \ + * another TSB to ensure the trace is flushed. The barriers \ + * don't have to be strictly back to back, as long as the \ + * CPU is in trace prohibited state. \ + */ \ + if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \ + __tsb_csync(); \ + __tsb_csync(); \ + } while (0) + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index ccd757373f36..aaa66c9eee24 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -352,6 +352,18 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE */ +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE +static const struct midr_range tsb_flush_fail_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -558,6 +570,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, CAP_MIDR_RANGE_LIST(trbe_overwrite_fill_mode_cpus), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE + { + .desc = "ARM erratum 2067961 or 2054223", + .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, + ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1ccb92165bd8..2102e15af43d 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -54,6 +54,7 @@ WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_TRBE_OVERWRITE_FILL_MODE +WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 -- cgit v1.2.3 From 8d81b2a38ddfc4b03662d2359765648c8b4cc73c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 19 Oct 2021 17:31:42 +0100 Subject: arm64: errata: Add detection for TRBE write to out-of-range Arm Neoverse-N2 and Cortex-A710 cores are affected by an erratum where the trbe, under some circumstances, might write upto 64bytes to an address after the Limit as programmed by the TRBLIMITR_EL1.LIMIT. This might - - Corrupt a page in the ring buffer, which may corrupt trace from a previous session, consumed by userspace. - Hit the guard page at the end of the vmalloc area and raise a fault. To keep the handling simpler, we always leave the last page from the range, which TRBE is allowed to write. This can be achieved by ensuring that we always have more than a PAGE worth space in the range, while calculating the LIMIT for TRBE. And then the LIMIT pointer can be adjusted to leave the PAGE (TRBLIMITR.LIMIT -= PAGE_SIZE), out of the TRBE range while enabling it. This makes sure that the TRBE will only write to an area within its allowed limit (i.e, [head-head+size]) and we do not have to handle address faults within the driver. Cc: Anshuman Khandual Cc: Mathieu Poirier Cc: Mike Leach Cc: Leo Yan Cc: Will Deacon Cc: Mark Rutland Reviewed-by: Anshuman Khandual Reviewed-by: Mathieu Poirier Acked-by: Catalin Marinas Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20211019163153.3692640-5-suzuki.poulose@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 41 ++++++++++++++++++++++++++++++++++ arch/arm64/kernel/cpu_errata.c | 20 +++++++++++++++++ arch/arm64/tools/cpucaps | 1 + 4 files changed, 66 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 569a92411dcd..5342e895fb60 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -96,6 +96,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | @@ -106,6 +108,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2253138 | ARM64_ERRATUM_2253138 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ec3bb346957f..a500af15ebd5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -740,6 +740,47 @@ config ARM64_ERRATUM_2067961 If unsure, say Y. +config ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + bool + +config ARM64_ERRATUM_2253138 + bool "Neoverse-N2: 2253138: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Neoverse-N2 erratum 2253138. + + Affected Neoverse-N2 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + +config ARM64_ERRATUM_2224489 + bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + depends on CORESIGHT_TRBE + default y + select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + help + This option adds the workaround for ARM Cortex-A710 erratum 2224489. + + Affected Cortex-A710 cores might write to an out-of-range address, not reserved + for TRBE. Under some conditions, the TRBE might generate a write to the next + virtually addressed page following the last page of the TRBE address space + (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. + + Work around this in the driver by always making sure that there is a + page beyond the TRBLIMITR_EL1.LIMIT, within the space allowed for the TRBE. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index aaa66c9eee24..9e1c1aef9ebd 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -364,6 +364,18 @@ static const struct midr_range tsb_flush_fail_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE +static struct midr_range trbe_write_out_of_range_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2253138 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2224489 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -577,6 +589,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE + { + .desc = "ARM erratum 2253138 or 2224489", + .capability = ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), + }, #endif { } diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 2102e15af43d..90628638e0f9 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -55,6 +55,7 @@ WORKAROUND_1508412 WORKAROUND_1542419 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE +WORKAROUND_TRBE_WRITE_OUT_OF_RANGE WORKAROUND_CAVIUM_23154 WORKAROUND_CAVIUM_27456 WORKAROUND_CAVIUM_30115 -- cgit v1.2.3 From 49ed920408f85fb143020cf7d95612b6b12a84a2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Oct 2021 15:16:34 +0100 Subject: arm64/sve: Add stub for sve_max_virtualisable_vl() Fixes build problems for configurations with KVM enabled but SVE disabled. Reported-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211022141635.2360415-2-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/fpsimd.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 1d0b5fa253a0..dbb4b30a5648 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -215,6 +215,11 @@ static inline void fpsimd_release_task(struct task_struct *task) { } static inline void sve_sync_to_fpsimd(struct task_struct *task) { } static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { } +static inline int sve_max_virtualisable_vl(void) +{ + return 0; +} + static inline int sve_set_current_vl(unsigned long arg) { return -EINVAL; -- cgit v1.2.3 From 04ee53a55543ddc16398391ac95e97e5c9436ba3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 22 Oct 2021 15:16:35 +0100 Subject: arm64/sve: Fix warnings when SVE is disabled In configurations where SVE is disabled we define but never reference the functions for retrieving the default vector length, causing warnings. Fix this by move the ifdef up, marking get_default_vl() inline since it is referenced from code guarded by an IS_ENABLED() check, and do the same for the other accessors for consistency. Reported-by: Catalin Marinas Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211022141635.2360415-3-broonie@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3474122f9207..fa244c426f61 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -150,24 +150,24 @@ struct vl_config { static struct vl_config vl_config[ARM64_VEC_MAX]; -static int get_default_vl(enum vec_type type) +static inline int get_default_vl(enum vec_type type) { return READ_ONCE(vl_config[type].__default_vl); } -static int get_sve_default_vl(void) +#ifdef CONFIG_ARM64_SVE + +static inline int get_sve_default_vl(void) { return get_default_vl(ARM64_VEC_SVE); } -#ifdef CONFIG_ARM64_SVE - -static void set_default_vl(enum vec_type type, int val) +static inline void set_default_vl(enum vec_type type, int val) { WRITE_ONCE(vl_config[type].__default_vl, val); } -static void set_sve_default_vl(int val) +static inline void set_sve_default_vl(int val) { set_default_vl(ARM64_VEC_SVE, val); } -- cgit v1.2.3 From d198c77b7fab13d4def83c038807f6967f857acc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 26 Oct 2021 12:18:02 +0100 Subject: arm64: Document boot requirements for FEAT_SME_FA64 The EAC1 release of the SME specification adds the FA64 feature which requires enablement at higher ELs before lower ELs can use it. Document what we require from higher ELs in our boot requirements. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20211026111802.12853-1-broonie@kernel.org Signed-off-by: Will Deacon --- Documentation/arm64/booting.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index 3f9d86557c5e..52d060caf8bb 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -340,6 +340,16 @@ Before jumping into the kernel, the following conditions must be met: - SMCR_EL2.LEN must be initialised to the same value for all CPUs the kernel will execute on. + For CPUs with the Scalable Matrix Extension FA64 feature (FEAT_SME_FA64) + + - If EL3 is present: + + - SMCR_EL3.FA64 (bit 31) must be initialised to 0b1. + + - If the kernel is entered at EL1 and EL2 is present: + + - SMCR_EL2.FA64 (bit 31) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented -- cgit v1.2.3 From a68773bd32d9b9dea62be99c06502567532f652f Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 18 Oct 2021 16:47:13 +0200 Subject: arm64: Select POSIX_CPU_TIMERS_TASK_WORK With 6caa5812e2d1 ("KVM: arm64: Use generic KVM xfer to guest work function") all arm64 exit paths are properly equipped to handle the POSIX timers' task work. Deferring timer callbacks to thread context, not only limits the amount of time spent in hard interrupt context, but is a safer implementation[1], and will allow PREEMPT_RT setups to use KVM[2]. So let's enable POSIX_CPU_TIMERS_TASK_WORK on arm64. [1] https://lore.kernel.org/all/20200716201923.228696399@linutronix.de/ [2] https://lore.kernel.org/linux-rt-users/87v92bdnlx.ffs@tglx/ Signed-off-by: Nicolas Saenz Julienne Acked-by: Mark Rutland Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20211018144713.873464-1-nsaenzju@redhat.com Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..ddd1258bf44c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -192,6 +192,7 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE -- cgit v1.2.3