diff options
author | Palmer Dabbelt <palmer@rivosinc.com> | 2024-01-16 07:14:04 -0800 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2024-01-16 07:14:04 -0800 |
commit | a894e8ed09c6c7fa239711819db83b8c050eb7b0 (patch) | |
tree | 8cd4e5e9e12fa483d6551b4ca45c46ce52b3f5db /arch/riscv/lib | |
parent | d4abde52b4b116111537593e2744b3234d18808d (diff) | |
parent | 2080ff9493072a94e42b1856d59f5f1bffb761b7 (diff) |
Merge patch series "riscv: support kernel-mode Vector"
Andy Chiu <andy.chiu@sifive.com> says:
This series provides support running Vector in kernel mode.
Additionally, kernel-mode Vector can be configured to run without
turnning off preemption on a CONFIG_PREEMPT kernel. Along with the
suport, we add Vector optimized copy_{to,from}_user. And provide a
simple threshold to decide when to run the vectorized functions.
We decided to drop vectorized memcpy/memset/memmove for the moment due
to the concern of memory side-effect in kernel_vector_begin(). The
detailed description can be found at v9[0]
This series is composed by 4 parts:
patch 1-4: adds basic support for kernel-mode Vector
patch 5: includes vectorized copy_{to,from}_user into the kernel
patch 6: refactor context switch code in fpu [1]
patch 7-10: provides some code refactors and support for preemptible
kernel-mode Vector.
This series can be merged if we feel any part of {1~4, 5, 6, 7~10} is
mature enough.
This patch is tested on a QEMU with V and verified that booting, normal
userspace operations all work as usual with thresholds set to 0. Also,
we test by launching multiple kernel threads which continuously executes
and verifies Vector operations in the background. The module that tests
these operation is expected to be upstream later.
* b4-shazam-merge:
riscv: vector: allow kernel-mode Vector with preemption
riscv: vector: use kmem_cache to manage vector context
riscv: vector: use a mask to write vstate_ctrl
riscv: vector: do not pass task_struct into riscv_v_vstate_{save,restore}()
riscv: fpu: drop SR_SD bit checking
riscv: lib: vectorize copy_to_user/copy_from_user
riscv: sched: defer restoring Vector context for user
riscv: Add vector extension XOR implementation
riscv: vector: make Vector always available for softirq context
riscv: Add support for kernel mode vector
Link: https://lore.kernel.org/r/20240115055929.4736-1-andy.chiu@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv/lib')
-rw-r--r-- | arch/riscv/lib/Makefile | 7 | ||||
-rw-r--r-- | arch/riscv/lib/riscv_v_helpers.c | 45 | ||||
-rw-r--r-- | arch/riscv/lib/uaccess.S | 10 | ||||
-rw-r--r-- | arch/riscv/lib/uaccess_vector.S | 53 | ||||
-rw-r--r-- | arch/riscv/lib/xor.S | 81 |
5 files changed, 195 insertions, 1 deletions
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 26cb2502ecf8..c8a6787d5827 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -6,8 +6,13 @@ lib-y += memmove.o lib-y += strcmp.o lib-y += strlen.o lib-y += strncmp.o -lib-$(CONFIG_MMU) += uaccess.o +ifeq ($(CONFIG_MMU), y) +lib-y += uaccess.o +lib-$(CONFIG_RISCV_ISA_V) += uaccess_vector.o +endif lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o +lib-$(CONFIG_RISCV_ISA_V) += xor.o +lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/riscv_v_helpers.c b/arch/riscv/lib/riscv_v_helpers.c new file mode 100644 index 000000000000..be38a93cedae --- /dev/null +++ b/arch/riscv/lib/riscv_v_helpers.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2023 SiFive + * Author: Andy Chiu <andy.chiu@sifive.com> + */ +#include <linux/linkage.h> +#include <asm/asm.h> + +#include <asm/vector.h> +#include <asm/simd.h> + +#ifdef CONFIG_MMU +#include <asm/asm-prototypes.h> +#endif + +#ifdef CONFIG_MMU +size_t riscv_v_usercopy_threshold = CONFIG_RISCV_ISA_V_UCOPY_THRESHOLD; +int __asm_vector_usercopy(void *dst, void *src, size_t n); +int fallback_scalar_usercopy(void *dst, void *src, size_t n); +asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n) +{ + size_t remain, copied; + + /* skip has_vector() check because it has been done by the asm */ + if (!may_use_simd()) + goto fallback; + + kernel_vector_begin(); + remain = __asm_vector_usercopy(dst, src, n); + kernel_vector_end(); + + if (remain) { + copied = n - remain; + dst += copied; + src += copied; + n = remain; + goto fallback; + } + + return remain; + +fallback: + return fallback_scalar_usercopy(dst, src, n); +} +#endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index a9d356d6c03c..bc22c078aba8 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -3,6 +3,8 @@ #include <asm/asm.h> #include <asm/asm-extable.h> #include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/alternative-macros.h> .macro fixup op reg addr lbl 100: @@ -11,6 +13,13 @@ .endm SYM_FUNC_START(__asm_copy_to_user) +#ifdef CONFIG_RISCV_ISA_V + ALTERNATIVE("j fallback_scalar_usercopy", "nop", 0, RISCV_ISA_EXT_v, CONFIG_RISCV_ISA_V) + REG_L t0, riscv_v_usercopy_threshold + bltu a2, t0, fallback_scalar_usercopy + tail enter_vector_usercopy +#endif +SYM_FUNC_START(fallback_scalar_usercopy) /* Enable access to user memory */ li t6, SR_SUM @@ -181,6 +190,7 @@ SYM_FUNC_START(__asm_copy_to_user) sub a0, t5, a0 ret SYM_FUNC_END(__asm_copy_to_user) +SYM_FUNC_END(fallback_scalar_usercopy) EXPORT_SYMBOL(__asm_copy_to_user) SYM_FUNC_ALIAS(__asm_copy_from_user, __asm_copy_to_user) EXPORT_SYMBOL(__asm_copy_from_user) diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S new file mode 100644 index 000000000000..51ab5588e9ff --- /dev/null +++ b/arch/riscv/lib/uaccess_vector.S @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <linux/linkage.h> +#include <asm-generic/export.h> +#include <asm/asm.h> +#include <asm/asm-extable.h> +#include <asm/csr.h> + +#define pDst a0 +#define pSrc a1 +#define iNum a2 + +#define iVL a3 + +#define ELEM_LMUL_SETTING m8 +#define vData v0 + + .macro fixup op reg addr lbl +100: + \op \reg, \addr + _asm_extable 100b, \lbl + .endm + +SYM_FUNC_START(__asm_vector_usercopy) + /* Enable access to user memory */ + li t6, SR_SUM + csrs CSR_STATUS, t6 + +loop: + vsetvli iVL, iNum, e8, ELEM_LMUL_SETTING, ta, ma + fixup vle8.v vData, (pSrc), 10f + sub iNum, iNum, iVL + add pSrc, pSrc, iVL + fixup vse8.v vData, (pDst), 11f + add pDst, pDst, iVL + bnez iNum, loop + + /* Exception fixup for vector load is shared with normal exit */ +10: + /* Disable access to user memory */ + csrc CSR_STATUS, t6 + mv a0, iNum + ret + + /* Exception fixup code for vector store. */ +11: + /* Undo the subtraction after vle8.v */ + add iNum, iNum, iVL + /* Make sure the scalar fallback skip already processed bytes */ + csrr t2, CSR_VSTART + sub iNum, iNum, t2 + j 10b +SYM_FUNC_END(__asm_vector_usercopy) diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S new file mode 100644 index 000000000000..b28f2430e52f --- /dev/null +++ b/arch/riscv/lib/xor.S @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2021 SiFive + */ +#include <linux/linkage.h> +#include <linux/export.h> +#include <asm/asm.h> + +SYM_FUNC_START(xor_regs_2_) + vsetvli a3, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a3 + vxor.vv v16, v0, v8 + add a2, a2, a3 + vse8.v v16, (a1) + add a1, a1, a3 + bnez a0, xor_regs_2_ + ret +SYM_FUNC_END(xor_regs_2_) +EXPORT_SYMBOL(xor_regs_2_) + +SYM_FUNC_START(xor_regs_3_) + vsetvli a4, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a4 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a4 + vxor.vv v16, v0, v16 + add a3, a3, a4 + vse8.v v16, (a1) + add a1, a1, a4 + bnez a0, xor_regs_3_ + ret +SYM_FUNC_END(xor_regs_3_) +EXPORT_SYMBOL(xor_regs_3_) + +SYM_FUNC_START(xor_regs_4_) + vsetvli a5, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a5 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a5 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a5 + vxor.vv v16, v0, v24 + add a4, a4, a5 + vse8.v v16, (a1) + add a1, a1, a5 + bnez a0, xor_regs_4_ + ret +SYM_FUNC_END(xor_regs_4_) +EXPORT_SYMBOL(xor_regs_4_) + +SYM_FUNC_START(xor_regs_5_) + vsetvli a6, a0, e8, m8, ta, ma + vle8.v v0, (a1) + vle8.v v8, (a2) + sub a0, a0, a6 + vxor.vv v0, v0, v8 + vle8.v v16, (a3) + add a2, a2, a6 + vxor.vv v0, v0, v16 + vle8.v v24, (a4) + add a3, a3, a6 + vxor.vv v0, v0, v24 + vle8.v v8, (a5) + add a4, a4, a6 + vxor.vv v16, v0, v8 + add a5, a5, a6 + vse8.v v16, (a1) + add a1, a1, a6 + bnez a0, xor_regs_5_ + ret +SYM_FUNC_END(xor_regs_5_) +EXPORT_SYMBOL(xor_regs_5_) |