diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-11 20:07:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-11 20:07:44 -0700 |
commit | f311d498be8f1aa49d5cfca0b18d6db4f77845b7 (patch) | |
tree | e8658081d366b64a645e79dab05191cf95a75aa0 /arch | |
parent | d465bff130bf4ca17b6980abe51164ace1e0cba4 (diff) | |
parent | e18d6152ff0f41b7f01f9817372022df04e0d354 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm updates from Paolo Bonzini:
"The main batch of ARM + RISC-V changes, and a few fixes and cleanups
for x86 (PMU virtualization and selftests).
ARM:
- Fixes for single-stepping in the presence of an async exception as
well as the preservation of PSTATE.SS
- Better handling of AArch32 ID registers on AArch64-only systems
- Fixes for the dirty-ring API, allowing it to work on architectures
with relaxed memory ordering
- Advertise the new kvmarm mailing list
- Various minor cleanups and spelling fixes
RISC-V:
- Improved instruction encoding infrastructure for instructions not
yet supported by binutils
- Svinval support for both KVM Host and KVM Guest
- Zihintpause support for KVM Guest
- Zicbom support for KVM Guest
- Record number of signal exits as a VCPU stat
- Use generic guest entry infrastructure
x86:
- Misc PMU fixes and cleanups.
- selftests: fixes for Hyper-V hypercall
- selftests: fix nx_huge_pages_test on TDP-disabled hosts
- selftests: cleanups for fix_hypercall_test"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (57 commits)
riscv: select HAVE_POSIX_CPU_TIMERS_TASK_WORK
RISC-V: KVM: Use generic guest entry infrastructure
RISC-V: KVM: Record number of signal exits as a vCPU stat
RISC-V: KVM: add __init annotation to riscv_kvm_init()
RISC-V: KVM: Expose Zicbom to the guest
RISC-V: KVM: Provide UAPI for Zicbom block size
RISC-V: KVM: Make ISA ext mappings explicit
RISC-V: KVM: Allow Guest use Zihintpause extension
RISC-V: KVM: Allow Guest use Svinval extension
RISC-V: KVM: Use Svinval for local TLB maintenance when available
RISC-V: Probe Svinval extension form ISA string
RISC-V: KVM: Change the SBI specification version to v1.0
riscv: KVM: Apply insn-def to hlv encodings
riscv: KVM: Apply insn-def to hfence encodings
riscv: Introduce support for defining instructions
riscv: Add X register names to gpr-nums
KVM: arm64: Advertise new kvmarm mailing list
kvm: vmx: keep constant definition format consistent
kvm: mmu: fix typos in struct kvm_arch
KVM: selftests: Fix nx_huge_pages_test on TDP-disabled hosts
...
Diffstat (limited to 'arch')
30 files changed, 466 insertions, 384 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e9c9388ccc02..45e2136322ba 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -393,6 +393,7 @@ struct kvm_vcpu_arch { */ struct { u32 mdscr_el1; + bool pstate_ss; } guest_debug_preserved; /* vcpu power state */ @@ -535,6 +536,9 @@ struct kvm_vcpu_arch { #define IN_WFIT __vcpu_single_flag(sflags, BIT(3)) /* vcpu system registers loaded on physical CPU */ #define SYSREGS_ON_CPU __vcpu_single_flag(sflags, BIT(4)) +/* Software step state is Active-pending */ +#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5)) + /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ #define vcpu_sve_pffr(vcpu) (kern_hyp_va((vcpu)->arch.sve_state) + \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 446f628a9de1..94d33e296e10 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2269,6 +2269,16 @@ static int __init early_kvm_mode_cfg(char *arg) if (!arg) return -EINVAL; + if (strcmp(arg, "none") == 0) { + kvm_mode = KVM_MODE_NONE; + return 0; + } + + if (!is_hyp_mode_available()) { + pr_warn_once("KVM is not available. Ignoring kvm-arm.mode\n"); + return 0; + } + if (strcmp(arg, "protected") == 0) { if (!is_kernel_in_hyp_mode()) kvm_mode = KVM_MODE_PROTECTED; @@ -2283,11 +2293,6 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } - if (strcmp(arg, "none") == 0) { - kvm_mode = KVM_MODE_NONE; - return 0; - } - return -EINVAL; } early_param("kvm-arm.mode", early_kvm_mode_cfg); diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 3f7563d768e2..fccf9ec01813 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -32,6 +32,10 @@ static DEFINE_PER_CPU(u64, mdcr_el2); * * Guest access to MDSCR_EL1 is trapped by the hypervisor and handled * after we have restored the preserved value to the main context. + * + * When single-step is enabled by userspace, we tweak PSTATE.SS on every + * guest entry. Preserve PSTATE.SS so we can restore the original value + * for the vcpu after the single-step is disabled. */ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) { @@ -41,6 +45,9 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu) trace_kvm_arm_set_dreg32("Saved MDSCR_EL1", vcpu->arch.guest_debug_preserved.mdscr_el1); + + vcpu->arch.guest_debug_preserved.pstate_ss = + (*vcpu_cpsr(vcpu) & DBG_SPSR_SS); } static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) @@ -51,6 +58,11 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu) trace_kvm_arm_set_dreg32("Restored MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); + + if (vcpu->arch.guest_debug_preserved.pstate_ss) + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + else + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; } /** @@ -188,7 +200,18 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) * debugging the system. */ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + /* + * If the software step state at the last guest exit + * was Active-pending, we don't set DBG_SPSR_SS so + * that the state is maintained (to not run another + * single-step until the pending Software Step + * exception is taken). + */ + if (!vcpu_get_flag(vcpu, DBG_SS_ACTIVE_PENDING)) + *vcpu_cpsr(vcpu) |= DBG_SPSR_SS; + else + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; + mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1); mdscr |= DBG_MDSCR_SS; vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); @@ -262,6 +285,15 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) * Restore the guest's debug registers if we were using them. */ if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) { + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { + if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)) + /* + * Mark the vcpu as ACTIVE_PENDING + * until Software Step exception is taken. + */ + vcpu_set_flag(vcpu, DBG_SS_ACTIVE_PENDING); + } + restore_guest_debug_regs(vcpu); /* diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index f802a3b3f8db..2ff13a3f8479 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -937,6 +937,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, } else { /* If not enabled clear all flags */ vcpu->guest_debug = 0; + vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); } out: diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bbe5b393d689..e778eefcf214 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -152,8 +152,14 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) run->debug.arch.hsr_high = upper_32_bits(esr); run->flags = KVM_DEBUG_ARCH_HSR_HIGH_VALID; - if (ESR_ELx_EC(esr) == ESR_ELx_EC_WATCHPT_LOW) + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_WATCHPT_LOW: run->debug.arch.far = vcpu->arch.fault.far_el2; + break; + case ESR_ELx_EC_SOFTSTP_LOW: + vcpu_clear_flag(vcpu, DBG_SS_ACTIVE_PENDING); + break; + } return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 9f6385702061..8e9d49a964be 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -143,7 +143,7 @@ static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu) } } -/* Restore VGICv3 state on non_VEH systems */ +/* Restore VGICv3 state on non-VHE systems */ static void __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 2ef1121ab844..f4a7c5abcbca 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1063,13 +1063,12 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, } /* Read a sanitised cpufeature ID register by sys_reg_desc */ -static u64 read_id_reg(const struct kvm_vcpu *vcpu, - struct sys_reg_desc const *r, bool raz) +static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { u32 id = reg_to_encoding(r); u64 val; - if (raz) + if (sysreg_visible_as_raz(vcpu, r)) return 0; val = read_sanitised_ftr_reg(id); @@ -1145,34 +1144,37 @@ static unsigned int id_visibility(const struct kvm_vcpu *vcpu, return 0; } -/* cpufeature ID register access trap handlers */ - -static bool __access_id_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r, - bool raz) +static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { - if (p->is_write) - return write_to_read_only(vcpu, p, r); + /* + * AArch32 ID registers are UNKNOWN if AArch32 isn't implemented at any + * EL. Promote to RAZ/WI in order to guarantee consistency between + * systems. + */ + if (!kvm_supports_32bit_el0()) + return REG_RAZ | REG_USER_WI; - p->regval = read_id_reg(vcpu, r, raz); - return true; + return id_visibility(vcpu, r); } +static unsigned int raz_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return REG_RAZ; +} + +/* cpufeature ID register access trap handlers */ + static bool access_id_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - bool raz = sysreg_visible_as_raz(vcpu, r); - - return __access_id_reg(vcpu, p, r, raz); -} + if (p->is_write) + return write_to_read_only(vcpu, p, r); -static bool access_raz_id_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - return __access_id_reg(vcpu, p, r, true); + p->regval = read_id_reg(vcpu, r); + return true; } /* Visibility overrides for SVE-specific control registers */ @@ -1208,9 +1210,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return -EINVAL; /* We can only differ with CSV[23], and anything else is an error */ - val ^= read_id_reg(vcpu, rd, false); - val &= ~((0xFUL << ID_AA64PFR0_EL1_CSV2_SHIFT) | - (0xFUL << ID_AA64PFR0_EL1_CSV3_SHIFT)); + val ^= read_id_reg(vcpu, rd); + val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); if (val) return -EINVAL; @@ -1227,45 +1229,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, * are stored, and for set_id_reg() we don't allow the effective value * to be changed. */ -static int __get_id_reg(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, u64 *val, - bool raz) -{ - *val = read_id_reg(vcpu, rd, raz); - return 0; -} - -static int __set_id_reg(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, u64 val, - bool raz) -{ - /* This is what we mean by invariant: you can't change it. */ - if (val != read_id_reg(vcpu, rd, raz)) - return -EINVAL; - - return 0; -} - static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 *val) { - bool raz = sysreg_visible_as_raz(vcpu, rd); - - return __get_id_reg(vcpu, rd, val, raz); + *val = read_id_reg(vcpu, rd); + return 0; } static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { - bool raz = sysreg_visible_as_raz(vcpu, rd); - - return __set_id_reg(vcpu, rd, val, raz); -} + /* This is what we mean by invariant: you can't change it. */ + if (val != read_id_reg(vcpu, rd)) + return -EINVAL; -static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - u64 val) -{ - return __set_id_reg(vcpu, rd, val, true); + return 0; } static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1367,6 +1345,15 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, .visibility = id_visibility, \ } +/* sys_reg_desc initialiser for known cpufeature ID registers */ +#define AA32_ID_SANITISED(name) { \ + SYS_DESC(SYS_##name), \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = aa32_id_visibility, \ +} + /* * sys_reg_desc initialiser for architecturally unallocated cpufeature ID * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 @@ -1374,9 +1361,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, */ #define ID_UNALLOCATED(crm, op2) { \ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ - .access = access_raz_id_reg, \ - .get_user = get_raz_reg, \ - .set_user = set_raz_id_reg, \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = raz_visibility \ } /* @@ -1386,9 +1374,10 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, */ #define ID_HIDDEN(name) { \ SYS_DESC(SYS_##name), \ - .access = access_raz_id_reg, \ - .get_user = get_raz_reg, \ - .set_user = set_raz_id_reg, \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = raz_visibility, \ } /* @@ -1452,33 +1441,33 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - ID_SANITISED(ID_PFR0_EL1), - ID_SANITISED(ID_PFR1_EL1), - ID_SANITISED(ID_DFR0_EL1), + AA32_ID_SANITISED(ID_PFR0_EL1), + AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_SANITISED(ID_DFR0_EL1), ID_HIDDEN(ID_AFR0_EL1), - ID_SANITISED(ID_MMFR0_EL1), - ID_SANITISED(ID_MMFR1_EL1), - ID_SANITISED(ID_MMFR2_EL1), - ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_SANITISED(ID_MMFR0_EL1), + AA32_ID_SANITISED(ID_MMFR1_EL1), + AA32_ID_SANITISED(ID_MMFR2_EL1), + AA32_ID_SANITISED(ID_MMFR3_EL1), /* CRm=2 */ - ID_SANITISED(ID_ISAR0_EL1), - ID_SANITISED(ID_ISAR1_EL1), - ID_SANITISED(ID_ISAR2_EL1), - ID_SANITISED(ID_ISAR3_EL1), - ID_SANITISED(ID_ISAR4_EL1), - ID_SANITISED(ID_ISAR5_EL1), - ID_SANITISED(ID_MMFR4_EL1), - ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_SANITISED(ID_ISAR0_EL1), + AA32_ID_SANITISED(ID_ISAR1_EL1), + AA32_ID_SANITISED(ID_ISAR2_EL1), + AA32_ID_SANITISED(ID_ISAR3_EL1), + AA32_ID_SANITISED(ID_ISAR4_EL1), + AA32_ID_SANITISED(ID_ISAR5_EL1), + AA32_ID_SANITISED(ID_MMFR4_EL1), + AA32_ID_SANITISED(ID_ISAR6_EL1), /* CRm=3 */ - ID_SANITISED(MVFR0_EL1), - ID_SANITISED(MVFR1_EL1), - ID_SANITISED(MVFR2_EL1), + AA32_ID_SANITISED(MVFR0_EL1), + AA32_ID_SANITISED(MVFR1_EL1), + AA32_ID_SANITISED(MVFR2_EL1), ID_UNALLOCATED(3,3), - ID_SANITISED(ID_PFR2_EL1), + AA32_ID_SANITISED(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_SANITISED(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -2809,6 +2798,9 @@ int kvm_sys_reg_set_user(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, if (!r) return -ENOENT; + if (sysreg_user_write_ignore(vcpu, r)) + return 0; + if (r->set_user) { ret = (r->set_user)(vcpu, r, val); } else { diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index a8c4cc32eb9a..e4ebb3a379fd 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -86,6 +86,7 @@ struct sys_reg_desc { #define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ #define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ +#define REG_USER_WI (1 << 2) /* WI from userspace only */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -136,22 +137,31 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r __vcpu_sys_reg(vcpu, r->reg) = r->val; } -static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline unsigned int sysreg_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) - return false; + return 0; - return r->visibility(vcpu, r) & REG_HIDDEN; + return r->visibility(vcpu, r); +} + +static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return sysreg_visibility(vcpu, r) & REG_HIDDEN; } static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - if (likely(!r->visibility)) - return false; + return sysreg_visibility(vcpu, r) & REG_RAZ; +} - return r->visibility(vcpu, r) & REG_RAZ; +static inline bool sysreg_user_write_ignore(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + return sysreg_visibility(vcpu, r) & REG_USER_WI; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 9d3299a70242..24d7778d1ce6 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -406,7 +406,7 @@ static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, struct its_ite *ite; for_each_lpi_its(device, ite, its) { - if (!ite->collection || coll != ite->collection) + if (ite->collection != coll) continue; update_affinity_ite(kvm, ite); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e84f2742b6bb..56976e5674ee 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -104,6 +104,7 @@ config RISCV select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_STACKPROTECTOR @@ -228,6 +229,9 @@ config RISCV_DMA_NONCOHERENT select ARCH_HAS_SETUP_DMA_OPS select DMA_DIRECT_REMAP +config AS_HAS_INSN + def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + source "arch/riscv/Kconfig.socs" source "arch/riscv/Kconfig.erratas" diff --git a/arch/riscv/include/asm/gpr-num.h b/arch/riscv/include/asm/gpr-num.h index dfee2829fc7c..efeb5edf8a3a 100644 --- a/arch/riscv/include/asm/gpr-num.h +++ b/arch/riscv/include/asm/gpr-num.h @@ -3,6 +3,11 @@ #define __ASM_GPR_NUM_H #ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 + .equ .L__gpr_num_x\num, \num + .endr + .equ .L__gpr_num_zero, 0 .equ .L__gpr_num_ra, 1 .equ .L__gpr_num_sp, 2 @@ -39,6 +44,9 @@ #else /* __ASSEMBLY__ */ #define __DEFINE_ASM_GPR_NUMS \ +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \ +" .equ .L__gpr_num_x\\num, \\num\n" \ +" .endr\n" \ " .equ .L__gpr_num_zero, 0\n" \ " .equ .L__gpr_num_ra, 1\n" \ " .equ .L__gpr_num_sp, 2\n" \ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 6f59ec64175e..b22525290073 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -58,6 +58,7 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_ZICBOM, RISCV_ISA_EXT_ZIHINTPAUSE, RISCV_ISA_EXT_SSTC, + RISCV_ISA_EXT_SVINVAL, RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, }; @@ -69,6 +70,7 @@ enum riscv_isa_ext_id { enum riscv_isa_ext_key { RISCV_ISA_EXT_KEY_FPU, /* For 'F' and 'D' */ RISCV_ISA_EXT_KEY_ZIHINTPAUSE, + RISCV_ISA_EXT_KEY_SVINVAL, RISCV_ISA_EXT_KEY_MAX, }; @@ -90,6 +92,8 @@ static __always_inline int riscv_isa_ext2key(int num) return RISCV_ISA_EXT_KEY_FPU; case RISCV_ISA_EXT_ZIHINTPAUSE: return RISCV_ISA_EXT_KEY_ZIHINTPAUSE; + case RISCV_ISA_EXT_SVINVAL: + return RISCV_ISA_EXT_KEY_SVINVAL; default: return -EINVAL; } diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h new file mode 100644 index 000000000000..16044affa57c --- /dev/null +++ b/arch/riscv/include/asm/insn-def.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_INSN_DEF_H +#define __ASM_INSN_DEF_H + +#include <asm/asm.h> + +#define INSN_R_FUNC7_SHIFT 25 +#define INSN_R_RS2_SHIFT 20 +#define INSN_R_RS1_SHIFT 15 +#define INSN_R_FUNC3_SHIFT 12 +#define INSN_R_RD_SHIFT 7 +#define INSN_R_OPCODE_SHIFT 0 + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_AS_HAS_INSN + + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 + .insn r \opcode, \func3, \func7, \rd, \rs1, \rs2 + .endm + +#else + +#include <asm/gpr-num.h> + + .macro insn_r, opcode, func3, func7, rd, rs1, rs2 + .4byte ((\opcode << INSN_R_OPCODE_SHIFT) | \ + (\func3 << INSN_R_FUNC3_SHIFT) | \ + (\func7 << INSN_R_FUNC7_SHIFT) | \ + (.L__gpr_num_\rd << INSN_R_RD_SHIFT) | \ + (.L__gpr_num_\rs1 << INSN_R_RS1_SHIFT) | \ + (.L__gpr_num_\rs2 << INSN_R_RS2_SHIFT)) + .endm + +#endif + +#define __INSN_R(...) insn_r __VA_ARGS__ + +#else /* ! __ASSEMBLY__ */ + +#ifdef CONFIG_AS_HAS_INSN + +#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + ".insn r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" + +#else + +#include <linux/stringify.h> +#include <asm/gpr-num.h> + +#define DEFINE_INSN_R \ + __DEFINE_ASM_GPR_NUMS \ +" .macro insn_r, opcode, func3, func7, rd, rs1, rs2\n" \ +" .4byte ((\\opcode << " __stringify(INSN_R_OPCODE_SHIFT) ") |" \ +" (\\func3 << " __stringify(INSN_R_FUNC3_SHIFT) ") |" \ +" (\\func7 << " __stringify(INSN_R_FUNC7_SHIFT) ") |" \ +" (.L__gpr_num_\\rd << " __stringify(INSN_R_RD_SHIFT) ") |" \ +" (.L__gpr_num_\\rs1 << " __stringify(INSN_R_RS1_SHIFT) ") |" \ +" (.L__gpr_num_\\rs2 << " __stringify(INSN_R_RS2_SHIFT) "))\n" \ +" .endm\n" + +#define UNDEFINE_INSN_R \ +" .purgem insn_r\n" + +#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + DEFINE_INSN_R \ + "insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \ + UNDEFINE_INSN_R + +#endif + +#endif /* ! __ASSEMBLY__ */ + +#define INSN_R(opcode, func3, func7, rd, rs1, rs2) \ + __INSN_R(RV_##opcode, RV_##func3, RV_##func7, \ + RV_##rd, RV_##rs1, RV_##rs2) + +#define RV_OPCODE(v) __ASM_STR(v) +#define RV_FUNC3(v) __ASM_STR(v) +#define RV_FUNC7(v) __ASM_STR(v) +#define RV_RD(v) __ASM_STR(v) +#define RV_RS1(v) __ASM_STR(v) +#define RV_RS2(v) __ASM_STR(v) +#define __RV_REG(v) __ASM_STR(x ## v) +#define RV___RD(v) __RV_REG(v) +#define RV___RS1(v) __RV_REG(v) +#define RV___RS2(v) __RV_REG(v) + +#define RV_OPCODE_SYSTEM RV_OPCODE(115) + +#define HFENCE_VVMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(17), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define HFENCE_GVMA(gaddr, vmid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(49), \ + __RD(0), RS1(gaddr), RS2(vmid)) + +#define HLVX_HU(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(50), \ + RD(dest), RS1(addr), __RS2(3)) + +#define HLV_W(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(52), \ + RD(dest), RS1(addr), __RS2(0)) + +#ifdef CONFIG_64BIT +#define HLV_D(dest, addr) \ + INSN_R(OPCODE_SYSTEM, FUNC3(4), FUNC7(54), \ + RD(dest), RS1(addr), __RS2(0)) +#else +#define HLV_D(dest, addr) \ + __ASM_STR(.error "hlv.d requires 64-bit support") +#endif + +#define SINVAL_VMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(11), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define SFENCE_W_INVAL() \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ + __RD(0), __RS1(0), __RS2(0)) + +#define SFENCE_INVAL_IR() \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(12), \ + __RD(0), __RS1(0), __RS2(1)) + +#define HINVAL_VVMA(vaddr, asid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(19), \ + __RD(0), RS1(vaddr), RS2(asid)) + +#define HINVAL_GVMA(gaddr, vmid) \ + INSN_R(OPCODE_SYSTEM, FUNC3(0), FUNC7(51), \ + __RD(0), RS1(gaddr), RS2(vmid)) + +#endif /* __ASM_INSN_DEF_H */ diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 60c517e4d576..dbbf43d52623 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -67,6 +67,7 @@ struct kvm_vcpu_stat { u64 mmio_exit_kernel; u64 csr_exit_user; u64 csr_exit_kernel; + u64 signal_exits; u64 exits; }; diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h b/arch/riscv/include/asm/kvm_vcpu_sbi.h index 26a446a34057..d4e3e600beef 100644 --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h @@ -11,8 +11,8 @@ #define KVM_SBI_IMPID 3 -#define KVM_SBI_VERSION_MAJOR 0 -#define KVM_SBI_VERSION_MINOR 3 +#define KVM_SBI_VERSION_MAJOR 1 +#define KVM_SBI_VERSION_MINOR 0 struct kvm_vcpu_sbi_extension { unsigned long extid_start; diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 7351417afd62..8985ff234c01 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -48,6 +48,7 @@ struct kvm_sregs { /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ struct kvm_riscv_config { unsigned long isa; + unsigned long zicbom_block_size; }; /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ @@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_M, KVM_RISCV_ISA_EXT_SVPBMT, KVM_RISCV_ISA_EXT_SSTC, + KVM_RISCV_ISA_EXT_SVINVAL, + KVM_RISCV_ISA_EXT_ZIHINTPAUSE, + KVM_RISCV_ISA_EXT_ZICBOM, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 87455d12970f..4d0dece5996c 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -93,6 +93,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) static struct riscv_isa_ext_data isa_ext_arr[] = { __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3b5583db9d80..9774f1271f93 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -204,6 +204,7 @@ void __init riscv_fill_hwcap(void) SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); + SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); } #undef SET_ISA_EXT_MAP } diff --git a/arch/riscv/kvm/Kconfig b/arch/riscv/kvm/Kconfig index f5a342fa1b1d..f36a737d5f96 100644 --- a/arch/riscv/kvm/Kconfig +++ b/arch/riscv/kvm/Kconfig @@ -24,6 +24,7 @@ config KVM select PREEMPT_NOTIFIERS select KVM_MMIO select KVM_GENERIC_DIRTYLOG_READ_PROTECT + select KVM_XFER_TO_GUEST_WORK select HAVE_KVM_VCPU_ASYNC_IOCTL select HAVE_KVM_EVENTFD select SRCU diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 1549205fe5fe..df2d8716851f 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -122,7 +122,7 @@ void kvm_arch_exit(void) { } -static int riscv_kvm_init(void) +static int __init riscv_kvm_init(void) { return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c index 1a76d0b1907d..309d79b3e5cd 100644 --- a/arch/riscv/kvm/tlb.c +++ b/arch/riscv/kvm/tlb.c @@ -12,22 +12,11 @@ #include <linux/kvm_host.h> #include <asm/cacheflush.h> #include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/insn-def.h> -/* - * Instruction encoding of hfence.gvma is: - * HFENCE.GVMA rs1, rs2 - * HFENCE.GVMA zero, rs2 - * HFENCE.GVMA rs1 - * HFENCE.GVMA - * - * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2 - * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2 - * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1 - * rs1==zero and rs2==zero ==> HFENCE.GVMA - * - * Instruction encoding of HFENCE.GVMA is: - * 0110001 rs2(5) rs1(5) 000 00000 1110011 - */ +#define has_svinval() \ + static_branch_unlikely(&riscv_isa_ext_keys[RISCV_ISA_EXT_KEY_SVINVAL]) void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, gpa_t gpa, gpa_t gpsz, @@ -40,32 +29,22 @@ void kvm_riscv_local_hfence_gvma_vmid_gpa(unsigned long vmid, return; } - for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) { - /* - * rs1 = a0 (GPA >> 2) - * rs2 = a1 (VMID) - * HFENCE.GVMA a0, a1 - * 0110001 01011 01010 000 00000 1110011 - */ - asm volatile ("srli a0, %0, 2\n" - "add a1, %1, zero\n" - ".word 0x62b50073\n" - :: "r" (pos), "r" (vmid) - : "a0", "a1", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile (HINVAL_GVMA(%0, %1) + : : "r" (pos >> 2), "r" (vmid) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile (HFENCE_GVMA(%0, %1) + : : "r" (pos >> 2), "r" (vmid) : "memory"); } } void kvm_riscv_local_hfence_gvma_vmid_all(unsigned long vmid) { - /* - * rs1 = zero - * rs2 = a0 (VMID) - * HFENCE.GVMA zero, a0 - * 0110001 01010 00000 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - ".word 0x62a00073\n" - :: "r" (vmid) : "a0", "memory"); + asm volatile(HFENCE_GVMA(zero, %0) : : "r" (vmid) : "memory"); } void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, @@ -78,46 +57,24 @@ void kvm_riscv_local_hfence_gvma_gpa(gpa_t gpa, gpa_t gpsz, return; } - for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) { - /* - * rs1 = a0 (GPA >> 2) - * rs2 = zero - * HFENCE.GVMA a0 - * 0110001 00000 01010 000 00000 1110011 - */ - asm volatile ("srli a0, %0, 2\n" - ".word 0x62050073\n" - :: "r" (pos) : "a0", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile(HINVAL_GVMA(%0, zero) + : : "r" (pos >> 2) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gpa; pos < (gpa + gpsz); pos += BIT(order)) + asm volatile(HFENCE_GVMA(%0, zero) + : : "r" (pos >> 2) : "memory"); } } void kvm_riscv_local_hfence_gvma_all(void) { - /* - * rs1 = zero - * rs2 = zero - * HFENCE.GVMA - * 0110001 00000 00000 000 00000 1110011 - */ - asm volatile (".word 0x62000073" ::: "memory"); + asm volatile(HFENCE_GVMA(zero, zero) : : : "memory"); } -/* - * Instruction encoding of hfence.gvma is: - * HFENCE.VVMA rs1, rs2 - * HFENCE.VVMA zero, rs2 - * HFENCE.VVMA rs1 - * HFENCE.VVMA - * - * rs1!=zero and rs2!=zero ==> HFENCE.VVMA rs1, rs2 - * rs1==zero and rs2!=zero ==> HFENCE.VVMA zero, rs2 - * rs1!=zero and rs2==zero ==> HFENCE.VVMA rs1 - * rs1==zero and rs2==zero ==> HFENCE.VVMA - * - * Instruction encoding of HFENCE.VVMA is: - * 0010001 rs2(5) rs1(5) 000 00000 1110011 - */ - void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, unsigned long asid, unsigned long gva, @@ -133,18 +90,16 @@ void kvm_riscv_local_hfence_vvma_asid_gva(unsigned long vmid, hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) { - /* - * rs1 = a0 (GVA) - * rs2 = a1 (ASID) - * HFENCE.VVMA a0, a1 - * 0010001 01011 01010 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - "add a1, %1, zero\n" - ".word 0x22b50073\n" - :: "r" (pos), "r" (asid) - : "a0", "a1", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HINVAL_VVMA(%0, %1) + : : "r" (pos), "r" (asid) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HFENCE_VVMA(%0, %1) + : : "r" (pos), "r" (asid) : "memory"); } csr_write(CSR_HGATP, hgatp); @@ -157,15 +112,7 @@ void kvm_riscv_local_hfence_vvma_asid_all(unsigned long vmid, hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - /* - * rs1 = zero - * rs2 = a0 (ASID) - * HFENCE.VVMA zero, a0 - * 0010001 01010 00000 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - ".word 0x22a00073\n" - :: "r" (asid) : "a0", "memory"); + asm volatile(HFENCE_VVMA(zero, %0) : : "r" (asid) : "memory"); csr_write(CSR_HGATP, hgatp); } @@ -183,16 +130,16 @@ void kvm_riscv_local_hfence_vvma_gva(unsigned long vmid, hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) { - /* - * rs1 = a0 (GVA) - * rs2 = zero - * HFENCE.VVMA a0 - * 0010001 00000 01010 000 00000 1110011 - */ - asm volatile ("add a0, %0, zero\n" - ".word 0x22050073\n" - :: "r" (pos) : "a0", "memory"); + if (has_svinval()) { + asm volatile (SFENCE_W_INVAL() ::: "memory"); + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HINVAL_VVMA(%0, zero) + : : "r" (pos) : "memory"); + asm volatile (SFENCE_INVAL_IR() ::: "memory"); + } else { + for (pos = gva; pos < (gva + gvsz); pos += BIT(order)) + asm volatile(HFENCE_VVMA(%0, zero) + : : "r" (pos) : "memory"); } csr_write(CSR_HGATP, hgatp); @@ -204,13 +151,7 @@ void kvm_riscv_local_hfence_vvma_all(unsigned long vmid) hgatp = csr_swap(CSR_HGATP, vmid << HGATP_VMID_SHIFT); - /* - * rs1 = zero - * rs2 = zero - * HFENCE.VVMA - * 0010001 00000 00000 000 00000 1110011 - */ - asm volatile (".word 0x22000073" ::: "memory"); + asm volatile(HFENCE_VVMA(zero, zero) : : : "memory"); csr_write(CSR_HGATP, hgatp); } diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index d0f08d5b4282..a032c4f0d600 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/entry-kvm.h> #include <linux/errno.h> #include <linux/err.h> #include <linux/kdebug.h> @@ -18,6 +19,7 @@ #include <linux/fs.h> #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/cacheflush.h> #include <asm/hwcap.h> const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { @@ -28,6 +30,7 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, mmio_exit_kernel), STATS_DESC_COUNTER(VCPU, csr_exit_user), STATS_DESC_COUNTER(VCPU, csr_exit_kernel), + STATS_DESC_COUNTER(VCPU, signal_exits), STATS_DESC_COUNTER(VCPU, exits) }; @@ -42,17 +45,23 @@ const struct kvm_stats_header kvm_vcpu_stats_header = { #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) +#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext + /* Mapping between KVM ISA Extension ID & Host ISA extension ID */ static const unsigned long kvm_isa_ext_arr[] = { - RISCV_ISA_EXT_a, - RISCV_ISA_EXT_c, - RISCV_ISA_EXT_d, - RISCV_ISA_EXT_f, - RISCV_ISA_EXT_h, - RISCV_ISA_EXT_i, - RISCV_ISA_EXT_m, - RISCV_ISA_EXT_SVPBMT, - RISCV_ISA_EXT_SSTC, + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, + + KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVINVAL), + KVM_ISA_EXT_ARR(SVPBMT), + KVM_ISA_EXT_ARR(ZIHINTPAUSE), + KVM_ISA_EXT_ARR(ZICBOM), }; static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) @@ -87,6 +96,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: case KVM_RISCV_ISA_EXT_SSTC: + case KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: return false; default: break; @@ -254,6 +265,11 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu, case KVM_REG_RISCV_CONFIG_REG(isa): reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM)) + return -EINVAL; + reg_val = riscv_cbom_block_size; + break; default: return -EINVAL; } @@ -311,6 +327,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu, return -EOPNOTSUPP; } break; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + return -EOPNOTSUPP; default: return -EINVAL; } @@ -784,11 +802,15 @@ static void kvm_riscv_vcpu_update_config(const unsigned long *isa) { u64 henvcfg = 0; - if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SVPBMT)) + if (riscv_isa_extension_available(isa, SVPBMT)) henvcfg |= ENVCFG_PBMTE; - if (__riscv_isa_extension_available(isa, RISCV_ISA_EXT_SSTC)) + if (riscv_isa_extension_available(isa, SSTC)) henvcfg |= ENVCFG_STCE; + + if (riscv_isa_extension_available(isa, ZICBOM)) + henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); + csr_write(CSR_HENVCFG, henvcfg); #ifdef CONFIG_32BIT csr_write(CSR_HENVCFGH, henvcfg >> 32); @@ -958,7 +980,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) run->exit_reason = KVM_EXIT_UNKNOWN; while (ret > 0) { /* Check conditions before entering the guest */ - cond_resched(); + ret = xfer_to_guest_mode_handle_work(vcpu); + if (!ret) + ret = 1; kvm_riscv_gstage_vmid_update(vcpu); @@ -967,15 +991,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) local_irq_disable(); /* - * Exit if we have a signal pending so that we can deliver - * the signal to user space. - */ - if (signal_pending(current)) { - ret = -EINTR; - run->exit_reason = KVM_EXIT_INTR; - } - - /* * Ensure we set mode to IN_GUEST_MODE after we disable * interrupts and before the final VCPU requests check. * See the comment in kvm_vcpu_exiting_guest_mode() and @@ -997,7 +1012,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (ret <= 0 || kvm_riscv_gstage_vmid_ver_changed(&vcpu->kvm->arch.vmid) || - kvm_request_pending(vcpu)) { + kvm_request_pending(vcpu) || + xfer_to_guest_mode_work_pending()) { vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_vcpu_srcu_read_lock(vcpu); diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index d5c36386878a..c9f741ab26f5 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -8,6 +8,7 @@ #include <linux/kvm_host.h> #include <asm/csr.h> +#include <asm/insn-def.h> static int gstage_page_fault(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_trap *trap) @@ -62,11 +63,7 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, { register unsigned long taddr asm("a0") = (unsigned long)trap; register unsigned long ttmp asm("a1"); - register unsigned long val asm("t0"); - register unsigned long tmp asm("t1"); - register unsigned long addr asm("t2") = guest_addr; - unsigned long flags; - unsigned long old_stvec, old_hstatus; + unsigned long flags, val, tmp, old_stvec, old_hstatus; local_irq_save(flags); @@ -82,29 +79,19 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, ".option push\n" ".option norvc\n" "add %[ttmp], %[taddr], 0\n" - /* - * HLVX.HU %[val], (%[addr]) - * HLVX.HU t0, (t2) - * 0110010 00011 00111 100 00101 1110011 - */ - ".word 0x6433c2f3\n" + HLVX_HU(%[val], %[addr]) "andi %[tmp], %[val], 3\n" "addi %[tmp], %[tmp], -3\n" "bne %[tmp], zero, 2f\n" "addi %[addr], %[addr], 2\n" - /* - * HLVX.HU %[tmp], (%[addr]) - * HLVX.HU t1, (t2) - * 0110010 00011 00111 100 00110 1110011 - */ - ".word 0x6433c373\n" + HLVX_HU(%[tmp], %[addr]) "sll %[tmp], %[tmp], 16\n" "add %[val], %[val], %[tmp]\n" "2:\n" ".option pop" : [val] "=&r" (val), [tmp] "=&r" (tmp), [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp), - [addr] "+&r" (addr) : : "memory"); + [addr] "+&r" (guest_addr) : : "memory"); if (trap->scause == EXC_LOAD_PAGE_FAULT) trap->scause = EXC_INST_PAGE_FAULT; @@ -121,24 +108,14 @@ unsigned long kvm_riscv_vcpu_unpriv_read(struct kvm_vcpu *vcpu, ".option norvc\n" "add %[ttmp], %[taddr], 0\n" #ifdef CONFIG_64BIT - /* - * HLV.D %[val], (%[addr]) - * HLV.D t0, (t2) - * 0110110 00000 00111 100 00101 1110011 - */ - ".word 0x6c03c2f3\n" + HLV_D(%[val], %[addr]) #else - /* - * HLV.W %[val], (%[addr]) - * HLV.W t0, (t2) - * 0110100 00000 00111 100 00101 1110011 - */ - ".word 0x6803c2f3\n" + HLV_W(%[val], %[addr]) #endif ".option pop" : [val] "=&r" (val), [taddr] "+&r" (taddr), [ttmp] "+&r" (ttmp) - : [addr] "r" (addr) : "memory"); + : [addr] "r" (guest_addr) : "memory"); } csr_write(CSR_STVEC, old_stvec); diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index e3f9bdf47c5f..b0add983530a 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -13,6 +13,8 @@ #include <asm/cacheflush.h> unsigned int riscv_cbom_block_size; +EXPORT_SYMBOL_GPL(riscv_cbom_block_size); + static bool noncoherent_supported; void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 61b9dd34d333..7551b6f9c31c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1280,8 +1280,8 @@ struct kvm_arch { bool tdp_mmu_enabled; /* - * List of struct kvm_mmu_pages being used as roots. - * All struct kvm_mmu_pages in the list should have + * List of kvm_mmu_page structs being used as roots. + * All kvm_mmu_page structs in the list should have * tdp_mmu_page set. * * For reads, this list is protected by: @@ -1300,8 +1300,8 @@ struct kvm_arch { struct list_head tdp_mmu_roots; /* - * List of struct kvmp_mmu_pages not being used as roots. - * All struct kvm_mmu_pages in the list should have + * List of kvm_mmu_page structs not being used as roots. + * All kvm_mmu_page structs in the list should have * tdp_mmu_page set and a tdp_mmu_root_count of 0. */ struct list_head tdp_mmu_pages; @@ -1311,9 +1311,9 @@ struct kvm_arch { * is held in read mode: * - tdp_mmu_roots (above) * - tdp_mmu_pages (above) - * - the link field of struct kvm_mmu_pages used by the TDP MMU + * - the link field of kvm_mmu_page structs used by the TDP MMU * - lpage_disallowed_mmu_pages - * - the lpage_disallowed_link field of struct kvm_mmu_pages used + * - the lpage_disallowed_link field of kvm_mmu_page structs used * by the TDP MMU * It is acceptable, but not necessary, to acquire this lock when * the thread holds the MMU lock in write mode. diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index c371ef695fcc..498dc600bd5c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -309,7 +309,7 @@ enum vmcs_field { GUEST_LDTR_AR_BYTES = 0x00004820, GUEST_TR_AR_BYTES = 0x00004822, GUEST_INTERRUPTIBILITY_INFO = 0x00004824, - GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_ACTIVITY_STATE = 0x00004826, GUEST_SYSENTER_CS = 0x0000482A, VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, HOST_IA32_SYSENTER_CS = 0x00004c00, diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index e3cbd7706136..67be7f217e37 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -28,7 +28,8 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_PFNCACHE select HAVE_KVM_IRQFD - select HAVE_KVM_DIRTY_RING + select HAVE_KVM_DIRTY_RING_TSO + select HAVE_KVM_DIRTY_RING_ACQ_REL select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_IRQ_ROUTING diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 02f9e4f245bd..d9b9a0f0db17 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -106,9 +106,19 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) return; if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { - /* Indicate PEBS overflow PMI to guest. */ - skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, - (unsigned long *)&pmu->global_status); + if (!in_pmi) { + /* + * TODO: KVM is currently _choosing_ to not generate records + * for emulated instructions, avoiding BUFFER_OVF PMI when + * there are no records. Strictly speaking, it should be done + * as well in the right context to improve sampling accuracy. + */ + skip_pmi = true; + } else { + /* Indicate PEBS overflow PMI to guest. */ + skip_pmi = __test_and_set_bit(GLOBAL_STATUS_BUFFER_OVF_BIT, + (unsigned long *)&pmu->global_status); + } } else { __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } @@ -227,8 +237,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) get_sample_period(pmc, pmc->counter))) return false; - if (!test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) && - pmc->perf_event->attr.precise_ip) + if (test_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->pebs_enable) != + (!!pmc->perf_event->attr.precise_ip)) return false; /* reuse perf_event to serve as pmc_reprogram_counter() does*/ diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index f24613a108c5..b68956299fa8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -23,107 +23,52 @@ enum pmu_type { PMU_TYPE_EVNTSEL, }; -enum index { - INDEX_ZERO = 0, - INDEX_ONE, - INDEX_TWO, - INDEX_THREE, - INDEX_FOUR, - INDEX_FIVE, - INDEX_ERROR, -}; - -static unsigned int get_msr_base(struct kvm_pmu *pmu, enum pmu_type type) +static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) { - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + unsigned int num_counters = pmu->nr_arch_gp_counters; - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { - if (type == PMU_TYPE_COUNTER) - return MSR_F15H_PERF_CTR; - else - return MSR_F15H_PERF_CTL; - } else { - if (type == PMU_TYPE_COUNTER) - return MSR_K7_PERFCTR0; - else - return MSR_K7_EVNTSEL0; - } -} + if (pmc_idx >= num_counters) + return NULL; -static enum index msr_to_index(u32 msr) -{ - switch (msr) { - case MSR_F15H_PERF_CTL0: - case MSR_F15H_PERF_CTR0: - case MSR_K7_EVNTSEL0: - case MSR_K7_PERFCTR0: - return INDEX_ZERO; - case MSR_F15H_PERF_CTL1: - case MSR_F15H_PERF_CTR1: - case MSR_K7_EVNTSEL1: - case MSR_K7_PERFCTR1: - return INDEX_ONE; - case MSR_F15H_PERF_CTL2: - case MSR_F15H_PERF_CTR2: - case MSR_K7_EVNTSEL2: - case MSR_K7_PERFCTR2: - return INDEX_TWO; - case MSR_F15H_PERF_CTL3: - case MSR_F15H_PERF_CTR3: - case MSR_K7_EVNTSEL3: - case MSR_K7_PERFCTR3: - return INDEX_THREE; - case MSR_F15H_PERF_CTL4: - case MSR_F15H_PERF_CTR4: - return INDEX_FOUR; - case MSR_F15H_PERF_CTL5: - case MSR_F15H_PERF_CTR5: - return INDEX_FIVE; - default: - return INDEX_ERROR; - } + return &pmu->gp_counters[array_index_nospec(pmc_idx, num_counters)]; } static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, enum pmu_type type) { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); + unsigned int idx; if (!vcpu->kvm->arch.enable_pmu) return NULL; switch (msr) { - case MSR_F15H_PERF_CTL0: - case MSR_F15H_PERF_CTL1: - case MSR_F15H_PERF_CTL2: - case MSR_F15H_PERF_CTL3: - case MSR_F15H_PERF_CTL4: - case MSR_F15H_PERF_CTL5: + case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5: if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) return NULL; - fallthrough; + /* + * Each PMU counter has a pair of CTL and CTR MSRs. CTLn + * MSRs (accessed via EVNTSEL) are even, CTRn MSRs are odd. + */ + idx = (unsigned int)((msr - MSR_F15H_PERF_CTL0) / 2); + if (!(msr & 0x1) != (type == PMU_TYPE_EVNTSEL)) + return NULL; + break; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: if (type != PMU_TYPE_EVNTSEL) return NULL; + idx = msr - MSR_K7_EVNTSEL0; break; - case MSR_F15H_PERF_CTR0: - case MSR_F15H_PERF_CTR1: - case MSR_F15H_PERF_CTR2: - case MSR_F15H_PERF_CTR3: - case MSR_F15H_PERF_CTR4: - case MSR_F15H_PERF_CTR5: - if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) - return NULL; - fallthrough; case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: if (type != PMU_TYPE_COUNTER) return NULL; + idx = msr - MSR_K7_PERFCTR0; break; default: return NULL; } - return &pmu->gp_counters[msr_to_index(msr)]; + return amd_pmc_idx_to_pmc(pmu, idx); } static bool amd_hw_event_available(struct kvm_pmc *pmc) @@ -139,22 +84,6 @@ static bool amd_pmc_is_enabled(struct kvm_pmc *pmc) return true; } -static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) -{ - unsigned int base = get_msr_base(pmu, PMU_TYPE_COUNTER); - struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - - if (guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE)) { - /* - * The idx is contiguous. The MSRs are not. The counter MSRs - * are interleaved with the event select MSRs. - */ - pmc_idx *= 2; - } - - return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER); -} - static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -168,15 +97,7 @@ static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx) static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, unsigned int idx, u64 *mask) { - struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); - struct kvm_pmc *counters; - - idx &= ~(3u << 30); - if (idx >= pmu->nr_arch_gp_counters) - return NULL; - counters = pmu->gp_counters; - - return &counters[idx]; + return amd_pmc_idx_to_pmc(vcpu_to_pmu(vcpu), idx & ~(3u << 30)); } static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index c399637a3a79..25b70a85bef5 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -68,15 +68,11 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) } } -/* function is called when global control register has been updated. */ -static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) +static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) { int bit; - u64 diff = pmu->global_ctrl ^ data; struct kvm_pmc *pmc; - pmu->global_ctrl = data; - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (pmc) @@ -397,7 +393,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) struct kvm_pmc *pmc; u32 msr = msr_info->index; u64 data = msr_info->data; - u64 reserved_bits; + u64 reserved_bits, diff; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: @@ -418,7 +414,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (pmu->global_ctrl == data) return 0; if (kvm_valid_perf_global_ctrl(pmu, data)) { - global_ctrl_changed(pmu, data); + diff = pmu->global_ctrl ^ data; + pmu->global_ctrl = data; + reprogram_counters(pmu, diff); return 0; } break; @@ -433,7 +431,9 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (pmu->pebs_enable == data) return 0; if (!(data & pmu->pebs_enable_mask)) { + diff = pmu->pebs_enable ^ data; pmu->pebs_enable = data; + reprogram_counters(pmu, diff); return 0; } break; @@ -776,20 +776,23 @@ static void intel_pmu_cleanup(struct kvm_vcpu *vcpu) void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu) { struct kvm_pmc *pmc = NULL; - int bit; + int bit, hw_idx; for_each_set_bit(bit, (unsigned long *)&pmu->global_ctrl, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (!pmc || !pmc_speculative_in_use(pmc) || - !intel_pmc_is_enabled(pmc)) + !intel_pmc_is_enabled(pmc) || !pmc->perf_event) continue; - if (pmc->perf_event && pmc->idx != pmc->perf_event->hw.idx) { - pmu->host_cross_mapped_mask |= - BIT_ULL(pmc->perf_event->hw.idx); - } + /* + * A negative index indicates the event isn't mapped to a + * physical counter in the host, e.g. due to contention. + */ + hw_idx = pmc->perf_event->hw.idx; + if (hw_idx != pmc->idx && hw_idx > -1) + pmu->host_cross_mapped_mask |= BIT_ULL(hw_idx); } } |