diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2018-06-01 19:17:22 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2018-06-01 19:17:22 +0200 |
commit | 5eec43a1fa2a7ec5225411c97538fa582d36f579 (patch) | |
tree | 22928042a707851ab4359eb45e7bda04a374f4d3 | |
parent | 75025cc9d13f2093bb1ee4388dbaae3182c97bab (diff) | |
parent | e25028c8ded011d19f9a11164807507c94febc01 (diff) |
Merge tag 'kvmarm-for-v4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/ARM updates for 4.18
- Lazy context-switching of FPSIMD registers on arm64
- Allow virtual redistributors to be part of two or more MMIO ranges
35 files changed, 809 insertions, 349 deletions
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt index 9293b45abdb9..2408ab720ef7 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt @@ -27,16 +27,42 @@ Groups: VCPU and all of the redistributor pages are contiguous. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. This address needs to be 64K aligned. + + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit) + The attribute data pointed to by kvm_device_attr.addr is a __u64 value: + bits: | 63 .... 52 | 51 .... 16 | 15 - 12 |11 - 0 + values: | count | base | flags | index + - index encodes the unique redistributor region index + - flags: reserved for future use, currently 0 + - base field encodes bits [51:16] of the guest physical base address + of the first redistributor in the region. + - count encodes the number of redistributors in the region. Must be + greater than 0. + There are two 64K pages for each redistributor in the region and + redistributors are laid out contiguously within the region. Regions + are filled with redistributors in the index order. The sum of all + region count fields must be greater than or equal to the number of + VCPUs. Redistributor regions must be registered in the incremental + index order, starting from index 0. + The characteristics of a specific redistributor region can be read + by presetting the index field in the attr data. + Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + + It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes. + Errors: -E2BIG: Address outside of addressable IPA range - -EINVAL: Incorrectly aligned address + -EINVAL: Incorrectly aligned address, bad redistributor region + count/index, mixed redistributor region attribute usage -EEXIST: Address already configured + -ENOENT: Attempt to read the characteristics of a non existing + redistributor region -ENXIO: The group or attribute is unknown/unsupported for this device or hardware support is missing. -EFAULT: Invalid user pointer for attr->addr. - KVM_DEV_ARM_VGIC_GRP_DIST_REGS KVM_DEV_ARM_VGIC_GRP_REDIST_REGS Attributes: diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c7c28c885a19..f079a2039c8a 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -280,6 +280,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); +static inline bool kvm_arch_check_sve_has_vhe(void) { return true; } static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} @@ -303,8 +304,13 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -/* All host FP/SIMD state is restored on guest exit, so nothing to save: */ -static inline void kvm_fpsimd_flush_cpu_state(void) {} +/* + * VFP/NEON switching is all done by the hyp switch code, so no need to + * coordinate with host context handling for this state: + */ +static inline void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_vhe_guest_enter(void) {} static inline void kvm_arm_vhe_guest_exit(void) {} diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index caae4843cb70..16e006f708ca 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -91,6 +91,7 @@ struct kvm_regs { #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 #define KVM_VGIC_ITS_ADDR_TYPE 4 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eb2cf4938f6d..b0d3820081c8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1130,6 +1130,7 @@ endmenu config ARM64_SVE bool "ARM Scalable Vector Extension support" default y + depends on !KVM || ARM64_VHE help The Scalable Vector Extension (SVE) is an extension to the AArch64 execution state which complements and extends the SIMD functionality @@ -1155,6 +1156,12 @@ config ARM64_SVE booting the kernel. If unsure and you are not observing these symptoms, you should assume that it is safe to say Y. + CPUs that support SVE are architecturally required to support the + Virtualization Host Extensions (VHE), so the kernel makes no + provision for supporting SVE alongside KVM without VHE enabled. + Thus, you will need to enable CONFIG_ARM64_VHE if you want to support + KVM in the same kernel image. + config ARM64_MODULE_PLTS bool select HAVE_MOD_ARCH_SPECIFIC diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 09b0f2a80c8f..0a6b7133195e 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,9 +11,7 @@ #include <asm/cpucaps.h> #include <asm/cputype.h> -#include <asm/fpsimd.h> #include <asm/hwcap.h> -#include <asm/sigcontext.h> #include <asm/sysreg.h> /* @@ -510,33 +508,6 @@ static inline bool system_supports_sve(void) cpus_have_const_cap(ARM64_SVE); } -/* - * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE - * vector length. - * - * Use only if SVE is present. - * This function clobbers the SVE vector length. - */ -static inline u64 read_zcr_features(void) -{ - u64 zcr; - unsigned int vq_max; - - /* - * Set the maximum possible VL, and write zeroes to all other - * bits to see if they stick. - */ - sve_kernel_enable(NULL); - write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); - - zcr = read_sysreg_s(SYS_ZCR_EL1); - zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ - vq_max = sve_vq_from_vl(sve_get_vl()); - zcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return zcr; -} - #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index aa7162ae93e3..fa92747a49c8 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -18,6 +18,8 @@ #include <asm/ptrace.h> #include <asm/errno.h> +#include <asm/processor.h> +#include <asm/sigcontext.h> #ifndef __ASSEMBLY__ @@ -41,6 +43,8 @@ struct task_struct; extern void fpsimd_save_state(struct user_fpsimd_state *state); extern void fpsimd_load_state(struct user_fpsimd_state *state); +extern void fpsimd_save(void); + extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); @@ -49,12 +53,27 @@ extern void fpsimd_preserve_current_state(void); extern void fpsimd_restore_current_state(void); extern void fpsimd_update_current_state(struct user_fpsimd_state const *state); +extern void fpsimd_bind_task_to_cpu(void); +extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state); + extern void fpsimd_flush_task_state(struct task_struct *target); +extern void fpsimd_flush_cpu_state(void); extern void sve_flush_cpu_state(void); /* Maximum VL that SVE VL-agnostic software can transparently support */ #define SVE_VL_ARCH_MAX 0x100 +/* Offset of FFR in the SVE register dump */ +static inline size_t sve_ffr_offset(int vl) +{ + return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET; +} + +static inline void *sve_pffr(struct thread_struct *thread) +{ + return (char *)thread->sve_state + sve_ffr_offset(thread->sve_vl); +} + extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); @@ -63,6 +82,8 @@ extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused); +extern u64 read_zcr_features(void); + extern int __ro_after_init sve_max_vl; #ifdef CONFIG_ARM64_SVE diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f6648a3e4152..821a7032c0f7 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -30,19 +30,19 @@ /* The hyp-stub will return this for any kvm_call_hyp() call */ #define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR -#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 -#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#ifndef __ASSEMBLY__ + +#include <linux/mm.h> /* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ void *val = &sym; \ if (!is_kernel_in_hyp_mode()) \ - val = phys_to_virt((u64)&sym - kimage_voffset); \ + val = lm_alias(&sym); \ val; \ }) -#ifndef __ASSEMBLY__ struct kvm; struct kvm_vcpu; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 469de8acd06f..a4ca202ff3f2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -30,6 +30,7 @@ #include <asm/kvm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmio.h> +#include <asm/thread_info.h> #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -216,8 +217,8 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Guest debug state */ - u64 debug_flags; + /* Miscellaneous vcpu state flags */ + u64 flags; /* * We maintain more than a single set of debug registers to support @@ -238,6 +239,10 @@ struct kvm_vcpu_arch { /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; + + struct thread_info *host_thread_info; /* hyp VA */ + struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ + struct { /* {Break,watch}point registers */ struct kvm_guest_debug_arch regs; @@ -293,6 +298,12 @@ struct kvm_vcpu_arch { bool sysregs_loaded_on_cpu; }; +/* vcpu_arch flags field values: */ +#define KVM_ARM64_DEBUG_DIRTY (1 << 0) +#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ +#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ +#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ + #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) /* @@ -394,6 +405,19 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2); } +static inline bool kvm_arch_check_sve_has_vhe(void) +{ + /* + * The Arm architecture specifies that implementation of SVE + * requires VHE also to be implemented. The KVM code for arm64 + * relies on this when SVE is present: + */ + if (system_supports_sve()) + return has_vhe(); + else + return true; +} + static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} @@ -420,15 +444,18 @@ static inline void __cpu_init_stage2(void) "PARange is %d bits, unsupported configuration!", parange); } -/* - * All host FP/SIMD state is restored on guest exit, so nothing needs - * doing here except in the SVE case: -*/ -static inline void kvm_fpsimd_flush_cpu_state(void) +/* Guest/host FPSIMD coordination helpers */ +int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); + +#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */ +static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) { - if (system_supports_sve()) - sve_flush_cpu_state(); + return kvm_arch_vcpu_run_map_fp(vcpu); } +#endif static inline void kvm_arm_vhe_guest_enter(void) { diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 767598932549..c99e657fdd57 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -156,7 +156,9 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, /* Sync TPIDR_EL0 back to thread_struct for current */ void tls_preserve_current_state(void); -#define INIT_THREAD { } +#define INIT_THREAD { \ + .fpsimd_cpu = NR_CPUS, \ +} static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { @@ -244,6 +246,17 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); +/* + * Not at the top of the file due to a direct #include cycle between + * <asm/fpsimd.h> and <asm/processor.h>. Deferring this #include + * ensures that contents of processor.h are visible to fpsimd.h even if + * processor.h is included first. + * + * These prctl helpers are the only things in this file that require + * fpsimd.h. The core code expects them to be in this header. + */ +#include <asm/fpsimd.h> + /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 740aa03c5f0d..af271f9a6c9f 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -45,12 +45,6 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => bug */ }; -#define INIT_THREAD_INFO(tsk) \ -{ \ - .preempt_count = INIT_PREEMPT_COUNT, \ - .addr_limit = KERNEL_DS, \ -} - #define thread_saved_pc(tsk) \ ((unsigned long)(tsk->thread.cpu_context.pc)) #define thread_saved_sp(tsk) \ @@ -117,5 +111,12 @@ void arch_release_task_struct(struct task_struct *tsk); _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_NOHZ) +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = _TIF_FOREIGN_FPSTATE, \ + .preempt_count = INIT_PREEMPT_COUNT, \ + .addr_limit = KERNEL_DS, \ +} + #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 04b3256f8e6d..4e76630dd655 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -91,6 +91,7 @@ struct kvm_regs { #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 #define KVM_VGIC_ITS_ADDR_TYPE 4 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 87a35364e750..7074c4cd0e0e 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -37,12 +37,14 @@ #include <linux/sched/task_stack.h> #include <linux/signal.h> #include <linux/slab.h> +#include <linux/stddef.h> #include <linux/sysctl.h> #include <asm/esr.h> #include <asm/fpsimd.h> #include <asm/cpufeature.h> #include <asm/cputype.h> +#include <asm/processor.h> #include <asm/simd.h> #include <asm/sigcontext.h> #include <asm/sysreg.h> @@ -118,7 +120,6 @@ */ struct fpsimd_last_state_struct { struct user_fpsimd_state *st; - bool sve_in_use; }; static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); @@ -159,19 +160,6 @@ static void sve_free(struct task_struct *task) __sve_free(task); } - -/* Offset of FFR in the SVE register dump */ -static size_t sve_ffr_offset(int vl) -{ - return SVE_SIG_FFR_OFFSET(sve_vq_from_vl(vl)) - SVE_SIG_REGS_OFFSET; -} - -static void *sve_pffr(struct task_struct *task) -{ - return (char *)task->thread.sve_state + - sve_ffr_offset(task->thread.sve_vl); -} - static void change_cpacr(u64 val, u64 mask) { u64 cpacr = read_sysreg(CPACR_EL1); @@ -252,31 +240,24 @@ static void task_fpsimd_load(void) WARN_ON(!in_softirq() && !irqs_disabled()); if (system_supports_sve() && test_thread_flag(TIF_SVE)) - sve_load_state(sve_pffr(current), + sve_load_state(sve_pffr(¤t->thread), ¤t->thread.uw.fpsimd_state.fpsr, sve_vq_from_vl(current->thread.sve_vl) - 1); else fpsimd_load_state(¤t->thread.uw.fpsimd_state); - - if (system_supports_sve()) { - /* Toggle SVE trapping for userspace if needed */ - if (test_thread_flag(TIF_SVE)) - sve_user_enable(); - else - sve_user_disable(); - - /* Serialised by exception return to user */ - } } /* - * Ensure current's FPSIMD/SVE storage in thread_struct is up to date - * with respect to the CPU registers. + * Ensure FPSIMD/SVE storage in memory for the loaded context is up to + * date with respect to the CPU registers. * * Softirqs (and preemption) must be disabled. */ -static void task_fpsimd_save(void) +void fpsimd_save(void) { + struct user_fpsimd_state *st = __this_cpu_read(fpsimd_last_state.st); + /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ + WARN_ON(!in_softirq() && !irqs_disabled()); if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { @@ -291,10 +272,9 @@ static void task_fpsimd_save(void) return; } - sve_save_state(sve_pffr(current), - ¤t->thread.uw.fpsimd_state.fpsr); + sve_save_state(sve_pffr(¤t->thread), &st->fpsr); } else - fpsimd_save_state(¤t->thread.uw.fpsimd_state); + fpsimd_save_state(st); } } @@ -598,7 +578,7 @@ int sve_set_vector_length(struct task_struct *task, if (task == current) { local_bh_disable(); - task_fpsimd_save(); + fpsimd_save(); set_thread_flag(TIF_FOREIGN_FPSTATE); } @@ -618,10 +598,8 @@ int sve_set_vector_length(struct task_struct *task, task->thread.sve_vl = vl; out: - if (flags & PR_SVE_VL_INHERIT) - set_tsk_thread_flag(task, TIF_SVE_VL_INHERIT); - else - clear_tsk_thread_flag(task, TIF_SVE_VL_INHERIT); + update_tsk_thread_flag(task, TIF_SVE_VL_INHERIT, + flags & PR_SVE_VL_INHERIT); return 0; } @@ -765,6 +743,33 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) isb(); } +/* + * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE + * vector length. + * + * Use only if SVE is present. + * This function clobbers the SVE vector length. + */ +u64 read_zcr_features(void) +{ + u64 zcr; + unsigned int vq_max; + + /* + * Set the maximum possible VL, and write zeroes to all other + * bits to see if they stick. + */ + sve_kernel_enable(NULL); + write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); + + zcr = read_sysreg_s(SYS_ZCR_EL1); + zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ + vq_max = sve_vq_from_vl(sve_get_vl()); + zcr |= vq_max - 1; /* set LEN field to maximum effective value */ + + return zcr; +} + void __init sve_setup(void) { u64 zcr; @@ -839,7 +844,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) local_bh_disable(); - task_fpsimd_save(); + fpsimd_save(); fpsimd_to_sve(current); /* Force ret_to_user to reload the registers: */ @@ -892,31 +897,25 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) void fpsimd_thread_switch(struct task_struct *next) { + bool wrong_task, wrong_cpu; + if (!system_supports_fpsimd()) return; + + /* Save unsaved fpsimd state, if any: */ + fpsimd_save(); + /* - * Save the current FPSIMD state to memory, but only if whatever is in - * the registers is in fact the most recent userland FPSIMD state of - * 'current'. + * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's + * state. For kernel threads, FPSIMD registers are never loaded + * and wrong_task and wrong_cpu will always be true. */ - if (current->mm) - task_fpsimd_save(); + wrong_task = __this_cpu_read(fpsimd_last_state.st) != + &next->thread.uw.fpsimd_state; + wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); - if (next->mm) { - /* - * If we are switching to a task whose most recent userland - * FPSIMD state is already in the registers of *this* cpu, - * we can skip loading the state from memory. Otherwise, set - * the TIF_FOREIGN_FPSTATE flag so the state will be loaded - * upon the next return to userland. - */ - if (__this_cpu_read(fpsimd_last_state.st) == - &next->thread.uw.fpsimd_state - && next->thread.fpsimd_cpu == smp_processor_id()) - clear_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); - else - set_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE); - } + update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, + wrong_task || wrong_cpu); } void fpsimd_flush_thread(void) @@ -982,7 +981,7 @@ void fpsimd_preserve_current_state(void) return; local_bh_disable(); - task_fpsimd_save(); + fpsimd_save(); local_bh_enable(); } @@ -1002,14 +1001,33 @@ void fpsimd_signal_preserve_current_state(void) * Associate current's FPSIMD context with this cpu * Preemption must be disabled when calling this function. */ -static void fpsimd_bind_to_cpu(void) +void fpsimd_bind_task_to_cpu(void) { struct fpsimd_last_state_struct *last = this_cpu_ptr(&fpsimd_last_state); last->st = ¤t->thread.uw.fpsimd_state; - last->sve_in_use = test_thread_flag(TIF_SVE); current->thread.fpsimd_cpu = smp_processor_id(); + + if (system_supports_sve()) { + /* Toggle SVE trapping for userspace if needed */ + if (test_thread_flag(TIF_SVE)) + sve_user_enable(); + else + sve_user_disable(); + + /* Serialised by exception return to user */ + } +} + +void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st) +{ + struct fpsimd_last_state_struct *last = + this_cpu_ptr(&fpsimd_last_state); + + WARN_ON(!in_softirq() && !irqs_disabled()); + + last->st = st; } /* @@ -1026,7 +1044,7 @@ void fpsimd_restore_current_state(void) if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { task_fpsimd_load(); - fpsimd_bind_to_cpu(); + fpsimd_bind_task_to_cpu(); } local_bh_enable(); @@ -1049,9 +1067,9 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state) fpsimd_to_sve(current); task_fpsimd_load(); + fpsimd_bind_task_to_cpu(); - if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) - fpsimd_bind_to_cpu(); + clear_thread_flag(TIF_FOREIGN_FPSTATE); local_bh_enable(); } @@ -1064,29 +1082,12 @@ void fpsimd_flush_task_state(struct task_struct *t) t->thread.fpsimd_cpu = NR_CPUS; } -static inline void fpsimd_flush_cpu_state(void) +void fpsimd_flush_cpu_state(void) { __this_cpu_write(fpsimd_last_state.st, NULL); + set_thread_flag(TIF_FOREIGN_FPSTATE); } -/* - * Invalidate any task SVE state currently held in this CPU's regs. - * - * This is used to prevent the kernel from trying to reuse SVE register data - * that is detroyed by KVM guest enter/exit. This function should go away when - * KVM SVE support is implemented. Don't use it for anything else. - */ -#ifdef CONFIG_ARM64_SVE -void sve_flush_cpu_state(void) -{ - struct fpsimd_last_state_struct const *last = - this_cpu_ptr(&fpsimd_last_state); - - if (last->st && last->sve_in_use) - fpsimd_flush_cpu_state(); -} -#endif /* CONFIG_ARM64_SVE */ - #ifdef CONFIG_KERNEL_MODE_NEON DEFINE_PER_CPU(bool, kernel_neon_busy); @@ -1120,11 +1121,8 @@ void kernel_neon_begin(void) __this_cpu_write(kernel_neon_busy, true); - /* Save unsaved task fpsimd state, if any: */ - if (current->mm) { - task_fpsimd_save(); - set_thread_flag(TIF_FOREIGN_FPSTATE); - } + /* Save unsaved fpsimd state, if any: */ + fpsimd_save(); /* Invalidate any task state remaining in the fpsimd regs: */ fpsimd_flush_cpu_state(); @@ -1246,13 +1244,10 @@ static int fpsimd_cpu_pm_notifier(struct notifier_block *self, { switch (cmd) { case CPU_PM_ENTER: - if (current->mm) - task_fpsimd_save(); + fpsimd_save(); fpsimd_flush_cpu_state(); break; case CPU_PM_EXIT: - if (current->mm) - set_thread_flag(TIF_FOREIGN_FPSTATE); break; case CPU_PM_ENTER_FAILED: default: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7ff81fed46e1..78889c4546d7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -44,6 +44,7 @@ #include <asm/compat.h> #include <asm/cpufeature.h> #include <asm/debug-monitors.h> +#include <asm/fpsimd.h> #include <asm/pgtable.h> #include <asm/stacktrace.h> #include <asm/syscall.h> diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index a2e3a5af1113..47b23bf617c7 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -39,6 +39,7 @@ config KVM select HAVE_KVM_IRQ_ROUTING select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS + select HAVE_KVM_VCPU_RUN_PID_CHANGE ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 93afff91cb7c..0f2a135ba15b 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,7 +19,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index a1f4ebdfe6d3..00d422336a45 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -103,7 +103,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) * * Additionally, KVM only traps guest accesses to the debug registers if * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY - * flag on vcpu->arch.debug_flags). Since the guest must not interfere + * flag on vcpu->arch.flags). Since the guest must not interfere * with the hardware state when debugging the guest, we must ensure that * trapping is enabled whenever we are debugging the guest using the * debug registers. @@ -111,7 +111,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { - bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY); + bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); unsigned long mdscr; trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug); @@ -184,7 +184,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1); vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state; - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; trap_debug = true; trace_kvm_arm_set_regset("BKPTS", get_num_brps(), @@ -206,7 +206,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) /* If KDE or MDE are set, perform a full save/restore cycle. */ if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c new file mode 100644 index 000000000000..dc6ecfa5a2d2 --- /dev/null +++ b/arch/arm64/kvm/fpsimd.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/arm64/kvm/fpsimd.c: Guest/host FPSIMD context coordination helpers + * + * Copyright 2018 Arm Limited + * Author: Dave Martin <Dave.Martin@arm.com> + */ +#include <linux/bottom_half.h> +#include <linux/sched.h> +#include <linux/thread_info.h> +#include <linux/kvm_host.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_host.h> +#include <asm/kvm_mmu.h> + +/* + * Called on entry to KVM_RUN unless this vcpu previously ran at least + * once and the most recent prior KVM_RUN for this vcpu was called from + * the same task as current (highly likely). + * + * This is guaranteed to execute before kvm_arch_vcpu_load_fp(vcpu), + * such that on entering hyp the relevant parts of current are already + * mapped. + */ +int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) +{ + int ret; + + struct thread_info *ti = ¤t->thread_info; + struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; + + /* + * Make sure the host task thread flags and fpsimd state are + * visible to hyp: + */ + ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP); + if (ret) + goto error; + + ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP); + if (ret) + goto error; + + vcpu->arch.host_thread_info = kern_hyp_va(ti); + vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); +error: + return ret; +} + +/* + * Prepare vcpu for saving the host's FPSIMD state and loading the guest's. + * The actual loading is done by the FPSIMD access trap taken to hyp. + * + * Here, we just set the correct metadata to indicate that the FPSIMD + * state in the cpu regs (if any) belongs to current on the host. + * + * TIF_SVE is backed up here, since it may get clobbered with guest state. + * This flag is restored by kvm_arch_vcpu_put_fp(vcpu). + */ +void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) +{ + BUG_ON(!current->mm); + + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE); + vcpu->arch.flags |= KVM_ARM64_FP_HOST; + if (test_thread_flag(TIF_SVE)) + vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; +} + +/* + * If the guest FPSIMD state was loaded, update the host's context + * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu + * so that they will be written back if the kernel clobbers them due to + * kernel-mode NEON before re-entry into the guest. + */ +void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) +{ + WARN_ON_ONCE(!irqs_disabled()); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs); + clear_thread_flag(TIF_FOREIGN_FPSTATE); + clear_thread_flag(TIF_SVE); + } +} + +/* + * Write back the vcpu FPSIMD regs if they are dirty, and invalidate the + * cpu FPSIMD regs so that they can't be spuriously reused if this vcpu + * disappears and another task or vcpu appears that recycles the same + * struct fpsimd_state. + */ +void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) +{ + local_bh_disable(); + + update_thread_flag(TIF_SVE, + vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); + + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + /* Clean guest FP state to memory and invalidate cpu view */ + fpsimd_save(); + fpsimd_flush_cpu_state(); + } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { + /* Ensure user trap controls are correctly restored */ + fpsimd_bind_task_to_cpu(); + } + + local_bh_enable(); +} diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 3e717f66f011..50009766e5e5 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -163,7 +163,7 @@ void __hyp_text __debug_switch_to_guest(struct kvm_vcpu *vcpu) if (!has_vhe()) __debug_save_spe_nvhe(&vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); @@ -185,7 +185,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) if (!has_vhe()) __debug_restore_spe_nvhe(vcpu->arch.host_debug_state.pmscr_el1); - if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + if (!(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY)) return; host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); @@ -196,7 +196,7 @@ void __hyp_text __debug_switch_to_host(struct kvm_vcpu *vcpu) __debug_save_state(vcpu, guest_dbg, guest_ctxt); __debug_restore_state(vcpu, host_dbg, host_ctxt); - vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags &= ~KVM_ARM64_DEBUG_DIRTY; } u32 __hyp_text __kvm_get_mdcr_el2(void) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e41a161d313a..fad1e164fe48 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -166,46 +166,3 @@ abort_guest_exit_end: orr x0, x0, x5 1: ret ENDPROC(__guest_exit) - -ENTRY(__fpsimd_guest_restore) - // x0: esr - // x1: vcpu - // x2-x29,lr: vcpu regs - // vcpu x0-x1 on the stack - stp x2, x3, [sp, #-16]! - stp x4, lr, [sp, #-16]! - -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs x2, cptr_el2 - bic x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 -alternative_else - mrs x2, cpacr_el1 - orr x2, x2, #CPACR_EL1_FPEN - msr cpacr_el1, x2 -alternative_endif - isb - - mov x3, x1 - - ldr x0, [x3, #VCPU_HOST_CONTEXT] - kern_hyp_va x0 - add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - bl __fpsimd_save_state - - add x2, x3, #VCPU_CONTEXT - add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - bl __fpsimd_restore_state - - // Skip restoring fpexc32 for AArch64 guests - mrs x1, hcr_el2 - tbnz x1, #HCR_RW_SHIFT, 1f - ldr x4, [x3, #VCPU_FPEXC32_EL2] - msr fpexc32_el2, x4 -1: - ldp x4, lr, [sp], #16 - ldp x2, x3, [sp], #16 - ldp x0, x1, [sp], #16 - - eret -ENDPROC(__fpsimd_guest_restore) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index bffece27b5c1..753b9d213651 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -113,25 +113,6 @@ el1_hvc_guest: el1_trap: get_vcpu_ptr x1, x0 - - mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT - /* - * x0: ESR_EC - * x1: vcpu pointer - */ - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - */ -alternative_if_not ARM64_HAS_NO_FPSIMD - cmp x0, #ESR_ELx_EC_FP_ASIMD - b.eq __fpsimd_guest_restore -alternative_else_nop_endif - mov x0, #ARM_EXCEPTION_TRAP b __guest_exit diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d9645236e474..2d45bd719a5d 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -21,21 +21,25 @@ #include <kvm/arm_psci.h> +#include <asm/cpufeature.h> #include <asm/kvm_asm.h> #include <asm/kvm_emulate.h> +#include <asm/kvm_host.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> #include <asm/fpsimd.h> #include <asm/debug-monitors.h> +#include <asm/processor.h> +#include <asm/thread_info.h> -static bool __hyp_text __fpsimd_enabled_nvhe(void) +/* Check whether the FP regs were dirtied while in the host-side run loop: */ +static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu) { - return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); -} + if (vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_FP_HOST); -static bool fpsimd_enabled_vhe(void) -{ - return !!(read_sysreg(cpacr_el1) & CPACR_EL1_FPEN); + return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED); } /* Save the 32-bit only FPSIMD system register state */ @@ -92,7 +96,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN); + val &= ~CPACR_EL1_ZEN; + if (!update_fp_enabled(vcpu)) + val &= ~CPACR_EL1_FPEN; + write_sysreg(val, cpacr_el1); write_sysreg(kvm_get_hyp_vector(), vbar_el1); @@ -105,7 +112,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) __activate_traps_common(vcpu); val = CPTR_EL2_DEFAULT; - val |= CPTR_EL2_TTA | CPTR_EL2_TFP | CPTR_EL2_TZ; + val |= CPTR_EL2_TTA | CPTR_EL2_TZ; + if (!update_fp_enabled(vcpu)) + val |= CPTR_EL2_TFP; + write_sysreg(val, cptr_el2); } @@ -318,6 +328,50 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu) } } +static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) +{ + struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state; + + if (has_vhe()) + write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, + cpacr_el1); + else + write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, + cptr_el2); + + isb(); + + if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { + /* + * In the SVE case, VHE is assumed: it is enforced by + * Kconfig and kvm_arch_init(). + */ + if (system_supports_sve() && + (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) { + struct thread_struct *thread = container_of( + host_fpsimd, + struct thread_struct, uw.fpsimd_state); + + sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr); + } else { + __fpsimd_save_state(host_fpsimd); + } + + vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; + } + + __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2], + fpexc32_el2); + + vcpu->arch.flags |= KVM_ARM64_FP_ENABLED; + + return true; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -334,11 +388,23 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * same PC once the SError has been injected, and replay the * trapping instruction. */ - if (*exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + /* + * We trap the first access to the FP/SIMD to save the host context + * and restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an + * undefined instruction exception to the guest. + */ + if (system_supports_fpsimd() && + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD) + return __hyp_switch_fpsimd(vcpu); + + if (!__populate_fault_info(vcpu)) return true; - if (static_branch_unlikely(&vgic_v2_cpuif_trap) && - *exit_code == ARM_EXCEPTION_TRAP) { + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && @@ -350,12 +416,8 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (valid) { int ret = __vgic_v2_perform_cpuif_access(vcpu); - if (ret == 1) { - if (__skip_instr(vcpu)) - return true; - else - *exit_code = ARM_EXCEPTION_TRAP; - } + if (ret == 1 && __skip_instr(vcpu)) + return true; if (ret == -1) { /* Promote an illegal access to an @@ -368,23 +430,21 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; *exit_code = ARM_EXCEPTION_EL1_SERROR; } + + goto exit; } } if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - *exit_code == ARM_EXCEPTION_TRAP && (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { int ret = __vgic_v3_perform_cpuif_access(vcpu); - if (ret == 1) { - if (__skip_instr(vcpu)) - return true; - else - *exit_code = ARM_EXCEPTION_TRAP; - } + if (ret == 1 && __skip_instr(vcpu)) + return true; } +exit: /* Return to the host kernel and handle the exit */ return false; } @@ -394,7 +454,6 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; - bool fp_enabled; u64 exit_code; host_ctxt = vcpu->arch.host_cpu_context; @@ -416,19 +475,14 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - fp_enabled = fpsimd_enabled_vhe(); - sysreg_save_guest_state_vhe(guest_ctxt); __deactivate_traps(vcpu); sysreg_restore_host_state_vhe(host_ctxt); - if (fp_enabled) { - __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); - __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); - } __debug_switch_to_host(vcpu); @@ -440,7 +494,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; - bool fp_enabled; u64 exit_code; vcpu = kern_hyp_va(vcpu); @@ -472,8 +525,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - fp_enabled = __fpsimd_enabled_nvhe(); - __sysreg_save_state_nvhe(guest_ctxt); __sysreg32_save_state(vcpu); __timer_disable_traps(vcpu); @@ -484,11 +535,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (fp_enabled) { - __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); - __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); - } /* * This must come after restoring the host sysregs, since a non-VHE diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index b3894df6bf1a..35bc16832efe 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -196,7 +196,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); } @@ -218,7 +218,7 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) write_sysreg(sysreg[DACR32_EL2], dacr32_el2); write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); - if (has_vhe() || vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + if (has_vhe() || vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 6e3b969391fd..a4363735d3f8 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -31,7 +31,6 @@ #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> #include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_host.h> @@ -338,7 +337,7 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, { if (p->is_write) { vcpu_write_sys_reg(vcpu, p->regval, r->reg); - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } else { p->regval = vcpu_read_sys_reg(vcpu, r->reg); } @@ -369,7 +368,7 @@ static void reg_to_dbg(struct kvm_vcpu *vcpu, } *dbg_reg = val; - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } static void dbg_to_reg(struct kvm_vcpu *vcpu, @@ -1441,7 +1440,7 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, { if (p->is_write) { vcpu_cp14(vcpu, r->reg) = p->regval; - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } else { p->regval = vcpu_cp14(vcpu, r->reg); } @@ -1473,7 +1472,7 @@ static bool trap_xvr(struct kvm_vcpu *vcpu, val |= p->regval << 32; *dbg_reg = val; - vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } else { p->regval = *dbg_reg >> 32; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e7efe12a81bd..cfdd2484cc42 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -28,7 +28,7 @@ #include <linux/irqchip/arm-gic-v4.h> -#define VGIC_V3_MAX_CPUS 255 +#define VGIC_V3_MAX_CPUS 512 #define VGIC_V2_MAX_CPUS 8 #define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 @@ -201,6 +201,14 @@ struct vgic_its { struct vgic_state_iter; +struct vgic_redist_region { + u32 index; + gpa_t base; + u32 count; /* number of redistributors or 0 if single region */ + u32 free_index; /* index of the next free redistributor */ + struct list_head list; +}; + struct vgic_dist { bool in_kernel; bool ready; @@ -220,10 +228,7 @@ struct vgic_dist { /* either a GICv2 CPU interface */ gpa_t vgic_cpu_base; /* or a number of GICv3 redistributor regions */ - struct { - gpa_t vgic_redist_base; - gpa_t vgic_redist_free_offset; - }; + struct list_head rd_regions; }; /* distributor enabled */ @@ -311,6 +316,7 @@ struct vgic_cpu { */ struct vgic_io_device rd_iodev; struct vgic_io_device sgi_iodev; + struct vgic_redist_region *rdreg; /* Contains the attributes and gpa of the LPI pending tables. */ u64 pendbaser; @@ -332,7 +338,6 @@ void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_vgic_map_resources(struct kvm *kvm); int kvm_vgic_hyp_init(void); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 14e710d639c7..b81769a5a2b7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1273,4 +1273,13 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, unsigned long start, unsigned long end); +#ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE +int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); +#else +static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) +{ + return 0; +} +#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ + #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index c2413703f45d..ff289ae6b787 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1618,6 +1618,12 @@ static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) clear_ti_thread_flag(task_thread_info(tsk), flag); } +static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, + bool value) +{ + update_ti_thread_flag(task_thread_info(tsk), flag, value); +} + static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index cf2862bd134a..8d8821b3689a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -60,6 +60,15 @@ static inline void clear_ti_thread_flag(struct thread_info *ti, int flag) clear_bit(flag, (unsigned long *)&ti->flags); } +static inline void update_ti_thread_flag(struct thread_info *ti, int flag, + bool value) +{ + if (value) + set_ti_thread_flag(ti, flag); + else + clear_ti_thread_flag(ti, flag); +} + static inline int test_and_set_ti_thread_flag(struct thread_info *ti, int flag) { return test_and_set_bit(flag, (unsigned long *)&ti->flags); @@ -79,6 +88,8 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag) set_ti_thread_flag(current_thread_info(), flag) #define clear_thread_flag(flag) \ clear_ti_thread_flag(current_thread_info(), flag) +#define update_thread_flag(flag, value) \ + update_ti_thread_flag(current_thread_info(), flag, value) #define test_and_set_thread_flag(flag) \ test_and_set_ti_thread_flag(current_thread_info(), flag) #define test_and_clear_thread_flag(flag) \ diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index cca7e065a075..72143cfaf6ec 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -54,3 +54,6 @@ config HAVE_KVM_IRQ_BYPASS config HAVE_KVM_VCPU_ASYNC_IOCTL bool + +config HAVE_KVM_VCPU_RUN_PID_CHANGE + bool diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a4c1b76240df..126b98fbf9ba 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -16,6 +16,7 @@ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include <linux/bug.h> #include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/err.h> @@ -41,6 +42,7 @@ #include <asm/mman.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/virt.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> @@ -290,7 +292,6 @@ out: void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) { - kvm_vgic_vcpu_early_init(vcpu); } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) @@ -363,10 +364,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); kvm_vcpu_load_sysregs(vcpu); + kvm_arch_vcpu_load_fp(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_arch_vcpu_put_fp(vcpu); kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); @@ -678,9 +681,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ preempt_disable(); - /* Flush FP/SIMD state that can't survive guest entry/exit */ - kvm_fpsimd_flush_cpu_state(); - kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -778,6 +778,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (static_branch_unlikely(&userspace_irqchip_in_use)) kvm_timer_sync_hwstate(vcpu); + kvm_arch_vcpu_ctxsync_fp(vcpu); + /* * We may have taken a host interrupt in HYP mode (ie * while executing the guest). This interrupt is still @@ -1570,6 +1572,11 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + if (!kvm_arch_check_sve_has_vhe()) { + kvm_pr_unimpl("SVE system without VHE unsupported. Broken cpu?"); + return -ENODEV; + } + for_each_online_cpu(cpu) { smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); if (ret < 0) { diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index e07156c30323..2673efce65f3 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -44,7 +44,7 @@ * * CPU Interface: * - * - kvm_vgic_vcpu_early_init(): initialization of static data that + * - kvm_vgic_vcpu_init(): initialization of static data that * doesn't depend on any sizing information or emulation type. No * allocation is allowed there. */ @@ -67,46 +67,6 @@ void kvm_vgic_early_init(struct kvm *kvm) spin_lock_init(&dist->lpi_list_lock); } -/** - * kvm_vgic_vcpu_early_init() - Initialize static VGIC VCPU data structures - * @vcpu: The VCPU whose VGIC data structures whould be initialized - * - * Only do initialization, but do not actually enable the VGIC CPU interface - * yet. - */ -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int i; - - INIT_LIST_HEAD(&vgic_cpu->ap_list_head); - spin_lock_init(&vgic_cpu->ap_list_lock); - - /* - * Enable and configure all SGIs to be edge-triggered and - * configure all PPIs as level-triggered. - */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; - - INIT_LIST_HEAD(&irq->ap_list); - spin_lock_init(&irq->irq_lock); - irq->intid = i; - irq->vcpu = NULL; - irq->target_vcpu = vcpu; - irq->targets = 1U << vcpu->vcpu_id; - kref_init(&irq->refcount); - if (vgic_irq_is_sgi(i)) { - /* SGIs */ - irq->enabled = 1; - irq->config = VGIC_CONFIG_EDGE; - } else { - /* PPIs */ - irq->config = VGIC_CONFIG_LEVEL; - } - } -} - /* CREATION */ /** @@ -167,8 +127,11 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vgic_model = type; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + else + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); out_unlock: for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { @@ -221,13 +184,50 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) } /** - * kvm_vgic_vcpu_init() - Register VCPU-specific KVM iodevs + * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data + * structures and register VCPU-specific KVM iodevs + * * @vcpu: pointer to the VCPU being created and initialized + * + * Only do initialization, but do not actually enable the + * VGIC CPU interface */ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { - int ret = 0; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int ret = 0; + int i; + + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + spin_lock_init(&vgic_cpu->ap_list_lock); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + irq->targets = 1U << vcpu->vcpu_id; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } if (!irqchip_in_kernel(vcpu->kvm)) return 0; @@ -303,13 +303,23 @@ out: static void kvm_vgic_dist_destroy(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_redist_region *rdreg, *next; dist->ready = false; dist->initialized = false; kfree(dist->spis); + dist->spis = NULL; dist->nr_spis = 0; + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) { + list_del(&rdreg->list); + kfree(rdreg); + } + INIT_LIST_HEAD(&dist->rd_regions); + } + if (vgic_supports_direct_msis(kvm)) vgic_v4_teardown(kvm); } diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 10ae6f394b71..6ada2432e37c 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -66,6 +66,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; phys_addr_t *addr_ptr, alignment; + u64 undef_value = VGIC_ADDR_UNDEF; mutex_lock(&kvm->lock); switch (type) { @@ -84,16 +85,61 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) addr_ptr = &vgic->vgic_dist_base; alignment = SZ_64K; break; - case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST: { + struct vgic_redist_region *rdreg; + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); if (r) break; if (write) { - r = vgic_v3_set_redist_base(kvm, *addr); + r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); goto out; } - addr_ptr = &vgic->vgic_redist_base; + rdreg = list_first_entry(&vgic->rd_regions, + struct vgic_redist_region, list); + if (!rdreg) + addr_ptr = &undef_value; + else + addr_ptr = &rdreg->base; break; + } + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + { + struct vgic_redist_region *rdreg; + u8 index; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + + index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + + if (write) { + gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK) + >> KVM_VGIC_V3_RDIST_COUNT_SHIFT; + u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK) + >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT; + + if (!count || flags) + r = -EINVAL; + else + r = vgic_v3_set_redist_base(kvm, index, + base, count); + goto out; + } + + rdreg = vgic_v3_rdist_region_from_index(kvm, index); + if (!rdreg) { + r = -ENOENT; + goto out; + } + + *addr = index; + *addr |= rdreg->base; + *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + goto out; + } default: r = -ENODEV; } @@ -665,6 +711,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev, switch (attr->attr) { case KVM_VGIC_V3_ADDR_TYPE_DIST: case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: return 0; } break; diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 671fe81f8e1d..287784095b5b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -184,12 +184,17 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_redist_region *rdreg = vgic_cpu->rdreg; int target_vcpu_id = vcpu->vcpu_id; + gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; u64 value; value = (u64)(mpidr & GENMASK(23, 0)) << 32; value |= ((target_vcpu_id & 0xffff) << 8); - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + + if (addr == last_rdist_typer) value |= GICR_TYPER_LAST; if (vgic_has_its(vcpu->kvm)) value |= GICR_TYPER_PLPIS; @@ -580,24 +585,32 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; + struct vgic_redist_region *rdreg; gpa_t rd_base, sgi_base; int ret; + if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) + return 0; + /* * We may be creating VCPUs before having set the base address for the * redistributor region, in which case we will come back to this * function for all VCPUs when the base address is set. Just return * without doing any work for now. */ - if (IS_VGIC_ADDR_UNDEF(vgic->vgic_redist_base)) + rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); + if (!rdreg) return 0; if (!vgic_v3_check_base(kvm)) return -EINVAL; - rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset; + vgic_cpu->rdreg = rdreg; + + rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; sgi_base = rd_base + SZ_64K; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); @@ -631,7 +644,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) goto out; } - vgic->vgic_redist_free_offset += 2 * SZ_64K; + rdreg->free_index++; out: mutex_unlock(&kvm->slots_lock); return ret; @@ -670,23 +683,96 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) return ret; } -int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr) +/** + * vgic_v3_insert_redist_region - Insert a new redistributor region + * + * Performs various checks before inserting the rdist region in the list. + * Those tests depend on whether the size of the rdist region is known + * (ie. count != 0). The list is sorted by rdist region index. + * + * @kvm: kvm handle + * @index: redist region index + * @base: base of the new rdist region + * @count: number of redistributors the region is made of (0 in the old style + * single region, whose size is induced from the number of vcpus) + * + * Return 0 on success, < 0 otherwise + */ +static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index, + gpa_t base, uint32_t count) { - struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + struct list_head *rd_regions = &d->rd_regions; + size_t size = count * KVM_VGIC_V3_REDIST_SIZE; int ret; - /* vgic_check_ioaddr makes sure we don't do this twice */ - ret = vgic_check_ioaddr(kvm, &vgic->vgic_redist_base, addr, SZ_64K); - if (ret) - return ret; + /* single rdist region already set ?*/ + if (!count && !list_empty(rd_regions)) + return -EINVAL; - vgic->vgic_redist_base = addr; - if (!vgic_v3_check_base(kvm)) { - vgic->vgic_redist_base = VGIC_ADDR_UNDEF; + /* cross the end of memory ? */ + if (base + size < base) return -EINVAL; + + if (list_empty(rd_regions)) { + if (index != 0) + return -EINVAL; + } else { + rdreg = list_last_entry(rd_regions, + struct vgic_redist_region, list); + if (index != rdreg->index + 1) + return -EINVAL; + + /* Cannot add an explicitly sized regions after legacy region */ + if (!rdreg->count) + return -EINVAL; } /* + * For legacy single-region redistributor regions (!count), + * check that the redistributor region does not overlap with the + * distributor's address space. + */ + if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + vgic_dist_overlap(kvm, base, size)) + return -EINVAL; + + /* collision with any other rdist region? */ + if (vgic_v3_rdist_overlap(kvm, base, size)) + return -EINVAL; + + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + if (!rdreg) + return -ENOMEM; + + rdreg->base = VGIC_ADDR_UNDEF; + + ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + if (ret) + goto free; + + rdreg->base = base; + rdreg->count = count; + rdreg->free_index = 0; + rdreg->index = index; + + list_add_tail(&rdreg->list, rd_regions); + return 0; +free: + kfree(rdreg); + return ret; +} + +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) +{ + int ret; + + ret = vgic_v3_insert_redist_region(kvm, index, addr, count); + if (ret) + return ret; + + /* * Register iodevs for each existing VCPU. Adding more VCPUs * afterwards will register the iodevs when needed. */ diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index bdcf8e7a6161..ff7dc890941a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -419,6 +419,29 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) return 0; } +/** + * vgic_v3_rdist_overlap - check if a region overlaps with any + * existing redistributor region + * + * @kvm: kvm handle + * @base: base of the region + * @size: size of region + * + * Return: true if there is an overlap + */ +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if ((base + size > rdreg->base) && + (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) + return true; + } + return false; +} + /* * Check for overlapping regions and for regions crossing the end of memory * for base addresses which have already been set. @@ -426,41 +449,83 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bool vgic_v3_check_base(struct kvm *kvm) { struct vgic_dist *d = &kvm->arch.vgic; - gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE; - - redist_size *= atomic_read(&kvm->online_vcpus); + struct vgic_redist_region *rdreg; if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) return false; - if (!IS_VGIC_ADDR_UNDEF(d->vgic_redist_base) && - d->vgic_redist_base + redist_size < d->vgic_redist_base) - return false; + list_for_each_entry(rdreg, &d->rd_regions, list) { + if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < + rdreg->base) + return false; + } - /* Both base addresses must be set to check if they overlap */ - if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(d->vgic_redist_base)) + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) return true; - if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base) - return true; - if (d->vgic_redist_base + redist_size <= d->vgic_dist_base) - return true; + return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, + KVM_VGIC_V3_DIST_SIZE); +} - return false; +/** + * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one + * which has free space to put a new rdist region. + * + * @rd_regions: redistributor region list head + * + * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). + * Stride between redistributors is 0 and regions are filled in the index order. + * + * Return: the redist region handle, if any, that has space to map a new rdist + * region. + */ +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) +{ + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (!vgic_v3_redist_region_full(rdreg)) + return rdreg; + } + return NULL; } +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index) +{ + struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (rdreg->index == index) + return rdreg; + } + return NULL; +} + + int vgic_v3_map_resources(struct kvm *kvm) { - int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0; + int c; if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { + kvm_for_each_vcpu(c, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { + kvm_debug("vcpu %d redistributor base not set\n", c); + ret = -ENXIO; + goto out; + } + } + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { kvm_err("Need to set vgic distributor addresses first\n"); ret = -ENXIO; goto out; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 32c25d42c93f..6879cf48652a 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -96,6 +96,13 @@ /* we only support 64 kB translation table page size */ #define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0) +#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12) +#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12 +#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52) +#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52 + /* Requires the irq_lock to be held by the caller. */ static inline bool irq_is_pending(struct vgic_irq *irq) { @@ -215,7 +222,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); int vgic_v3_save_pending_tables(struct kvm *kvm); -int vgic_v3_set_redist_base(struct kvm *kvm, u64 addr); +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); @@ -265,6 +272,39 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) } } +static inline bool +vgic_v3_redist_region_full(struct vgic_redist_region *region) +{ + if (!region->count) + return false; + + return (region->free_index >= region->count); +} + +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs); + +static inline size_t +vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) +{ + if (!rdreg->count) + return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE; + else + return rdreg->count * KVM_VGIC_V3_REDIST_SIZE; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index); + +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); + +static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + + return (base + size > d->vgic_dist_base) && + (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); +} + int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, u32 devid, u32 eventid, struct vgic_irq **irq); struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b125d94307d2..c5f6a552e486 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2568,8 +2568,13 @@ static long kvm_vcpu_ioctl(struct file *filp, oldpid = rcu_access_pointer(vcpu->pid); if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ - struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + struct pid *newpid; + r = kvm_arch_vcpu_run_pid_change(vcpu); + if (r) + break; + + newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); if (oldpid) synchronize_rcu(); |