diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-16 16:15:14 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-01-16 16:15:14 +0200 |
commit | 79e06c4c4950be2abd8ca5d2428a8c915aa62c24 (patch) | |
tree | 0507ef82aa3c7766b7b19163a0351882b7d7c5b5 /arch/x86/include/asm | |
parent | cb3f09f9afe5286c0aed7a1c5cc71495de166efb (diff) | |
parent | c862dcd199759d4a45e65dab47b03e3e8a144e3a (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"RISCV:
- Use common KVM implementation of MMU memory caches
- SBI v0.2 support for Guest
- Initial KVM selftests support
- Fix to avoid spurious virtual interrupts after clearing hideleg CSR
- Update email address for Anup and Atish
ARM:
- Simplification of the 'vcpu first run' by integrating it into KVM's
'pid change' flow
- Refactoring of the FP and SVE state tracking, also leading to a
simpler state and less shared data between EL1 and EL2 in the nVHE
case
- Tidy up the header file usage for the nvhe hyp object
- New HYP unsharing mechanism, finally allowing pages to be unmapped
from the Stage-1 EL2 page-tables
- Various pKVM cleanups around refcounting and sharing
- A couple of vgic fixes for bugs that would trigger once the vcpu
xarray rework is merged, but not sooner
- Add minimal support for ARMv8.7's PMU extension
- Rework kvm_pgtable initialisation ahead of the NV work
- New selftest for IRQ injection
- Teach selftests about the lack of default IPA space and page sizes
- Expand sysreg selftest to deal with Pointer Authentication
- The usual bunch of cleanups and doc update
s390:
- fix sigp sense/start/stop/inconsistency
- cleanups
x86:
- Clean up some function prototypes more
- improved gfn_to_pfn_cache with proper invalidation, used by Xen
emulation
- add KVM_IRQ_ROUTING_XEN_EVTCHN and event channel delivery
- completely remove potential TOC/TOU races in nested SVM consistency
checks
- update some PMCs on emulated instructions
- Intel AMX support (joint work between Thomas and Intel)
- large MMU cleanups
- module parameter to disable PMU virtualization
- cleanup register cache
- first part of halt handling cleanups
- Hyper-V enlightened MSR bitmap support for nested hypervisors
Generic:
- clean up Makefiles
- introduce CONFIG_HAVE_KVM_DIRTY_RING
- optimize memslot lookup using a tree
- optimize vCPU array usage by converting to xarray"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (268 commits)
x86/fpu: Fix inline prefix warnings
selftest: kvm: Add amx selftest
selftest: kvm: Move struct kvm_x86_state to header
selftest: kvm: Reorder vcpu_load_state steps for AMX
kvm: x86: Disable interception for IA32_XFD on demand
x86/fpu: Provide fpu_sync_guest_vmexit_xfd_state()
kvm: selftests: Add support for KVM_CAP_XSAVE2
kvm: x86: Add support for getting/setting expanded xstate buffer
x86/fpu: Add uabi_size to guest_fpu
kvm: x86: Add CPUID support for Intel AMX
kvm: x86: Add XCR0 support for Intel AMX
kvm: x86: Disable RDMSR interception of IA32_XFD_ERR
kvm: x86: Emulate IA32_XFD_ERR for guest
kvm: x86: Intercept #NM for saving IA32_XFD_ERR
x86/fpu: Prepare xfd_err in struct fpu_guest
kvm: x86: Add emulation for IA32_XFD
x86/fpu: Provide fpu_update_guest_xfd() for IA32_XFD emulation
kvm: x86: Enable dynamic xfeatures at KVM_SET_CPUID2
x86/fpu: Provide fpu_enable_guest_xfd_features() for KVM
x86/fpu: Add guest support to xfd_enable_feature()
...
Diffstat (limited to 'arch/x86/include/asm')
-rw-r--r-- | arch/x86/include/asm/cpufeatures.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/api.h | 11 | ||||
-rw-r--r-- | arch/x86/include/asm/fpu/types.h | 32 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm-x86-ops.h | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 65 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_page_track.h | 6 |
6 files changed, 86 insertions, 31 deletions
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 18de5f76f198..6db4e2932b3d 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,7 +299,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index c2767a6a387e..c83b3020350a 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -132,10 +132,21 @@ static inline void fpstate_free(struct fpu *fpu) { } /* fpstate-related functions which are exported to KVM */ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfeature); +extern u64 xstate_get_guest_group_perm(void); + /* KVM specific functions */ extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu); extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu); extern int fpu_swap_kvm_fpstate(struct fpu_guest *gfpu, bool enter_guest); +extern int fpu_enable_guest_xfd_features(struct fpu_guest *guest_fpu, u64 xfeatures); + +#ifdef CONFIG_X86_64 +extern void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd); +extern void fpu_sync_guest_vmexit_xfd_state(void); +#else +static inline void fpu_update_guest_xfd(struct fpu_guest *guest_fpu, u64 xfd) { } +static inline void fpu_sync_guest_vmexit_xfd_state(void) { } +#endif extern void fpu_copy_guest_fpstate_to_uabi(struct fpu_guest *gfpu, void *buf, unsigned int size, u32 pkru); extern int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, u64 xcr0, u32 *vpkru); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c06c82ab355..eb7cd1139d97 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -387,6 +387,8 @@ struct fpstate { /* @regs is dynamically sized! Don't add anything after @regs! */ } __aligned(64); +#define FPU_GUEST_PERM_LOCKED BIT_ULL(63) + struct fpu_state_perm { /* * @__state_perm: @@ -477,6 +479,13 @@ struct fpu { struct fpu_state_perm perm; /* + * @guest_perm: + * + * Permission related information for guest pseudo FPUs + */ + struct fpu_state_perm guest_perm; + + /* * @__fpstate: * * Initial in-memory storage for FPU registers which are saved in @@ -496,6 +505,29 @@ struct fpu { */ struct fpu_guest { /* + * @xfeatures: xfeature bitmap of features which are + * currently enabled for the guest vCPU. + */ + u64 xfeatures; + + /* + * @perm: xfeature bitmap of features which are + * permitted to be enabled for the guest + * vCPU. + */ + u64 perm; + + /* + * @xfd_err: Save the guest value. + */ + u64 xfd_err; + + /* + * @uabi_size: Size required for save/restore + */ + unsigned int uabi_size; + + /* * @fpstate: Pointer to the allocated guest fpstate */ struct fpstate *fpstate; diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 9e50da3ed01a..f658bb4dbb74 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -35,6 +35,7 @@ KVM_X86_OP(get_cpl) KVM_X86_OP(set_segment) KVM_X86_OP_NULL(get_cs_db_l_bits) KVM_X86_OP(set_cr0) +KVM_X86_OP_NULL(post_set_cr3) KVM_X86_OP(is_valid_cr4) KVM_X86_OP(set_cr4) KVM_X86_OP(set_efer) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d0ad98ddd459..0677b9ea01c9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -135,7 +135,7 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) -#define KVM_PERMILLE_MMU_PAGES 20 +#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50 #define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 12 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) @@ -291,25 +291,31 @@ struct kvm_kernel_irq_routing_entry; * the number of unique SPs that can theoretically be created is 2^n, where n * is the number of bits that are used to compute the role. * - * But, even though there are 18 bits in the mask below, not all combinations - * of modes and flags are possible. The maximum number of possible upper-level - * shadow pages for a single gfn is in the neighborhood of 2^13. + * But, even though there are 19 bits in the mask below, not all combinations + * of modes and flags are possible: * - * - invalid shadow pages are not accounted. - * - level is effectively limited to four combinations, not 16 as the number - * bits would imply, as 4k SPs are not tracked (allowed to go unsync). - * - level is effectively unused for non-PAE paging because there is exactly - * one upper level (see 4k SP exception above). - * - quadrant is used only for non-PAE paging and is exclusive with - * gpte_is_8_bytes. - * - execonly and ad_disabled are used only for nested EPT, which makes it - * exclusive with quadrant. + * - invalid shadow pages are not accounted, so the bits are effectively 18 + * + * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); + * execonly and ad_disabled are only used for nested EPT which has + * has_4_byte_gpte=0. Therefore, 2 bits are always unused. + * + * - the 4 bits of level are effectively limited to the values 2/3/4/5, + * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE + * paging has exactly one upper level, making level completely redundant + * when has_4_byte_gpte=1. + * + * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if + * cr0_wp=0, therefore these three bits only give rise to 5 possibilities. + * + * Therefore, the maximum number of possible upper-level shadow pages for a + * single gfn is a bit less than 2^13. */ union kvm_mmu_page_role { u32 word; struct { unsigned level:4; - unsigned gpte_is_8_bytes:1; + unsigned has_4_byte_gpte:1; unsigned quadrant:2; unsigned direct:1; unsigned access:3; @@ -420,10 +426,9 @@ struct kvm_mmu { int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); - gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gpa_t gva_or_gpa, - u32 access, struct x86_exception *exception); - gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, - struct x86_exception *exception); + gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gpa_t gva_or_gpa, u32 access, + struct x86_exception *exception); int (*sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root_hpa); @@ -490,6 +495,7 @@ struct kvm_pmc { */ u64 current_config; bool is_paused; + bool intr; }; struct kvm_pmu { @@ -604,6 +610,7 @@ struct kvm_vcpu_xen { u64 last_steal; u64 runstate_entry_time; u64 runstate_times[4]; + unsigned long evtchn_pending_sel; }; struct kvm_vcpu_arch { @@ -640,6 +647,7 @@ struct kvm_vcpu_arch { u64 smi_count; bool tpr_access_reporting; bool xsaves_enabled; + bool xfd_no_write_intercept; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; @@ -1015,7 +1023,7 @@ struct msr_bitmap_range { struct kvm_xen { bool long_mode; u8 upcall_vector; - gfn_t shinfo_gfn; + struct gfn_to_pfn_cache shinfo_cache; }; enum kvm_irqchip_mode { @@ -1338,6 +1346,7 @@ struct kvm_x86_ops { struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); + void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); @@ -1592,10 +1601,9 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); -int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); +int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); @@ -1645,7 +1653,8 @@ extern u64 kvm_mce_cap_supported; * * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to * decode the instruction length. For use *only* by - * kvm_x86_ops.skip_emulated_instruction() implementations. + * kvm_x86_ops.skip_emulated_instruction() implementations if + * EMULTYPE_COMPLETE_USER_EXIT is not set. * * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to * retry native execution under certain conditions, @@ -1665,6 +1674,10 @@ extern u64 kvm_mce_cap_supported; * * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which * case the CR2/GPA value pass on the stack is valid. + * + * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility + * state and inject single-step #DBs after skipping + * an instruction (after completing userspace I/O). */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) @@ -1673,6 +1686,7 @@ extern u64 kvm_mce_cap_supported; #define EMULTYPE_TRAP_UD_FORCED (1 << 4) #define EMULTYPE_VMWARE_GP (1 << 5) #define EMULTYPE_PF (1 << 6) +#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, @@ -1697,7 +1711,7 @@ int kvm_emulate_monitor(struct kvm_vcpu *vcpu); int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); -int kvm_vcpu_halt(struct kvm_vcpu *vcpu); +int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu); int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); @@ -1763,12 +1777,9 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); -void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ulong roots_to_free); void kvm_mmu_free_guest_mode_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu); -gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, - struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, @@ -1930,8 +1941,6 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static_call_cond(kvm_x86_vcpu_unblocking)(vcpu); } -static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} - static inline int kvm_cpu_get_apicid(int mps_cpu) { #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 9d4a3b1b25b9..eb186bc57f6a 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -63,9 +63,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm, void kvm_slot_page_track_remove_page(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, enum kvm_page_track_mode mode); -bool kvm_slot_page_track_is_active(struct kvm_vcpu *vcpu, - struct kvm_memory_slot *slot, gfn_t gfn, - enum kvm_page_track_mode mode); +bool kvm_slot_page_track_is_active(struct kvm *kvm, + const struct kvm_memory_slot *slot, + gfn_t gfn, enum kvm_page_track_mode mode); void kvm_page_track_register_notifier(struct kvm *kvm, |