diff options
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 59 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 57 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 100 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 327 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 61 |
5 files changed, 387 insertions, 217 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index cfc8ab773025..2092db892d7d 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -791,6 +791,7 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) int ret = 0; unsigned long flags; struct amd_svm_iommu_ir *ir; + u64 entry; /** * In some cases, the existing irte is updated and re-set, @@ -824,6 +825,18 @@ static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) ir->data = pi->ir_data; spin_lock_irqsave(&svm->ir_list_lock, flags); + + /* + * Update the target pCPU for IOMMU doorbells if the vCPU is running. + * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM + * will update the pCPU info when the vCPU awkened and/or scheduled in. + * See also avic_vcpu_load(). + */ + entry = READ_ONCE(*(svm->avic_physical_id_cache)); + if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) + amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK, + true, pi->ir_data); + list_add(&ir->node, &svm->ir_list); spin_unlock_irqrestore(&svm->ir_list_lock, flags); out: @@ -986,10 +999,11 @@ static inline int avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) { int ret = 0; - unsigned long flags; struct amd_svm_iommu_ir *ir; struct vcpu_svm *svm = to_svm(vcpu); + lockdep_assert_held(&svm->ir_list_lock); + if (!kvm_arch_has_assigned_device(vcpu->kvm)) return 0; @@ -997,19 +1011,15 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) * Here, we go through the per-vcpu ir_list to update all existing * interrupt remapping table entry targeting this vcpu. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); - if (list_empty(&svm->ir_list)) - goto out; + return 0; list_for_each_entry(ir, &svm->ir_list, node) { ret = amd_iommu_update_ga(cpu, r, ir->data); if (ret) - break; + return ret; } -out: - spin_unlock_irqrestore(&svm->ir_list_lock, flags); - return ret; + return 0; } void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1017,6 +1027,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) u64 entry; int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); + unsigned long flags; lockdep_assert_preemption_disabled(); @@ -1033,6 +1044,15 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (kvm_vcpu_is_blocking(vcpu)) return; + /* + * Grab the per-vCPU interrupt remapping lock even if the VM doesn't + * _currently_ have assigned devices, as that can change. Holding + * ir_list_lock ensures that either svm_ir_list_add() will consume + * up-to-date entry information, or that this task will wait until + * svm_ir_list_add() completes to set the new target pCPU. + */ + spin_lock_irqsave(&svm->ir_list_lock, flags); + entry = READ_ONCE(*(svm->avic_physical_id_cache)); WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); @@ -1042,25 +1062,48 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); + + spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); + unsigned long flags; lockdep_assert_preemption_disabled(); + /* + * Note, reading the Physical ID entry outside of ir_list_lock is safe + * as only the pCPU that has loaded (or is loading) the vCPU is allowed + * to modify the entry, and preemption is disabled. I.e. the vCPU + * can't be scheduled out and thus avic_vcpu_{put,load}() can't run + * recursively. + */ entry = READ_ONCE(*(svm->avic_physical_id_cache)); /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) return; + /* + * Take and hold the per-vCPU interrupt remapping lock while updating + * the Physical ID entry even though the lock doesn't protect against + * multiple writers (see above). Holding ir_list_lock ensures that + * either svm_ir_list_add() will consume up-to-date entry information, + * or that this task will wait until svm_ir_list_add() completes to + * mark the vCPU as not running. + */ + spin_lock_irqsave(&svm->ir_list_lock, flags); + avic_update_iommu_vcpu_affinity(vcpu, -1, 0); entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; WRITE_ONCE(*(svm->avic_physical_id_cache), entry); + + spin_unlock_irqrestore(&svm->ir_list_lock, flags); + } void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 96936ddf1b3c..dd496c9e5f91 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -107,7 +107,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm) { - if (!svm->v_vmload_vmsave_enabled) + if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD)) return true; if (!nested_npt_enabled(svm)) @@ -552,6 +552,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 bool new_vmcb12 = false; struct vmcb *vmcb01 = svm->vmcb01.ptr; struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; + struct kvm_vcpu *vcpu = &svm->vcpu; nested_vmcb02_compute_g_pat(svm); @@ -577,18 +578,18 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DT); } - kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); + kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); - svm_set_efer(&svm->vcpu, svm->nested.save.efer); + svm_set_efer(vcpu, svm->nested.save.efer); - svm_set_cr0(&svm->vcpu, svm->nested.save.cr0); - svm_set_cr4(&svm->vcpu, svm->nested.save.cr4); + svm_set_cr0(vcpu, svm->nested.save.cr0); + svm_set_cr4(vcpu, svm->nested.save.cr4); svm->vcpu.arch.cr2 = vmcb12->save.cr2; - kvm_rax_write(&svm->vcpu, vmcb12->save.rax); - kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp); - kvm_rip_write(&svm->vcpu, vmcb12->save.rip); + kvm_rax_write(vcpu, vmcb12->save.rax); + kvm_rsp_write(vcpu, vmcb12->save.rsp); + kvm_rip_write(vcpu, vmcb12->save.rip); /* In case we don't even reach vcpu_run, the fields are not updated */ vmcb02->save.rax = vmcb12->save.rax; @@ -602,7 +603,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 vmcb_mark_dirty(vmcb02, VMCB_DR); } - if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { /* * Reserved bits of DEBUGCTL are ignored. Be consistent with * svm_set_msr's definition of reserved bits. @@ -658,7 +660,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. */ - if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK)) + if (guest_can_use(vcpu, X86_FEATURE_VGIF) && + (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK)) int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); else int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); @@ -695,10 +698,9 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.tsc_offset = vcpu->arch.tsc_offset; - if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) { - WARN_ON(!svm->tsc_scaling_enabled); + if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && + svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) nested_svm_update_tsc_ratio_msr(vcpu); - } vmcb02->control.int_ctl = (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | @@ -717,7 +719,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, * what a nrips=0 CPU would do (L1 is responsible for advancing RIP * prior to injecting the event). */ - if (svm->nrips_enabled) + if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) vmcb02->control.next_rip = svm->nested.ctl.next_rip; else if (boot_cpu_has(X86_FEATURE_NRIPS)) vmcb02->control.next_rip = vmcb12_rip; @@ -727,7 +729,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_injected = true; svm->soft_int_csbase = vmcb12_csbase; svm->soft_int_old_rip = vmcb12_rip; - if (svm->nrips_enabled) + if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) svm->soft_int_next_rip = svm->nested.ctl.next_rip; else svm->soft_int_next_rip = vmcb12_rip; @@ -735,15 +737,21 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, vmcb02->control.virt_ext = vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK; - if (svm->lbrv_enabled) + if (guest_can_use(vcpu, X86_FEATURE_LBRV)) vmcb02->control.virt_ext |= (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; - pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; - pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; + if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER)) + pause_count12 = svm->nested.ctl.pause_filter_count; + else + pause_count12 = 0; + if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD)) + pause_thresh12 = svm->nested.ctl.pause_filter_thresh; + else + pause_thresh12 = 0; if (kvm_pause_in_guest(svm->vcpu.kvm)) { /* use guest values since host doesn't intercept PAUSE */ vmcb02->control.pause_filter_count = pause_count12; @@ -1027,7 +1035,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (vmcb12->control.exit_code != SVM_EXIT_ERR) nested_save_pending_event_to_vmcb12(svm, vmcb12); - if (svm->nrips_enabled) + if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) vmcb12->control.next_rip = vmcb02->control.next_rip; vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; @@ -1066,7 +1074,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm) if (!nested_exit_on_intr(svm)) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { svm_copy_lbrs(vmcb12, vmcb02); svm_update_lbrv(vcpu); } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { @@ -1101,10 +1110,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm) vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); } - if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) { - WARN_ON(!svm->tsc_scaling_enabled); + if (kvm_caps.has_tsc_control && + vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) { vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; - __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + svm_write_tsc_multiplier(vcpu); } svm->nested.ctl.nested_cr3 = 0; @@ -1537,7 +1546,7 @@ void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, svm->tsc_ratio_msr); - __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + svm_write_tsc_multiplier(vcpu); } /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index d3aec1f2cad2..b9a0a939d59f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -23,6 +23,7 @@ #include <asm/pkru.h> #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include <asm/debugreg.h> #include "mmu.h" #include "x86.h" @@ -54,9 +55,14 @@ module_param_named(sev, sev_enabled, bool, 0444); /* enable/disable SEV-ES support */ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); + +/* enable/disable SEV-ES DebugSwap support */ +static bool sev_es_debug_swap_enabled = true; +module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); #else #define sev_enabled false #define sev_es_enabled false +#define sev_es_debug_swap_enabled false #endif /* CONFIG_KVM_AMD_SEV */ static u8 sev_enc_bit; @@ -606,6 +612,9 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; + if (sev_es_debug_swap_enabled) + save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; + pr_debug("Virtual Machine Save Area (VMSA):\n"); print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); @@ -619,6 +628,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, struct vcpu_svm *svm = to_svm(vcpu); int ret; + if (vcpu->guest_debug) { + pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported"); + return -EINVAL; + } + /* Perform some pre-encryption checks against the VMSA */ ret = sev_es_sync_vmsa(svm); if (ret) @@ -1725,7 +1739,7 @@ static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) * Note, the source is not required to have the same number of * vCPUs as the destination when migrating a vanilla SEV VM. */ - src_vcpu = kvm_get_vcpu(dst_kvm, i); + src_vcpu = kvm_get_vcpu(src_kvm, i); src_svm = to_svm(src_vcpu); /* @@ -2171,7 +2185,7 @@ void __init sev_hardware_setup(void) bool sev_es_supported = false; bool sev_supported = false; - if (!sev_enabled || !npt_enabled) + if (!sev_enabled || !npt_enabled || !nrips) goto out; /* @@ -2256,6 +2270,9 @@ out: sev_enabled = sev_supported; sev_es_enabled = sev_es_supported; + if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) || + !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP)) + sev_es_debug_swap_enabled = false; #endif } @@ -2881,7 +2898,10 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_NMI_COMPLETE: - ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET); + ++vcpu->stat.nmi_window_exits; + svm->nmi_masked = false; + kvm_make_request(KVM_REQ_EVENT, vcpu); + ret = 1; break; case SVM_VMGEXIT_AP_HLT_LOOP: ret = kvm_emulate_ap_reset_hold(vcpu); @@ -2944,6 +2964,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) static void sev_es_init_vmcb(struct vcpu_svm *svm) { + struct vmcb *vmcb = svm->vmcb01.ptr; struct kvm_vcpu *vcpu = &svm->vcpu; svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; @@ -2952,9 +2973,12 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) /* * An SEV-ES guest requires a VMSA area that is a separate from the * VMCB page. Do not include the encryption mask on the VMSA physical - * address since hardware will access it using the guest key. + * address since hardware will access it using the guest key. Note, + * the VMSA will be NULL if this vCPU is the destination for intrahost + * migration, and will be copied later. */ - svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); + if (svm->sev_es.vmsa) + svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa); /* Can't intercept CR register access, HV can't modify CR registers */ svm_clr_intercept(svm, INTERCEPT_CR0_READ); @@ -2972,8 +2996,23 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) svm_set_intercept(svm, TRAP_CR4_WRITE); svm_set_intercept(svm, TRAP_CR8_WRITE); - /* No support for enable_vmware_backdoor */ - clr_exception_intercept(svm, GP_VECTOR); + vmcb->control.intercepts[INTERCEPT_DR] = 0; + if (!sev_es_debug_swap_enabled) { + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + recalc_intercepts(svm); + } else { + /* + * Disable #DB intercept iff DebugSwap is enabled. KVM doesn't + * allow debugging SEV-ES guests, and enables DebugSwap iff + * NO_NESTED_DATA_BP is supported, so there's no reason to + * intercept #DB when DebugSwap is enabled. For simplicity + * with respect to guest debug, intercept #DB for other VMs + * even if NO_NESTED_DATA_BP is supported, i.e. even if the + * guest can't DoS the CPU with infinite #DB vectoring. + */ + clr_exception_intercept(svm, DB_VECTOR); + } /* Can't intercept XSETBV, HV can't modify XCR0 directly */ svm_clr_intercept(svm, INTERCEPT_XSETBV); @@ -3000,6 +3039,12 @@ void sev_init_vmcb(struct vcpu_svm *svm) svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE; clr_exception_intercept(svm, UD_VECTOR); + /* + * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as + * KVM can't decrypt guest memory to decode the faulting instruction. + */ + clr_exception_intercept(svm, GP_VECTOR); + if (sev_es_guest(svm->vcpu.kvm)) sev_es_init_vmcb(svm); } @@ -3018,20 +3063,41 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa) { /* - * As an SEV-ES guest, hardware will restore the host state on VMEXIT, - * of which one step is to perform a VMLOAD. KVM performs the - * corresponding VMSAVE in svm_prepare_guest_switch for both - * traditional and SEV-ES guests. + * All host state for SEV-ES guests is categorized into three swap types + * based on how it is handled by hardware during a world switch: + * + * A: VMRUN: Host state saved in host save area + * VMEXIT: Host state loaded from host save area + * + * B: VMRUN: Host state _NOT_ saved in host save area + * VMEXIT: Host state loaded from host save area + * + * C: VMRUN: Host state _NOT_ saved in host save area + * VMEXIT: Host state initialized to default(reset) values + * + * Manually save type-B state, i.e. state that is loaded by VMEXIT but + * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed + * by common SVM code). */ - - /* XCR0 is restored on VMEXIT, save the current host value */ hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - - /* PKRU is restored on VMEXIT, save the current host value */ hostsa->pkru = read_pkru(); - - /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */ hostsa->xss = host_xss; + + /* + * If DebugSwap is enabled, debug registers are loaded but NOT saved by + * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both + * saves and loads debug registers (Type-A). + */ + if (sev_es_debug_swap_enabled) { + hostsa->dr0 = native_get_debugreg(0); + hostsa->dr1 = native_get_debugreg(1); + hostsa->dr2 = native_get_debugreg(2); + hostsa->dr3 = native_get_debugreg(3); + hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0); + hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1); + hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2); + hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3); + } } void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d4bfdc607fe7..f283eb47f6ac 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -39,10 +39,9 @@ #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> #include <asm/traps.h> +#include <asm/reboot.h> #include <asm/fpu/api.h> -#include <asm/virtext.h> - #include <trace/events/ipi.h> #include "trace.h" @@ -203,7 +202,7 @@ static int nested = true; module_param(nested, int, S_IRUGO); /* enable/disable Next RIP Save */ -static int nrips = true; +int nrips = true; module_param(nrips, int, 0444); /* enable/disable Virtual VMLOAD VMSAVE */ @@ -365,6 +364,8 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; } +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, bool commit_side_effects) @@ -385,6 +386,14 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu, } if (!svm->next_rip) { + /* + * FIXME: Drop this when kvm_emulate_instruction() does the + * right thing and treats "can't emulate" as outright failure + * for EMULTYPE_SKIP. + */ + if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0)) + return 0; + if (unlikely(!commit_side_effects)) old_rflags = svm->vmcb->save.rflags; @@ -517,14 +526,21 @@ static void svm_init_osvw(struct kvm_vcpu *vcpu) vcpu->arch.osvw.status |= 1; } -static bool kvm_is_svm_supported(void) +static bool __kvm_is_svm_supported(void) { - int cpu = raw_smp_processor_id(); - const char *msg; + int cpu = smp_processor_id(); + struct cpuinfo_x86 *c = &cpu_data(cpu); + u64 vm_cr; - if (!cpu_has_svm(&msg)) { - pr_err("SVM not supported by CPU %d, %s\n", cpu, msg); + if (c->x86_vendor != X86_VENDOR_AMD && + c->x86_vendor != X86_VENDOR_HYGON) { + pr_err("CPU %d isn't AMD or Hygon\n", cpu); + return false; + } + + if (!cpu_has(c, X86_FEATURE_SVM)) { + pr_err("SVM not supported by CPU %d\n", cpu); return false; } @@ -542,25 +558,55 @@ static bool kvm_is_svm_supported(void) return true; } +static bool kvm_is_svm_supported(void) +{ + bool supported; + + migrate_disable(); + supported = __kvm_is_svm_supported(); + migrate_enable(); + + return supported; +} + static int svm_check_processor_compat(void) { - if (!kvm_is_svm_supported()) + if (!__kvm_is_svm_supported()) return -EIO; return 0; } -void __svm_write_tsc_multiplier(u64 multiplier) +static void __svm_write_tsc_multiplier(u64 multiplier) { - preempt_disable(); - if (multiplier == __this_cpu_read(current_tsc_ratio)) - goto out; + return; wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); __this_cpu_write(current_tsc_ratio, multiplier); -out: - preempt_enable(); +} + +static inline void kvm_cpu_svm_disable(void) +{ + uint64_t efer; + + wrmsrl(MSR_VM_HSAVE_PA, 0); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) { + /* + * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and + * NMI aren't blocked. + */ + stgi(); + wrmsrl(MSR_EFER, efer & ~EFER_SVME); + } +} + +static void svm_emergency_disable(void) +{ + kvm_rebooting = true; + + kvm_cpu_svm_disable(); } static void svm_hardware_disable(void) @@ -569,7 +615,7 @@ static void svm_hardware_disable(void) if (tsc_scaling) __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT); - cpu_svm_disable(); + kvm_cpu_svm_disable(); amd_pmu_disable_virt(); } @@ -677,6 +723,39 @@ free_save_area: } +static void set_dr_intercepts(struct vcpu_svm *svm) +{ + struct vmcb *vmcb = svm->vmcb01.ptr; + + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); + vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); + + recalc_intercepts(svm); +} + +static void clr_dr_intercepts(struct vcpu_svm *svm) +{ + struct vmcb *vmcb = svm->vmcb01.ptr; + + vmcb->control.intercepts[INTERCEPT_DR] = 0; + + recalc_intercepts(svm); +} + static int direct_access_msr_slot(u32 msr) { u32 i; @@ -947,50 +1026,24 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu) svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); } -static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index) +static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) { /* - * If the LBR virtualization is disabled, the LBR msrs are always - * kept in the vmcb01 to avoid copying them on nested guest entries. - * - * If nested, and the LBR virtualization is enabled/disabled, the msrs - * are moved between the vmcb01 and vmcb02 as needed. + * If LBR virtualization is disabled, the LBR MSRs are always kept in + * vmcb01. If LBR virtualization is enabled and L1 is running VMs of + * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. */ - struct vmcb *vmcb = - (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ? - svm->vmcb : svm->vmcb01.ptr; - - switch (index) { - case MSR_IA32_DEBUGCTLMSR: - return vmcb->save.dbgctl; - case MSR_IA32_LASTBRANCHFROMIP: - return vmcb->save.br_from; - case MSR_IA32_LASTBRANCHTOIP: - return vmcb->save.br_to; - case MSR_IA32_LASTINTFROMIP: - return vmcb->save.last_excp_from; - case MSR_IA32_LASTINTTOIP: - return vmcb->save.last_excp_to; - default: - KVM_BUG(false, svm->vcpu.kvm, - "%s: Unknown MSR 0x%x", __func__, index); - return 0; - } + return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : + svm->vmcb01.ptr; } void svm_update_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - - bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) & - DEBUGCTLMSR_LBR; - - bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext & - LBR_CTL_ENABLE_MASK); - - if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled)) - if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)) - enable_lbrv = true; + bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || + (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) && + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); if (enable_lbrv == current_enable_lbrv) return; @@ -1101,21 +1154,23 @@ static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) return svm->tsc_ratio_msr; } -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static void svm_write_tsc_offset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset; - svm->vmcb->control.tsc_offset = offset; + svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset; vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu) { - __svm_write_tsc_multiplier(multiplier); + preempt_disable(); + if (to_svm(vcpu)->guest_state_loaded) + __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); + preempt_enable(); } - /* Evaluate instruction intercepts that depend on guest CPUID features. */ static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, struct vcpu_svm *svm) @@ -1156,8 +1211,6 @@ static inline void init_vmcb_after_set_cpuid(struct kvm_vcpu *vcpu) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0); - - svm->v_vmload_vmsave_enabled = false; } else { /* * If hardware supports Virtual VMLOAD VMSAVE then enable it @@ -1201,10 +1254,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway - * as VMware does. Don't intercept #GP for SEV guests as KVM can't - * decrypt guest memory to decode the faulting instruction. + * as VMware does. */ - if (enable_vmware_backdoor && !sev_guest(vcpu->kvm)) + if (enable_vmware_backdoor) set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); @@ -1949,7 +2001,7 @@ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (vcpu->arch.guest_state_protected) + if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm))) return; get_debugreg(vcpu->arch.db[0], 0); @@ -2510,12 +2562,13 @@ static int iret_interception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + WARN_ON_ONCE(sev_es_guest(vcpu->kvm)); + ++vcpu->stat.nmi_window_exits; svm->awaiting_iret_completion = true; svm_clr_iret_intercept(svm); - if (!sev_es_guest(vcpu->kvm)) - svm->nmi_iret_rip = kvm_rip_read(vcpu); + svm->nmi_iret_rip = kvm_rip_read(vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu); return 1; @@ -2680,6 +2733,13 @@ static int dr_interception(struct kvm_vcpu *vcpu) unsigned long val; int err = 0; + /* + * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT + * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early. + */ + if (sev_es_guest(vcpu->kvm)) + return 1; + if (vcpu->guest_debug == 0) { /* * No more DR vmexits; force a reload of the debug registers @@ -2764,7 +2824,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr_info->index) { case MSR_AMD64_TSC_RATIO: - if (!msr_info->host_initiated && !svm->tsc_scaling_enabled) + if (!msr_info->host_initiated && + !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) return 1; msr_info->data = svm->tsc_ratio_msr; break; @@ -2802,11 +2863,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: + msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; + break; case MSR_IA32_LASTBRANCHFROMIP: + msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; + break; case MSR_IA32_LASTBRANCHTOIP: + msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; + break; case MSR_IA32_LASTINTFROMIP: + msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; + break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm_get_lbr_msr(svm, msr_info->index); + msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -2906,7 +2975,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) switch (ecx) { case MSR_AMD64_TSC_RATIO: - if (!svm->tsc_scaling_enabled) { + if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) { if (!msr->host_initiated) return 1; @@ -2928,7 +2997,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->tsc_ratio_msr = data; - if (svm->tsc_scaling_enabled && is_guest_mode(vcpu)) + if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && + is_guest_mode(vcpu)) nested_svm_update_tsc_ratio_msr(vcpu); break; @@ -3037,13 +3107,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; - if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) - svm->vmcb->save.dbgctl = data; - else - svm->vmcb01.ptr->save.dbgctl = data; - + svm_get_lbr_vmcb(svm)->save.dbgctl = data; svm_update_lbrv(vcpu); - break; case MSR_VM_HSAVE_PA: /* @@ -3769,6 +3834,19 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion) return; /* IRET will cause a vm exit */ + /* + * SEV-ES guests are responsible for signaling when a vCPU is ready to + * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e. + * KVM can't intercept and single-step IRET to detect when NMIs are + * unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE. + * + * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware + * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not + * supported NAEs in the GHCB protocol. + */ + if (sev_es_guest(vcpu->kvm)) + return; + if (!gif_set(svm)) { if (vgif) svm_set_intercept(svm, INTERCEPT_STGI); @@ -3918,12 +3996,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) svm->soft_int_injected = false; /* - * If we've made progress since setting HF_IRET_MASK, we've + * If we've made progress since setting awaiting_iret_completion, we've * executed an IRET and can allow NMI injection. */ if (svm->awaiting_iret_completion && - (sev_es_guest(vcpu->kvm) || - kvm_rip_read(vcpu) != svm->nmi_iret_rip)) { + kvm_rip_read(vcpu) != svm->nmi_iret_rip) { svm->awaiting_iret_completion = false; svm->nmi_masked = false; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -4209,28 +4286,37 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct kvm_cpuid_entry2 *best; - vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && - boot_cpu_has(X86_FEATURE_XSAVE) && - boot_cpu_has(X86_FEATURE_XSAVES); - - /* Update nrips enabled cache */ - svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && - guest_cpuid_has(vcpu, X86_FEATURE_NRIPS); - - svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR); - svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV); - - svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); - - svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) && - guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER); + /* + * SVM doesn't provide a way to disable just XSAVES in the guest, KVM + * can only disable all variants of by disallowing CR4.OSXSAVE from + * being set. As a result, if the host has XSAVE and XSAVES, and the + * guest has XSAVE enabled, the guest can execute XSAVES without + * faulting. Treat XSAVES as enabled in this case regardless of + * whether it's advertised to the guest so that KVM context switches + * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give + * the guest read/write access to the host's XSS. + */ + if (boot_cpu_has(X86_FEATURE_XSAVE) && + boot_cpu_has(X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVE)) + kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES); - svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) && - guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV); - svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF); + /* + * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that + * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing + * SVM on Intel is bonkers and extremely unlikely to work). + */ + if (!guest_cpuid_is_intel(vcpu)) + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD); - svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI); svm_recalc_instruction_intercepts(vcpu, svm); @@ -4651,16 +4737,25 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and * decode garbage. * - * Inject #UD if KVM reached this point without an instruction buffer. - * In practice, this path should never be hit by a well-behaved guest, - * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path - * is still theoretically reachable, e.g. via unaccelerated fault-like - * AVIC access, and needs to be handled by KVM to avoid putting the - * guest into an infinite loop. Injecting #UD is somewhat arbitrary, - * but its the least awful option given lack of insight into the guest. + * If KVM is NOT trying to simply skip an instruction, inject #UD if + * KVM reached this point without an instruction buffer. In practice, + * this path should never be hit by a well-behaved guest, e.g. KVM + * doesn't intercept #UD or #GP for SEV guests, but this path is still + * theoretically reachable, e.g. via unaccelerated fault-like AVIC + * access, and needs to be handled by KVM to avoid putting the guest + * into an infinite loop. Injecting #UD is somewhat arbitrary, but + * its the least awful option given lack of insight into the guest. + * + * If KVM is trying to skip an instruction, simply resume the guest. + * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM + * will attempt to re-inject the INT3/INTO and skip the instruction. + * In that scenario, retrying the INT3/INTO and hoping the guest will + * make forward progress is the only option that has a chance of + * success (and in practice it will work the vast majority of the time). */ if (unlikely(!insn)) { - kvm_queue_exception(vcpu, UD_VECTOR); + if (!(emul_type & EMULTYPE_SKIP)) + kvm_queue_exception(vcpu, UD_VECTOR); return false; } @@ -5112,9 +5207,11 @@ static __init int svm_hardware_setup(void) svm_adjust_mmio_mask(); + nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS); + /* * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which - * may be modified by svm_adjust_mmio_mask()). + * may be modified by svm_adjust_mmio_mask()), as well as nrips. */ sev_hardware_setup(); @@ -5126,11 +5223,6 @@ static __init int svm_hardware_setup(void) goto err; } - if (nrips) { - if (!boot_cpu_has(X86_FEATURE_NRIPS)) - nrips = false; - } - enable_apicv = avic = avic && avic_hardware_setup(); if (!enable_apicv) { @@ -5213,6 +5305,13 @@ static struct kvm_x86_init_ops svm_init_ops __initdata = { .pmu_ops = &amd_pmu_ops, }; +static void __svm_exit(void) +{ + kvm_x86_vendor_exit(); + + cpu_emergency_unregister_virt_callback(svm_emergency_disable); +} + static int __init svm_init(void) { int r; @@ -5226,6 +5325,8 @@ static int __init svm_init(void) if (r) return r; + cpu_emergency_register_virt_callback(svm_emergency_disable); + /* * Common KVM initialization _must_ come last, after this, /dev/kvm is * exposed to userspace! @@ -5238,14 +5339,14 @@ static int __init svm_init(void) return 0; err_kvm_init: - kvm_x86_vendor_exit(); + __svm_exit(); return r; } static void __exit svm_exit(void) { kvm_exit(); - kvm_x86_vendor_exit(); + __svm_exit(); } module_init(svm_init) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 8239c8de45ac..f41253958357 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -22,6 +22,7 @@ #include <asm/svm.h> #include <asm/sev-common.h> +#include "cpuid.h" #include "kvm_cache_regs.h" #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -33,6 +34,7 @@ #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; +extern int nrips; extern int vgif; extern bool intercept_smi; extern bool x2avic_enabled; @@ -260,16 +262,6 @@ struct vcpu_svm { unsigned long soft_int_next_rip; bool soft_int_injected; - /* optional nested SVM features that are enabled for this guest */ - bool nrips_enabled : 1; - bool tsc_scaling_enabled : 1; - bool v_vmload_vmsave_enabled : 1; - bool lbrv_enabled : 1; - bool pause_filter_enabled : 1; - bool pause_threshold_enabled : 1; - bool vgif_enabled : 1; - bool vnmi_enabled : 1; - u32 ldr_reg; u32 dfr_reg; struct page *avic_backing_page; @@ -406,48 +398,6 @@ static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u3 return test_bit(bit, (unsigned long *)&control->intercepts); } -static inline void set_dr_intercepts(struct vcpu_svm *svm) -{ - struct vmcb *vmcb = svm->vmcb01.ptr; - - if (!sev_es_guest(svm->vcpu.kvm)) { - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE); - } - - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); - - recalc_intercepts(svm); -} - -static inline void clr_dr_intercepts(struct vcpu_svm *svm) -{ - struct vmcb *vmcb = svm->vmcb01.ptr; - - vmcb->control.intercepts[INTERCEPT_DR] = 0; - - /* DR7 access must remain intercepted for an SEV-ES guest */ - if (sev_es_guest(svm->vcpu.kvm)) { - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ); - vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE); - } - - recalc_intercepts(svm); -} - static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit) { struct vmcb *vmcb = svm->vmcb01.ptr; @@ -493,7 +443,8 @@ static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit) static inline bool nested_vgif_enabled(struct vcpu_svm *svm) { - return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK); + return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) && + (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK); } static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm) @@ -544,7 +495,7 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm) static inline bool nested_vnmi_enabled(struct vcpu_svm *svm) { - return svm->vnmi_enabled && + return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) && (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK); } @@ -660,7 +611,7 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu); -void __svm_write_tsc_multiplier(u64 multiplier); +void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu); void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, struct vmcb_control_area *control); void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, |