summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2022-06-14 21:58:29 +0000
committerPaolo Bonzini <pbonzini@redhat.com>2022-06-20 06:21:20 -0400
commit5d76b1f8c79309c0b60c8db5f16774f1691945a7 (patch)
treee31e95a929bf2b7b61383d98984dc9e8b4a2a36e
parent764643a6be07445308e492a528197044c801b3ba (diff)
KVM: nVMX: Rename nested.vmcs01_* fields to nested.pre_vmenter_*
Rename the fields in struct nested_vmx used to snapshot pre-VM-Enter values to reflect that they can hold L2's values when restoring nested state, e.g. if userspace restores MSRs before nested state. As crazy as it seems, restoring MSRs before nested state actually works (because KVM goes out if it's way to make it work), even though the initial MSR writes will hit vmcs01 despite holding L2 values. Add a related comment to vmx_enter_smm() to call out that using the common VM-Exit and VM-Enter helpers to emulate SMI and RSM is wrong and broken. The few MSRs that have snapshots _could_ be fixed by taking a snapshot prior to the forced VM-Exit instead of at forced VM-Enter, but that's just the tip of the iceberg as the rather long list of MSRs that aren't snapshotted (hello, VM-Exit MSR load list) can't be handled this way. Signed-off-by: Sean Christopherson <seanjc@google.com> Message-Id: <20220614215831.3762138-4-seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--arch/x86/kvm/vmx/nested.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--arch/x86/kvm/vmx/vmx.h15
3 files changed, 23 insertions, 7 deletions
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 4a53e0c73445..38015f4ecc54 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2520,11 +2520,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
} else {
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
- vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
+ vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.pre_vmenter_debugctl);
}
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
- vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.pre_vmenter_bndcfgs);
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
@@ -3381,11 +3381,11 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
if (!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
- vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+ vmx->nested.pre_vmenter_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
if (kvm_mpx_supported() &&
(!vmx->nested.nested_run_pending ||
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
- vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ vmx->nested.pre_vmenter_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
/*
* Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 61962f3c4b28..b4d3820e9800 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7847,6 +7847,13 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ /*
+ * TODO: Implement custom flows for forcing the vCPU out/in of L2 on
+ * SMI and RSM. Using the common VM-Exit + VM-Enter routines is wrong
+ * SMI and RSM only modify state that is saved and restored via SMRAM.
+ * E.g. most MSRs are left untouched, but many are modified by VM-Exit
+ * and VM-Enter, and thus L2's values may be corrupted on SMI+RSM.
+ */
vmx->nested.smm.guest_mode = is_guest_mode(vcpu);
if (vmx->nested.smm.guest_mode)
nested_vmx_vmexit(vcpu, -1, 0, 0);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 71bcb486e73f..a84c91ee2a48 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -219,9 +219,18 @@ struct nested_vmx {
bool has_preemption_timer_deadline;
bool preemption_timer_expired;
- /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
- u64 vmcs01_debugctl;
- u64 vmcs01_guest_bndcfgs;
+ /*
+ * Used to snapshot MSRs that are conditionally loaded on VM-Enter in
+ * order to propagate the guest's pre-VM-Enter value into vmcs02. For
+ * emulation of VMLAUNCH/VMRESUME, the snapshot will be of L1's value.
+ * For KVM_SET_NESTED_STATE, the snapshot is of L2's value, _if_
+ * userspace restores MSRs before nested state. If userspace restores
+ * MSRs after nested state, the snapshot holds garbage, but KVM can't
+ * detect that, and the garbage value in vmcs02 will be overwritten by
+ * MSR restoration in any case.
+ */
+ u64 pre_vmenter_debugctl;
+ u64 pre_vmenter_bndcfgs;
/* to migrate it to L1 if L2 writes to L1's CR8 directly */
int l1_tpr_threshold;