From 1844e68ecabbdfdf0228774bcd5cf0f63ffc2e57 Mon Sep 17 00:00:00 2001 From: William Grant Date: Sun, 24 Aug 2014 15:13:48 +1000 Subject: target-i386: Don't forbid NX bit on PAE PDEs and PTEs Commit e8f6d00c30ed88910d0d985f4b2bf41654172ceb ("target-i386: raise page fault for reserved physical address bits") added a check that the NX bit is not set on PAE PDPEs, but it also added it to rsvd_mask for the rest of the function. This caused any PDEs or PTEs with NX set to be erroneously rejected, making PAE guests with NX support unusable. Signed-off-by: William Grant Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target-i386/helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'target-i386') diff --git a/target-i386/helper.c b/target-i386/helper.c index 47b982b437..30cb0d0143 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -615,8 +615,8 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, if (!(pdpe & PG_PRESENT_MASK)) { goto do_fault; } - rsvd_mask |= PG_HI_USER_MASK | PG_NX_MASK; - if (pdpe & rsvd_mask) { + rsvd_mask |= PG_HI_USER_MASK; + if (pdpe & (rsvd_mask | PG_NX_MASK)) { goto do_fault_rsvd; } ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; -- cgit v1.2.3 From d8b5c67b05420d966664664ff287af05b884bdd1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Aug 2014 15:39:27 -0600 Subject: x86: Use common variable range MTRR counts We currently define the number of variable range MTRR registers as 8 in the CPUX86State structure and vmstate, but use MSR_MTRRcap_VCNT (also 8) to report to guests the number available. Change this to use MSR_MTRRcap_VCNT consistently. Signed-off-by: Alex Williamson Reviewed-by: Laszlo Ersek Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 2 +- target-i386/machine.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'target-i386') diff --git a/target-i386/cpu.h b/target-i386/cpu.h index e634d83e86..d37d857d21 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -930,7 +930,7 @@ typedef struct CPUX86State { /* MTRRs */ uint64_t mtrr_fixed[11]; uint64_t mtrr_deftype; - MTRRVar mtrr_var[8]; + MTRRVar mtrr_var[MSR_MTRRcap_VCNT]; /* For KVM */ uint32_t mp_state; diff --git a/target-i386/machine.c b/target-i386/machine.c index 16d2f6a809..fb890654b1 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -677,7 +677,7 @@ VMStateDescription vmstate_x86_cpu = { /* MTRRs */ VMSTATE_UINT64_ARRAY_V(env.mtrr_fixed, X86CPU, 11, 8), VMSTATE_UINT64_V(env.mtrr_deftype, X86CPU, 8), - VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, 8, 8), + VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, MSR_MTRRcap_VCNT, 8), /* KVM-related states */ VMSTATE_INT32_V(env.interrupt_injected, X86CPU, 9), VMSTATE_UINT32_V(env.mp_state, X86CPU, 9), -- cgit v1.2.3 From d1ae67f626c5ed5729e1d8212834291b409d26df Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Aug 2014 15:39:33 -0600 Subject: x86: kvm: Add MTRR support for kvm_get|put_msrs() The MTRR state in KVM currently runs completely independent of the QEMU state in CPUX86State.mtrr_*. This means that on migration, the target loses MTRR state from the source. Generally that's ok though because KVM ignores it and maps everything as write-back anyway. The exception to this rule is when we have an assigned device and an IOMMU that doesn't promote NoSnoop transactions from that device to be cache coherent. In that case KVM trusts the guest mapping of memory as configured in the MTRR. This patch updates kvm_get|put_msrs() so that we retrieve the actual vCPU MTRR settings and therefore keep CPUX86State synchronized for migration. kvm_put_msrs() is also used on vCPU reset and therefore allows future modificaitons of MTRR state at reset to be realized. Note that the entries array used by both functions was already slightly undersized for holding every possible MSR, so this patch increases it beyond the 28 new entries necessary for MTRR state. Signed-off-by: Alex Williamson Reviewed-by: Laszlo Ersek Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 2 ++ target-i386/kvm.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 101 insertions(+), 2 deletions(-) (limited to 'target-i386') diff --git a/target-i386/cpu.h b/target-i386/cpu.h index d37d857d21..3460b12139 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -337,6 +337,8 @@ #define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) #define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) +#define MSR_MTRRphysIndex(addr) ((((addr) & ~1u) - 0x200) / 2) + #define MSR_MTRRfix64K_00000 0x250 #define MSR_MTRRfix16K_80000 0x258 #define MSR_MTRRfix16K_A0000 0x259 diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 097fe1188d..ddedc735ff 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -79,6 +79,7 @@ static int lm_capable_kernel; static bool has_msr_hv_hypercall; static bool has_msr_hv_vapic; static bool has_msr_hv_tsc; +static bool has_msr_mtrr; static bool has_msr_architectural_pmu; static uint32_t num_architectural_pmu_counters; @@ -739,6 +740,10 @@ int kvm_arch_init_vcpu(CPUState *cs) env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); } + if (env->features[FEAT_1_EDX] & CPUID_MTRR) { + has_msr_mtrr = true; + } + return 0; } @@ -1183,7 +1188,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) CPUX86State *env = &cpu->env; struct { struct kvm_msrs info; - struct kvm_msr_entry entries[100]; + struct kvm_msr_entry entries[150]; } msr_data; struct kvm_msr_entry *msrs = msr_data.entries; int n = 0, i; @@ -1278,6 +1283,37 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC, env->msr_hv_tsc); } + if (has_msr_mtrr) { + kvm_msr_entry_set(&msrs[n++], MSR_MTRRdefType, env->mtrr_deftype); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix64K_00000, env->mtrr_fixed[0]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix16K_80000, env->mtrr_fixed[1]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]); + for (i = 0; i < MSR_MTRRcap_VCNT; i++) { + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRphysBase(i), env->mtrr_var[i].base); + kvm_msr_entry_set(&msrs[n++], + MSR_MTRRphysMask(i), env->mtrr_var[i].mask); + } + } /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see * kvm_put_msr_feature_control. */ @@ -1484,7 +1520,7 @@ static int kvm_get_msrs(X86CPU *cpu) CPUX86State *env = &cpu->env; struct { struct kvm_msrs info; - struct kvm_msr_entry entries[100]; + struct kvm_msr_entry entries[150]; } msr_data; struct kvm_msr_entry *msrs = msr_data.entries; int ret, i, n; @@ -1572,6 +1608,24 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_hv_tsc) { msrs[n++].index = HV_X64_MSR_REFERENCE_TSC; } + if (has_msr_mtrr) { + msrs[n++].index = MSR_MTRRdefType; + msrs[n++].index = MSR_MTRRfix64K_00000; + msrs[n++].index = MSR_MTRRfix16K_80000; + msrs[n++].index = MSR_MTRRfix16K_A0000; + msrs[n++].index = MSR_MTRRfix4K_C0000; + msrs[n++].index = MSR_MTRRfix4K_C8000; + msrs[n++].index = MSR_MTRRfix4K_D0000; + msrs[n++].index = MSR_MTRRfix4K_D8000; + msrs[n++].index = MSR_MTRRfix4K_E0000; + msrs[n++].index = MSR_MTRRfix4K_E8000; + msrs[n++].index = MSR_MTRRfix4K_F0000; + msrs[n++].index = MSR_MTRRfix4K_F8000; + for (i = 0; i < MSR_MTRRcap_VCNT; i++) { + msrs[n++].index = MSR_MTRRphysBase(i); + msrs[n++].index = MSR_MTRRphysMask(i); + } + } msr_data.info.nmsrs = n; ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); @@ -1692,6 +1746,49 @@ static int kvm_get_msrs(X86CPU *cpu) case HV_X64_MSR_REFERENCE_TSC: env->msr_hv_tsc = msrs[i].data; break; + case MSR_MTRRdefType: + env->mtrr_deftype = msrs[i].data; + break; + case MSR_MTRRfix64K_00000: + env->mtrr_fixed[0] = msrs[i].data; + break; + case MSR_MTRRfix16K_80000: + env->mtrr_fixed[1] = msrs[i].data; + break; + case MSR_MTRRfix16K_A0000: + env->mtrr_fixed[2] = msrs[i].data; + break; + case MSR_MTRRfix4K_C0000: + env->mtrr_fixed[3] = msrs[i].data; + break; + case MSR_MTRRfix4K_C8000: + env->mtrr_fixed[4] = msrs[i].data; + break; + case MSR_MTRRfix4K_D0000: + env->mtrr_fixed[5] = msrs[i].data; + break; + case MSR_MTRRfix4K_D8000: + env->mtrr_fixed[6] = msrs[i].data; + break; + case MSR_MTRRfix4K_E0000: + env->mtrr_fixed[7] = msrs[i].data; + break; + case MSR_MTRRfix4K_E8000: + env->mtrr_fixed[8] = msrs[i].data; + break; + case MSR_MTRRfix4K_F0000: + env->mtrr_fixed[9] = msrs[i].data; + break; + case MSR_MTRRfix4K_F8000: + env->mtrr_fixed[10] = msrs[i].data; + break; + case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1): + if (index & 1) { + env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data; + } else { + env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data; + } + break; } } -- cgit v1.2.3 From 9db2efd95e13330075bff027cd682a063d725332 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 14 Aug 2014 15:39:39 -0600 Subject: x86: Clear MTRRs on vCPU reset The SDM specifies (June 2014 Vol3 11.11.5): On a hardware reset, the P6 and more recent processors clear the valid flags in variable-range MTRRs and clear the E flag in the IA32_MTRR_DEF_TYPE MSR to disable all MTRRs. All other bits in the MTRRs are undefined. We currently do none of that, so whatever MTRR settings you had prior to reset is what you have after reset. Usually this doesn't matter because KVM often ignores the guest mappings and uses write-back anyway. However, if you have an assigned device and an IOMMU that allows NoSnoop for that device, KVM defers to the guest memory mappings which are now stale after reset. The result is that OVMF rebooting on such a configuration takes a full minute to LZMA decompress the firmware volume, a process that is nearly instant on the initial boot. Signed-off-by: Alex Williamson Reviewed-by: Laszlo Ersek Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'target-i386') diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 217500c735..52e335f145 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2588,6 +2588,16 @@ static void x86_cpu_reset(CPUState *s) env->xcr0 = 1; + /* + * SDM 11.11.5 requires: + * - IA32_MTRR_DEF_TYPE MSR.E = 0 + * - IA32_MTRR_PHYSMASKn.V = 0 + * All other bits are undefined. For simplification, zero it all. + */ + env->mtrr_deftype = 0; + memset(env->mtrr_var, 0, sizeof(env->mtrr_var)); + memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed)); + #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ if (s->cpu_index == 0) { -- cgit v1.2.3 From 5bd8ff07e65d066f1e90f05d49ee634f3ccd2664 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 25 Aug 2014 17:02:12 -0300 Subject: target-i386: Add "mpx" CPU feature name Migration support for MPX is already implemented (commit 79e9ebebbf2a00c46fcedb6dc7dd5e12bbd30216), so we can add it to the list of known feature names. Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'target-i386') diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 52e335f145..f4ee3535f0 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -258,7 +258,7 @@ static const char *svm_feature_name[] = { static const char *cpuid_7_0_ebx_feature_name[] = { "fsgsbase", NULL, NULL, "bmi1", "hle", "avx2", NULL, "smep", - "bmi2", "erms", "invpcid", "rtm", NULL, NULL, NULL, NULL, + "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; -- cgit v1.2.3 From 7b458bfd12a71b3da6b531daedc417492c9334e0 Mon Sep 17 00:00:00 2001 From: Eduardo Habkost Date: Mon, 25 Aug 2014 17:02:13 -0300 Subject: target-i386: Add "tsc_adjust" CPU feature name tsc_adjust migration support is already implemented (commit f28558d3d37ad3bc4e35e8ac93f7bf81a0d5622c), so we can add it to the list of known feature names. Signed-off-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'target-i386') diff --git a/target-i386/cpu.c b/target-i386/cpu.c index f4ee3535f0..fa811a02d1 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -257,7 +257,7 @@ static const char *svm_feature_name[] = { }; static const char *cpuid_7_0_ebx_feature_name[] = { - "fsgsbase", NULL, NULL, "bmi1", "hle", "avx2", NULL, "smep", + "fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep", "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -- cgit v1.2.3