diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 30 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging.h | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/paging_tmpl.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 6 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 53 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 14 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 77 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 5 |
11 files changed, 161 insertions, 51 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c42613cfb5ba..739be5da3bca 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -765,7 +765,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.anythread_deprecated = 1; + if (cap.version) + edx.split.anythread_deprecated = 1; edx.split.reserved1 = 0; edx.split.reserved2 = 0; @@ -940,8 +941,21 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U); unsigned phys_as = entry->eax & 0xff; - if (!g_phys_as) + /* + * If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as + * the guest operates in the same PA space as the host, i.e. + * reductions in MAXPHYADDR for memory encryption affect shadow + * paging, too. + * + * If TDP is enabled but an explicit guest MAXPHYADDR is not + * provided, use the raw bare metal MAXPHYADDR as reductions to + * the HPAs do not affect GPAs. + */ + if (!tdp_enabled) + g_phys_as = boot_cpu_data.x86_phys_bits; + else if (!g_phys_as) g_phys_as = phys_as; + entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; cpuid_entry_override(entry, CPUID_8000_0008_EBX); @@ -964,12 +978,18 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) case 0x8000001a: case 0x8000001e: break; - /* Support memory encryption cpuid if host supports it */ case 0x8000001F: - if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) + if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { entry->eax = entry->ebx = entry->ecx = entry->edx = 0; - else + } else { cpuid_entry_override(entry, CPUID_8000_001F_EAX); + + /* + * Enumerate '0' for "PA bits reduction", the adjusted + * MAXPHYADDR is enumerated directly (see 0x80000008). + */ + entry->ebx &= ~GENMASK(11, 6); + } break; /*Add support for Centaur's CPUID instruction*/ case 0xC0000000: diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 845d114ae075..66f7f5bc3482 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -53,6 +53,8 @@ #include <asm/kvm_page_track.h> #include "trace.h" +#include "paging.h" + extern bool itlb_multihit_kvm_mitigation; int __read_mostly nx_huge_pages = -1; diff --git a/arch/x86/kvm/mmu/paging.h b/arch/x86/kvm/mmu/paging.h new file mode 100644 index 000000000000..de8ab323bb70 --- /dev/null +++ b/arch/x86/kvm/mmu/paging.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Shadow paging constants/helpers that don't need to be #undef'd. */ +#ifndef __KVM_X86_PAGING_H +#define __KVM_X86_PAGING_H + +#define GUEST_PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) +#define PT64_LVL_ADDR_MASK(level) \ + (GUEST_PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ + * PT64_LEVEL_BITS))) - 1)) +#define PT64_LVL_OFFSET_MASK(level) \ + (GUEST_PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ + * PT64_LEVEL_BITS))) - 1)) +#endif /* __KVM_X86_PAGING_H */ + diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 490a028ddabe..ee044d357b5f 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -24,7 +24,7 @@ #define pt_element_t u64 #define guest_walker guest_walker64 #define FNAME(name) paging##64_##name - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) @@ -57,7 +57,7 @@ #define pt_element_t u64 #define guest_walker guest_walkerEPT #define FNAME(name) ept_##name - #define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK + #define PT_BASE_ADDR_MASK GUEST_PT64_BASE_ADDR_MASK #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 7a5ce9314107..eb7b227fc6cf 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -38,12 +38,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); #else #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & ~(u64)(PAGE_SIZE-1)) #endif -#define PT64_LVL_ADDR_MASK(level) \ - (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + (((level) - 1) \ - * PT64_LEVEL_BITS))) - 1)) -#define PT64_LVL_OFFSET_MASK(level) \ - (PT64_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ - * PT64_LEVEL_BITS))) - 1)) #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | shadow_user_mask \ | shadow_x_mask | shadow_nx_mask | shadow_me_mask) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 21d03e3a5dfd..3bd09c50c98b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -154,6 +154,10 @@ void recalc_intercepts(struct vcpu_svm *svm) for (i = 0; i < MAX_INTERCEPT; i++) c->intercepts[i] |= g->intercepts[i]; + + /* If SMI is not intercepted, ignore guest SMI intercept as well */ + if (!intercept_smi) + vmcb_clr_intercept(c, INTERCEPT_SMI); } static void copy_vmcb_control_area(struct vmcb_control_area *dst, @@ -304,8 +308,8 @@ static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, return true; } -static void nested_load_control_from_vmcb12(struct vcpu_svm *svm, - struct vmcb_control_area *control) +void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control) { copy_vmcb_control_area(&svm->nested.ctl, control); @@ -618,6 +622,11 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) struct kvm_host_map map; u64 vmcb12_gpa; + if (!svm->nested.hsave_msr) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if (is_smm(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -692,6 +701,27 @@ out: return ret; } +/* Copy state save area fields which are handled by VMRUN */ +void svm_copy_vmrun_state(struct vmcb_save_area *from_save, + struct vmcb_save_area *to_save) +{ + to_save->es = from_save->es; + to_save->cs = from_save->cs; + to_save->ss = from_save->ss; + to_save->ds = from_save->ds; + to_save->gdtr = from_save->gdtr; + to_save->idtr = from_save->idtr; + to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED; + to_save->efer = from_save->efer; + to_save->cr0 = from_save->cr0; + to_save->cr3 = from_save->cr3; + to_save->cr4 = from_save->cr4; + to_save->rax = from_save->rax; + to_save->rsp = from_save->rsp; + to_save->rip = from_save->rip; + to_save->cpl = 0; +} + void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; @@ -1355,28 +1385,11 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm->vmcb01.ptr->save.es = save->es; - svm->vmcb01.ptr->save.cs = save->cs; - svm->vmcb01.ptr->save.ss = save->ss; - svm->vmcb01.ptr->save.ds = save->ds; - svm->vmcb01.ptr->save.gdtr = save->gdtr; - svm->vmcb01.ptr->save.idtr = save->idtr; - svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED; - svm->vmcb01.ptr->save.efer = save->efer; - svm->vmcb01.ptr->save.cr0 = save->cr0; - svm->vmcb01.ptr->save.cr3 = save->cr3; - svm->vmcb01.ptr->save.cr4 = save->cr4; - svm->vmcb01.ptr->save.rax = save->rax; - svm->vmcb01.ptr->save.rsp = save->rsp; - svm->vmcb01.ptr->save.rip = save->rip; - svm->vmcb01.ptr->save.cpl = 0; - + svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); - nested_vmcb02_prepare_control(svm); - kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; out_free: diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 62926f1a5f7b..6710d9ee2e4b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1272,8 +1272,8 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) /* Pin guest memory */ guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, PAGE_SIZE, &n, 0); - if (!guest_page) - return -EFAULT; + if (IS_ERR(guest_page)) + return PTR_ERR(guest_page); /* allocate memory for header and transport buffer */ ret = -ENOMEM; @@ -1310,8 +1310,9 @@ static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) } /* Copy packet header to userspace. */ - ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, - params.hdr_len); + if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr, + params.hdr_len)) + ret = -EFAULT; e_free_trans_data: kfree(trans_data); @@ -1463,11 +1464,12 @@ static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp) data.trans_len = params.trans_len; /* Pin guest memory */ - ret = -EFAULT; guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK, PAGE_SIZE, &n, 0); - if (!guest_page) + if (IS_ERR(guest_page)) { + ret = PTR_ERR(guest_page); goto e_free_trans; + } /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */ data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8834822c00cd..664d20f0689c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -198,6 +198,11 @@ module_param(avic, bool, 0444); bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); + +bool intercept_smi = true; +module_param(intercept_smi, bool, 0444); + + static bool svm_gp_erratum_intercept = true; static u8 rsm_ins_bytes[] = "\x0f\xaa"; @@ -1185,7 +1190,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_set_intercept(svm, INTERCEPT_INTR); svm_set_intercept(svm, INTERCEPT_NMI); - svm_set_intercept(svm, INTERCEPT_SMI); + + if (intercept_smi) + svm_set_intercept(svm, INTERCEPT_SMI); + svm_set_intercept(svm, INTERCEPT_SELECTIVE_CR0); svm_set_intercept(svm, INTERCEPT_RDPMC); svm_set_intercept(svm, INTERCEPT_CPUID); @@ -1923,7 +1931,7 @@ static int npf_interception(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2); + u64 fault_address = svm->vmcb->control.exit_info_2; u64 error_code = svm->vmcb->control.exit_info_1; trace_kvm_page_fault(fault_address, error_code); @@ -2106,6 +2114,11 @@ static int nmi_interception(struct kvm_vcpu *vcpu) return 1; } +static int smi_interception(struct kvm_vcpu *vcpu) +{ + return 1; +} + static int intr_interception(struct kvm_vcpu *vcpu) { ++vcpu->stat.irq_exits; @@ -2941,7 +2954,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm_disable_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: - svm->nested.hsave_msr = data; + /* + * Old kernels did not validate the value written to + * MSR_VM_HSAVE_PA. Allow KVM_SET_MSR to set an invalid + * value to allow live migrating buggy or malicious guests + * originating from those kernels. + */ + if (!msr->host_initiated && !page_address_valid(vcpu, data)) + return 1; + + svm->nested.hsave_msr = data & PAGE_MASK; break; case MSR_VM_CR: return svm_set_vm_cr(vcpu, data); @@ -3080,8 +3102,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_EXCP_BASE + GP_VECTOR] = gp_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, - [SVM_EXIT_SMI] = kvm_emulate_as_nop, - [SVM_EXIT_INIT] = kvm_emulate_as_nop, + [SVM_EXIT_SMI] = smi_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, [SVM_EXIT_RDPMC] = kvm_emulate_rdpmc, [SVM_EXIT_CPUID] = kvm_emulate_cpuid, @@ -4288,6 +4309,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_host_map map_save; int ret; if (is_guest_mode(vcpu)) { @@ -4303,6 +4325,29 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) ret = nested_svm_vmexit(svm); if (ret) return ret; + + /* + * KVM uses VMCB01 to store L1 host state while L2 runs but + * VMCB01 is going to be used during SMM and thus the state will + * be lost. Temporary save non-VMLOAD/VMSAVE state to the host save + * area pointed to by MSR_VM_HSAVE_PA. APM guarantees that the + * format of the area is identical to guest save area offsetted + * by 0x400 (matches the offset of 'struct vmcb_save_area' + * within 'struct vmcb'). Note: HSAVE area may also be used by + * L1 hypervisor to save additional host context (e.g. KVM does + * that, see svm_prepare_guest_switch()) which must be + * preserved. + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; + + BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); + + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); + + kvm_vcpu_unmap(vcpu, &map_save, true); } return 0; } @@ -4310,13 +4355,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); - struct kvm_host_map map; + struct kvm_host_map map, map_save; int ret = 0; if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { u64 saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); u64 guest = GET_SMSTATE(u64, smstate, 0x7ed8); u64 vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); + struct vmcb *vmcb12; if (guest) { if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) @@ -4332,8 +4378,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (svm_allocate_nested(svm)) return 1; - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, map.hva); + vmcb12 = map.hva; + + nested_load_control_from_vmcb12(svm, &vmcb12->control); + + ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12); kvm_vcpu_unmap(vcpu, &map, true); + + /* + * Restore L1 host state from L1 HSAVE area as VMCB01 was + * used during SMM (see svm_enter_smm()) + */ + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), + &map_save) == -EINVAL) + return 1; + + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); + + kvm_vcpu_unmap(vcpu, &map_save, true); } } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f89b623bb591..7e2090752d8f 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -31,6 +31,7 @@ #define MSRPM_OFFSETS 16 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; +extern bool intercept_smi; /* * Clean bits in VMCB. @@ -463,6 +464,8 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); +void svm_copy_vmrun_state(struct vmcb_save_area *from_save, + struct vmcb_save_area *to_save); void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); @@ -479,6 +482,8 @@ int nested_svm_check_permissions(struct kvm_vcpu *vcpu); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); +void nested_load_control_from_vmcb12(struct vcpu_svm *svm, + struct vmcb_control_area *control); void nested_sync_control_from_vmcb02(struct vcpu_svm *svm); void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm); void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 3979a947933a..db88ed4f2121 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -14,8 +14,6 @@ #include "vmx_ops.h" #include "cpuid.h" -extern const u32 vmx_msr_index[]; - #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 #define MSR_TYPE_RW 3 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6dc1b445231..a4fd10604f72 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9601,6 +9601,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; + } else if (unlikely(hw_breakpoint_active())) { + set_debugreg(0, 7); } for (;;) { @@ -10985,9 +10987,6 @@ int kvm_arch_hardware_setup(void *opaque) int r; rdmsrl_safe(MSR_EFER, &host_efer); - if (WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_NX) && - !(host_efer & EFER_NX))) - return -EIO; if (boot_cpu_has(X86_FEATURE_XSAVES)) rdmsrl(MSR_IA32_XSS, host_xss); |