summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-10-21 11:22:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-10-21 11:22:04 -0700
commitd129377639907fce7e0a27990e590e4661d3ee02 (patch)
tree6ab2b8b2e756e71ddc83146d01e8a75c5d172b24 /arch/x86
parentc1bc09d7bfcbe90c6df3a630ec1fb0fcd4799236 (diff)
parente9001a382fa2c256229adc68d55212028b01d515 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM64: - Fix the guest view of the ID registers, making the relevant fields writable from userspace (affecting ID_AA64DFR0_EL1 and ID_AA64PFR1_EL1) - Correcly expose S1PIE to guests, fixing a regression introduced in 6.12-rc1 with the S1POE support - Fix the recycling of stage-2 shadow MMUs by tracking the context (are we allowed to block or not) as well as the recycling state - Address a couple of issues with the vgic when userspace misconfigures the emulation, resulting in various splats. Headaches courtesy of our Syzkaller friends - Stop wasting space in the HYP idmap, as we are dangerously close to the 4kB limit, and this has already exploded in -next - Fix another race in vgic_init() - Fix a UBSAN error when faking the cache topology with MTE enabled RISCV: - RISCV: KVM: use raw_spinlock for critical section in imsic x86: - A bandaid for lack of XCR0 setup in selftests, which causes trouble if the compiler is configured to have x86-64-v3 (with AVX) as the default ISA. Proper XCR0 setup will come in the next merge window. - Fix an issue where KVM would not ignore low bits of the nested CR3 and potentially leak up to 31 bytes out of the guest memory's bounds - Fix case in which an out-of-date cached value for the segments could by returned by KVM_GET_SREGS. - More cleanups for KVM_X86_QUIRK_SLOT_ZAP_ALL - Override MTRR state for KVM confidential guests, making it WB by default as is already the case for Hyper-V guests. Generic: - Remove a couple of unused functions" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (27 commits) RISCV: KVM: use raw_spinlock for critical section in imsic KVM: selftests: Fix out-of-bounds reads in CPUID test's array lookups KVM: selftests: x86: Avoid using SSE/AVX instructions KVM: nSVM: Ignore nCR3[4:0] when loading PDPTEs from memory KVM: VMX: reset the segment cache after segment init in vmx_vcpu_reset() KVM: x86: Clean up documentation for KVM_X86_QUIRK_SLOT_ZAP_ALL KVM: x86/mmu: Add lockdep assert to enforce safe usage of kvm_unmap_gfn_range() KVM: x86/mmu: Zap only SPs that shadow gPTEs when deleting memslot x86/kvm: Override default caching mode for SEV-SNP and TDX KVM: Remove unused kvm_vcpu_gfn_to_pfn_atomic KVM: Remove unused kvm_vcpu_gfn_to_pfn KVM: arm64: Ensure vgic_ready() is ordered against MMIO registration KVM: arm64: vgic: Don't check for vgic_ready() when setting NR_IRQS KVM: arm64: Fix shift-out-of-bounds bug KVM: arm64: Shave a few bytes from the EL2 idmap code KVM: arm64: Don't eagerly teardown the vgic on init error KVM: arm64: Expose S1PIE to guests KVM: arm64: nv: Clarify safety of allowing TLBI unmaps to reschedule KVM: arm64: nv: Punt stage-2 recycling to a vCPU request KVM: arm64: nv: Do not block when unmapping stage-2 if disallowed ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kvm/mmu/mmu.c27
-rw-r--r--arch/x86/kvm/svm/nested.c6
-rw-r--r--arch/x86/kvm/vmx/vmx.c6
4 files changed, 29 insertions, 14 deletions
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 263f8aed4e2c..21e9e4845354 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -37,6 +37,7 @@
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
+#include <asm/mtrr.h>
#include <asm/tlb.h>
#include <asm/cpuidle_haltpoll.h>
#include <asm/ptrace.h>
@@ -980,6 +981,9 @@ static void __init kvm_init_platform(void)
}
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
+
+ /* Set WB as the default cache mode for SEV-SNP and TDX */
+ mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK);
}
#if defined(CONFIG_AMD_MEM_ENCRYPT)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a9a23e058555..8e853a5fc867 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1556,6 +1556,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
bool flush = false;
+ /*
+ * To prevent races with vCPUs faulting in a gfn using stale data,
+ * zapping a gfn range must be protected by mmu_invalidate_in_progress
+ * (and mmu_invalidate_seq). The only exception is memslot deletion;
+ * in that case, SRCU synchronization ensures that SPTEs are zapped
+ * after all vCPUs have unlocked SRCU, guaranteeing that vCPUs see the
+ * invalid slot.
+ */
+ lockdep_assert_once(kvm->mmu_invalidate_in_progress ||
+ lockdep_is_held(&kvm->slots_lock));
+
if (kvm_memslots_have_rmaps(kvm))
flush = __kvm_rmap_zap_gfn_range(kvm, range->slot,
range->start, range->end,
@@ -1884,14 +1895,10 @@ static bool sp_has_gptes(struct kvm_mmu_page *sp)
if (is_obsolete_sp((_kvm), (_sp))) { \
} else
-#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
+#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
for_each_valid_sp(_kvm, _sp, \
&(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)]) \
- if ((_sp)->gfn != (_gfn)) {} else
-
-#define for_each_gfn_valid_sp_with_gptes(_kvm, _sp, _gfn) \
- for_each_gfn_valid_sp(_kvm, _sp, _gfn) \
- if (!sp_has_gptes(_sp)) {} else
+ if ((_sp)->gfn != (_gfn) || !sp_has_gptes(_sp)) {} else
static bool kvm_sync_page_check(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -7063,15 +7070,15 @@ static void kvm_mmu_zap_memslot_pages_and_flush(struct kvm *kvm,
/*
* Since accounting information is stored in struct kvm_arch_memory_slot,
- * shadow pages deletion (e.g. unaccount_shadowed()) requires that all
- * gfns with a shadow page have a corresponding memslot. Do so before
- * the memslot goes away.
+ * all MMU pages that are shadowing guest PTEs must be zapped before the
+ * memslot is deleted, as freeing such pages after the memslot is freed
+ * will result in use-after-free, e.g. in unaccount_shadowed().
*/
for (i = 0; i < slot->npages; i++) {
struct kvm_mmu_page *sp;
gfn_t gfn = slot->base_gfn + i;
- for_each_gfn_valid_sp(kvm, sp, gfn)
+ for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn)
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index d5314cb7dff4..cf84103ce38b 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -63,8 +63,12 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
u64 pdpte;
int ret;
+ /*
+ * Note, nCR3 is "assumed" to be 32-byte aligned, i.e. the CPU ignores
+ * nCR3[4:0] when loading PDPTEs from memory.
+ */
ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
- offset_in_page(cr3) + index * 8, 8);
+ (cr3 & GENMASK(11, 5)) + index * 8, 8);
if (ret)
return 0;
return pdpte;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1a4438358c5e..81ed596e4454 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4888,9 +4888,6 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->hv_deadline_tsc = -1;
kvm_set_cr8(vcpu, 0);
- vmx_segment_cache_clear(vmx);
- kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
-
seg_setup(VCPU_SREG_CS);
vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
@@ -4917,6 +4914,9 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_writel(GUEST_IDTR_BASE, 0);
vmcs_write32(GUEST_IDTR_LIMIT, 0xffff);
+ vmx_segment_cache_clear(vmx);
+ kvm_register_mark_available(vcpu, VCPU_EXREG_SEGMENTS);
+
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);