KVM: SEV: add cache flush to solve SEV cache incoherency issues

Flush the CPU caches when memory is reclaimed from an SEV guest (where reclaim also includes it being unmapped from KVM's memslots). Due to lack of coherency for SEV encrypted memory, failure to flush results in silent data corruption if userspace is malicious/broken and doesn't ensure SEV guest memory is properly pinned and unpinned. Cache coherency is not enforced across the VM boundary in SEV (AMD APM vol.2 Section 15.34.7). Confidential cachelines, generated by confidential VM guests have to be explicitly flushed on the host side. If a memory page containing dirty confidential cachelines was released by VM and reallocated to another user, the cachelines may corrupt the new user at a later time. KVM takes a shortcut by assuming all confidential memory remain pinned until the end of VM lifetime. Therefore, KVM does not flush cache at mmu_notifier invalidation events. Because of this incorrect assumption and the lack of cache flushing, malicous userspace can crash the host kernel: creating a malicious VM and continuously allocates/releases unpinned confidential memory pages when the VM is running. Add cache flush operations to mmu_notifier operations to ensure that any physical memory leaving the guest VM get flushed. In particular, hook mmu_notifier_invalidate_range_start and mmu_notifier_release events and flush cache accordingly. The hook after releasing the mmu lock to avoid contention with other vCPUs. Cc: stable@vger.kernel.org Suggested-by: Sean Christpherson <seanjc@google.com> Reported-by: Mingwei Zhang <mizhang@google.com> Signed-off-by: Mingwei Zhang <mizhang@google.com> Message-Id: <20220421031407.2516575-4-mizhang@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
author: Mingwei Zhang <mizhang@google.com> 2022-04-21 03:14:07 +0000
committer: Paolo Bonzini <pbonzini@redhat.com> 2022-04-21 15:41:00 -0400
commit: 683412ccf61294d727ead4a73d97397396e69a6b (patch)
tree: 2e1f2e5ff4031699ee51b45341590461565f36fc /virt
parent: d45829b351ee6ec5f54dd55e6aca1f44fe239fe6 (diff)
1 files changed, 24 insertions, 3 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2a23f24d13cf..f30bb8c16f26 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -164,6 +164,10 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
 {
 }
 
+__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
+{
+}
+
 bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
 {
 	/*
@@ -357,6 +361,12 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 #endif
 
+static void kvm_flush_shadow_all(struct kvm *kvm)
+{
+	kvm_arch_flush_shadow_all(kvm);
+	kvm_arch_guest_memory_reclaimed(kvm);
+}
+
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
 					       gfp_t gfp_flags)
@@ -485,12 +495,15 @@ typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
 typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
 			     unsigned long end);
 
+typedef void (*on_unlock_fn_t)(struct kvm *kvm);
+
 struct kvm_hva_range {
 	unsigned long start;
 	unsigned long end;
 	pte_t pte;
 	hva_handler_t handler;
 	on_lock_fn_t on_lock;
+	on_unlock_fn_t on_unlock;
 	bool flush_on_ret;
 	bool may_block;
 };
@@ -578,8 +591,11 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 	if (range->flush_on_ret && ret)
 		kvm_flush_remote_tlbs(kvm);
 
-	if (locked)
+	if (locked) {
 		KVM_MMU_UNLOCK(kvm);
+		if (!IS_KVM_NULL_FN(range->on_unlock))
+			range->on_unlock(kvm);
+	}
 
 	srcu_read_unlock(&kvm->srcu, idx);
 
@@ -600,6 +616,7 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
 		.pte		= pte,
 		.handler	= handler,
 		.on_lock	= (void *)kvm_null_fn,
+		.on_unlock	= (void *)kvm_null_fn,
 		.flush_on_ret	= true,
 		.may_block	= false,
 	};
@@ -619,6 +636,7 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
 		.pte		= __pte(0),
 		.handler	= handler,
 		.on_lock	= (void *)kvm_null_fn,
+		.on_unlock	= (void *)kvm_null_fn,
 		.flush_on_ret	= false,
 		.may_block	= false,
 	};
@@ -687,6 +705,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 		.pte		= __pte(0),
 		.handler	= kvm_unmap_gfn_range,
 		.on_lock	= kvm_inc_notifier_count,
+		.on_unlock	= kvm_arch_guest_memory_reclaimed,
 		.flush_on_ret	= true,
 		.may_block	= mmu_notifier_range_blockable(range),
 	};
@@ -741,6 +760,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 		.pte		= __pte(0),
 		.handler	= (void *)kvm_null_fn,
 		.on_lock	= kvm_dec_notifier_count,
+		.on_unlock	= (void *)kvm_null_fn,
 		.flush_on_ret	= false,
 		.may_block	= mmu_notifier_range_blockable(range),
 	};
@@ -813,7 +833,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 	int idx;
 
 	idx = srcu_read_lock(&kvm->srcu);
-	kvm_arch_flush_shadow_all(kvm);
+	kvm_flush_shadow_all(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
@@ -1225,7 +1245,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	WARN_ON(rcuwait_active(&kvm->mn_memslots_update_rcuwait));
 	kvm->mn_active_invalidate_count = 0;
 #else
-	kvm_arch_flush_shadow_all(kvm);
+	kvm_flush_shadow_all(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
@@ -1652,6 +1672,7 @@ static void kvm_invalidate_memslot(struct kvm *kvm,
 	 *	- kvm_is_visible_gfn (mmu_check_root)
 	 */
 	kvm_arch_flush_shadow_memslot(kvm, old);
+	kvm_arch_guest_memory_reclaimed(kvm);
 
 	/* Was released by kvm_swap_active_memslots, reacquire. */
 	mutex_lock(&kvm->slots_arch_lock);
author	Mingwei Zhang <mizhang@google.com>	2022-04-21 03:14:07 +0000
committer	Paolo Bonzini <pbonzini@redhat.com>	2022-04-21 15:41:00 -0400
commit	683412ccf61294d727ead4a73d97397396e69a6b (patch)
tree	2e1f2e5ff4031699ee51b45341590461565f36fc /virt
parent	d45829b351ee6ec5f54dd55e6aca1f44fe239fe6 (diff)