summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2016-06-07 17:51:18 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2016-07-05 14:41:26 +0200
commitadd6a0cd1c5ba51b201e1361b05a5df817083618 (patch)
treededa6470d77bbc8b478f4d508927754e8ff38e42
parent92176a8ede577d0ff78ab3298e06701f67ad5f51 (diff)
KVM: MMU: try to fix up page faults before giving up
The vGPU folks would like to trap the first access to a BAR by setting vm_ops on the VMAs produced by mmap-ing a VFIO device. The fault handler then can use remap_pfn_range to place some non-reserved pages in the VMA. This kind of VM_PFNMAP mapping is not handled by KVM, but follow_pfn and fixup_user_fault together help supporting it. The patch also supports VM_MIXEDMAP vmas where the pfns are not reserved and thus subject to reference counting. Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Radim Krčmář <rkrcmar@redhat.com> Tested-by: Neo Jia <cjia@nvidia.com> Reported-by: Kirti Wankhede <kwankhede@nvidia.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--mm/gup.c1
-rw-r--r--virt/kvm/kvm_main.c45
2 files changed, 43 insertions, 3 deletions
diff --git a/mm/gup.c b/mm/gup.c
index c057784c8444..e3ac22f90fa4 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -720,6 +720,7 @@ retry:
}
return 0;
}
+EXPORT_SYMBOL_GPL(fixup_user_fault);
static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
struct mm_struct *mm,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5aae59e00bef..154b9ab459b0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1446,9 +1446,45 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
unsigned long addr, bool *async,
bool write_fault, kvm_pfn_t *p_pfn)
{
- *p_pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
- vma->vm_pgoff;
- BUG_ON(!kvm_is_reserved_pfn(*p_pfn));
+ unsigned long pfn;
+ int r;
+
+ r = follow_pfn(vma, addr, &pfn);
+ if (r) {
+ /*
+ * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
+ * not call the fault handler, so do it here.
+ */
+ bool unlocked = false;
+ r = fixup_user_fault(current, current->mm, addr,
+ (write_fault ? FAULT_FLAG_WRITE : 0),
+ &unlocked);
+ if (unlocked)
+ return -EAGAIN;
+ if (r)
+ return r;
+
+ r = follow_pfn(vma, addr, &pfn);
+ if (r)
+ return r;
+
+ }
+
+
+ /*
+ * Get a reference here because callers of *hva_to_pfn* and
+ * *gfn_to_pfn* ultimately call kvm_release_pfn_clean on the
+ * returned pfn. This is only needed if the VMA has VM_MIXEDMAP
+ * set, but the kvm_get_pfn/kvm_release_pfn_clean pair will
+ * simply do nothing for reserved pfns.
+ *
+ * Whoever called remap_pfn_range is also going to call e.g.
+ * unmap_mapping_range before the underlying pages are freed,
+ * causing a call to our MMU notifier.
+ */
+ kvm_get_pfn(pfn);
+
+ *p_pfn = pfn;
return 0;
}
@@ -1493,12 +1529,15 @@ static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
goto exit;
}
+retry:
vma = find_vma_intersection(current->mm, addr, addr + 1);
if (vma == NULL)
pfn = KVM_PFN_ERR_FAULT;
else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
r = hva_to_pfn_remapped(vma, addr, async, write_fault, &pfn);
+ if (r == -EAGAIN)
+ goto retry;
if (r < 0)
pfn = KVM_PFN_ERR_FAULT;
} else {