summaryrefslogtreecommitdiff
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/book3s.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c12
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c160
-rw-r--r--arch/powerpc/kvm/book3s_hv.c96
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c190
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c2
-rw-r--r--arch/powerpc/kvm/book3s_pr.c4
-rw-r--r--arch/powerpc/kvm/book3s_xics.c12
-rw-r--r--arch/powerpc/kvm/book3s_xive.c12
-rw-r--r--arch/powerpc/kvm/booke.c3
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c3
-rw-r--r--arch/powerpc/kvm/powerpc.c47
12 files changed, 463 insertions, 86 deletions
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index fd9893bc7aa1..bd1a677dd9e4 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -830,9 +830,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
- const struct kvm_memory_slot *new)
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new);
+ kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change);
}
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
@@ -850,9 +851,10 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
}
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
+ return 0;
}
void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c615617e78ac..6f2d2fb4e098 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm)
srcu_idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, slots) {
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
/*
* This assumes it is acceptable to lose reference and
* change bits across a reset.
*/
memset(memslot->arch.rmap, 0,
memslot->npages * sizeof(*memslot->arch.rmap));
+ spin_unlock(&kvm->mmu_lock);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
@@ -896,11 +899,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
gfn = memslot->base_gfn;
rmapp = memslot->arch.rmap;
+ if (kvm_is_radix(kvm)) {
+ kvmppc_radix_flush_memslot(kvm, memslot);
+ return;
+ }
+
for (n = memslot->npages; n; --n, ++gfn) {
- if (kvm_is_radix(kvm)) {
- kvm_unmap_radix(kvm, memslot, gfn);
- continue;
- }
/*
* Testing the present bit without locking is OK because
* the memslot has been marked invalid already, and hence
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index d68162ee159b..fb88167a402a 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -29,6 +29,103 @@
*/
static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
+unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+ gva_t eaddr, void *to, void *from,
+ unsigned long n)
+{
+ unsigned long quadrant, ret = n;
+ int old_pid, old_lpid;
+ bool is_load = !!to;
+
+ /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
+ if (kvmhv_on_pseries())
+ return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
+ __pa(to), __pa(from), n);
+
+ quadrant = 1;
+ if (!pid)
+ quadrant = 2;
+ if (is_load)
+ from = (void *) (eaddr | (quadrant << 62));
+ else
+ to = (void *) (eaddr | (quadrant << 62));
+
+ preempt_disable();
+
+ /* switch the lpid first to avoid running host with unallocated pid */
+ old_lpid = mfspr(SPRN_LPID);
+ if (old_lpid != lpid)
+ mtspr(SPRN_LPID, lpid);
+ if (quadrant == 1) {
+ old_pid = mfspr(SPRN_PID);
+ if (old_pid != pid)
+ mtspr(SPRN_PID, pid);
+ }
+ isync();
+
+ pagefault_disable();
+ if (is_load)
+ ret = raw_copy_from_user(to, from, n);
+ else
+ ret = raw_copy_to_user(to, from, n);
+ pagefault_enable();
+
+ /* switch the pid first to avoid running host with unallocated pid */
+ if (quadrant == 1 && pid != old_pid)
+ mtspr(SPRN_PID, old_pid);
+ if (lpid != old_lpid)
+ mtspr(SPRN_LPID, old_lpid);
+ isync();
+
+ preempt_enable();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix);
+
+static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+ void *to, void *from, unsigned long n)
+{
+ int lpid = vcpu->kvm->arch.lpid;
+ int pid = vcpu->arch.pid;
+
+ /* This would cause a data segment intr so don't allow the access */
+ if (eaddr & (0x3FFUL << 52))
+ return -EINVAL;
+
+ /* Should we be using the nested lpid */
+ if (vcpu->arch.nested)
+ lpid = vcpu->arch.nested->shadow_lpid;
+
+ /* If accessing quadrant 3 then pid is expected to be 0 */
+ if (((eaddr >> 62) & 0x3) == 0x3)
+ pid = 0;
+
+ eaddr &= ~(0xFFFUL << 52);
+
+ return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
+}
+
+long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
+ unsigned long n)
+{
+ long ret;
+
+ ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
+ if (ret > 0)
+ memset(to + (n - ret), 0, ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix);
+
+long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
+ unsigned long n)
+{
+ return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
+}
+EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix);
+
int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *gpte, u64 root,
u64 *pte_ret_p)
@@ -197,8 +294,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return 0;
}
-static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
- unsigned int pshift, unsigned int lpid)
+void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ unsigned int pshift, unsigned int lpid)
{
unsigned long psize = PAGE_SIZE;
int psi;
@@ -284,7 +381,8 @@ static void kvmppc_pmd_free(pmd_t *pmdp)
/* Called with kvm->mmu_lock held */
void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
- unsigned int shift, struct kvm_memory_slot *memslot,
+ unsigned int shift,
+ const struct kvm_memory_slot *memslot,
unsigned int lpid)
{
@@ -683,6 +781,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
pte_t pte, *ptep;
unsigned int shift, level;
int ret;
+ bool large_enable;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
@@ -732,12 +831,15 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
pte = *ptep;
local_irq_enable();
+ /* If we're logging dirty pages, always map single pages */
+ large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
+
/* Get pte level from shift/size */
- if (shift == PUD_SHIFT &&
+ if (large_enable && shift == PUD_SHIFT &&
(gpa & (PUD_SIZE - PAGE_SIZE)) ==
(hva & (PUD_SIZE - PAGE_SIZE))) {
level = 2;
- } else if (shift == PMD_SHIFT &&
+ } else if (large_enable && shift == PMD_SHIFT &&
(gpa & (PMD_SIZE - PAGE_SIZE)) ==
(hva & (PMD_SIZE - PAGE_SIZE))) {
level = 1;
@@ -857,7 +959,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return ret;
}
-/* Called with kvm->lock held */
+/* Called with kvm->mmu_lock held */
int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
@@ -872,7 +974,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
return 0;
}
-/* Called with kvm->lock held */
+/* Called with kvm->mmu_lock held */
int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
@@ -880,18 +982,24 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gpa = gfn << PAGE_SHIFT;
unsigned int shift;
int ref = 0;
+ unsigned long old, *rmapp;
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
- kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
- gpa, shift);
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
+ gpa, shift);
/* XXX need to flush tlb here? */
+ /* Also clear bit in ptes in shadow pgtable for nested guests */
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0,
+ old & PTE_RPN_MASK,
+ 1UL << shift);
ref = 1;
}
return ref;
}
-/* Called with kvm->lock held */
+/* Called with kvm->mmu_lock held */
int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
@@ -915,15 +1023,23 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
pte_t *ptep;
unsigned int shift;
int ret = 0;
+ unsigned long old, *rmapp;
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1;
if (shift)
ret = 1 << (shift - PAGE_SHIFT);
- kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
- gpa, shift);
+ spin_lock(&kvm->mmu_lock);
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
+ gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
+ /* Also clear bit in ptes in shadow pgtable for nested guests */
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0,
+ old & PTE_RPN_MASK,
+ 1UL << shift);
+ spin_unlock(&kvm->mmu_lock);
}
return ret;
}
@@ -953,6 +1069,26 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
return 0;
}
+void kvmppc_radix_flush_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long n;
+ pte_t *ptep;
+ unsigned long gpa;
+ unsigned int shift;
+
+ gpa = memslot->base_gfn << PAGE_SHIFT;
+ spin_lock(&kvm->mmu_lock);
+ for (n = memslot->npages; n; --n) {
+ ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep))
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+ kvm->arch.lpid);
+ gpa += PAGE_SIZE;
+ }
+ spin_unlock(&kvm->mmu_lock);
+}
+
static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
int psize, int *indexp)
{
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d65b961661fb..5a066fc299e1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -983,7 +983,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
kvmppc_set_gpr(vcpu, 3, 0);
+ vcpu->arch.hcall_needed = 0;
return -EINTR;
+ } else if (ret == H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, 0);
+ vcpu->arch.hcall_needed = 0;
+ return RESUME_HOST;
}
break;
case H_TLB_INVALIDATE:
@@ -991,7 +996,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (nesting_enabled(vcpu->kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
-
+ case H_COPY_TOFROM_GUEST:
+ ret = H_FUNCTION;
+ if (nesting_enabled(vcpu->kvm))
+ ret = kvmhv_copy_tofrom_guest_nested(vcpu);
+ break;
default:
return RESUME_HOST;
}
@@ -1335,7 +1344,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
-static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
@@ -1393,7 +1402,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmhv_nested_page_fault(vcpu);
+ r = kvmhv_nested_page_fault(run, vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
@@ -1403,7 +1412,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmhv_nested_page_fault(vcpu);
+ r = kvmhv_nested_page_fault(run, vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
break;
@@ -4058,7 +4067,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
if (!nested)
r = kvmppc_handle_exit_hv(kvm_run, vcpu, current);
else
- r = kvmppc_handle_nested_exit(vcpu);
+ r = kvmppc_handle_nested_exit(kvm_run, vcpu);
}
vcpu->arch.ret = r;
@@ -4370,7 +4379,8 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
- const struct kvm_memory_slot *new)
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
unsigned long npages = mem->memory_size >> PAGE_SHIFT;
@@ -4382,6 +4392,23 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
*/
if (npages)
atomic64_inc(&kvm->arch.mmio_update);
+
+ /*
+ * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels
+ * have already called kvm_arch_flush_shadow_memslot() to
+ * flush shadow mappings. For KVM_MR_CREATE we have no
+ * previous mappings. So the only case to handle is
+ * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit
+ * has been changed.
+ * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES
+ * to get rid of any THP PTEs in the partition-scoped page tables
+ * so we can track dirtiness at the page level; we flush when
+ * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to
+ * using THP PTEs.
+ */
+ if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
+ ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
+ kvmppc_radix_flush_memslot(kvm, old);
}
/*
@@ -4531,12 +4558,15 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
{
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
+ kvmppc_rmap_reset(kvm);
+ kvm->arch.process_table = 0;
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.radix = 0;
+ spin_unlock(&kvm->mmu_lock);
kvmppc_free_radix(kvm);
kvmppc_update_lpcr(kvm, LPCR_VPM1,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
- kvmppc_rmap_reset(kvm);
- kvm->arch.radix = 0;
- kvm->arch.process_table = 0;
return 0;
}
@@ -4548,12 +4578,14 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
err = kvmppc_init_vm_radix(kvm);
if (err)
return err;
-
+ kvmppc_rmap_reset(kvm);
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.radix = 1;
+ spin_unlock(&kvm->mmu_lock);
kvmppc_free_hpt(&kvm->arch.hpt);
kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR,
LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR);
- kvmppc_rmap_reset(kvm);
- kvm->arch.radix = 1;
return 0;
}
@@ -5213,6 +5245,44 @@ static int kvmhv_enable_nested(struct kvm *kvm)
return 0;
}
+static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size)
+{
+ int rc = -EINVAL;
+
+ if (kvmhv_vcpu_is_radix(vcpu)) {
+ rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size);
+
+ if (rc > 0)
+ rc = -EINVAL;
+ }
+
+ /* For now quadrants are the only way to access nested guest memory */
+ if (rc && vcpu->arch.nested)
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size)
+{
+ int rc = -EINVAL;
+
+ if (kvmhv_vcpu_is_radix(vcpu)) {
+ rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size);
+
+ if (rc > 0)
+ rc = -EINVAL;
+ }
+
+ /* For now quadrants are the only way to access nested guest memory */
+ if (rc && vcpu->arch.nested)
+ rc = -EAGAIN;
+
+ return rc;
+}
+
static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5253,6 +5323,8 @@ static struct kvmppc_ops kvm_ops_hv = {
.get_rmmu_info = kvmhv_get_rmmu_info,
.set_smt_mode = kvmhv_set_smt_mode,
.enable_nested = kvmhv_enable_nested,
+ .load_from_eaddr = kvmhv_load_from_eaddr,
+ .store_to_eaddr = kvmhv_store_to_eaddr,
};
static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 401d2ecbebc5..735e0ac6f5b2 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -195,6 +195,26 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
vcpu->arch.ppr = hr->ppr;
}
+static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
+{
+ /* No need to reflect the page fault to L1, we've handled it */
+ vcpu->arch.trap = 0;
+
+ /*
+ * Since the L2 gprs have already been written back into L1 memory when
+ * we complete the mmio, store the L1 memory location of the L2 gpr
+ * being loaded into by the mmio so that the loaded value can be
+ * written there in kvmppc_complete_mmio_load()
+ */
+ if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
+ && (vcpu->mmio_is_write == 0)) {
+ vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
+ offsetof(struct pt_regs,
+ gpr[vcpu->arch.io_gpr]);
+ vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
+ }
+}
+
long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
{
long int err, r;
@@ -316,6 +336,11 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
if (r == -EINTR)
return H_INTERRUPT;
+ if (vcpu->mmio_needed) {
+ kvmhv_nested_mmio_needed(vcpu, regs_ptr);
+ return H_TOO_HARD;
+ }
+
return vcpu->arch.trap;
}
@@ -437,6 +462,81 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
}
/*
+ * Handle the H_COPY_TOFROM_GUEST hcall.
+ * r4 = L1 lpid of nested guest
+ * r5 = pid
+ * r6 = eaddr to access
+ * r7 = to buffer (L1 gpa)
+ * r8 = from buffer (L1 gpa)
+ * r9 = n bytes to copy
+ */
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
+{
+ struct kvm_nested_guest *gp;
+ int l1_lpid = kvmppc_get_gpr(vcpu, 4);
+ int pid = kvmppc_get_gpr(vcpu, 5);
+ gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
+ gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
+ gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
+ void *buf;
+ unsigned long n = kvmppc_get_gpr(vcpu, 9);
+ bool is_load = !!gp_to;
+ long rc;
+
+ if (gp_to && gp_from) /* One must be NULL to determine the direction */
+ return H_PARAMETER;
+
+ if (eaddr & (0xFFFUL << 52))
+ return H_PARAMETER;
+
+ buf = kzalloc(n, GFP_KERNEL);
+ if (!buf)
+ return H_NO_MEM;
+
+ gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
+ if (!gp) {
+ rc = H_PARAMETER;
+ goto out_free;
+ }
+
+ mutex_lock(&gp->tlb_lock);
+
+ if (is_load) {
+ /* Load from the nested guest into our buffer */
+ rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
+ eaddr, buf, NULL, n);
+ if (rc)
+ goto not_found;
+
+ /* Write what was loaded into our buffer back to the L1 guest */
+ rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
+ if (rc)
+ goto not_found;
+ } else {
+ /* Load the data to be stored from the L1 guest into our buf */
+ rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
+ if (rc)
+ goto not_found;
+
+ /* Store from our buffer into the nested guest */
+ rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
+ eaddr, NULL, buf, n);
+ if (rc)
+ goto not_found;
+ }
+
+out_unlock:
+ mutex_unlock(&gp->tlb_lock);
+ kvmhv_put_nested(gp);
+out_free:
+ kfree(buf);
+ return rc;
+not_found:
+ rc = H_NOT_FOUND;
+ goto out_unlock;
+}
+
+/*
* Reload the partition table entry for a guest.
* Caller must hold gp->tlb_lock.
*/
@@ -480,6 +580,7 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
if (shadow_lpid < 0)
goto out_free2;
gp->shadow_lpid = shadow_lpid;
+ gp->radix = 1;
memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
@@ -687,6 +788,57 @@ void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
*n_rmap = NULL;
}
+static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
+ unsigned long clr, unsigned long set,
+ unsigned long hpa, unsigned long mask)
+{
+ struct kvm_nested_guest *gp;
+ unsigned long gpa;
+ unsigned int shift, lpid;
+ pte_t *ptep;
+
+ gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+ lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+ gp = kvmhv_find_nested(kvm, lpid);
+ if (!gp)
+ return;
+
+ /* Find the pte */
+ ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift);
+ /*
+ * If the pte is present and the pfn is still the same, update the pte.
+ * If the pfn has changed then this is a stale rmap entry, the nested
+ * gpa actually points somewhere else now, and there is nothing to do.
+ * XXX A future optimisation would be to remove the rmap entry here.
+ */
+ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
+ __radix_pte_update(ptep, clr, set);
+ kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+ }
+}
+
+/*
+ * For a given list of rmap entries, update the rc bits in all ptes in shadow
+ * page tables for nested guests which are referenced by the rmap list.
+ */
+void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long clr, unsigned long set,
+ unsigned long hpa, unsigned long nbytes)
+{
+ struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+ struct rmap_nested *cursor;
+ unsigned long rmap, mask;
+
+ if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
+ return;
+
+ mask = PTE_RPN_MASK & ~(nbytes - 1);
+ hpa &= mask;
+
+ for_each_nest_rmap_safe(cursor, entry, &rmap)
+ kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
+}
+
static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
unsigned long hpa, unsigned long mask)
{
@@ -723,7 +875,7 @@ static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
/* called with kvm->mmu_lock held */
void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
+ const struct kvm_memory_slot *memslot,
unsigned long gpa, unsigned long hpa,
unsigned long nbytes)
{
@@ -1049,7 +1201,7 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
struct kvm *kvm = vcpu->kvm;
bool writing = !!(dsisr & DSISR_ISSTORE);
u64 pgflags;
- bool ret;
+ long ret;
/* Are the rc bits set in the L1 partition scoped pte? */
pgflags = _PAGE_ACCESSED;
@@ -1062,16 +1214,22 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
/* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing,
gpte.raddr, kvm->arch.lpid);
- spin_unlock(&kvm->mmu_lock);
- if (!ret)
- return -EINVAL;
+ if (!ret) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
/* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa,
gp->shadow_lpid);
if (!ret)
- return -EINVAL;
- return 0;
+ ret = -EINVAL;
+ else
+ ret = 0;
+
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
}
static inline int kvmppc_radix_level_to_shift(int level)
@@ -1099,7 +1257,8 @@ static inline int kvmppc_radix_shift_to_level(int shift)
}
/* called with gp->tlb_lock held */
-static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
+static long int __kvmhv_nested_page_fault(struct kvm_run *run,
+ struct kvm_vcpu *vcpu,
struct kvm_nested_guest *gp)
{
struct kvm *kvm = vcpu->kvm;
@@ -1180,9 +1339,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
return RESUME_GUEST;
}
- /* passthrough of emulated MMIO case... */
- pr_err("emulated MMIO passthrough?\n");
- return -EINVAL;
+
+ /* passthrough of emulated MMIO case */
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing);
}
if (memslot->flags & KVM_MEM_READONLY) {
if (writing) {
@@ -1220,6 +1379,8 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
return ret;
shift = kvmppc_radix_level_to_shift(level);
}
+ /* Align gfn to the start of the page */
+ gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
/* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
@@ -1227,6 +1388,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
perm |= gpte.may_read ? 0UL : _PAGE_READ;
perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
+ /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
+ perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
+ perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
pte = __pte(pte_val(pte) & ~perm);
/* What size pte can we insert? */
@@ -1264,13 +1428,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
return RESUME_GUEST;
}
-long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
+long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
struct kvm_nested_guest *gp = vcpu->arch.nested;
long int ret;
mutex_lock(&gp->tlb_lock);
- ret = __kvmhv_nested_page_fault(vcpu, gp);
+ ret = __kvmhv_nested_page_fault(run, vcpu, gp);
mutex_unlock(&gp->tlb_lock);
return ret;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index a67cf1cdeda4..3b3791ed74a6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -107,7 +107,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
/* Update the dirty bitmap of a memslot */
-void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot,
+void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
unsigned long gfn, unsigned long psize)
{
unsigned long npages;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 4efd65d9e828..811a3c2fb0e9 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -587,6 +587,7 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
case PVR_POWER8:
case PVR_POWER8E:
case PVR_POWER8NVL:
+ case PVR_POWER9:
vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
BOOK3S_HFLAG_NEW_TLBIE;
break;
@@ -1913,7 +1914,8 @@ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
- const struct kvm_memory_slot *new)
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
return;
}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index b0b2bfc2ff51..f27ee57ab46e 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1015,17 +1015,7 @@ static int xics_debug_show(struct seq_file *m, void *private)
return 0;
}
-static int xics_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xics_debug_show, inode->i_private);
-}
-
-static const struct file_operations xics_debug_fops = {
- .open = xics_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(xics_debug);
static void xics_debugfs_init(struct kvmppc_xics *xics)
{
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index ad4a370703d3..f78d002f0fe0 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1968,17 +1968,7 @@ static int xive_debug_show(struct seq_file *m, void *private)
return 0;
}
-static int xive_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xive_debug_show, inode->i_private);
-}
-
-static const struct file_operations xive_debug_fops = {
- .open = xive_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(xive_debug);
static void xive_debugfs_init(struct kvmppc_xive *xive)
{
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a9ca016da670..dbec4128bb51 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1833,7 +1833,8 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
void kvmppc_core_commit_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
const struct kvm_memory_slot *old,
- const struct kvm_memory_slot *new)
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 8f2985e46f6f..c3f312b2bcb3 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -757,10 +757,11 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
return 0;
}
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
/* The page will get remapped properly on its next fault */
kvm_unmap_hva(kvm, hva);
+ return 0;
}
/*****************************************/
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2869a299c4ed..b90a7d154180 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -331,10 +331,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
{
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
struct kvmppc_pte pte;
- int r;
+ int r = -EINVAL;
vcpu->stat.st++;
+ if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr)
+ r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr,
+ size);
+
+ if ((!r) || (r == -EAGAIN))
+ return r;
+
r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
XLATE_WRITE, &pte);
if (r < 0)
@@ -367,10 +374,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
{
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
struct kvmppc_pte pte;
- int rc;
+ int rc = -EINVAL;
vcpu->stat.ld++;
+ if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr)
+ rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr,
+ size);
+
+ if ((!rc) || (rc == -EAGAIN))
+ return rc;
+
rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
XLATE_READ, &pte);
if (rc)
@@ -518,7 +532,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
- case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
@@ -543,8 +556,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
- /* fallthrough */
+ r = 1;
+ break;
case KVM_CAP_SPAPR_TCE_VFIO:
+ r = !!cpu_has_feature(CPU_FTR_HVMODE);
+ break;
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
@@ -696,7 +712,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- kvmppc_core_commit_memory_region(kvm, mem, old, new);
+ kvmppc_core_commit_memory_region(kvm, mem, old, new, change);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -1192,6 +1208,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
kvmppc_set_vmx_byte(vcpu, gpr);
break;
#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_MMIO_REG_NESTED_GPR:
+ if (kvmppc_need_byteswap(vcpu))
+ gpr = swab64(gpr);
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr,
+ sizeof(gpr));
+ break;
+#endif
default:
BUG();
}
@@ -2084,8 +2108,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
}
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
- struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
{
int r;
@@ -2273,15 +2297,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
- case KVM_ENABLE_CAP:
- {
- struct kvm_enable_cap cap;
- r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
- goto out;
- r = kvm_vm_ioctl_enable_cap(kvm, &cap);
- break;
- }
#ifdef CONFIG_SPAPR_TCE_IOMMU
case KVM_CREATE_SPAPR_TCE_64: {
struct kvm_create_spapr_tce_64 create_tce_64;