diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-09 11:42:31 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-04-09 11:42:31 -0700 |
commit | d8312a3f61024352f1c7cb967571fd53631b0d6c (patch) | |
tree | be2f2f699e763330b0f0179e9f86009affbc0c7d /virt | |
parent | e9092d0d97961146655ce51f43850907d95f68c3 (diff) | |
parent | e01bca2fc698d7f0626f0214001af523e18ad60b (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM:
- VHE optimizations
- EL2 address space randomization
- speculative execution mitigations ("variant 3a", aka execution past
invalid privilege register access)
- bugfixes and cleanups
PPC:
- improvements for the radix page fault handler for HV KVM on POWER9
s390:
- more kvm stat counters
- virtio gpu plumbing
- documentation
- facilities improvements
x86:
- support for VMware magic I/O port and pseudo-PMCs
- AMD pause loop exiting
- support for AMD core performance extensions
- support for synchronous register access
- expose nVMX capabilities to userspace
- support for Hyper-V signaling via eventfd
- use Enlightened VMCS when running on Hyper-V
- allow userspace to disable MWAIT/HLT/PAUSE vmexits
- usual roundup of optimizations and nested virtualization bugfixes
Generic:
- API selftest infrastructure (though the only tests are for x86 as
of now)"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (174 commits)
kvm: x86: fix a prototype warning
kvm: selftests: add sync_regs_test
kvm: selftests: add API testing infrastructure
kvm: x86: fix a compile warning
KVM: X86: Add Force Emulation Prefix for "emulate the next instruction"
KVM: X86: Introduce handle_ud()
KVM: vmx: unify adjacent #ifdefs
x86: kvm: hide the unused 'cpu' variable
KVM: VMX: remove bogus WARN_ON in handle_ept_misconfig
Revert "KVM: X86: Fix SMRAM accessing even if VM is shutdown"
kvm: Add emulation for movups/movupd
KVM: VMX: raise internal error for exception during invalid protected mode state
KVM: nVMX: Optimization: Dont set KVM_REQ_EVENT when VMExit with nested_run_pending
KVM: nVMX: Require immediate-exit when event reinjected to L2 and L1 event pending
KVM: x86: Fix misleading comments on handling pending exceptions
KVM: x86: Rename interrupt.pending to interrupt.injected
KVM: VMX: No need to clear pending NMI/interrupt on inject realmode interrupt
x86/kvm: use Enlightened VMCS when running on Hyper-V
x86/hyper-v: detect nested features
x86/hyper-v: define struct hv_enlightened_vmcs and clean field bits
...
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/aarch32.c | 2 | ||||
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 10 | ||||
-rw-r--r-- | virt/kvm/arm/arm.c | 48 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/timer-sr.c | 44 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v2-sr.c | 159 | ||||
-rw-r--r-- | virt/kvm/arm/hyp/vgic-v3-sr.c | 247 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 176 | ||||
-rw-r--r-- | virt/kvm/arm/pmu.c | 36 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 17 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 15 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 152 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 66 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 33 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 3 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 36 |
15 files changed, 565 insertions, 479 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 8bc479fa37e6..efc84cbe8277 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) *vcpu_cpsr(vcpu) = cpsr; /* Note: These now point to the banked copies */ - *vcpu_spsr(vcpu) = new_spsr_value; + vcpu_write_spsr(vcpu, new_spsr_value); *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; /* Branch to exception vector */ diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 282389eb204f..bd3d57f40f1b 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -545,9 +545,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) * The kernel may decide to run userspace after calling vcpu_put, so * we reset cntvoff to 0 to ensure a consistent read between user * accesses to the virtual counter and kernel access to the physical - * counter. + * counter of non-VHE case. For VHE, the virtual counter uses a fixed + * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. */ - set_cntvoff(0); + if (!has_vhe()) + set_cntvoff(0); } /* @@ -856,11 +858,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) return ret; no_vgic: - preempt_disable(); timer->enabled = 1; - kvm_timer_vcpu_load(vcpu); - preempt_enable(); - return 0; } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 53572304843b..dba629c5f8ac 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_arm_set_running_vcpu(vcpu); kvm_vgic_load(vcpu); kvm_timer_vcpu_load(vcpu); + kvm_vcpu_load_sysregs(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + kvm_vcpu_put_sysregs(vcpu); kvm_timer_vcpu_put(vcpu); kvm_vgic_put(vcpu); @@ -420,7 +422,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, */ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v)) + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) && !v->arch.power_off && !v->arch.pause); } @@ -632,27 +635,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (unlikely(!kvm_vcpu_initialized(vcpu))) return -ENOEXEC; - vcpu_load(vcpu); - ret = kvm_vcpu_first_run_init(vcpu); if (ret) - goto out; + return ret; if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu, vcpu->run); if (ret) - goto out; - if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) { - ret = 0; - goto out; - } - + return ret; + if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) + return 0; } - if (run->immediate_exit) { - ret = -EINTR; - goto out; - } + if (run->immediate_exit) + return -EINTR; + + vcpu_load(vcpu); kvm_sigset_activate(vcpu); @@ -719,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; + isb(); /* Ensure work in x_flush_hwstate is committed */ kvm_pmu_sync_hwstate(vcpu); if (static_branch_unlikely(&userspace_irqchip_in_use)) kvm_timer_sync_hwstate(vcpu); @@ -735,13 +734,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - if (has_vhe()) - kvm_arm_vhe_guest_enter(); - - ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); - if (has_vhe()) + if (has_vhe()) { + kvm_arm_vhe_guest_enter(); + ret = kvm_vcpu_run_vhe(vcpu); kvm_arm_vhe_guest_exit(); + } else { + ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu); + } + vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; /* @@ -811,7 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_sigset_deactivate(vcpu); -out: vcpu_put(vcpu); return ret; } @@ -820,18 +820,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) { int bit_index; bool set; - unsigned long *ptr; + unsigned long *hcr; if (number == KVM_ARM_IRQ_CPU_IRQ) bit_index = __ffs(HCR_VI); else /* KVM_ARM_IRQ_CPU_FIQ */ bit_index = __ffs(HCR_VF); - ptr = (unsigned long *)&vcpu->arch.irq_lines; + hcr = vcpu_hcr(vcpu); if (level) - set = test_and_set_bit(bit_index, ptr); + set = test_and_set_bit(bit_index, hcr); else - set = test_and_clear_bit(bit_index, ptr); + set = test_and_clear_bit(bit_index, hcr); /* * If we didn't change anything, no need to wake up or kick other CPUs diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c index f24404b3c8df..77754a62eb0c 100644 --- a/virt/kvm/arm/hyp/timer-sr.c +++ b/virt/kvm/arm/hyp/timer-sr.c @@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high) write_sysreg(cntvoff, cntvoff_el2); } +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) { - /* - * We don't need to do this for VHE since the host kernel runs in EL2 - * with HCR_EL2.TGE ==1, which makes those bits have no impact. - */ - if (!has_vhe()) { - u64 val; + u64 val; - /* Allow physical timer/counter access for the host */ - val = read_sysreg(cnthctl_el2); - val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; - write_sysreg(val, cnthctl_el2); - } + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); } +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) { - if (!has_vhe()) { - u64 val; + u64 val; - /* - * Disallow physical timer access for the guest - * Physical counter access is allowed - */ - val = read_sysreg(cnthctl_el2); - val &= ~CNTHCTL_EL1PCEN; - val |= CNTHCTL_EL1PCTEN; - write_sysreg(val, cnthctl_el2); - } + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); } diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c deleted file mode 100644 index 4fe6e797e8b3..000000000000 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2012-2015 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/compiler.h> -#include <linux/irqchip/arm-gic.h> -#include <linux/kvm_host.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> -#include <asm/kvm_mmu.h> - -static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; - u32 elrsr0, elrsr1; - - elrsr0 = readl_relaxed(base + GICH_ELRSR0); - if (unlikely(nr_lr > 32)) - elrsr1 = readl_relaxed(base + GICH_ELRSR1); - else - elrsr1 = 0; - - cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; -} - -static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int i; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - for (i = 0; i < used_lrs; i++) { - if (cpu_if->vgic_elrsr & (1UL << i)) - cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; - else - cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); - - writel_relaxed(0, base + GICH_LR0 + (i * 4)); - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - if (!base) - return; - - if (used_lrs) { - cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); - - save_elrsr(vcpu, base); - save_lrs(vcpu, base); - - writel_relaxed(0, base + GICH_HCR); - } else { - cpu_if->vgic_elrsr = ~0UL; - cpu_if->vgic_apr = 0; - } -} - -/* vcpu is already in the HYP VA space */ -void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &kvm->arch.vgic; - void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int i; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - - if (!base) - return; - - if (used_lrs) { - writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); - writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); - for (i = 0; i < used_lrs; i++) { - writel_relaxed(cpu_if->vgic_lr[i], - base + GICH_LR0 + (i * 4)); - } - } -} - -#ifdef CONFIG_ARM64 -/* - * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the - * guest. - * - * @vcpu: the offending vcpu - * - * Returns: - * 1: GICV access successfully performed - * 0: Not a GICV access - * -1: Illegal GICV access - */ -int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t fault_ipa; - void __iomem *addr; - int rd; - - /* Build the full address */ - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); - - /* If not for GICV, move on */ - if (fault_ipa < vgic->vgic_cpu_base || - fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) - return 0; - - /* Reject anything but a 32bit access */ - if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) - return -1; - - /* Not aligned? Don't bother */ - if (fault_ipa & 3) - return -1; - - rd = kvm_vcpu_dabt_get_rd(vcpu); - addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); - addr += fault_ipa - vgic->vgic_cpu_base; - - if (kvm_vcpu_dabt_iswrite(vcpu)) { - u32 data = vcpu_data_guest_to_host(vcpu, - vcpu_get_reg(vcpu, rd), - sizeof(u32)); - writel_relaxed(data, addr); - } else { - u32 data = readl_relaxed(addr); - vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, - sizeof(u32))); - } - - return 1; -} -#endif diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index b89ce5432214..616e5a433ab0 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -21,6 +21,7 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) @@ -208,89 +209,68 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 val; /* * Make sure stores to the GIC via the memory mapped interface - * are now visible to the system register interface. + * are now visible to the system register interface when reading the + * LRs, and when reading back the VMCR on non-VHE systems. */ - if (!cpu_if->vgic_sre) { - dsb(sy); - isb(); - cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + dsb(sy); + isb(); + } } if (used_lrs) { int i; - u32 nr_pre_bits; + u32 elrsr; - cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + elrsr = read_gicreg(ICH_ELSR_EL2); - write_gicreg(0, ICH_HCR_EL2); - val = read_gicreg(ICH_VTR_EL2); - nr_pre_bits = vtr_to_nr_pre_bits(val); + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); for (i = 0; i < used_lrs; i++) { - if (cpu_if->vgic_elrsr & (1 << i)) + if (elrsr & (1 << i)) cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; else cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); __gic_v3_set_lr(0, i); } + } +} - switch (nr_pre_bits) { - case 7: - cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); - cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); - case 6: - cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); - default: - cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); - } +void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; - switch (nr_pre_bits) { - case 7: - cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); - cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); - case 6: - cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); - default: - cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); - } - } else { - if (static_branch_unlikely(&vgic_v3_cpuif_trap) || - cpu_if->its_vpe.its_vm) - write_gicreg(0, ICH_HCR_EL2); - - cpu_if->vgic_elrsr = 0xffff; - cpu_if->vgic_ap0r[0] = 0; - cpu_if->vgic_ap0r[1] = 0; - cpu_if->vgic_ap0r[2] = 0; - cpu_if->vgic_ap0r[3] = 0; - cpu_if->vgic_ap1r[0] = 0; - cpu_if->vgic_ap1r[1] = 0; - cpu_if->vgic_ap1r[2] = 0; - cpu_if->vgic_ap1r[3] = 0; - } + if (used_lrs) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); - val = read_gicreg(ICC_SRE_EL2); - write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + for (i = 0; i < used_lrs; i++) + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } - if (!cpu_if->vgic_sre) { - /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ - isb(); - write_gicreg(1, ICC_SRE_EL1); + /* + * Ensure that writes to the LRs, and on non-VHE systems ensure that + * the write to the VMCR in __vgic_v3_activate_traps(), will have + * reached the (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } } } -void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; - u64 val; - u32 nr_pre_bits; - int i; /* * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a @@ -299,70 +279,135 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) * consequences. So we must make sure that ICC_SRE_EL1 has * been actually programmed with the value we want before * starting to mess with the rest of the GIC, and VMCR_EL2 in - * particular. + * particular. This logic must be called before + * __vgic_v3_restore_state(). */ if (!cpu_if->vgic_sre) { write_gicreg(0, ICC_SRE_EL1); isb(); write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + + if (has_vhe()) { + /* + * Ensure that the write to the VMCR will have reached + * the (re)distributors. This ensure the guest will + * read the correct values from the memory-mapped + * interface. + */ + isb(); + dsb(sy); + } } - val = read_gicreg(ICH_VTR_EL2); - nr_pre_bits = vtr_to_nr_pre_bits(val); + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); - if (used_lrs) { + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); +} - switch (nr_pre_bits) { - case 7: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); - case 6: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); - default: - __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); - } - - switch (nr_pre_bits) { - case 7: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); - case 6: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); - default: - __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); - } +void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; - for (i = 0; i < used_lrs; i++) - __gic_v3_set_lr(cpu_if->vgic_lr[i], i); - } else { - /* - * If we need to trap system registers, we must write - * ICH_HCR_EL2 anyway, even if no interrupts are being - * injected. Same thing if GICv4 is used, as VLPI - * delivery is gated by ICH_HCR_EL2.En. - */ - if (static_branch_unlikely(&vgic_v3_cpuif_trap) || - cpu_if->its_vpe.its_vm) - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + if (!cpu_if->vgic_sre) { + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); } - /* - * Ensures that the above will have reached the - * (re)distributors. This ensure the guest will read the - * correct values from the memory-mapped interface. - */ + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ isb(); - dsb(sy); + write_gicreg(1, ICC_SRE_EL1); } /* - * Prevent the guest from touching the GIC system registers if - * SRE isn't enabled for GICv3 emulation. + * If we were trapping system registers, we enabled the VGIC even if + * no interrupts were being injected, and we disable it again here. */ - write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, - ICC_SRE_EL2); + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(0, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if; + u64 val; + u32 nr_pre_bits; + + vcpu = kern_hyp_va(vcpu); + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + case 6: + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + default: + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); + } + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + case 6: + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + default: + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); + } +} + +void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if; + u64 val; + u32 nr_pre_bits; + + vcpu = kern_hyp_va(vcpu); + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + case 6: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + default: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); + } + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + case 6: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + default: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); + } } void __hyp_text __vgic_v3_init_lrs(void) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index b960acdd0c05..7f6a944db23d 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start; static unsigned long hyp_idmap_end; static phys_addr_t hyp_idmap_vector; +static unsigned long io_map_base; + #define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t)) #define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) @@ -479,7 +481,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) clear_hyp_pgd_entry(pgd); } -static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd) +{ + return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1); +} + +static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd, + phys_addr_t start, u64 size) { pgd_t *pgd; phys_addr_t addr = start, end = start + size; @@ -489,7 +497,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) * We don't unmap anything from HYP, except at the hyp tear down. * Hence, we don't have to invalidate the TLBs here. */ - pgd = pgdp + pgd_index(addr); + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); do { next = pgd_addr_end(addr, end); if (!pgd_none(*pgd)) @@ -497,32 +505,50 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) } while (pgd++, addr = next, addr != end); } +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size); +} + +static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size); +} + /** * free_hyp_pgds - free Hyp-mode page tables * * Assumes hyp_pgd is a page table used strictly in Hyp-mode and * therefore contains either mappings in the kernel memory area (above - * PAGE_OFFSET), or device mappings in the vmalloc range (from - * VMALLOC_START to VMALLOC_END). + * PAGE_OFFSET), or device mappings in the idmap range. * - * boot_hyp_pgd should only map two pages for the init code. + * boot_hyp_pgd should only map the idmap range, and is only used in + * the extended idmap case. */ void free_hyp_pgds(void) { + pgd_t *id_pgd; + mutex_lock(&kvm_hyp_pgd_mutex); + id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd; + + if (id_pgd) { + /* In case we never called hyp_mmu_init() */ + if (!io_map_base) + io_map_base = hyp_idmap_start; + unmap_hyp_idmap_range(id_pgd, io_map_base, + hyp_idmap_start + PAGE_SIZE - io_map_base); + } + if (boot_hyp_pgd) { - unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); boot_hyp_pgd = NULL; } if (hyp_pgd) { - unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), (uintptr_t)high_memory - PAGE_OFFSET); - unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START), - VMALLOC_END - VMALLOC_START); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; @@ -634,7 +660,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, addr = start & PAGE_MASK; end = PAGE_ALIGN(end); do { - pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1)); + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); if (pgd_none(*pgd)) { pud = pud_alloc_one(NULL, addr); @@ -708,29 +734,115 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot) return 0; } +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, pgprot_t prot) +{ + pgd_t *pgd = hyp_pgd; + unsigned long base; + int ret = 0; + + mutex_lock(&kvm_hyp_pgd_mutex); + + /* + * This assumes that we we have enough space below the idmap + * page to allocate our VAs. If not, the check below will + * kick. A potential alternative would be to detect that + * overflow and switch to an allocation above the idmap. + * + * The allocated size is always a multiple of PAGE_SIZE. + */ + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + base = io_map_base - size; + + /* + * Verify that BIT(VA_BITS - 1) hasn't been flipped by + * allocating the new area, as it would indicate we've + * overflowed the idmap/IO address range. + */ + if ((base ^ io_map_base) & BIT(VA_BITS - 1)) + ret = -ENOMEM; + else + io_map_base = base; + + mutex_unlock(&kvm_hyp_pgd_mutex); + + if (ret) + goto out; + + if (__kvm_cpu_uses_extended_idmap()) + pgd = boot_hyp_pgd; + + ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + base, base + size, + __phys_to_pfn(phys_addr), prot); + if (ret) + goto out; + + *haddr = base + offset_in_page(phys_addr); + +out: + return ret; +} + /** - * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode - * @from: The kernel start VA of the range - * @to: The kernel end VA of the range (exclusive) + * create_hyp_io_mappings - Map IO into both kernel and HYP * @phys_addr: The physical start address which gets mapped - * - * The resulting HYP VA is the same as the kernel VA, modulo - * HYP_PAGE_OFFSET. + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping + * @haddr: HYP VA for this mapping */ -int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr) { - unsigned long start = kern_hyp_va((unsigned long)from); - unsigned long end = kern_hyp_va((unsigned long)to); + unsigned long addr; + int ret; - if (is_kernel_in_hyp_mode()) + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; return 0; + } - /* Check for a valid kernel IO mapping */ - if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1)) - return -EINVAL; + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_DEVICE); + if (ret) { + iounmap(*kaddr); + *kaddr = NULL; + *haddr = NULL; + return ret; + } + + *haddr = (void __iomem *)addr; + return 0; +} - return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end, - __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE); +/** + * create_hyp_exec_mappings - Map an executable range into HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @haddr: HYP VA for this mapping + */ +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr) +{ + unsigned long addr; + int ret; + + BUG_ON(is_kernel_in_hyp_mode()); + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_EXEC); + if (ret) { + *haddr = NULL; + return ret; + } + + *haddr = (void *)addr; + return 0; } /** @@ -1801,7 +1913,9 @@ int kvm_mmu_init(void) int err; hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); /* @@ -1812,10 +1926,11 @@ int kvm_mmu_init(void) kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", - kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); + kern_hyp_va(PAGE_OFFSET), + kern_hyp_va((unsigned long)high_memory - 1)); if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && - hyp_idmap_start < kern_hyp_va(~0UL) && + hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { /* * The idmap page is intersecting with the VA space, @@ -1859,6 +1974,7 @@ int kvm_mmu_init(void) goto out; } + io_map_base = hyp_idmap_start; return 0; out: free_hyp_pgds(); @@ -2035,7 +2151,7 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, */ void kvm_set_way_flush(struct kvm_vcpu *vcpu) { - unsigned long hcr = vcpu_get_hcr(vcpu); + unsigned long hcr = *vcpu_hcr(vcpu); /* * If this is the first time we do a S/W operation @@ -2050,7 +2166,7 @@ void kvm_set_way_flush(struct kvm_vcpu *vcpu) trace_kvm_set_way_flush(*vcpu_pc(vcpu), vcpu_has_cache_enabled(vcpu)); stage2_flush_vm(vcpu->kvm); - vcpu_set_hcr(vcpu, hcr | HCR_TVM); + *vcpu_hcr(vcpu) = hcr | HCR_TVM; } } @@ -2068,7 +2184,7 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) /* Caches are now on, stop trapping VM ops (until a S/W op) */ if (now_enabled) - vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) & ~HCR_TVM); + *vcpu_hcr(vcpu) &= ~HCR_TVM; trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); } diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 8a9c42366db7..1c5b76c46e26 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -37,7 +37,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - counter = vcpu_sys_reg(vcpu, reg); + counter = __vcpu_sys_reg(vcpu, reg); /* The real counter value is equal to the value of counter register plus * the value perf event counts. @@ -61,7 +61,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) reg = (select_idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); } /** @@ -78,7 +78,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) counter = kvm_pmu_get_counter_value(vcpu, pmc->idx); reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - vcpu_sys_reg(vcpu, reg) = counter; + __vcpu_sys_reg(vcpu, reg) = counter; perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); pmc->perf_event = NULL; @@ -125,7 +125,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) { - u64 val = vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; val &= ARMV8_PMU_PMCR_N_MASK; if (val == 0) @@ -147,7 +147,7 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) struct kvm_pmu *pmu = &vcpu->arch.pmu; struct kvm_pmc *pmc; - if (!(vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { @@ -193,10 +193,10 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { u64 reg = 0; - if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { - reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1); + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { + reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); reg &= kvm_pmu_valid_counter_mask(vcpu); } @@ -295,7 +295,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); int idx = pmc->idx; - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -316,19 +316,19 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) if (val == 0) return; - enable = vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + enable = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { if (!(val & BIT(i))) continue; - type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i) & ARMV8_PMU_EVTYPE_EVENT; if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR) && (enable & BIT(i))) { - reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; reg = lower_32_bits(reg); - vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; if (!reg) - vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); } } } @@ -348,7 +348,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) mask = kvm_pmu_valid_counter_mask(vcpu); if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter(vcpu, - vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); } else { kvm_pmu_disable_counter(vcpu, mask); } @@ -369,8 +369,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) { - return (vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); } /** diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 743ca5cb05ef..68378fe17a0e 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -166,12 +166,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; - /* - * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init) - * it is stored in distributor struct for asm save/restore purpose - */ - kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; @@ -302,17 +296,6 @@ int vgic_init(struct kvm *kvm) dist->initialized = true; - /* - * If we're initializing GICv2 on-demand when first running the VCPU - * then we need to load the VGIC state onto the CPU. We can detect - * this easily by checking if we are in between vcpu_load and vcpu_put - * when we just initialized the VGIC. - */ - preempt_disable(); - vcpu = kvm_arm_get_running_vcpu(); - if (vcpu) - kvm_vgic_load(vcpu); - preempt_enable(); out: return ret; } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 465095355666..a8f07243aa9f 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -316,21 +316,24 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_irq *irq; u32 *intids; - int irq_count = dist->lpi_list_count, i = 0; + int irq_count, i = 0; /* - * We use the current value of the list length, which may change - * after the kmalloc. We don't care, because the guest shouldn't - * change anything while the command handling is still running, - * and in the worst case we would miss a new IRQ, which one wouldn't - * expect to be covered by this command anyway. + * There is an obvious race between allocating the array and LPIs + * being mapped/unmapped. If we ended up here as a result of a + * command, we're safe (locks are held, preventing another + * command). If coming from another path (such as enabling LPIs), + * we must be careful not to overrun the array. */ + irq_count = READ_ONCE(dist->lpi_list_count); intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); if (!intids) return -ENOMEM; spin_lock(&dist->lpi_list_lock); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (i == irq_count) + break; /* We don't need to "get" the IRQ, as we hold the list lock. */ if (irq->target_vcpu != vcpu) continue; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 29556f71b691..45aa433f018f 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -105,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) /* * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & GICH_LR_PENDING_BIT)) - irq->pending_latch = false; - } + if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) + irq->pending_latch = false; /* * Level-triggered mapped IRQs are special because we only @@ -153,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 val = irq->intid; + bool allow_pending = true; + + if (irq->active) + val |= GICH_LR_ACTIVE_BIT; + + if (irq->hw) { + val |= GICH_LR_HW; + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= GICH_LR_EOI; - if (irq_is_pending(irq)) { + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { val |= GICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) @@ -171,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->active) - val |= GICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= GICH_LR_HW; - val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; - /* - * Never set pending+active on a HW interrupt, as the - * pending state is kept at the physical distributor - * level. - */ - if (irq->active && irq_is_pending(irq)) - val &= ~GICH_LR_PENDING_BIT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= GICH_LR_EOI; - } - /* * Level-triggered mapped IRQs are special because we only observe * rising edges as input to the VGIC. We therefore lower the line @@ -272,7 +278,6 @@ void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; @@ -368,16 +373,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info) if (!PAGE_ALIGNED(info->vcpu.start) || !PAGE_ALIGNED(resource_size(&info->vcpu))) { kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); - kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start, - resource_size(&info->vcpu)); - if (!kvm_vgic_global_state.vcpu_base_va) { - kvm_err("Cannot ioremap GICV\n"); - return -ENOMEM; - } - ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va, - kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu), - info->vcpu.start); + ret = create_hyp_io_mappings(info->vcpu.start, + resource_size(&info->vcpu), + &kvm_vgic_global_state.vcpu_base_va, + &kvm_vgic_global_state.vcpu_hyp_va); if (ret) { kvm_err("Cannot map GICV into hyp\n"); goto out; @@ -386,26 +386,18 @@ int vgic_v2_probe(const struct gic_kvm_info *info) static_branch_enable(&vgic_v2_cpuif_trap); } - kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, - resource_size(&info->vctrl)); - if (!kvm_vgic_global_state.vctrl_base) { - kvm_err("Cannot ioremap GICH\n"); - ret = -ENOMEM; + ret = create_hyp_io_mappings(info->vctrl.start, + resource_size(&info->vctrl), + &kvm_vgic_global_state.vctrl_base, + &kvm_vgic_global_state.vctrl_hyp); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); goto out; } vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; - ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, - kvm_vgic_global_state.vctrl_base + - resource_size(&info->vctrl), - info->vctrl.start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out; - } - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (ret) { kvm_err("Cannot register GICv2 KVM device\n"); @@ -429,18 +421,74 @@ out: return ret; } +static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1UL << i)) + cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); + + writel_relaxed(0, base + GICH_LR0 + (i * 4)); + } +} + +void vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + + if (!base) + return; + + if (used_lrs) { + save_lrs(vcpu, base); + writel_relaxed(0, base + GICH_HCR); + } +} + +void vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; + + if (!base) + return; + + if (used_lrs) { + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + for (i = 0; i < used_lrs; i++) { + writel_relaxed(cpu_if->vgic_lr[i], + base + GICH_LR0 + (i * 4)); + } + } +} + void vgic_v2_load(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; - writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_vmcr, + kvm_vgic_global_state.vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, + kvm_vgic_global_state.vctrl_base + GICH_APR); } void vgic_v2_put(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; - cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); + cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 0ff2006f3781..8195f52ae6f0 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -16,6 +16,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <kvm/arm_vgic.h> +#include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> #include <asm/kvm_asm.h> @@ -96,12 +97,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) /* * Clear soft pending state when level irqs have been acked. - * Always regenerate the pending state. */ - if (irq->config == VGIC_CONFIG_LEVEL) { - if (!(val & ICH_LR_PENDING_BIT)) - irq->pending_latch = false; - } + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; /* * Level-triggered mapped IRQs are special because we only @@ -135,8 +133,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { u32 model = vcpu->kvm->arch.vgic.vgic_model; u64 val = irq->intid; + bool allow_pending = true; + + if (irq->active) + val |= ICH_LR_ACTIVE_BIT; + + if (irq->hw) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } - if (irq_is_pending(irq)) { + if (allow_pending && irq_is_pending(irq)) { val |= ICH_LR_PENDING_BIT; if (irq->config == VGIC_CONFIG_EDGE) @@ -154,24 +179,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->active) - val |= ICH_LR_ACTIVE_BIT; - - if (irq->hw) { - val |= ICH_LR_HW; - val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; - /* - * Never set pending+active on a HW interrupt, as the - * pending state is kept at the physical distributor - * level. - */ - if (irq->active && irq_is_pending(irq)) - val &= ~ICH_LR_PENDING_BIT; - } else { - if (irq->config == VGIC_CONFIG_LEVEL) - val |= ICH_LR_EOI; - } - /* * Level-triggered mapped IRQs are special because we only observe * rising edges as input to the VGIC. We therefore lower the line @@ -274,7 +281,6 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible @@ -595,6 +601,11 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) */ if (likely(cpu_if->vgic_sre)) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); + + kvm_call_hyp(__vgic_v3_restore_aprs, vcpu); + + if (has_vhe()) + __vgic_v3_activate_traps(vcpu); } void vgic_v3_put(struct kvm_vcpu *vcpu) @@ -603,4 +614,9 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); + + kvm_call_hyp(__vgic_v3_save_aprs, vcpu); + + if (has_vhe()) + __vgic_v3_deactivate_traps(vcpu); } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 8201899126f6..e74baec76361 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -19,6 +19,7 @@ #include <linux/list_sort.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <asm/kvm_hyp.h> #include "vgic.h" @@ -808,6 +809,24 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) vgic_clear_lr(vcpu, count); } +static inline bool can_access_vgic_from_kernel(void) +{ + /* + * GICv2 can always be accessed from the kernel because it is + * memory-mapped, and VHE systems can access GICv3 EL2 system + * registers. + */ + return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); +} + +static inline void vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); + else + __vgic_v3_save_state(vcpu); +} + /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { @@ -819,11 +838,22 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); + if (vgic_cpu->used_lrs) vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); } +static inline void vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); + else + __vgic_v3_restore_state(vcpu); +} + /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { @@ -846,6 +876,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + + if (can_access_vgic_from_kernel()) + vgic_restore_state(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index f5b8519e5546..830e815748a0 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -178,6 +178,9 @@ void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_save_state(struct kvm_vcpu *vcpu); +void vgic_v2_restore_state(struct kvm_vcpu *vcpu); + static inline void vgic_get_irq_kref(struct vgic_irq *irq) { if (irq->intid < VGIC_MIN_LPI) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 65dea3ffef68..c7b2e927f699 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3398,21 +3398,6 @@ static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) return kvm_io_bus_cmp(p1, p2); } -static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, - gpa_t addr, int len) -{ - bus->range[bus->dev_count++] = (struct kvm_io_range) { - .addr = addr, - .len = len, - .dev = dev, - }; - - sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), - kvm_io_bus_sort_cmp, NULL); - - return 0; -} - static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, gpa_t addr, int len) { @@ -3553,7 +3538,9 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev) { + int i; struct kvm_io_bus *new_bus, *bus; + struct kvm_io_range range; bus = kvm_get_bus(kvm, bus_idx); if (!bus) @@ -3567,9 +3554,22 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; - memcpy(new_bus, bus, sizeof(*bus) + (bus->dev_count * - sizeof(struct kvm_io_range))); - kvm_io_bus_insert_dev(new_bus, dev, addr, len); + + range = (struct kvm_io_range) { + .addr = addr, + .len = len, + .dev = dev, + }; + + for (i = 0; i < bus->dev_count; i++) + if (kvm_io_bus_cmp(&bus->range[i], &range) > 0) + break; + + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + new_bus->dev_count++; + new_bus->range[i] = range; + memcpy(new_bus->range + i + 1, bus->range + i, + (bus->dev_count - i) * sizeof(struct kvm_io_range)); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); |