diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-08 12:37:56 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-08 12:37:56 -0700 |
commit | 2d3e4866dea96b0506395b47bfefb234f2088dac (patch) | |
tree | d5c7bd97d222bef46f9d73adee8c79dbdb9f82f4 /arch | |
parent | 9c6ee01ed5bb1ee489d580eaa60d7eb5a8ede336 (diff) | |
parent | 2e5b0bd9cc6172edef502dfae28ae790f74a882e (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"ARM:
- HYP mode stub supports kexec/kdump on 32-bit
- improved PMU support
- virtual interrupt controller performance improvements
- support for userspace virtual interrupt controller (slower, but
necessary for KVM on the weird Broadcom SoCs used by the Raspberry
Pi 3)
MIPS:
- basic support for hardware virtualization (ImgTec P5600/P6600/I6400
and Cavium Octeon III)
PPC:
- in-kernel acceleration for VFIO
s390:
- support for guests without storage keys
- adapter interruption suppression
x86:
- usual range of nVMX improvements, notably nested EPT support for
accessed and dirty bits
- emulation of CPL3 CPUID faulting
generic:
- first part of VCPU thread request API
- kvm_stat improvements"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (227 commits)
kvm: nVMX: Don't validate disabled secondary controls
KVM: put back #ifndef CONFIG_S390 around kvm_vcpu_kick
Revert "KVM: Support vCPU-based gfn->hva cache"
tools/kvm: fix top level makefile
KVM: x86: don't hold kvm->lock in KVM_SET_GSI_ROUTING
KVM: Documentation: remove VM mmap documentation
kvm: nVMX: Remove superfluous VMX instruction fault checks
KVM: x86: fix emulation of RSM and IRET instructions
KVM: mark requests that need synchronization
KVM: return if kvm_vcpu_wake_up() did wake up the VCPU
KVM: add explicit barrier to kvm_vcpu_kick
KVM: perform a wake_up in kvm_make_all_cpus_request
KVM: mark requests that do not need a wakeup
KVM: remove #ifndef CONFIG_S390 around kvm_vcpu_wake_up
KVM: x86: always use kvm_make_request instead of set_bit
KVM: add kvm_{test,clear}_request to replace {test,clear}_bit
s390: kvm: Cpu model support for msa6, msa7 and msa8
KVM: x86: remove irq disablement around KVM_SET_CLOCK/KVM_GET_CLOCK
kvm: better MWAIT emulation for guests
KVM: x86: virtualize cpuid faulting
...
Diffstat (limited to 'arch')
121 files changed, 7950 insertions, 3150 deletions
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 9150f9732785..7c711ba61417 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -422,7 +422,17 @@ dtb_check_done: cmp r0, #HYP_MODE bne 1f - bl __hyp_get_vectors + /* + * Compute the address of the hyp vectors after relocation. + * This requires some arithmetic since we cannot directly + * reference __hyp_stub_vectors in a PC-relative way. + * Call __hyp_set_vectors with the new address so that we + * can HVC again after the copy. + */ +0: adr r0, 0b + movw r1, #:lower16:__hyp_stub_vectors - 0b + movt r1, #:upper16:__hyp_stub_vectors - 0b + add r0, r0, r1 sub r0, r0, r5 add r0, r0, r10 bl __hyp_set_vectors diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 8ef05381984b..14d68a4d826f 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -33,7 +33,7 @@ #define ARM_EXCEPTION_IRQ 5 #define ARM_EXCEPTION_FIQ 6 #define ARM_EXCEPTION_HVC 7 - +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR /* * The rr_lo_hi macro swaps a pair of registers depending on * current endianness. It is used in conjunction with ldrd and strd @@ -72,10 +72,11 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __init_stage2_translation(void); -extern void __kvm_hyp_reset(unsigned long); - extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u64 __vgic_v3_read_vmcr(void); +extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 31ee468ce667..f0e66577ce05 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -30,7 +30,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 32 -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HAVE_ONE_REG #define KVM_HALT_POLL_NS_DEFAULT 500000 @@ -45,7 +44,7 @@ #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS #endif -#define KVM_REQ_VCPU_EXIT 8 +#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); @@ -270,12 +269,6 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } -static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, - phys_addr_t phys_idmap_start) -{ - kvm_call_hyp((void *)virt_to_idmap(__kvm_hyp_reset), vector_ptr); -} - static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) { return 0; diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 95f38dcd611d..fa6f2174276b 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -56,7 +56,6 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 8877ad5ffe10..f2e1af45bd6f 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -43,7 +43,7 @@ extern struct processor { /* * Special stuff for a reset */ - void (*reset)(unsigned long addr) __attribute__((noreturn)); + void (*reset)(unsigned long addr, bool hvc) __attribute__((noreturn)); /* * Idle the processor */ @@ -88,7 +88,7 @@ extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte); #else extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); #endif -extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); +extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); /* These three are private to arch/arm/kernel/suspend.c */ extern void cpu_do_suspend(void *); diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index 6dae1956c74d..141144f333a2 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -53,7 +53,7 @@ static inline void sync_boot_mode(void) } void __hyp_set_vectors(unsigned long phys_vector_base); -unsigned long __hyp_get_vectors(void); +void __hyp_reset_vectors(void); #else #define __boot_cpu_mode (SVC_MODE) #define sync_boot_mode() @@ -94,6 +94,18 @@ extern char __hyp_text_start[]; extern char __hyp_text_end[]; #endif +#else + +/* Only assembly code should need those */ + +#define HVC_SET_VECTORS 0 +#define HVC_SOFT_RESTART 1 +#define HVC_RESET_VECTORS 2 + +#define HVC_STUB_HCALL_NR 3 + #endif /* __ASSEMBLY__ */ +#define HVC_STUB_ERR 0xbadca11 + #endif /* ! VIRT_H */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 6ebd3e6a1fd1..a88726359e5f 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -27,6 +27,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -114,6 +116,8 @@ struct kvm_debug_exit_arch { }; struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 15d073ae5da2..ec7e7377d423 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -125,7 +125,7 @@ ENTRY(__hyp_stub_install_secondary) * (see safe_svcmode_maskall). */ @ Now install the hypervisor stub: - adr r7, __hyp_stub_vectors + W(adr) r7, __hyp_stub_vectors mcr p15, 4, r7, c12, c0, 0 @ set hypervisor vector base (HVBAR) @ Disable all traps, so we don't get any nasty surprise @@ -202,9 +202,23 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE ENDPROC(__hyp_stub_install_secondary) __hyp_stub_do_trap: - cmp r0, #-1 - mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR - mcrne p15, 4, r0, c12, c0, 0 @ set HVBAR + teq r0, #HVC_SET_VECTORS + bne 1f + mcr p15, 4, r1, c12, c0, 0 @ set HVBAR + b __hyp_stub_exit + +1: teq r0, #HVC_SOFT_RESTART + bne 1f + bx r1 + +1: teq r0, #HVC_RESET_VECTORS + beq __hyp_stub_exit + + ldr r0, =HVC_STUB_ERR + __ERET + +__hyp_stub_exit: + mov r0, #0 __ERET ENDPROC(__hyp_stub_do_trap) @@ -230,15 +244,26 @@ ENDPROC(__hyp_stub_do_trap) * so you will need to set that to something sensible at the new hypervisor's * initialisation entry point. */ -ENTRY(__hyp_get_vectors) - mov r0, #-1 -ENDPROC(__hyp_get_vectors) - @ fall through ENTRY(__hyp_set_vectors) + mov r1, r0 + mov r0, #HVC_SET_VECTORS __HVC(0) ret lr ENDPROC(__hyp_set_vectors) +ENTRY(__hyp_soft_restart) + mov r1, r0 + mov r0, #HVC_SOFT_RESTART + __HVC(0) + ret lr +ENDPROC(__hyp_soft_restart) + +ENTRY(__hyp_reset_vectors) + mov r0, #HVC_RESET_VECTORS + __HVC(0) + ret lr +ENDPROC(__hyp_reset_vectors) + #ifndef ZIMAGE .align 2 .L__boot_cpu_mode_offset: @@ -246,7 +271,7 @@ ENDPROC(__hyp_set_vectors) #endif .align 5 -__hyp_stub_vectors: +ENTRY(__hyp_stub_vectors) __hyp_stub_reset: W(b) . __hyp_stub_und: W(b) . __hyp_stub_svc: W(b) . diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 3fa867a2aae6..3b2aa9a9fe26 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -12,10 +12,11 @@ #include <asm/cacheflush.h> #include <asm/idmap.h> +#include <asm/virt.h> #include "reboot.h" -typedef void (*phys_reset_t)(unsigned long); +typedef void (*phys_reset_t)(unsigned long, bool); /* * Function pointers to optional machine specific functions @@ -51,7 +52,9 @@ static void __soft_restart(void *addr) /* Switch to the identity mapping. */ phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset); - phys_reset((unsigned long)addr); + + /* original stub should be restored by kvm */ + phys_reset((unsigned long)addr, is_hyp_mode_available()); /* Should never get here. */ BUG(); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 314eb6abe1ff..8a31906bdc9b 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -53,7 +53,6 @@ __asm__(".arch_extension virt"); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); static kvm_cpu_context_t __percpu *kvm_host_cpu_state; -static unsigned long hyp_default_vectors; /* Per-CPU variable containing the currently running vcpu. */ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); @@ -209,9 +208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; case KVM_CAP_ARM_SET_DEVICE_ADDR: r = 1; break; @@ -230,6 +226,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = kvm->arch.vgic.msis_require_devid; break; + case KVM_CAP_ARM_USER_IRQ: + /* + * 1: EL1_VTIMER, EL1_PTIMER, and PMU. + * (bump this number if adding more devices) + */ + r = 1; + break; default: r = kvm_arch_dev_ioctl_check_extension(kvm, ext); break; @@ -351,15 +354,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state); kvm_arm_set_running_vcpu(vcpu); + + kvm_vgic_load(vcpu); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { - /* - * The arch-generic KVM code expects the cpu field of a vcpu to be -1 - * if the vcpu is no longer assigned to a cpu. This is used for the - * optimized make_all_cpus_request path. - */ + kvm_vgic_put(vcpu); + vcpu->cpu = -1; kvm_arm_set_running_vcpu(NULL); @@ -517,13 +519,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Enable the arch timers only if we have an in-kernel VGIC - * and it has been properly initialized, since we cannot handle - * interrupts from the virtual timer with a userspace gic. - */ - if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) - ret = kvm_timer_enable(vcpu); + ret = kvm_timer_enable(vcpu); return ret; } @@ -633,16 +629,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * non-preemptible context. */ preempt_disable(); + kvm_pmu_flush_hwstate(vcpu); + kvm_timer_flush_hwstate(vcpu); kvm_vgic_flush_hwstate(vcpu); local_irq_disable(); /* - * Re-check atomic conditions + * If we have a singal pending, or need to notify a userspace + * irqchip about timer or PMU level changes, then we exit (and + * update the timer level state in kvm_timer_update_run + * below). */ - if (signal_pending(current)) { + if (signal_pending(current) || + kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { ret = -EINTR; run->exit_reason = KVM_EXIT_INTR; } @@ -714,6 +717,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = handle_exit(vcpu, run, ret); } + /* Tell userspace about in-kernel device output levels */ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { + kvm_timer_update_run(vcpu); + kvm_pmu_update_run(vcpu); + } + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); return ret; @@ -1112,8 +1121,16 @@ static void cpu_init_hyp_mode(void *dummy) kvm_arm_init_debug(); } +static void cpu_hyp_reset(void) +{ + if (!is_kernel_in_hyp_mode()) + __hyp_reset_vectors(); +} + static void cpu_hyp_reinit(void) { + cpu_hyp_reset(); + if (is_kernel_in_hyp_mode()) { /* * __cpu_init_stage2() is safe to call even if the PM @@ -1121,21 +1138,13 @@ static void cpu_hyp_reinit(void) */ __cpu_init_stage2(); } else { - if (__hyp_get_vectors() == hyp_default_vectors) - cpu_init_hyp_mode(NULL); + cpu_init_hyp_mode(NULL); } if (vgic_present) kvm_vgic_init_cpu_hardware(); } -static void cpu_hyp_reset(void) -{ - if (!is_kernel_in_hyp_mode()) - __cpu_reset_hyp_mode(hyp_default_vectors, - kvm_get_idmap_start()); -} - static void _kvm_arch_hardware_enable(void *discard) { if (!__this_cpu_read(kvm_arm_hardware_enabled)) { @@ -1319,12 +1328,6 @@ static int init_hyp_mode(void) goto out_err; /* - * It is probably enough to obtain the default on one - * CPU. It's unlikely to be different on the others. - */ - hyp_default_vectors = __hyp_get_vectors(); - - /* * Allocate stack pages for Hypervisor-mode */ for_each_possible_cpu(cpu) { diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 3e5e4194ef86..2c14b69511e9 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -40,6 +40,24 @@ * Co-processor emulation *****************************************************************************/ +static bool write_to_read_only(struct kvm_vcpu *vcpu, + const struct coproc_params *params) +{ + WARN_ONCE(1, "CP15 write to read-only register\n"); + print_cp_instr(params); + kvm_inject_undefined(vcpu); + return false; +} + +static bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct coproc_params *params) +{ + WARN_ONCE(1, "CP15 read to write-only register\n"); + print_cp_instr(params); + kvm_inject_undefined(vcpu); + return false; +} + /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -502,15 +520,15 @@ static int emulate_cp15(struct kvm_vcpu *vcpu, if (likely(r->access(vcpu, params, r))) { /* Skip instruction, since it was emulated */ kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return 1; } - /* If access function fails, it should complain. */ } else { + /* If access function fails, it should complain. */ kvm_err("Unsupported guest CP15 access at: %08lx\n", *vcpu_pc(vcpu)); print_cp_instr(params); + kvm_inject_undefined(vcpu); } - kvm_inject_undefined(vcpu); + return 1; } diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h index eef1759c2b65..3a41b7d1eb86 100644 --- a/arch/arm/kvm/coproc.h +++ b/arch/arm/kvm/coproc.h @@ -81,24 +81,6 @@ static inline bool read_zero(struct kvm_vcpu *vcpu, return true; } -static inline bool write_to_read_only(struct kvm_vcpu *vcpu, - const struct coproc_params *params) -{ - kvm_debug("CP15 write to read-only register at: %08lx\n", - *vcpu_pc(vcpu)); - print_cp_instr(params); - return false; -} - -static inline bool read_from_write_only(struct kvm_vcpu *vcpu, - const struct coproc_params *params) -{ - kvm_debug("CP15 read to write-only register at: %08lx\n", - *vcpu_pc(vcpu)); - print_cp_instr(params); - return false; -} - /* Reset functions */ static inline void reset_unknown(struct kvm_vcpu *vcpu, const struct coproc_reg *r) diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 96af65a30d78..5fd7968cdae9 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -160,6 +160,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case ARM_EXCEPTION_DATA_ABORT: kvm_inject_vabt(vcpu); return 1; + case ARM_EXCEPTION_HYP_GONE: + /* + * HYP has been reset to the hyp-stub. This happens + * when a guest is pre-empted by kvm_reboot()'s + * shutdown call. + */ + run->exit_reason = KVM_EXIT_FAIL_ENTRY; + return 0; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index 96beb53934c9..95a2faefc070 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -126,11 +126,29 @@ hyp_hvc: */ pop {r0, r1, r2} - /* Check for __hyp_get_vectors */ - cmp r0, #-1 - mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR - beq 1f + /* + * Check if we have a kernel function, which is guaranteed to be + * bigger than the maximum hyp stub hypercall + */ + cmp r0, #HVC_STUB_HCALL_NR + bhs 1f + /* + * Not a kernel function, treat it as a stub hypercall. + * Compute the physical address for __kvm_handle_stub_hvc + * (as the code lives in the idmaped page) and branch there. + * We hijack ip (r12) as a tmp register. + */ + push {r1} + ldr r1, =kimage_voffset + ldr r1, [r1] + ldr ip, =__kvm_handle_stub_hvc + sub ip, ip, r1 + pop {r1} + + bx ip + +1: push {lr} mov lr, r0 @@ -142,7 +160,7 @@ THUMB( orr lr, #1) blx lr @ Call the HYP function pop {lr} -1: eret + eret guest_trap: load_vcpu r0 @ Load VCPU pointer to r0 diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index bf89c919efc1..570ed4a9c261 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -23,6 +23,7 @@ #include <asm/kvm_asm.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> +#include <asm/virt.h> /******************************************************************** * Hypervisor initialization @@ -39,6 +40,10 @@ * - Setup the page tables * - Enable the MMU * - Profit! (or eret, if you only care about the code). + * + * Another possibility is to get a HYP stub hypercall. + * We discriminate between the two by checking if r0 contains a value + * that is less than HVC_STUB_HCALL_NR. */ .text @@ -58,6 +63,10 @@ __kvm_hyp_init: W(b) . __do_hyp_init: + @ Check for a stub hypercall + cmp r0, #HVC_STUB_HCALL_NR + blo __kvm_handle_stub_hvc + @ Set stack pointer mov sp, r0 @@ -112,20 +121,46 @@ __do_hyp_init: eret - @ r0 : stub vectors address -ENTRY(__kvm_hyp_reset) +ENTRY(__kvm_handle_stub_hvc) + cmp r0, #HVC_SOFT_RESTART + bne 1f + + /* The target is expected in r1 */ + msr ELR_hyp, r1 + mrs r0, cpsr + bic r0, r0, #MODE_MASK + orr r0, r0, #HYP_MODE +THUMB( orr r0, r0, #PSR_T_BIT ) + msr spsr_cxsf, r0 + b reset + +1: cmp r0, #HVC_RESET_VECTORS + bne 1f + +reset: /* We're now in idmap, disable MMU */ mrc p15, 4, r1, c1, c0, 0 @ HSCTLR - ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I) - bic r1, r1, r2 + ldr r0, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I) + bic r1, r1, r0 mcr p15, 4, r1, c1, c0, 0 @ HSCTLR - /* Install stub vectors */ - mcr p15, 4, r0, c12, c0, 0 @ HVBAR - isb + /* + * Install stub vectors, using ardb's VA->PA trick. + */ +0: adr r0, 0b @ PA(0) + movw r1, #:lower16:__hyp_stub_vectors - 0b @ VA(stub) - VA(0) + movt r1, #:upper16:__hyp_stub_vectors - 0b + add r1, r1, r0 @ PA(stub) + mcr p15, 4, r1, c12, c0, 0 @ HVBAR + b exit + +1: ldr r0, =HVC_STUB_ERR + eret +exit: + mov r0, #0 eret -ENDPROC(__kvm_hyp_reset) +ENDPROC(__kvm_handle_stub_hvc) .ltorg diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index b1bd316f14c0..80a1d6cd261c 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -37,10 +37,6 @@ * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are * passed in r0 (strictly 32bit). * - * A function pointer with a value of 0xffffffff has a special meaning, - * and is used to implement __hyp_get_vectors in the same way as in - * arch/arm/kernel/hyp_stub.S. - * * The calling convention follows the standard AAPCS: * r0 - r3: caller save * r12: caller save diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 582a972371cf..313ee646480f 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1524,7 +1524,8 @@ static int handle_hva_to_gpa(struct kvm *kvm, unsigned long start, unsigned long end, int (*handler)(struct kvm *kvm, - gpa_t gpa, void *data), + gpa_t gpa, u64 size, + void *data), void *data) { struct kvm_memslots *slots; @@ -1536,7 +1537,7 @@ static int handle_hva_to_gpa(struct kvm *kvm, /* we only care about the pages that the guest sees */ kvm_for_each_memslot(memslot, slots) { unsigned long hva_start, hva_end; - gfn_t gfn, gfn_end; + gfn_t gpa; hva_start = max(start, memslot->userspace_addr); hva_end = min(end, memslot->userspace_addr + @@ -1544,25 +1545,16 @@ static int handle_hva_to_gpa(struct kvm *kvm, if (hva_start >= hva_end) continue; - /* - * {gfn(page) | page intersects with [hva_start, hva_end)} = - * {gfn_start, gfn_start+1, ..., gfn_end-1}. - */ - gfn = hva_to_gfn_memslot(hva_start, memslot); - gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - - for (; gfn < gfn_end; ++gfn) { - gpa_t gpa = gfn << PAGE_SHIFT; - ret |= handler(kvm, gpa, data); - } + gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT; + ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data); } return ret; } -static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { - unmap_stage2_range(kvm, gpa, PAGE_SIZE); + unmap_stage2_range(kvm, gpa, size); return 0; } @@ -1589,10 +1581,11 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; } -static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { pte_t *pte = (pte_t *)data; + WARN_ON(size != PAGE_SIZE); /* * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE * flag clear because MMU notifiers will have unmapped a huge PMD before @@ -1618,11 +1611,12 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); } -static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { pmd_t *pmd; pte_t *pte; + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE); pmd = stage2_get_pmd(kvm, NULL, gpa); if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; @@ -1637,11 +1631,12 @@ static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) return stage2_ptep_test_and_clear_young(pte); } -static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) { pmd_t *pmd; pte_t *pte; + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE); pmd = stage2_get_pmd(kvm, NULL, gpa); if (!pmd || pmd_none(*pmd)) /* Nothing there */ return 0; @@ -1686,11 +1681,6 @@ phys_addr_t kvm_get_idmap_vector(void) return hyp_idmap_vector; } -phys_addr_t kvm_get_idmap_start(void) -{ - return hyp_idmap_start; -} - static int kvm_map_idmap_text(pgd_t *pgd) { int err; diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index c2b131527a64..a08d7a93aebb 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -208,9 +208,10 @@ int kvm_psci_version(struct kvm_vcpu *vcpu) static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) { - int ret = 1; + struct kvm *kvm = vcpu->kvm; unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); unsigned long val; + int ret = 1; switch (psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: @@ -230,7 +231,9 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) break; case PSCI_0_2_FN_CPU_ON: case PSCI_0_2_FN64_CPU_ON: + mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); break; case PSCI_0_2_FN_AFFINITY_INFO: case PSCI_0_2_FN64_AFFINITY_INFO: @@ -279,6 +282,7 @@ static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; unsigned long psci_fn = vcpu_get_reg(vcpu, 0) & ~((u32) 0); unsigned long val; @@ -288,7 +292,9 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) val = PSCI_RET_SUCCESS; break; case KVM_PSCI_FN_CPU_ON: + mutex_lock(&kvm->lock); val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); break; default: val = PSCI_RET_NOT_SUPPORTED; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 347cca965783..31af3cb59a60 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -87,6 +87,8 @@ struct cachepolicy { #define s2_policy(policy) 0 #endif +unsigned long kimage_voffset __ro_after_init; + static struct cachepolicy cache_policies[] __initdata = { { .policy = "uncached", @@ -1639,6 +1641,9 @@ void __init paging_init(const struct machine_desc *mdesc) empty_zero_page = virt_to_page(zero_page); __flush_dcache_page(NULL, empty_zero_page); + + /* Compute the virt/idmap offset, mostly for the sake of KVM */ + kimage_voffset = (unsigned long)&kimage_voffset - virt_to_idmap(&kimage_voffset); } void __init early_mm_init(const struct machine_desc *mdesc) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index d00d52c9de3e..01d64c0b2563 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -39,13 +39,14 @@ ENTRY(cpu_v7_proc_fin) ENDPROC(cpu_v7_proc_fin) /* - * cpu_v7_reset(loc) + * cpu_v7_reset(loc, hyp) * * Perform a soft reset of the system. Put the CPU into the * same state as it would be if it had been reset, and branch * to what would be the reset vector. * * - loc - location to jump to for soft reset + * - hyp - indicate if restart occurs in HYP mode * * This code must be executed using a flat identity mapping with * caches disabled. @@ -53,11 +54,15 @@ ENDPROC(cpu_v7_proc_fin) .align 5 .pushsection .idmap.text, "ax" ENTRY(cpu_v7_reset) - mrc p15, 0, r1, c1, c0, 0 @ ctrl register - bic r1, r1, #0x1 @ ...............m - THUMB( bic r1, r1, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) - mcr p15, 0, r1, c1, c0, 0 @ disable MMU + mrc p15, 0, r2, c1, c0, 0 @ ctrl register + bic r2, r2, #0x1 @ ...............m + THUMB( bic r2, r2, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) + mcr p15, 0, r2, c1, c0, 0 @ disable MMU isb +#ifdef CONFIG_ARM_VIRT_EXT + teq r1, #0 + bne __hyp_soft_restart +#endif bx r0 ENDPROC(cpu_v7_reset) .popsection diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index ec3553eb9349..26a64d0f9ab9 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -28,7 +28,7 @@ #define ARM_EXCEPTION_EL1_SERROR 1 #define ARM_EXCEPTION_TRAP 2 /* The hyp-stub will return this for any kvm_call_hyp() call */ -#define ARM_EXCEPTION_HYP_GONE 3 +#define ARM_EXCEPTION_HYP_GONE HVC_STUB_ERR #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) @@ -47,7 +47,6 @@ struct kvm_vcpu; extern char __kvm_hyp_init[]; extern char __kvm_hyp_init_end[]; -extern char __kvm_hyp_reset[]; extern char __kvm_hyp_vector[]; @@ -59,6 +58,8 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern u64 __vgic_v3_read_vmcr(void); +extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e7705e7bb07b..5e19165c5fa8 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -31,7 +31,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED #define KVM_USER_MEM_SLOTS 512 -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 #include <kvm/arm_vgic.h> @@ -42,7 +41,7 @@ #define KVM_VCPU_MAX_FEATURES 4 -#define KVM_REQ_VCPU_EXIT 8 +#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -362,13 +361,6 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); } -void __kvm_hyp_teardown(void); -static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, - phys_addr_t phys_idmap_start) -{ - kvm_call_hyp(__kvm_hyp_teardown, phys_idmap_start); -} - static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 2bc6ffa7b89b..a89cc22abadc 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -155,7 +155,6 @@ void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); -phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); void kvm_clear_hyp_idmap(void); diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 439f6b5d31f6..c5f89442785c 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -19,25 +19,38 @@ #define __ASM__VIRT_H /* - * The arm64 hcall implementation uses x0 to specify the hcall type. A value - * less than 0xfff indicates a special hcall, such as get/set vector. - * Any other value is used as a pointer to the function to call. + * The arm64 hcall implementation uses x0 to specify the hcall + * number. A value less than HVC_STUB_HCALL_NR indicates a special + * hcall, such as set vector. Any other value is handled in a + * hypervisor specific way. + * + * The hypercall is allowed to clobber any of the caller-saved + * registers (x0-x18), so it is advisable to use it through the + * indirection of a function call (as implemented in hyp-stub.S). */ -/* HVC_GET_VECTORS - Return the value of the vbar_el2 register. */ -#define HVC_GET_VECTORS 0 - /* * HVC_SET_VECTORS - Set the value of the vbar_el2 register. * * @x1: Physical address of the new vector table. */ -#define HVC_SET_VECTORS 1 +#define HVC_SET_VECTORS 0 /* * HVC_SOFT_RESTART - CPU soft reset, used by the cpu_soft_restart routine. */ -#define HVC_SOFT_RESTART 2 +#define HVC_SOFT_RESTART 1 + +/* + * HVC_RESET_VECTORS - Restore the vectors to the original HYP stubs + */ +#define HVC_RESET_VECTORS 2 + +/* Max number of HYP stub hypercalls */ +#define HVC_STUB_HCALL_NR 3 + +/* Error returned when an invalid stub number is passed into x0 */ +#define HVC_STUB_ERR 0xbadca11 #define BOOT_CPU_MODE_EL1 (0xe11) #define BOOT_CPU_MODE_EL2 (0xe12) @@ -61,7 +74,7 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); -phys_addr_t __hyp_get_vectors(void); +void __hyp_reset_vectors(void); /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index c2860358ae3e..869ee480deed 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -39,6 +39,8 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) @@ -143,6 +145,8 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW (1 << 17) struct kvm_sync_regs { + /* Used with KVM_CAP_ARM_USER_IRQ */ + __u64 device_irq_level; }; struct kvm_arch_memory_slot { diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index d3b5f75e652e..e1261fbaa374 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -55,18 +55,7 @@ ENDPROC(__hyp_stub_vectors) .align 11 el1_sync: - mrs x30, esr_el2 - lsr x30, x30, #ESR_ELx_EC_SHIFT - - cmp x30, #ESR_ELx_EC_HVC64 - b.ne 9f // Not an HVC trap - - cmp x0, #HVC_GET_VECTORS - b.ne 1f - mrs x0, vbar_el2 - b 9f - -1: cmp x0, #HVC_SET_VECTORS + cmp x0, #HVC_SET_VECTORS b.ne 2f msr vbar_el2, x1 b 9f @@ -79,10 +68,15 @@ el1_sync: mov x1, x3 br x4 // no return +3: cmp x0, #HVC_RESET_VECTORS + beq 9f // Nothing to reset! + /* Someone called kvm_call_hyp() against the hyp-stub... */ -3: mov x0, #ARM_EXCEPTION_HYP_GONE + ldr x0, =HVC_STUB_ERR + eret -9: eret +9: mov x0, xzr + eret ENDPROC(el1_sync) .macro invalid_vector label @@ -121,19 +115,15 @@ ENDPROC(\label) * initialisation entry point. */ -ENTRY(__hyp_get_vectors) - str lr, [sp, #-16]! - mov x0, #HVC_GET_VECTORS - hvc #0 - ldr lr, [sp], #16 - ret -ENDPROC(__hyp_get_vectors) - ENTRY(__hyp_set_vectors) - str lr, [sp, #-16]! mov x1, x0 mov x0, #HVC_SET_VECTORS hvc #0 - ldr lr, [sp], #16 ret ENDPROC(__hyp_set_vectors) + +ENTRY(__hyp_reset_vectors) + mov x0, #HVC_RESET_VECTORS + hvc #0 + ret +ENDPROC(__hyp_reset_vectors) diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 6b29d3d9e1f2..839425c24b1c 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -22,6 +22,7 @@ #include <asm/kvm_mmu.h> #include <asm/pgtable-hwdef.h> #include <asm/sysreg.h> +#include <asm/virt.h> .text .pushsection .hyp.idmap.text, "ax" @@ -58,6 +59,9 @@ __invalid: * x2: HYP vectors */ __do_hyp_init: + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.lo __kvm_handle_stub_hvc msr ttbr0_el2, x0 @@ -119,23 +123,45 @@ __do_hyp_init: eret ENDPROC(__kvm_hyp_init) +ENTRY(__kvm_handle_stub_hvc) + cmp x0, #HVC_SOFT_RESTART + b.ne 1f + + /* This is where we're about to jump, staying at EL2 */ + msr elr_el2, x1 + mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT | PSR_MODE_EL2h) + msr spsr_el2, x0 + + /* Shuffle the arguments, and don't come back */ + mov x0, x2 + mov x1, x3 + mov x2, x4 + b reset + +1: cmp x0, #HVC_RESET_VECTORS + b.ne 1f +reset: /* - * Reset kvm back to the hyp stub. + * Reset kvm back to the hyp stub. Do not clobber x0-x4 in + * case we coming via HVC_SOFT_RESTART. */ -ENTRY(__kvm_hyp_reset) - /* We're now in idmap, disable MMU */ - mrs x0, sctlr_el2 - ldr x1, =SCTLR_ELx_FLAGS - bic x0, x0, x1 // Clear SCTL_M and etc - msr sctlr_el2, x0 + mrs x5, sctlr_el2 + ldr x6, =SCTLR_ELx_FLAGS + bic x5, x5, x6 // Clear SCTL_M and etc + msr sctlr_el2, x5 isb /* Install stub vectors */ - adr_l x0, __hyp_stub_vectors - msr vbar_el2, x0 + adr_l x5, __hyp_stub_vectors + msr vbar_el2, x5 + mov x0, xzr + eret +1: /* Bad stub call */ + ldr x0, =HVC_STUB_ERR eret -ENDPROC(__kvm_hyp_reset) + +ENDPROC(__kvm_handle_stub_hvc) .ltorg diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 2726635dceba..952f6cb9cf72 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -36,15 +36,12 @@ * passed in x0. * * A function pointer with a value less than 0xfff has a special meaning, - * and is used to implement __hyp_get_vectors in the same way as in + * and is used to implement hyp stubs in the same way as in * arch/arm64/kernel/hyp_stub.S. - * HVC behaves as a 'bl' call and will clobber lr. */ ENTRY(__kvm_call_hyp) alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - str lr, [sp, #-16]! hvc #0 - ldr lr, [sp], #16 ret alternative_else_nop_endif b __vhe_hyp_call diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5e9052f087f2..5170ce1021da 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -32,17 +32,17 @@ * Shuffle the parameters before calling the function * pointed to in x0. Assumes parameters in x[1,2,3]. */ + str lr, [sp, #-16]! mov lr, x0 mov x0, x1 mov x1, x2 mov x2, x3 blr lr + ldr lr, [sp], #16 .endm ENTRY(__vhe_hyp_call) - str lr, [sp, #-16]! do_el2_call - ldr lr, [sp], #16 /* * We used to rely on having an exception return to get * an implicit isb. In the E2H case, we don't have it anymore. @@ -53,21 +53,6 @@ ENTRY(__vhe_hyp_call) ret ENDPROC(__vhe_hyp_call) -/* - * Compute the idmap address of __kvm_hyp_reset based on the idmap - * start passed as a parameter, and jump there. - * - * x0: HYP phys_idmap_start - */ -ENTRY(__kvm_hyp_teardown) - mov x4, x0 - adr_l x3, __kvm_hyp_reset - - /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ - bfi x4, x3, #0, #PAGE_SHIFT - br x4 -ENDPROC(__kvm_hyp_teardown) - el1_sync: // Guest trapped into EL2 stp x0, x1, [sp, #-16]! @@ -87,10 +72,24 @@ alternative_endif /* Here, we're pretty sure the host called HVC. */ ldp x0, x1, [sp], #16 - cmp x0, #HVC_GET_VECTORS - b.ne 1f - mrs x0, vbar_el2 - b 2f + /* Check for a stub HVC call */ + cmp x0, #HVC_STUB_HCALL_NR + b.hs 1f + + /* + * Compute the idmap address of __kvm_handle_stub_hvc and + * jump there. Since we use kimage_voffset, do not use the + * HYP VA for __kvm_handle_stub_hvc, but the kernel VA instead + * (by loading it from the constant pool). + * + * Preserve x0-x4, which may contain stub parameters. + */ + ldr x5, =__kvm_handle_stub_hvc + ldr_l x6, kimage_voffset + + /* x5 = __pa(x5) */ + sub x5, x5, x6 + br x5 1: /* @@ -99,7 +98,7 @@ alternative_endif kern_hyp_va x0 do_el2_call -2: eret + eret el1_trap: /* diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 26b0e77878b5..efbe9e8e7a78 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -55,6 +55,15 @@ * 64bit interface. */ +static bool read_from_write_only(struct kvm_vcpu *vcpu, + const struct sys_reg_params *params) +{ + WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); + return false; +} + /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -460,35 +469,35 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) vcpu_sys_reg(vcpu, PMCR_EL0) = val; } -static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu) +static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); - return !((reg & ARMV8_PMU_USERENR_EN) || vcpu_mode_priv(vcpu)); + if (!enabled) + kvm_inject_undefined(vcpu); + + return !enabled; } -static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu) +static bool pmu_access_el0_disabled(struct kvm_vcpu *vcpu) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_EN); +} - return !((reg & (ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN)) - || vcpu_mode_priv(vcpu)); +static bool pmu_write_swinc_el0_disabled(struct kvm_vcpu *vcpu) +{ + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_SW | ARMV8_PMU_USERENR_EN); } static bool pmu_access_cycle_counter_el0_disabled(struct kvm_vcpu *vcpu) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); - - return !((reg & (ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN)) - || vcpu_mode_priv(vcpu)); + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_EN); } static bool pmu_access_event_counter_el0_disabled(struct kvm_vcpu *vcpu) { - u64 reg = vcpu_sys_reg(vcpu, PMUSERENR_EL0); - - return !((reg & (ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN)) - || vcpu_mode_priv(vcpu)); + return check_pmu_access_disabled(vcpu, ARMV8_PMU_USERENR_ER | ARMV8_PMU_USERENR_EN); } static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -567,8 +576,10 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx) pmcr = vcpu_sys_reg(vcpu, PMCR_EL0); val = (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; - if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) + if (idx >= val && idx != ARMV8_PMU_CYCLE_IDX) { + kvm_inject_undefined(vcpu); return false; + } return true; } @@ -707,8 +718,10 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); - if (!vcpu_mode_priv(vcpu)) + if (!vcpu_mode_priv(vcpu)) { + kvm_inject_undefined(vcpu); return false; + } if (p->is_write) { u64 val = p->regval & mask; @@ -759,16 +772,15 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, if (!kvm_arm_pmu_v3_ready(vcpu)) return trap_raz_wi(vcpu, p, r); + if (!p->is_write) + return read_from_write_only(vcpu, p); + if (pmu_write_swinc_el0_disabled(vcpu)) return false; - if (p->is_write) { - mask = kvm_pmu_valid_counter_mask(vcpu); - kvm_pmu_software_increment(vcpu, p->regval & mask); - return true; - } - - return false; + mask = kvm_pmu_valid_counter_mask(vcpu); + kvm_pmu_software_increment(vcpu, p->regval & mask); + return true; } static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, @@ -778,8 +790,10 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return trap_raz_wi(vcpu, p, r); if (p->is_write) { - if (!vcpu_mode_priv(vcpu)) + if (!vcpu_mode_priv(vcpu)) { + kvm_inject_undefined(vcpu); return false; + } vcpu_sys_reg(vcpu, PMUSERENR_EL0) = p->regval & ARMV8_PMU_USERENR_MASK; @@ -793,31 +807,23 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, /* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */ #define DBG_BCR_BVR_WCR_WVR_EL1(n) \ - /* DBGBVRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \ + { SYS_DESC(SYS_DBGBVRn_EL1(n)), \ trap_bvr, reset_bvr, n, 0, get_bvr, set_bvr }, \ - /* DBGBCRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \ + { SYS_DESC(SYS_DBGBCRn_EL1(n)), \ trap_bcr, reset_bcr, n, 0, get_bcr, set_bcr }, \ - /* DBGWVRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \ + { SYS_DESC(SYS_DBGWVRn_EL1(n)), \ trap_wvr, reset_wvr, n, 0, get_wvr, set_wvr }, \ - /* DBGWCRn_EL1 */ \ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \ + { SYS_DESC(SYS_DBGWCRn_EL1(n)), \ trap_wcr, reset_wcr, n, 0, get_wcr, set_wcr } /* Macro to expand the PMEVCNTRn_EL0 register */ #define PMU_PMEVCNTR_EL0(n) \ - /* PMEVCNTRn_EL0 */ \ - { Op0(0b11), Op1(0b011), CRn(0b1110), \ - CRm((0b1000 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + { SYS_DESC(SYS_PMEVCNTRn_EL0(n)), \ access_pmu_evcntr, reset_unknown, (PMEVCNTR0_EL0 + n), } /* Macro to expand the PMEVTYPERn_EL0 register */ #define PMU_PMEVTYPER_EL0(n) \ - /* PMEVTYPERn_EL0 */ \ - { Op0(0b11), Op1(0b011), CRn(0b1110), \ - CRm((0b1100 | (((n) >> 3) & 0x3))), Op2(((n) & 0x7)), \ + { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } static bool access_cntp_tval(struct kvm_vcpu *vcpu, @@ -887,24 +893,14 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu, * more demanding guest... */ static const struct sys_reg_desc sys_reg_descs[] = { - /* DC ISW */ - { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b0110), Op2(0b010), - access_dcsw }, - /* DC CSW */ - { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1010), Op2(0b010), - access_dcsw }, - /* DC CISW */ - { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010), - access_dcsw }, + { SYS_DESC(SYS_DC_ISW), access_dcsw }, + { SYS_DESC(SYS_DC_CSW), access_dcsw }, + { SYS_DESC(SYS_DC_CISW), access_dcsw }, DBG_BCR_BVR_WCR_WVR_EL1(0), DBG_BCR_BVR_WCR_WVR_EL1(1), - /* MDCCINT_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), - trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, - /* MDSCR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), - trap_debug_regs, reset_val, MDSCR_EL1, 0 }, + { SYS_DESC(SYS_MDCCINT_EL1), trap_debug_regs, reset_val, MDCCINT_EL1, 0 }, + { SYS_DESC(SYS_MDSCR_EL1), trap_debug_regs, reset_val, MDSCR_EL1, 0 }, DBG_BCR_BVR_WCR_WVR_EL1(2), DBG_BCR_BVR_WCR_WVR_EL1(3), DBG_BCR_BVR_WCR_WVR_EL1(4), @@ -920,179 +916,77 @@ static const struct sys_reg_desc sys_reg_descs[] = { DBG_BCR_BVR_WCR_WVR_EL1(14), DBG_BCR_BVR_WCR_WVR_EL1(15), - /* MDRAR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - trap_raz_wi }, - /* OSLAR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100), - trap_raz_wi }, - /* OSLSR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100), - trap_oslsr_el1 }, - /* OSDLR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100), - trap_raz_wi }, - /* DBGPRCR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100), - trap_raz_wi }, - /* DBGCLAIMSET_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110), - trap_raz_wi }, - /* DBGCLAIMCLR_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110), - trap_raz_wi }, - /* DBGAUTHSTATUS_EL1 */ - { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110), - trap_dbgauthstatus_el1 }, - - /* MDCCSR_EL1 */ - { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000), - trap_raz_wi }, - /* DBGDTR_EL0 */ - { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000), - trap_raz_wi }, - /* DBGDTR[TR]X_EL0 */ - { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000), - trap_raz_wi }, - - /* DBGVCR32_EL2 */ - { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000), - NULL, reset_val, DBGVCR32_EL2, 0 }, - - /* MPIDR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b101), - NULL, reset_mpidr, MPIDR_EL1 }, - /* SCTLR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000), - access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, - /* CPACR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b010), - NULL, reset_val, CPACR_EL1, 0 }, - /* TTBR0_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b000), - access_vm_reg, reset_unknown, TTBR0_EL1 }, - /* TTBR1_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b001), - access_vm_reg, reset_unknown, TTBR1_EL1 }, - /* TCR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0010), CRm(0b0000), Op2(0b010), - access_vm_reg, reset_val, TCR_EL1, 0 }, - - /* AFSR0_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b000), - access_vm_reg, reset_unknown, AFSR0_EL1 }, - /* AFSR1_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0001), Op2(0b001), - access_vm_reg, reset_unknown, AFSR1_EL1 }, - /* ESR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0101), CRm(0b0010), Op2(0b000), - access_vm_reg, reset_unknown, ESR_EL1 }, - /* FAR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000), - access_vm_reg, reset_unknown, FAR_EL1 }, - /* PAR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000), - NULL, reset_unknown, PAR_EL1 }, - - /* PMINTENSET_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001), - access_pminten, reset_unknown, PMINTENSET_EL1 }, - /* PMINTENCLR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b010), - access_pminten, NULL, PMINTENSET_EL1 }, - - /* MAIR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0010), Op2(0b000), - access_vm_reg, reset_unknown, MAIR_EL1 }, - /* AMAIR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1010), CRm(0b0011), Op2(0b000), - access_vm_reg, reset_amair_el1, AMAIR_EL1 }, - - /* VBAR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b0000), Op2(0b000), - NULL, reset_val, VBAR_EL1, 0 }, - - /* ICC_SGI1R_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1011), Op2(0b101), - access_gic_sgi }, - /* ICC_SRE_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1100), CRm(0b1100), Op2(0b101), - access_gic_sre }, - - /* CONTEXTIDR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b001), - access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, - /* TPIDR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1101), CRm(0b0000), Op2(0b100), - NULL, reset_unknown, TPIDR_EL1 }, - - /* CNTKCTL_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b1110), CRm(0b0001), Op2(0b000), - NULL, reset_val, CNTKCTL_EL1, 0}, - - /* CSSELR_EL1 */ - { Op0(0b11), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, CSSELR_EL1 }, - - /* PMCR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b000), - access_pmcr, reset_pmcr, }, - /* PMCNTENSET_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b001), - access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - /* PMCNTENCLR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b010), - access_pmcnten, NULL, PMCNTENSET_EL0 }, - /* PMOVSCLR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b011), - access_pmovs, NULL, PMOVSSET_EL0 }, - /* PMSWINC_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b100), - access_pmswinc, reset_unknown, PMSWINC_EL0 }, - /* PMSELR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b101), - access_pmselr, reset_unknown, PMSELR_EL0 }, - /* PMCEID0_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b110), - access_pmceid }, - /* PMCEID1_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1100), Op2(0b111), - access_pmceid }, - /* PMCCNTR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b000), - access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, - /* PMXEVTYPER_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b001), - access_pmu_evtyper }, - /* PMXEVCNTR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1101), Op2(0b010), - access_pmu_evcntr }, - /* PMUSERENR_EL0 - * This register resets as unknown in 64bit mode while it resets as zero + { SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 }, + { SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGCLAIMCLR_EL1), trap_raz_wi }, + { SYS_DESC(SYS_DBGAUTHSTATUS_EL1), trap_dbgauthstatus_el1 }, + + { SYS_DESC(SYS_MDCCSR_EL0), trap_raz_wi }, + { SYS_DESC(SYS_DBGDTR_EL0), trap_raz_wi }, + // DBGDTR[TR]X_EL0 share the same encoding + { SYS_DESC(SYS_DBGDTRTX_EL0), trap_raz_wi }, + + { SYS_DESC(SYS_DBGVCR32_EL2), NULL, reset_val, DBGVCR32_EL2, 0 }, + + { SYS_DESC(SYS_MPIDR_EL1), NULL, reset_mpidr, MPIDR_EL1 }, + { SYS_DESC(SYS_SCTLR_EL1), access_vm_reg, reset_val, SCTLR_EL1, 0x00C50078 }, + { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, + { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, + { SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 }, + { SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 }, + + { SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 }, + { SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 }, + { SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 }, + { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, + { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, + + { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, + { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 }, + + { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, + { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, + + { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, + + { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, + + { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, + { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, + + { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, + + { SYS_DESC(SYS_CSSELR_EL1), NULL, reset_unknown, CSSELR_EL1 }, + + { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, }, + { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, + { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, + { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, + { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, + { SYS_DESC(SYS_PMCEID1_EL0), access_pmceid }, + { SYS_DESC(SYS_PMCCNTR_EL0), access_pmu_evcntr, reset_unknown, PMCCNTR_EL0 }, + { SYS_DESC(SYS_PMXEVTYPER_EL0), access_pmu_evtyper }, + { SYS_DESC(SYS_PMXEVCNTR_EL0), access_pmu_evcntr }, + /* + * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b000), - access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, - /* PMOVSSET_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1001), CRm(0b1110), Op2(0b011), - access_pmovs, reset_unknown, PMOVSSET_EL0 }, - - /* TPIDR_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b010), - NULL, reset_unknown, TPIDR_EL0 }, - /* TPIDRRO_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1101), CRm(0b0000), Op2(0b011), - NULL, reset_unknown, TPIDRRO_EL0 }, - - /* CNTP_TVAL_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b000), - access_cntp_tval }, - /* CNTP_CTL_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b001), - access_cntp_ctl }, - /* CNTP_CVAL_EL0 */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b0010), Op2(0b010), - access_cntp_cval }, + { SYS_DESC(SYS_PMUSERENR_EL0), access_pmuserenr, reset_val, PMUSERENR_EL0, 0 }, + { SYS_DESC(SYS_PMOVSSET_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, + + { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, + { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, + + { SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval }, + { SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl }, + { SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval }, /* PMEVCNTRn_EL0 */ PMU_PMEVCNTR_EL0(0), @@ -1158,22 +1052,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { PMU_PMEVTYPER_EL0(28), PMU_PMEVTYPER_EL0(29), PMU_PMEVTYPER_EL0(30), - /* PMCCFILTR_EL0 - * This register resets as unknown in 64bit mode while it resets as zero + /* + * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero * in 32bit mode. Here we choose to reset it as zero for consistency. */ - { Op0(0b11), Op1(0b011), CRn(0b1110), CRm(0b1111), Op2(0b111), - access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, - - /* DACR32_EL2 */ - { Op0(0b11), Op1(0b100), CRn(0b0011), CRm(0b0000), Op2(0b000), - NULL, reset_unknown, DACR32_EL2 }, - /* IFSR32_EL2 */ - { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0000), Op2(0b001), - NULL, reset_unknown, IFSR32_EL2 }, - /* FPEXC32_EL2 */ - { Op0(0b11), Op1(0b100), CRn(0b0101), CRm(0b0011), Op2(0b000), - NULL, reset_val, FPEXC32_EL2, 0x70 }, + { SYS_DESC(SYS_PMCCFILTR_EL0), access_pmu_evtyper, reset_val, PMCCFILTR_EL0, 0 }, + + { SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 }, + { SYS_DESC(SYS_IFSR32_EL2), NULL, reset_unknown, IFSR32_EL2 }, + { SYS_DESC(SYS_FPEXC32_EL2), NULL, reset_val, FPEXC32_EL2, 0x70 }, }; static bool trap_dbgidr(struct kvm_vcpu *vcpu, @@ -1557,6 +1444,22 @@ int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } +static void perform_access(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *r) +{ + /* + * Not having an accessor means that we have configured a trap + * that we don't know how to handle. This certainly qualifies + * as a gross bug that should be fixed right away. + */ + BUG_ON(!r->access); + + /* Skip instruction if instructed so */ + if (likely(r->access(vcpu, params, r))) + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); +} + /* * emulate_cp -- tries to match a sys_reg access in a handling table, and * call the corresponding trap handler. @@ -1580,20 +1483,8 @@ static int emulate_cp(struct kvm_vcpu *vcpu, r = find_reg(params, table, num); if (r) { - /* - * Not having an accessor means that we have - * configured a trap that we don't know how to - * handle. This certainly qualifies as a gross bug - * that should be fixed right away. - */ - BUG_ON(!r->access); - - if (likely(r->access(vcpu, params, r))) { - /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - /* Handled */ - return 0; - } + perform_access(vcpu, params, r); + return 0; } /* Not handled */ @@ -1660,20 +1551,25 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, params.regval |= vcpu_get_reg(vcpu, Rt2) << 32; } - if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific)) - goto out; - if (!emulate_cp(vcpu, ¶ms, global, nr_global)) - goto out; - - unhandled_cp_access(vcpu, ¶ms); + /* + * Try to emulate the coprocessor access using the target + * specific table first, and using the global table afterwards. + * If either of the tables contains a handler, handle the + * potential register operation in the case of a read and return + * with success. + */ + if (!emulate_cp(vcpu, ¶ms, target_specific, nr_specific) || + !emulate_cp(vcpu, ¶ms, global, nr_global)) { + /* Split up the value between registers for the read side */ + if (!params.is_write) { + vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); + vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval)); + } -out: - /* Split up the value between registers for the read side */ - if (!params.is_write) { - vcpu_set_reg(vcpu, Rt, lower_32_bits(params.regval)); - vcpu_set_reg(vcpu, Rt2, upper_32_bits(params.regval)); + return 1; } + unhandled_cp_access(vcpu, ¶ms); return 1; } @@ -1763,26 +1659,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, r = find_reg(params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); if (likely(r)) { - /* - * Not having an accessor means that we have - * configured a trap that we don't know how to - * handle. This certainly qualifies as a gross bug - * that should be fixed right away. - */ - BUG_ON(!r->access); - - if (likely(r->access(vcpu, params, r))) { - /* Skip instruction, since it was emulated */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - return 1; - } - /* If access function fails, it should complain. */ + perform_access(vcpu, params, r); } else { kvm_err("Unsupported guest sys_reg access at: %lx\n", *vcpu_pc(vcpu)); print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); } - kvm_inject_undefined(vcpu); return 1; } @@ -1932,44 +1815,25 @@ FUNCTION_INVARIANT(aidr_el1) /* ->val is filled in by kvm_sys_reg_table_init() */ static struct sys_reg_desc invariant_sys_regs[] = { - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b000), - NULL, get_midr_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0000), Op2(0b110), - NULL, get_revidr_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b000), - NULL, get_id_pfr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b001), - NULL, get_id_pfr1_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b010), - NULL, get_id_dfr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b011), - NULL, get_id_afr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b100), - NULL, get_id_mmfr0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b101), - NULL, get_id_mmfr1_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b110), - NULL, get_id_mmfr2_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0001), Op2(0b111), - NULL, get_id_mmfr3_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000), - NULL, get_id_isar0_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b001), - NULL, get_id_isar1_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010), - NULL, get_id_isar2_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b011), - NULL, get_id_isar3_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b100), - NULL, get_id_isar4_el1 }, - { Op0(0b11), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b101), - NULL, get_id_isar5_el1 }, - { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b001), - NULL, get_clidr_el1 }, - { Op0(0b11), Op1(0b001), CRn(0b0000), CRm(0b0000), Op2(0b111), - NULL, get_aidr_el1 }, - { Op0(0b11), Op1(0b011), CRn(0b0000), CRm(0b0000), Op2(0b001), - NULL, get_ctr_el0 }, + { SYS_DESC(SYS_MIDR_EL1), NULL, get_midr_el1 }, + { SYS_DESC(SYS_REVIDR_EL1), NULL, get_revidr_el1 }, + { SYS_DESC(SYS_ID_PFR0_EL1), NULL, get_id_pfr0_el1 }, + { SYS_DESC(SYS_ID_PFR1_EL1), NULL, get_id_pfr1_el1 }, + { SYS_DESC(SYS_ID_DFR0_EL1), NULL, get_id_dfr0_el1 }, + { SYS_DESC(SYS_ID_AFR0_EL1), NULL, get_id_afr0_el1 }, + { SYS_DESC(SYS_ID_MMFR0_EL1), NULL, get_id_mmfr0_el1 }, + { SYS_DESC(SYS_ID_MMFR1_EL1), NULL, get_id_mmfr1_el1 }, + { SYS_DESC(SYS_ID_MMFR2_EL1), NULL, get_id_mmfr2_el1 }, + { SYS_DESC(SYS_ID_MMFR3_EL1), NULL, get_id_mmfr3_el1 }, + { SYS_DESC(SYS_ID_ISAR0_EL1), NULL, get_id_isar0_el1 }, + { SYS_DESC(SYS_ID_ISAR1_EL1), NULL, get_id_isar1_el1 }, + { SYS_DESC(SYS_ID_ISAR2_EL1), NULL, get_id_isar2_el1 }, + { SYS_DESC(SYS_ID_ISAR3_EL1), NULL, get_id_isar3_el1 }, + { SYS_DESC(SYS_ID_ISAR4_EL1), NULL, get_id_isar4_el1 }, + { SYS_DESC(SYS_ID_ISAR5_EL1), NULL, get_id_isar5_el1 }, + { SYS_DESC(SYS_CLIDR_EL1), NULL, get_clidr_el1 }, + { SYS_DESC(SYS_AIDR_EL1), NULL, get_aidr_el1 }, + { SYS_DESC(SYS_CTR_EL0), NULL, get_ctr_el0 }, }; static int reg_from_user(u64 *val, const void __user *uaddr, u64 id) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 9c6ffd0f0196..060f5348ef25 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -83,24 +83,6 @@ static inline bool read_zero(struct kvm_vcpu *vcpu, return true; } -static inline bool write_to_read_only(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) -{ - kvm_debug("sys_reg write to read-only register at: %lx\n", - *vcpu_pc(vcpu)); - print_sys_reg_instr(params); - return false; -} - -static inline bool read_from_write_only(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) -{ - kvm_debug("sys_reg read to write-only register at: %lx\n", - *vcpu_pc(vcpu)); - print_sys_reg_instr(params); - return false; -} - /* Reset functions */ static inline void reset_unknown(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) @@ -147,4 +129,9 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, #define CRm(_x) .CRm = _x #define Op2(_x) .Op2 = _x +#define SYS_DESC(reg) \ + Op0(sys_reg_Op0(reg)), Op1(sys_reg_Op1(reg)), \ + CRn(sys_reg_CRn(reg)), CRm(sys_reg_CRm(reg)), \ + Op2(sys_reg_Op2(reg)) + #endif /* __ARM64_KVM_SYS_REGS_LOCAL_H__ */ diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 46af7186bca6..969ade1d333d 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -52,9 +52,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 */ static const struct sys_reg_desc genericv8_sys_regs[] = { - /* ACTLR_EL1 */ - { Op0(0b11), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b001), - access_actlr, reset_actlr, ACTLR_EL1 }, + { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, }; static const struct sys_reg_desc genericv8_cp15_regs[] = { diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index d6d545a45118..4e9ebf65d071 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1686,6 +1686,7 @@ config CPU_CAVIUM_OCTEON select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN select MIPS_L1_CACHE_SHIFT_7 + select HAVE_KVM help The Cavium Octeon processor is a highly integrated chip containing many ethernet hardware widgets for networking tasks. The processor diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index e961c8a7ea66..494d38274142 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -444,6 +444,10 @@ # define cpu_has_msa 0 #endif +#ifndef cpu_has_ufr +# define cpu_has_ufr (cpu_data[0].options & MIPS_CPU_UFR) +#endif + #ifndef cpu_has_fre # define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE) #endif @@ -528,6 +532,9 @@ #ifndef cpu_guest_has_htw #define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW) #endif +#ifndef cpu_guest_has_mvh +#define cpu_guest_has_mvh (cpu_data[0].guest.options & MIPS_CPU_MVH) +#endif #ifndef cpu_guest_has_msa #define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA) #endif @@ -543,6 +550,9 @@ #ifndef cpu_guest_has_maar #define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR) #endif +#ifndef cpu_guest_has_userlocal +#define cpu_guest_has_userlocal (cpu_data[0].guest.options & MIPS_CPU_ULRI) +#endif /* * Guest dynamic capabilities diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index edbe2734a1bf..be3b4c25f335 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -33,6 +33,7 @@ struct guest_info { unsigned long ases_dyn; unsigned long long options; unsigned long long options_dyn; + int tlbsize; u8 conf; u8 kscratch_mask; }; @@ -109,6 +110,7 @@ struct cpuinfo_mips { struct guest_info guest; unsigned int gtoffset_mask; unsigned int guestid_mask; + unsigned int guestid_cache; } __attribute__((aligned(SMP_CACHE_BYTES))); extern struct cpuinfo_mips cpu_data[]; diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 9a8372484edc..98f59307e6a3 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -415,6 +415,7 @@ enum cpu_type_enum { #define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */ #define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */ #define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */ +#define MIPS_CPU_UFR MBIT_ULL(53) /* CPU supports User mode FR switching */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 05e785fc061d..2998479fd4e8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -10,6 +10,7 @@ #ifndef __MIPS_KVM_HOST_H__ #define __MIPS_KVM_HOST_H__ +#include <linux/cpumask.h> #include <linux/mutex.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> @@ -33,12 +34,23 @@ #define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) #define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0) #define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0) +#define KVM_REG_MIPS_CP0_CONTEXTCONFIG MIPS_CP0_32(4, 1) #define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2) +#define KVM_REG_MIPS_CP0_XCONTEXTCONFIG MIPS_CP0_64(4, 3) #define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0) #define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1) +#define KVM_REG_MIPS_CP0_SEGCTL0 MIPS_CP0_64(5, 2) +#define KVM_REG_MIPS_CP0_SEGCTL1 MIPS_CP0_64(5, 3) +#define KVM_REG_MIPS_CP0_SEGCTL2 MIPS_CP0_64(5, 4) +#define KVM_REG_MIPS_CP0_PWBASE MIPS_CP0_64(5, 5) +#define KVM_REG_MIPS_CP0_PWFIELD MIPS_CP0_64(5, 6) +#define KVM_REG_MIPS_CP0_PWSIZE MIPS_CP0_64(5, 7) #define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0) +#define KVM_REG_MIPS_CP0_PWCTL MIPS_CP0_32(6, 6) #define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0) #define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0) +#define KVM_REG_MIPS_CP0_BADINSTR MIPS_CP0_32(8, 1) +#define KVM_REG_MIPS_CP0_BADINSTRP MIPS_CP0_32(8, 2) #define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0) #define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) #define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) @@ -55,6 +67,7 @@ #define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) #define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) +#define KVM_REG_MIPS_CP0_MAARI MIPS_CP0_64(17, 2) #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) #define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2) @@ -70,9 +83,13 @@ /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 0 -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_HALT_POLL_NS_DEFAULT 500000 +#ifdef CONFIG_KVM_MIPS_VZ +extern unsigned long GUESTID_MASK; +extern unsigned long GUESTID_FIRST_VERSION; +extern unsigned long GUESTID_VERSION_MASK; +#endif /* @@ -145,6 +162,16 @@ struct kvm_vcpu_stat { u64 fpe_exits; u64 msa_disabled_exits; u64 flush_dcache_exits; +#ifdef CONFIG_KVM_MIPS_VZ + u64 vz_gpsi_exits; + u64 vz_gsfc_exits; + u64 vz_hc_exits; + u64 vz_grr_exits; + u64 vz_gva_exits; + u64 vz_ghfc_exits; + u64 vz_gpa_exits; + u64 vz_resvd_exits; +#endif u64 halt_successful_poll; u64 halt_attempted_poll; u64 halt_poll_invalid; @@ -157,6 +184,8 @@ struct kvm_arch_memory_slot { struct kvm_arch { /* Guest physical mm */ struct mm_struct gpa_mm; + /* Mask of CPUs needing GPA ASID flush */ + cpumask_t asid_flush_mask; }; #define N_MIPS_COPROC_REGS 32 @@ -214,6 +243,11 @@ struct mips_coproc { #define MIPS_CP0_CONFIG4_SEL 4 #define MIPS_CP0_CONFIG5_SEL 5 +#define MIPS_CP0_GUESTCTL2 10 +#define MIPS_CP0_GUESTCTL2_SEL 5 +#define MIPS_CP0_GTOFFSET 12 +#define MIPS_CP0_GTOFFSET_SEL 7 + /* Resume Flags */ #define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ @@ -229,6 +263,7 @@ enum emulation_result { EMULATE_WAIT, /* WAIT instruction */ EMULATE_PRIV_FAIL, EMULATE_EXCEPT, /* A guest exception has been generated */ + EMULATE_HYPERCALL, /* HYPCALL instruction */ }; #define mips3_paddr_to_tlbpfn(x) \ @@ -276,13 +311,18 @@ struct kvm_mmu_memory_cache { struct kvm_vcpu_arch { void *guest_ebase; int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); + + /* Host registers preserved across guest mode execution */ unsigned long host_stack; unsigned long host_gp; + unsigned long host_pgd; + unsigned long host_entryhi; /* Host CP0 registers used when handling exits from guest */ unsigned long host_cp0_badvaddr; unsigned long host_cp0_epc; u32 host_cp0_cause; + u32 host_cp0_guestctl0; u32 host_cp0_badinstr; u32 host_cp0_badinstrp; @@ -340,7 +380,23 @@ struct kvm_vcpu_arch { /* Cache some mmu pages needed inside spinlock regions */ struct kvm_mmu_memory_cache mmu_page_cache; +#ifdef CONFIG_KVM_MIPS_VZ + /* vcpu's vzguestid is different on each host cpu in an smp system */ + u32 vzguestid[NR_CPUS]; + + /* wired guest TLB entries */ + struct kvm_mips_tlb *wired_tlb; + unsigned int wired_tlb_limit; + unsigned int wired_tlb_used; + + /* emulated guest MAAR registers */ + unsigned long maar[6]; +#endif + + /* Last CPU the VCPU state was loaded on */ int last_sched_cpu; + /* Last CPU the VCPU actually executed guest code on */ + int last_exec_cpu; /* WAIT executed */ int wait; @@ -349,78 +405,6 @@ struct kvm_vcpu_arch { u8 msa_enabled; }; - -#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) -#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) -#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) -#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val)) -#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) -#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val)) -#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) -#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) -#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) -#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val)) -#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0]) -#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val)) -#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0]) -#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val)) -#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0]) -#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val)) -#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0]) -#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val)) -#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0]) -#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val)) -#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0]) -#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val)) -#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0]) -#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val)) -#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0]) -#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val)) -#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1]) -#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val)) -#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0]) -#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val)) -#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0]) -#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val)) -#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0]) -#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val)) -#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1]) -#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val)) -#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0]) -#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) -#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) -#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) -#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4]) -#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5]) -#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) -#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) -#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) -#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) -#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) -#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val)) -#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val)) -#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) -#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) -#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) -#define kvm_read_c0_guest_kscratch1(cop0) (cop0->reg[MIPS_CP0_DESAVE][2]) -#define kvm_read_c0_guest_kscratch2(cop0) (cop0->reg[MIPS_CP0_DESAVE][3]) -#define kvm_read_c0_guest_kscratch3(cop0) (cop0->reg[MIPS_CP0_DESAVE][4]) -#define kvm_read_c0_guest_kscratch4(cop0) (cop0->reg[MIPS_CP0_DESAVE][5]) -#define kvm_read_c0_guest_kscratch5(cop0) (cop0->reg[MIPS_CP0_DESAVE][6]) -#define kvm_read_c0_guest_kscratch6(cop0) (cop0->reg[MIPS_CP0_DESAVE][7]) -#define kvm_write_c0_guest_kscratch1(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][2] = (val)) -#define kvm_write_c0_guest_kscratch2(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][3] = (val)) -#define kvm_write_c0_guest_kscratch3(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][4] = (val)) -#define kvm_write_c0_guest_kscratch4(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][5] = (val)) -#define kvm_write_c0_guest_kscratch5(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][6] = (val)) -#define kvm_write_c0_guest_kscratch6(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][7] = (val)) - -/* - * Some of the guest registers may be modified asynchronously (e.g. from a - * hrtimer callback in hard irq context) and therefore need stronger atomicity - * guarantees than other registers. - */ - static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg, unsigned long val) { @@ -471,26 +455,286 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, } while (unlikely(!temp)); } -#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val)) -#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val)) +/* Guest register types, used in accessor build below */ +#define __KVMT32 u32 +#define __KVMTl unsigned long -/* Cause can be modified asynchronously from hardirq hrtimer callback */ -#define kvm_set_c0_guest_cause(cop0, val) \ - _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) -#define kvm_clear_c0_guest_cause(cop0, val) \ - _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val) -#define kvm_change_c0_guest_cause(cop0, change, val) \ - _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \ - change, val) - -#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val)) -#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val)) -#define kvm_change_c0_guest_ebase(cop0, change, val) \ +/* + * __BUILD_KVM_$ops_SAVED(): kvm_$op_sw_gc0_$reg() + * These operate on the saved guest C0 state in RAM. + */ + +/* Generate saved context simple accessors */ +#define __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \ +static inline __KVMT##type kvm_read_sw_gc0_##name(struct mips_coproc *cop0) \ +{ \ + return cop0->reg[(_reg)][(sel)]; \ +} \ +static inline void kvm_write_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + cop0->reg[(_reg)][(sel)] = val; \ +} + +/* Generate saved context bitwise modifiers */ +#define __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \ +static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + cop0->reg[(_reg)][(sel)] |= val; \ +} \ +static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + cop0->reg[(_reg)][(sel)] &= ~val; \ +} \ +static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type mask, \ + __KVMT##type val) \ +{ \ + unsigned long _mask = mask; \ + cop0->reg[(_reg)][(sel)] &= ~_mask; \ + cop0->reg[(_reg)][(sel)] |= val & _mask; \ +} + +/* Generate saved context atomic bitwise modifiers */ +#define __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \ +static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + _kvm_atomic_set_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \ +} \ +static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + _kvm_atomic_clear_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \ +} \ +static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type mask, \ + __KVMT##type val) \ +{ \ + _kvm_atomic_change_c0_guest_reg(&cop0->reg[(_reg)][(sel)], mask, \ + val); \ +} + +/* + * __BUILD_KVM_$ops_VZ(): kvm_$op_vz_gc0_$reg() + * These operate on the VZ guest C0 context in hardware. + */ + +/* Generate VZ guest context simple accessors */ +#define __BUILD_KVM_RW_VZ(name, type, _reg, sel) \ +static inline __KVMT##type kvm_read_vz_gc0_##name(struct mips_coproc *cop0) \ +{ \ + return read_gc0_##name(); \ +} \ +static inline void kvm_write_vz_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + write_gc0_##name(val); \ +} + +/* Generate VZ guest context bitwise modifiers */ +#define __BUILD_KVM_SET_VZ(name, type, _reg, sel) \ +static inline void kvm_set_vz_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + set_gc0_##name(val); \ +} \ +static inline void kvm_clear_vz_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + clear_gc0_##name(val); \ +} \ +static inline void kvm_change_vz_gc0_##name(struct mips_coproc *cop0, \ + __KVMT##type mask, \ + __KVMT##type val) \ +{ \ + change_gc0_##name(mask, val); \ +} + +/* Generate VZ guest context save/restore to/from saved context */ +#define __BUILD_KVM_SAVE_VZ(name, _reg, sel) \ +static inline void kvm_restore_gc0_##name(struct mips_coproc *cop0) \ +{ \ + write_gc0_##name(cop0->reg[(_reg)][(sel)]); \ +} \ +static inline void kvm_save_gc0_##name(struct mips_coproc *cop0) \ +{ \ + cop0->reg[(_reg)][(sel)] = read_gc0_##name(); \ +} + +/* + * __BUILD_KVM_$ops_WRAP(): kvm_$op_$name1() -> kvm_$op_$name2() + * These wrap a set of operations to provide them with a different name. + */ + +/* Generate simple accessor wrapper */ +#define __BUILD_KVM_RW_WRAP(name1, name2, type) \ +static inline __KVMT##type kvm_read_##name1(struct mips_coproc *cop0) \ +{ \ + return kvm_read_##name2(cop0); \ +} \ +static inline void kvm_write_##name1(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + kvm_write_##name2(cop0, val); \ +} + +/* Generate bitwise modifier wrapper */ +#define __BUILD_KVM_SET_WRAP(name1, name2, type) \ +static inline void kvm_set_##name1(struct mips_coproc *cop0, \ + __KVMT##type val) \ { \ - kvm_clear_c0_guest_ebase(cop0, change); \ - kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ + kvm_set_##name2(cop0, val); \ +} \ +static inline void kvm_clear_##name1(struct mips_coproc *cop0, \ + __KVMT##type val) \ +{ \ + kvm_clear_##name2(cop0, val); \ +} \ +static inline void kvm_change_##name1(struct mips_coproc *cop0, \ + __KVMT##type mask, \ + __KVMT##type val) \ +{ \ + kvm_change_##name2(cop0, mask, val); \ } +/* + * __BUILD_KVM_$ops_SW(): kvm_$op_c0_guest_$reg() -> kvm_$op_sw_gc0_$reg() + * These generate accessors operating on the saved context in RAM, and wrap them + * with the common guest C0 accessors (for use by common emulation code). + */ + +#define __BUILD_KVM_RW_SW(name, type, _reg, sel) \ + __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \ + __BUILD_KVM_RW_WRAP(c0_guest_##name, sw_gc0_##name, type) + +#define __BUILD_KVM_SET_SW(name, type, _reg, sel) \ + __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \ + __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type) + +#define __BUILD_KVM_ATOMIC_SW(name, type, _reg, sel) \ + __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \ + __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type) + +#ifndef CONFIG_KVM_MIPS_VZ + +/* + * T&E (trap & emulate software based virtualisation) + * We generate the common accessors operating exclusively on the saved context + * in RAM. + */ + +#define __BUILD_KVM_RW_HW __BUILD_KVM_RW_SW +#define __BUILD_KVM_SET_HW __BUILD_KVM_SET_SW +#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_ATOMIC_SW + +#else + +/* + * VZ (hardware assisted virtualisation) + * These macros use the active guest state in VZ mode (hardware registers), + */ + +/* + * __BUILD_KVM_$ops_HW(): kvm_$op_c0_guest_$reg() -> kvm_$op_vz_gc0_$reg() + * These generate accessors operating on the VZ guest context in hardware, and + * wrap them with the common guest C0 accessors (for use by common emulation + * code). + * + * Accessors operating on the saved context in RAM are also generated to allow + * convenient explicit saving and restoring of the state. + */ + +#define __BUILD_KVM_RW_HW(name, type, _reg, sel) \ + __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \ + __BUILD_KVM_RW_VZ(name, type, _reg, sel) \ + __BUILD_KVM_RW_WRAP(c0_guest_##name, vz_gc0_##name, type) \ + __BUILD_KVM_SAVE_VZ(name, _reg, sel) + +#define __BUILD_KVM_SET_HW(name, type, _reg, sel) \ + __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \ + __BUILD_KVM_SET_VZ(name, type, _reg, sel) \ + __BUILD_KVM_SET_WRAP(c0_guest_##name, vz_gc0_##name, type) + +/* + * We can't do atomic modifications of COP0 state if hardware can modify it. + * Races must be handled explicitly. + */ +#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_SET_HW + +#endif + +/* + * Define accessors for CP0 registers that are accessible to the guest. These + * are primarily used by common emulation code, which may need to access the + * registers differently depending on the implementation. + * + * fns_hw/sw name type reg num select + */ +__BUILD_KVM_RW_HW(index, 32, MIPS_CP0_TLB_INDEX, 0) +__BUILD_KVM_RW_HW(entrylo0, l, MIPS_CP0_TLB_LO0, 0) +__BUILD_KVM_RW_HW(entrylo1, l, MIPS_CP0_TLB_LO1, 0) +__BUILD_KVM_RW_HW(context, l, MIPS_CP0_TLB_CONTEXT, 0) +__BUILD_KVM_RW_HW(contextconfig, 32, MIPS_CP0_TLB_CONTEXT, 1) +__BUILD_KVM_RW_HW(userlocal, l, MIPS_CP0_TLB_CONTEXT, 2) +__BUILD_KVM_RW_HW(xcontextconfig, l, MIPS_CP0_TLB_CONTEXT, 3) +__BUILD_KVM_RW_HW(pagemask, l, MIPS_CP0_TLB_PG_MASK, 0) +__BUILD_KVM_RW_HW(pagegrain, 32, MIPS_CP0_TLB_PG_MASK, 1) +__BUILD_KVM_RW_HW(segctl0, l, MIPS_CP0_TLB_PG_MASK, 2) +__BUILD_KVM_RW_HW(segctl1, l, MIPS_CP0_TLB_PG_MASK, 3) +__BUILD_KVM_RW_HW(segctl2, l, MIPS_CP0_TLB_PG_MASK, 4) +__BUILD_KVM_RW_HW(pwbase, l, MIPS_CP0_TLB_PG_MASK, 5) +__BUILD_KVM_RW_HW(pwfield, l, MIPS_CP0_TLB_PG_MASK, 6) +__BUILD_KVM_RW_HW(pwsize, l, MIPS_CP0_TLB_PG_MASK, 7) +__BUILD_KVM_RW_HW(wired, 32, MIPS_CP0_TLB_WIRED, 0) +__BUILD_KVM_RW_HW(pwctl, 32, MIPS_CP0_TLB_WIRED, 6) +__BUILD_KVM_RW_HW(hwrena, 32, MIPS_CP0_HWRENA, 0) +__BUILD_KVM_RW_HW(badvaddr, l, MIPS_CP0_BAD_VADDR, 0) +__BUILD_KVM_RW_HW(badinstr, 32, MIPS_CP0_BAD_VADDR, 1) +__BUILD_KVM_RW_HW(badinstrp, 32, MIPS_CP0_BAD_VADDR, 2) +__BUILD_KVM_RW_SW(count, 32, MIPS_CP0_COUNT, 0) +__BUILD_KVM_RW_HW(entryhi, l, MIPS_CP0_TLB_HI, 0) +__BUILD_KVM_RW_HW(compare, 32, MIPS_CP0_COMPARE, 0) +__BUILD_KVM_RW_HW(status, 32, MIPS_CP0_STATUS, 0) +__BUILD_KVM_RW_HW(intctl, 32, MIPS_CP0_STATUS, 1) +__BUILD_KVM_RW_HW(cause, 32, MIPS_CP0_CAUSE, 0) +__BUILD_KVM_RW_HW(epc, l, MIPS_CP0_EXC_PC, 0) +__BUILD_KVM_RW_SW(prid, 32, MIPS_CP0_PRID, 0) +__BUILD_KVM_RW_HW(ebase, l, MIPS_CP0_PRID, 1) +__BUILD_KVM_RW_HW(config, 32, MIPS_CP0_CONFIG, 0) +__BUILD_KVM_RW_HW(config1, 32, MIPS_CP0_CONFIG, 1) +__BUILD_KVM_RW_HW(config2, 32, MIPS_CP0_CONFIG, 2) +__BUILD_KVM_RW_HW(config3, 32, MIPS_CP0_CONFIG, 3) +__BUILD_KVM_RW_HW(config4, 32, MIPS_CP0_CONFIG, 4) +__BUILD_KVM_RW_HW(config5, 32, MIPS_CP0_CONFIG, 5) +__BUILD_KVM_RW_HW(config6, 32, MIPS_CP0_CONFIG, 6) +__BUILD_KVM_RW_HW(config7, 32, MIPS_CP0_CONFIG, 7) +__BUILD_KVM_RW_SW(maari, l, MIPS_CP0_LLADDR, 2) +__BUILD_KVM_RW_HW(xcontext, l, MIPS_CP0_TLB_XCONTEXT, 0) +__BUILD_KVM_RW_HW(errorepc, l, MIPS_CP0_ERROR_PC, 0) +__BUILD_KVM_RW_HW(kscratch1, l, MIPS_CP0_DESAVE, 2) +__BUILD_KVM_RW_HW(kscratch2, l, MIPS_CP0_DESAVE, 3) +__BUILD_KVM_RW_HW(kscratch3, l, MIPS_CP0_DESAVE, 4) +__BUILD_KVM_RW_HW(kscratch4, l, MIPS_CP0_DESAVE, 5) +__BUILD_KVM_RW_HW(kscratch5, l, MIPS_CP0_DESAVE, 6) +__BUILD_KVM_RW_HW(kscratch6, l, MIPS_CP0_DESAVE, 7) + +/* Bitwise operations (on HW state) */ +__BUILD_KVM_SET_HW(status, 32, MIPS_CP0_STATUS, 0) +/* Cause can be modified asynchronously from hardirq hrtimer callback */ +__BUILD_KVM_ATOMIC_HW(cause, 32, MIPS_CP0_CAUSE, 0) +__BUILD_KVM_SET_HW(ebase, l, MIPS_CP0_PRID, 1) + +/* Bitwise operations (on saved state) */ +__BUILD_KVM_SET_SAVED(config, 32, MIPS_CP0_CONFIG, 0) +__BUILD_KVM_SET_SAVED(config1, 32, MIPS_CP0_CONFIG, 1) +__BUILD_KVM_SET_SAVED(config2, 32, MIPS_CP0_CONFIG, 2) +__BUILD_KVM_SET_SAVED(config3, 32, MIPS_CP0_CONFIG, 3) +__BUILD_KVM_SET_SAVED(config4, 32, MIPS_CP0_CONFIG, 4) +__BUILD_KVM_SET_SAVED(config5, 32, MIPS_CP0_CONFIG, 5) + /* Helpers */ static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) @@ -531,6 +775,10 @@ struct kvm_mips_callbacks { int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); int (*handle_fpe)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); + int (*handle_guest_exit)(struct kvm_vcpu *vcpu); + int (*hardware_enable)(void); + void (*hardware_disable)(void); + int (*check_extension)(struct kvm *kvm, long ext); int (*vcpu_init)(struct kvm_vcpu *vcpu); void (*vcpu_uninit)(struct kvm_vcpu *vcpu); int (*vcpu_setup)(struct kvm_vcpu *vcpu); @@ -599,6 +847,10 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu); u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); +#ifdef CONFIG_KVM_MIPS_VZ +int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu, bool write_fault); +#endif extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, struct kvm_vcpu *vcpu, bool write_fault); @@ -625,6 +877,18 @@ extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi, extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi); +#ifdef CONFIG_KVM_MIPS_VZ +int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi); +int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa); +void kvm_vz_local_flush_roottlb_all_guests(void); +void kvm_vz_local_flush_guesttlb_all(void); +void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count); +void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count); +#endif + void kvm_mips_suspend_mm(int cpu); void kvm_mips_resume_mm(int cpu); @@ -795,7 +1059,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, u32 kvm_mips_read_count(struct kvm_vcpu *vcpu); void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count); void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack); -void kvm_mips_init_count(struct kvm_vcpu *vcpu); +void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz); int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl); int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume); int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz); @@ -803,6 +1067,20 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu); void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu); enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu); +/* fairly internal functions requiring some care to use */ +int kvm_mips_count_disabled(struct kvm_vcpu *vcpu); +ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count); +int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before, + u32 count, int min_drift); + +#ifdef CONFIG_KVM_MIPS_VZ +void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu); +void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu); +#else +static inline void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) {} +static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {} +#endif + enum emulation_result kvm_mips_check_privilege(u32 cause, u32 *opc, struct kvm_run *run, @@ -827,11 +1105,20 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, struct kvm_run *run, struct kvm_vcpu *vcpu); +/* COP0 */ +enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu); + unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu); unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu); unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); +/* Hypercalls (hypcall.c) */ + +enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu, + union mips_instruction inst); +int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu); + /* Dynamic binary translation */ extern int kvm_mips_trans_cache_index(union mips_instruction inst, u32 *opc, struct kvm_vcpu *vcpu); @@ -846,7 +1133,6 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc, extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu); extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm); -static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index 21d9607c80d7..e10f78befbd9 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h @@ -36,7 +36,7 @@ unsigned platform_maar_init(unsigned num_pairs); * @upper: The highest address that the MAAR pair will affect. Must be * aligned to one byte before a 2^16 byte boundary. * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The - * MIPS_MAAR_V attribute will automatically be set. + * MIPS_MAAR_VL attribute will automatically be set. * * Program the pair of MAAR registers specified by idx to apply the attributes * specified by attrs to the range of addresses from lower to higher. @@ -49,10 +49,10 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, BUG_ON(((upper & 0xffff) != 0xffff) || ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4))); - /* Automatically set MIPS_MAAR_V */ - attrs |= MIPS_MAAR_V; + /* Automatically set MIPS_MAAR_VL */ + attrs |= MIPS_MAAR_VL; - /* Write the upper address & attributes (only MIPS_MAAR_V matters) */ + /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */ write_c0_maari(idx << 1); back_to_back_c0_hazard(); write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs); @@ -81,7 +81,7 @@ extern void maar_init(void); * @upper: The highest address that the MAAR pair will affect. Must be * aligned to one byte before a 2^16 byte boundary. * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The - * MIPS_MAAR_V attribute will automatically be set. + * MIPS_MAAR_VL attribute will automatically be set. * * Describes the configuration of a pair of Memory Accessibility Attribute * Registers - applying attributes from attrs to the range of physical diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index f8d1d2f1d80d..6875b69f59f7 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -34,8 +34,10 @@ */ #ifdef __ASSEMBLY__ #define _ULCAST_ +#define _U64CAST_ #else #define _ULCAST_ (unsigned long) +#define _U64CAST_ (u64) #endif /* @@ -217,8 +219,10 @@ /* * Wired register bits */ -#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << 16) -#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << 0) +#define MIPSR6_WIRED_LIMIT_SHIFT 16 +#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << MIPSR6_WIRED_LIMIT_SHIFT) +#define MIPSR6_WIRED_WIRED_SHIFT 0 +#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << MIPSR6_WIRED_WIRED_SHIFT) /* * Values used for computation of new tlb entries @@ -645,6 +649,7 @@ #define MIPS_CONF5_LLB (_ULCAST_(1) << 4) #define MIPS_CONF5_MVH (_ULCAST_(1) << 5) #define MIPS_CONF5_VP (_ULCAST_(1) << 7) +#define MIPS_CONF5_SBRI (_ULCAST_(1) << 6) #define MIPS_CONF5_FRE (_ULCAST_(1) << 8) #define MIPS_CONF5_UFE (_ULCAST_(1) << 9) #define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27) @@ -719,10 +724,14 @@ #define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13) /* MAAR bit definitions */ +#define MIPS_MAAR_VH (_U64CAST_(1) << 63) #define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) #define MIPS_MAAR_ADDR_SHIFT 12 #define MIPS_MAAR_S (_ULCAST_(1) << 1) -#define MIPS_MAAR_V (_ULCAST_(1) << 0) +#define MIPS_MAAR_VL (_ULCAST_(1) << 0) + +/* MAARI bit definitions */ +#define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0) /* EBase bit definitions */ #define MIPS_EBASE_CPUNUM_SHIFT 0 @@ -736,6 +745,10 @@ #define MIPS_CMGCRB_BASE 11 #define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1)) +/* LLAddr bit definitions */ +#define MIPS_LLADDR_LLB_SHIFT 0 +#define MIPS_LLADDR_LLB (_ULCAST_(1) << MIPS_LLADDR_LLB_SHIFT) + /* * Bits in the MIPS32 Memory Segmentation registers. */ @@ -961,6 +974,22 @@ /* Flush FTLB */ #define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13) +/* CvmCtl register field definitions */ +#define CVMCTL_IPPCI_SHIFT 7 +#define CVMCTL_IPPCI (_U64CAST_(0x7) << CVMCTL_IPPCI_SHIFT) +#define CVMCTL_IPTI_SHIFT 4 +#define CVMCTL_IPTI (_U64CAST_(0x7) << CVMCTL_IPTI_SHIFT) + +/* CvmMemCtl2 register field definitions */ +#define CVMMEMCTL2_INHIBITTS (_U64CAST_(1) << 17) + +/* CvmVMConfig register field definitions */ +#define CVMVMCONF_DGHT (_U64CAST_(1) << 60) +#define CVMVMCONF_MMUSIZEM1_S 12 +#define CVMVMCONF_MMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_MMUSIZEM1_S) +#define CVMVMCONF_RMMUSIZEM1_S 0 +#define CVMVMCONF_RMMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_RMMUSIZEM1_S) + /* * Coprocessor 1 (FPU) register names */ @@ -1720,6 +1749,13 @@ do { \ #define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7) #define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val) + +#define read_c0_cvmmemctl2() __read_64bit_c0_register($16, 6) +#define write_c0_cvmmemctl2(val) __write_64bit_c0_register($16, 6, val) + +#define read_c0_cvmvmconfig() __read_64bit_c0_register($16, 7) +#define write_c0_cvmvmconfig(val) __write_64bit_c0_register($16, 7, val) + /* * The cacheerr registers are not standardized. On OCTEON, they are * 64 bits wide. @@ -1989,6 +2025,8 @@ do { \ #define read_gc0_epc() __read_ulong_gc0_register(14, 0) #define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val) +#define read_gc0_prid() __read_32bit_gc0_register(15, 0) + #define read_gc0_ebase() __read_32bit_gc0_register(15, 1) #define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val) @@ -2012,6 +2050,9 @@ do { \ #define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val) #define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val) +#define read_gc0_lladdr() __read_ulong_gc0_register(17, 0) +#define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val) + #define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0) #define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1) #define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2) @@ -2090,6 +2131,19 @@ do { \ #define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val) #define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val) +/* Cavium OCTEON (cnMIPS) */ +#define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6) +#define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val) + +#define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7) +#define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val) + +#define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7) +#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val) + +#define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6) +#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val) + /* * Macros to access the floating point coprocessor control registers */ @@ -2696,9 +2750,11 @@ __BUILD_SET_C0(brcm_mode) */ #define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name) +__BUILD_SET_GC0(wired) __BUILD_SET_GC0(status) __BUILD_SET_GC0(cause) __BUILD_SET_GC0(ebase) +__BUILD_SET_GC0(config1) /* * Return low 10 bits of ebase. diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h index dd179fd8acda..939734de4359 100644 --- a/arch/mips/include/asm/tlb.h +++ b/arch/mips/include/asm/tlb.h @@ -21,9 +21,11 @@ */ #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) -#define UNIQUE_ENTRYHI(idx) \ - ((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) | \ +#define _UNIQUE_ENTRYHI(base, idx) \ + (((base) + ((idx) << (PAGE_SHIFT + 1))) | \ (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0)) +#define UNIQUE_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG0, idx) +#define UNIQUE_GUEST_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG1, idx) static inline unsigned int num_wired_entries(void) { diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 77429d1622b3..b5e46ae872d3 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -179,7 +179,7 @@ enum cop0_coi_func { tlbr_op = 0x01, tlbwi_op = 0x02, tlbwr_op = 0x06, tlbp_op = 0x08, rfe_op = 0x10, eret_op = 0x18, - wait_op = 0x20, + wait_op = 0x20, hypcall_op = 0x28 }; /* diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index a8a0199bf760..0318c6b442ab 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -21,6 +21,8 @@ #define __KVM_HAVE_READONLY_MEM +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + /* * for KVM_GET_REGS and KVM_SET_REGS * @@ -54,9 +56,14 @@ struct kvm_fpu { * Register set = 0: GP registers from kvm_regs (see definitions below). * * Register set = 1: CP0 registers. - * bits[15..8] - Must be zero. - * bits[7..3] - Register 'rd' index. - * bits[2..0] - Register 'sel' index. + * bits[15..8] - COP0 register set. + * + * COP0 register set = 0: Main CP0 registers. + * bits[7..3] - Register 'rd' index. + * bits[2..0] - Register 'sel' index. + * + * COP0 register set = 1: MAARs. + * bits[7..0] - MAAR index. * * Register set = 2: KVM specific registers (see definitions below). * @@ -115,6 +122,15 @@ struct kvm_fpu { /* + * KVM_REG_MIPS_CP0 - Coprocessor 0 registers. + */ + +#define KVM_REG_MIPS_MAAR (KVM_REG_MIPS_CP0 | (1 << 8)) +#define KVM_REG_MIPS_CP0_MAAR(n) (KVM_REG_MIPS_MAAR | \ + KVM_REG_SIZE_U64 | (n)) + + +/* * KVM_REG_MIPS_KVM - KVM specific control registers. */ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 12422fd4af23..3382892544f0 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -289,6 +289,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c) MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { if (c->fpu_id & MIPS_FPIR_3D) c->ases |= MIPS_ASE_MIPS3D; + if (c->fpu_id & MIPS_FPIR_UFRP) + c->options |= MIPS_CPU_UFR; if (c->fpu_id & MIPS_FPIR_FREP) c->options |= MIPS_CPU_FRE; } @@ -1003,7 +1005,8 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c) unsigned int config3, config3_dyn; probe_gc0_config_dyn(config3, config3, config3_dyn, - MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_CTXTC); + MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_ULRI | + MIPS_CONF3_CTXTC); if (config3 & MIPS_CONF3_CTXTC) c->guest.options |= MIPS_CPU_CTXTC; @@ -1013,6 +1016,9 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c) if (config3 & MIPS_CONF3_PW) c->guest.options |= MIPS_CPU_HTW; + if (config3 & MIPS_CONF3_ULRI) + c->guest.options |= MIPS_CPU_ULRI; + if (config3 & MIPS_CONF3_SC) c->guest.options |= MIPS_CPU_SEGMENTS; @@ -1051,7 +1057,7 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c) unsigned int config5, config5_dyn; probe_gc0_config_dyn(config5, config5, config5_dyn, - MIPS_CONF_M | MIPS_CONF5_MRP); + MIPS_CONF_M | MIPS_CONF5_MVH | MIPS_CONF5_MRP); if (config5 & MIPS_CONF5_MRP) c->guest.options |= MIPS_CPU_MAAR; @@ -1061,6 +1067,9 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c) if (config5 & MIPS_CONF5_LLB) c->guest.options |= MIPS_CPU_RW_LLB; + if (config5 & MIPS_CONF5_MVH) + c->guest.options |= MIPS_CPU_MVH; + if (config5 & MIPS_CONF_M) c->guest.conf |= BIT(6); return config5 & MIPS_CONF_M; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index a7f81261c781..c036157fb891 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq); */ unsigned int mips_hpt_frequency; +EXPORT_SYMBOL_GPL(mips_hpt_frequency); /* * This function exists in order to cause an error due to a duplicate diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 65067327db12..50a722dfb236 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -26,11 +26,34 @@ config KVM select SRCU ---help--- Support for hosting Guest kernels. - Currently supported on MIPS32 processors. + +choice + prompt "Virtualization mode" + depends on KVM + default KVM_MIPS_TE + +config KVM_MIPS_TE + bool "Trap & Emulate" + ---help--- + Use trap and emulate to virtualize 32-bit guests in user mode. This + does not require any special hardware Virtualization support beyond + standard MIPS32/64 r2 or later, but it does require the guest kernel + to be configured with CONFIG_KVM_GUEST=y so that it resides in the + user address segment. + +config KVM_MIPS_VZ + bool "MIPS Virtualization (VZ) ASE" + ---help--- + Use the MIPS Virtualization (VZ) ASE to virtualize guests. This + supports running unmodified guest kernels (with CONFIG_KVM_GUEST=n), + but requires hardware support. + +endchoice config KVM_MIPS_DYN_TRANS bool "KVM/MIPS: Dynamic binary translation to reduce traps" - depends on KVM + depends on KVM_MIPS_TE + default y ---help--- When running in Trap & Emulate mode patch privileged instructions to reduce the number of traps. diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 847429de780d..45d90f5d5177 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -9,8 +9,15 @@ common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \ interrupt.o stats.o commpage.o \ - dyntrans.o trap_emul.o fpu.o + fpu.o +kvm-objs += hypcall.o kvm-objs += mmu.o +ifdef CONFIG_KVM_MIPS_VZ +kvm-objs += vz.o +else +kvm-objs += dyntrans.o +kvm-objs += trap_emul.o +endif obj-$(CONFIG_KVM) += kvm.o obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index d40cfaad4529..4144bfaef137 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -308,7 +308,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) * CP0_Cause.DC bit or the count_ctl.DC bit. * 0 otherwise (in which case CP0_Count timer is running). */ -static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) +int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; @@ -467,7 +467,7 @@ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu) * * Returns: The ktime at the point of freeze. */ -static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) +ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) { ktime_t now; @@ -517,6 +517,82 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, } /** + * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry. + * @vcpu: Virtual CPU. + * @before: Time before Count was saved, lower bound of drift calculation. + * @count: CP0_Count at point of restore. + * @min_drift: Minimum amount of drift permitted before correction. + * Must be <= 0. + * + * Restores the timer from a particular @count, accounting for drift. This can + * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is + * to be used for a period of time, but the exact ktime corresponding to the + * final Count that must be restored is not known. + * + * It is gauranteed that a timer interrupt immediately after restore will be + * handled, but not if CP0_Compare is exactly at @count. That case should + * already be handled when the hardware timer state is saved. + * + * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not + * stopped). + * + * Returns: Amount of correction to count_bias due to drift. + */ +int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before, + u32 count, int min_drift) +{ + ktime_t now, count_time; + u32 now_count, before_count; + u64 delta; + int drift, ret = 0; + + /* Calculate expected count at before */ + before_count = vcpu->arch.count_bias + + kvm_mips_ktime_to_count(vcpu, before); + + /* + * Detect significantly negative drift, where count is lower than + * expected. Some negative drift is expected when hardware counter is + * set after kvm_mips_freeze_timer(), and it is harmless to allow the + * time to jump forwards a little, within reason. If the drift is too + * significant, adjust the bias to avoid a big Guest.CP0_Count jump. + */ + drift = count - before_count; + if (drift < min_drift) { + count_time = before; + vcpu->arch.count_bias += drift; + ret = drift; + goto resume; + } + + /* Calculate expected count right now */ + now = ktime_get(); + now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now); + + /* + * Detect positive drift, where count is higher than expected, and + * adjust the bias to avoid guest time going backwards. + */ + drift = count - now_count; + if (drift > 0) { + count_time = now; + vcpu->arch.count_bias += drift; + ret = drift; + goto resume; + } + + /* Subtract nanosecond delta to find ktime when count was read */ + delta = (u64)(u32)(now_count - count); + delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz); + count_time = ktime_sub_ns(now, delta); + +resume: + /* Resume using the calculated ktime */ + kvm_mips_resume_hrtimer(vcpu, count_time, count); + return ret; +} + +/** * kvm_mips_write_count() - Modify the count and update timer. * @vcpu: Virtual CPU. * @count: Guest CP0_Count value to set. @@ -543,16 +619,15 @@ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count) /** * kvm_mips_init_count() - Initialise timer. * @vcpu: Virtual CPU. + * @count_hz: Frequency of timer. * - * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set - * it going if it's enabled. + * Initialise the timer to the specified frequency, zero it, and set it going if + * it's enabled. */ -void kvm_mips_init_count(struct kvm_vcpu *vcpu) +void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz) { - /* 100 MHz */ - vcpu->arch.count_hz = 100*1000*1000; - vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, - vcpu->arch.count_hz); + vcpu->arch.count_hz = count_hz; + vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz); vcpu->arch.count_dyn_bias = 0; /* Starting at 0 */ @@ -622,7 +697,9 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) struct mips_coproc *cop0 = vcpu->arch.cop0; int dc; u32 old_compare = kvm_read_c0_guest_compare(cop0); - ktime_t now; + s32 delta = compare - old_compare; + u32 cause; + ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */ u32 count; /* if unchanged, must just be an ack */ @@ -634,6 +711,21 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) return; } + /* + * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted + * too to prevent guest CP0_Count hitting guest CP0_Compare. + * + * The new GTOffset corresponds to the new value of CP0_Compare, and is + * set prior to it being written into the guest context. We disable + * preemption until the new value is written to prevent restore of a + * GTOffset corresponding to the old CP0_Compare value. + */ + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta > 0) { + preempt_disable(); + write_c0_gtoffset(compare - read_c0_count()); + back_to_back_c0_hazard(); + } + /* freeze_hrtimer() takes care of timer interrupts <= count */ dc = kvm_mips_count_disabled(vcpu); if (!dc) @@ -641,12 +733,36 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) if (ack) kvm_mips_callbacks->dequeue_timer_int(vcpu); + else if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) + /* + * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so + * preserve guest CP0_Cause.TI if we don't want to ack it. + */ + cause = kvm_read_c0_guest_cause(cop0); kvm_write_c0_guest_compare(cop0, compare); + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + if (delta > 0) + preempt_enable(); + + back_to_back_c0_hazard(); + + if (!ack && cause & CAUSEF_TI) + kvm_write_c0_guest_cause(cop0, cause); + } + /* resume_hrtimer() takes care of timer interrupts > count */ if (!dc) kvm_mips_resume_hrtimer(vcpu, now, count); + + /* + * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change + * until after the new CP0_Compare is written, otherwise new guest + * CP0_Count could hit new guest CP0_Compare. + */ + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta <= 0) + write_c0_gtoffset(compare - read_c0_count()); } /** @@ -857,6 +973,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) ++vcpu->stat.wait_exits; trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT); if (!vcpu->arch.pending_exceptions) { + kvm_vz_lose_htimer(vcpu); vcpu->arch.wait = 1; kvm_vcpu_block(vcpu); @@ -865,7 +982,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) * check if any I/O interrupts are pending. */ if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; } } @@ -873,17 +990,62 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu) return EMULATE_DONE; } -/* - * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that - * we can catch this, if things ever change - */ +static void kvm_mips_change_entryhi(struct kvm_vcpu *vcpu, + unsigned long entryhi) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + int cpu, i; + u32 nasid = entryhi & KVM_ENTRYHI_ASID; + + if (((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) { + trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) & + KVM_ENTRYHI_ASID, nasid); + + /* + * Flush entries from the GVA page tables. + * Guest user page table will get flushed lazily on re-entry to + * guest user if the guest ASID actually changes. + */ + kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_KERN); + + /* + * Regenerate/invalidate kernel MMU context. + * The user MMU context will be regenerated lazily on re-entry + * to guest user if the guest ASID actually changes. + */ + preempt_disable(); + cpu = smp_processor_id(); + get_new_mmu_context(kern_mm, cpu); + for_each_possible_cpu(i) + if (i != cpu) + cpu_context(i, kern_mm) = 0; + preempt_enable(); + } + kvm_write_c0_guest_entryhi(cop0, entryhi); +} + enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_mips_tlb *tlb; unsigned long pc = vcpu->arch.pc; + int index; - kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0)); - return EMULATE_FAIL; + index = kvm_read_c0_guest_index(cop0); + if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) { + /* UNDEFINED */ + kvm_debug("[%#lx] TLBR Index %#x out of range\n", pc, index); + index &= KVM_MIPS_GUEST_TLB_SIZE - 1; + } + + tlb = &vcpu->arch.guest_tlb[index]; + kvm_write_c0_guest_pagemask(cop0, tlb->tlb_mask); + kvm_write_c0_guest_entrylo0(cop0, tlb->tlb_lo[0]); + kvm_write_c0_guest_entrylo1(cop0, tlb->tlb_lo[1]); + kvm_mips_change_entryhi(vcpu, tlb->tlb_hi); + + return EMULATE_DONE; } /** @@ -1105,11 +1267,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; enum emulation_result er = EMULATE_DONE; u32 rt, rd, sel; unsigned long curr_pc; - int cpu, i; /* * Update PC and hold onto current PC in case there is @@ -1143,6 +1303,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, case wait_op: er = kvm_mips_emul_wait(vcpu); break; + case hypcall_op: + er = kvm_mips_emul_hypcall(vcpu, inst); + break; } } else { rt = inst.c0r_format.rt; @@ -1208,44 +1371,8 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, kvm_change_c0_guest_ebase(cop0, 0x1ffff000, vcpu->arch.gprs[rt]); } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { - u32 nasid = - vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; - if (((kvm_read_c0_guest_entryhi(cop0) & - KVM_ENTRYHI_ASID) != nasid)) { - trace_kvm_asid_change(vcpu, - kvm_read_c0_guest_entryhi(cop0) - & KVM_ENTRYHI_ASID, - nasid); - - /* - * Flush entries from the GVA page - * tables. - * Guest user page table will get - * flushed lazily on re-entry to guest - * user if the guest ASID actually - * changes. - */ - kvm_mips_flush_gva_pt(kern_mm->pgd, - KMF_KERN); - - /* - * Regenerate/invalidate kernel MMU - * context. - * The user MMU context will be - * regenerated lazily on re-entry to - * guest user if the guest ASID actually - * changes. - */ - preempt_disable(); - cpu = smp_processor_id(); - get_new_mmu_context(kern_mm, cpu); - for_each_possible_cpu(i) - if (i != cpu) - cpu_context(i, kern_mm) = 0; - preempt_enable(); - } - kvm_write_c0_guest_entryhi(cop0, - vcpu->arch.gprs[rt]); + kvm_mips_change_entryhi(vcpu, + vcpu->arch.gprs[rt]); } /* Are we writing to COUNT */ else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) { @@ -1474,9 +1601,8 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, struct kvm_run *run, struct kvm_vcpu *vcpu) { - enum emulation_result er = EMULATE_DO_MMIO; + enum emulation_result er; u32 rt; - u32 bytes; void *data = run->mmio.data; unsigned long curr_pc; @@ -1491,103 +1617,74 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst, rt = inst.i_format.rt; + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr); + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + goto out_fail; + switch (inst.i_format.opcode) { - case sb_op: - bytes = 1; - if (bytes > sizeof(run->mmio.data)) { - kvm_err("%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } - run->mmio.phys_addr = - kvm_mips_callbacks->gva_to_gpa(vcpu->arch. - host_cp0_badvaddr); - if (run->mmio.phys_addr == KVM_INVALID_ADDR) { - er = EMULATE_FAIL; - break; - } - run->mmio.len = bytes; - run->mmio.is_write = 1; - vcpu->mmio_needed = 1; - vcpu->mmio_is_write = 1; - *(u8 *) data = vcpu->arch.gprs[rt]; - kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", - vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt], - *(u8 *) data); +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) + case sd_op: + run->mmio.len = 8; + *(u64 *)data = vcpu->arch.gprs[rt]; + kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u64 *)data); break; +#endif case sw_op: - bytes = 4; - if (bytes > sizeof(run->mmio.data)) { - kvm_err("%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } - run->mmio.phys_addr = - kvm_mips_callbacks->gva_to_gpa(vcpu->arch. - host_cp0_badvaddr); - if (run->mmio.phys_addr == KVM_INVALID_ADDR) { - er = EMULATE_FAIL; - break; - } - - run->mmio.len = bytes; - run->mmio.is_write = 1; - vcpu->mmio_needed = 1; - vcpu->mmio_is_write = 1; - *(u32 *) data = vcpu->arch.gprs[rt]; + run->mmio.len = 4; + *(u32 *)data = vcpu->arch.gprs[rt]; kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n", vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, - vcpu->arch.gprs[rt], *(u32 *) data); + vcpu->arch.gprs[rt], *(u32 *)data); break; case sh_op: - bytes = 2; - if (bytes > sizeof(run->mmio.data)) { - kvm_err("%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - } - run->mmio.phys_addr = - kvm_mips_callbacks->gva_to_gpa(vcpu->arch. - host_cp0_badvaddr); - if (run->mmio.phys_addr == KVM_INVALID_ADDR) { - er = EMULATE_FAIL; - break; - } - - run->mmio.len = bytes; - run->mmio.is_write = 1; - vcpu->mmio_needed = 1; - vcpu->mmio_is_write = 1; - *(u16 *) data = vcpu->arch.gprs[rt]; + run->mmio.len = 2; + *(u16 *)data = vcpu->arch.gprs[rt]; kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n", vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, - vcpu->arch.gprs[rt], *(u32 *) data); + vcpu->arch.gprs[rt], *(u16 *)data); + break; + + case sb_op: + run->mmio.len = 1; + *(u8 *)data = vcpu->arch.gprs[rt]; + + kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n", + vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr, + vcpu->arch.gprs[rt], *(u8 *)data); break; default: kvm_err("Store not yet supported (inst=0x%08x)\n", inst.word); - er = EMULATE_FAIL; - break; + goto out_fail; } - /* Rollback PC if emulation was unsuccessful */ - if (er == EMULATE_FAIL) - vcpu->arch.pc = curr_pc; + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + return EMULATE_DO_MMIO; - return er; +out_fail: + /* Rollback PC if emulation was unsuccessful */ + vcpu->arch.pc = curr_pc; + return EMULATE_FAIL; } enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu) { - enum emulation_result er = EMULATE_DO_MMIO; + enum emulation_result er; unsigned long curr_pc; u32 op, rt; - u32 bytes; rt = inst.i_format.rt; op = inst.i_format.opcode; @@ -1606,96 +1703,53 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, vcpu->arch.io_gpr = rt; + run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa( + vcpu->arch.host_cp0_badvaddr); + if (run->mmio.phys_addr == KVM_INVALID_ADDR) + return EMULATE_FAIL; + + vcpu->mmio_needed = 2; /* signed */ switch (op) { - case lw_op: - bytes = 4; - if (bytes > sizeof(run->mmio.data)) { - kvm_err("%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - er = EMULATE_FAIL; - break; - } - run->mmio.phys_addr = - kvm_mips_callbacks->gva_to_gpa(vcpu->arch. - host_cp0_badvaddr); - if (run->mmio.phys_addr == KVM_INVALID_ADDR) { - er = EMULATE_FAIL; - break; - } +#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ) + case ld_op: + run->mmio.len = 8; + break; - run->mmio.len = bytes; - run->mmio.is_write = 0; - vcpu->mmio_needed = 1; - vcpu->mmio_is_write = 0; + case lwu_op: + vcpu->mmio_needed = 1; /* unsigned */ + /* fall through */ +#endif + case lw_op: + run->mmio.len = 4; break; - case lh_op: case lhu_op: - bytes = 2; - if (bytes > sizeof(run->mmio.data)) { - kvm_err("%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - er = EMULATE_FAIL; - break; - } - run->mmio.phys_addr = - kvm_mips_callbacks->gva_to_gpa(vcpu->arch. - host_cp0_badvaddr); - if (run->mmio.phys_addr == KVM_INVALID_ADDR) { - er = EMULATE_FAIL; - break; - } - - run->mmio.len = bytes; - run->mmio.is_write = 0; - vcpu->mmio_needed = 1; - vcpu->mmio_is_write = 0; - - if (op == lh_op) - vcpu->mmio_needed = 2; - else - vcpu->mmio_needed = 1; - + vcpu->mmio_needed = 1; /* unsigned */ + /* fall through */ + case lh_op: + run->mmio.len = 2; break; case lbu_op: + vcpu->mmio_needed = 1; /* unsigned */ + /* fall through */ case lb_op: - bytes = 1; - if (bytes > sizeof(run->mmio.data)) { - kvm_err("%s: bad MMIO length: %d\n", __func__, - run->mmio.len); - er = EMULATE_FAIL; - break; - } - run->mmio.phys_addr = - kvm_mips_callbacks->gva_to_gpa(vcpu->arch. - host_cp0_badvaddr); - if (run->mmio.phys_addr == KVM_INVALID_ADDR) { - er = EMULATE_FAIL; - break; - } - - run->mmio.len = bytes; - run->mmio.is_write = 0; - vcpu->mmio_is_write = 0; - - if (op == lb_op) - vcpu->mmio_needed = 2; - else - vcpu->mmio_needed = 1; - + run->mmio.len = 1; break; default: kvm_err("Load not yet supported (inst=0x%08x)\n", inst.word); - er = EMULATE_FAIL; - break; + vcpu->mmio_needed = 0; + return EMULATE_FAIL; } - return er; + run->mmio.is_write = 0; + vcpu->mmio_is_write = 0; + return EMULATE_DO_MMIO; } +#ifndef CONFIG_KVM_MIPS_VZ static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), unsigned long curr_pc, unsigned long addr, @@ -1786,11 +1840,35 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], offset); - if (cache == Cache_D) + if (cache == Cache_D) { +#ifdef CONFIG_CPU_R4K_CACHE_TLB r4k_blast_dcache(); - else if (cache == Cache_I) +#else + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* locally flush icache */ + local_flush_icache_range(0, 0); + break; + default: + __flush_cache_all(); + break; + } +#endif + } else if (cache == Cache_I) { +#ifdef CONFIG_CPU_R4K_CACHE_TLB r4k_blast_icache(); - else { +#else + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* locally flush icache */ + local_flush_icache_range(0, 0); + break; + default: + flush_icache_all(); + break; + } +#endif + } else { kvm_err("%s: unsupported CACHE INDEX operation\n", __func__); return EMULATE_FAIL; @@ -1870,18 +1948,6 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, case cop0_op: er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu); break; - case sb_op: - case sh_op: - case sw_op: - er = kvm_mips_emulate_store(inst, cause, run, vcpu); - break; - case lb_op: - case lbu_op: - case lhu_op: - case lh_op: - case lw_op: - er = kvm_mips_emulate_load(inst, cause, run, vcpu); - break; #ifndef CONFIG_CPU_MIPSR6 case cache_op: @@ -1915,6 +1981,7 @@ unknown: return er; } +#endif /* CONFIG_KVM_MIPS_VZ */ /** * kvm_mips_guest_exception_base() - Find guest exception vector base address. @@ -2524,8 +2591,15 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, vcpu->arch.pc = vcpu->arch.io_pc; switch (run->mmio.len) { + case 8: + *gpr = *(s64 *)run->mmio.data; + break; + case 4: - *gpr = *(s32 *) run->mmio.data; + if (vcpu->mmio_needed == 2) + *gpr = *(s32 *)run->mmio.data; + else + *gpr = *(u32 *)run->mmio.data; break; case 2: diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index c5b254c4d0da..16e1c93b484f 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c @@ -51,12 +51,15 @@ #define RA 31 /* Some CP0 registers */ +#define C0_PWBASE 5, 5 #define C0_HWRENA 7, 0 #define C0_BADVADDR 8, 0 #define C0_BADINSTR 8, 1 #define C0_BADINSTRP 8, 2 #define C0_ENTRYHI 10, 0 +#define C0_GUESTCTL1 10, 4 #define C0_STATUS 12, 0 +#define C0_GUESTCTL0 12, 6 #define C0_CAUSE 13, 0 #define C0_EPC 14, 0 #define C0_EBASE 15, 1 @@ -292,8 +295,8 @@ static void *kvm_mips_build_enter_guest(void *addr) unsigned int i; struct uasm_label labels[2]; struct uasm_reloc relocs[2]; - struct uasm_label *l = labels; - struct uasm_reloc *r = relocs; + struct uasm_label __maybe_unused *l = labels; + struct uasm_reloc __maybe_unused *r = relocs; memset(labels, 0, sizeof(labels)); memset(relocs, 0, sizeof(relocs)); @@ -302,7 +305,67 @@ static void *kvm_mips_build_enter_guest(void *addr) UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1); UASM_i_MTC0(&p, T0, C0_EPC); - /* Set the ASID for the Guest Kernel */ +#ifdef CONFIG_KVM_MIPS_VZ + /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */ + UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1); + + /* + * Set up KVM GPA pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - write mm->pgd into CP0_PWBase + * + * We keep S0 pointing at struct kvm so we can load the ASID below. + */ + UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) - + (int)offsetof(struct kvm_vcpu, arch), K1); + UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0); + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + /* delay slot */ + if (cpu_has_htw) + UASM_i_MTC0(&p, A0, C0_PWBASE); + else + uasm_i_nop(&p); + + /* Set GM bit to setup eret to VZ guest context */ + uasm_i_addiu(&p, V1, ZERO, 1); + uasm_i_mfc0(&p, K0, C0_GUESTCTL0); + uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1); + uasm_i_mtc0(&p, K0, C0_GUESTCTL0); + + if (cpu_has_guestid) { + /* + * Set root mode GuestID, so that root TLB refill handler can + * use the correct GuestID in the root TLB. + */ + + /* Get current GuestID */ + uasm_i_mfc0(&p, T0, C0_GUESTCTL1); + /* Set GuestCtl1.RID = GuestCtl1.ID */ + uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT, + MIPS_GCTL1_ID_WIDTH); + uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT, + MIPS_GCTL1_RID_WIDTH); + uasm_i_mtc0(&p, T0, C0_GUESTCTL1); + + /* GuestID handles dealiasing so we don't need to touch ASID */ + goto skip_asid_restore; + } + + /* Root ASID Dealias (RAD) */ + + /* Save host ASID */ + UASM_i_MFC0(&p, K0, C0_ENTRYHI); + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), + K1); + + /* Set the root ASID for the Guest */ + UASM_i_ADDIU(&p, T1, S0, + offsetof(struct kvm, arch.gpa_mm.context.asid)); +#else + /* Set the ASID for the Guest Kernel or User */ UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1); UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]), T0); @@ -315,6 +378,7 @@ static void *kvm_mips_build_enter_guest(void *addr) UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, guest_user_mm.context.asid)); uasm_l_kernel_asid(&l, p); +#endif /* t1: contains the base of the ASID array, need to get the cpu id */ /* smp_processor_id */ @@ -339,6 +403,7 @@ static void *kvm_mips_build_enter_guest(void *addr) uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); #endif +#ifndef CONFIG_KVM_MIPS_VZ /* * Set up KVM T&E GVA pgd. * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): @@ -351,7 +416,11 @@ static void *kvm_mips_build_enter_guest(void *addr) UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); uasm_i_jalr(&p, RA, T9); uasm_i_mtc0(&p, K0, C0_ENTRYHI); - +#else + /* Set up KVM VZ root ASID (!guestid) */ + uasm_i_mtc0(&p, K0, C0_ENTRYHI); +skip_asid_restore: +#endif uasm_i_ehb(&p); /* Disable RDHWR access */ @@ -559,13 +628,10 @@ void *kvm_mips_build_exit(void *addr) /* Now that context has been saved, we can use other registers */ /* Restore vcpu */ - UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]); - uasm_i_move(&p, S1, A1); + UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]); /* Restore run (vcpu->run) */ - UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1); - /* Save pointer to run in s0, will be saved by the compiler */ - uasm_i_move(&p, S0, A0); + UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1); /* * Save Host level EPC, BadVaddr and Cause to VCPU, useful to process @@ -641,6 +707,52 @@ void *kvm_mips_build_exit(void *addr) uasm_l_msa_1(&l, p); } +#ifdef CONFIG_KVM_MIPS_VZ + /* Restore host ASID */ + if (!cpu_has_guestid) { + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi), + K1); + UASM_i_MTC0(&p, K0, C0_ENTRYHI); + } + + /* + * Set up normal Linux process pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - write mm->pgd into CP0_PWBase + */ + UASM_i_LW(&p, A0, + offsetof(struct kvm_vcpu_arch, host_pgd), K1); + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + /* delay slot */ + if (cpu_has_htw) + UASM_i_MTC0(&p, A0, C0_PWBASE); + else + uasm_i_nop(&p); + + /* Clear GM bit so we don't enter guest mode when EXL is cleared */ + uasm_i_mfc0(&p, K0, C0_GUESTCTL0); + uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1); + uasm_i_mtc0(&p, K0, C0_GUESTCTL0); + + /* Save GuestCtl0 so we can access GExcCode after CPU migration */ + uasm_i_sw(&p, K0, + offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1); + + if (cpu_has_guestid) { + /* + * Clear root mode GuestID, so that root TLB operations use the + * root GuestID in the root TLB. + */ + uasm_i_mfc0(&p, T0, C0_GUESTCTL1); + /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */ + uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT, + MIPS_GCTL1_RID_WIDTH); + uasm_i_mtc0(&p, T0, C0_GUESTCTL1); + } +#endif + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE)); uasm_i_and(&p, V0, V0, AT); @@ -680,6 +792,8 @@ void *kvm_mips_build_exit(void *addr) * Now jump to the kvm_mips_handle_exit() to see if we can deal * with this in the kernel */ + uasm_i_move(&p, A0, S0); + uasm_i_move(&p, A1, S1); UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit); uasm_i_jalr(&p, RA, T9); UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ); diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c new file mode 100644 index 000000000000..83063435195f --- /dev/null +++ b/arch/mips/kvm/hypcall.c @@ -0,0 +1,53 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Hypercall handling. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include <linux/kernel.h> +#include <linux/kvm_host.h> +#include <linux/kvm_para.h> + +#define MAX_HYPCALL_ARGS 4 + +enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu, + union mips_instruction inst) +{ + unsigned int code = (inst.co_format.code >> 5) & 0x3ff; + + kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code); + + switch (code) { + case 0: + return EMULATE_HYPERCALL; + default: + return EMULATE_FAIL; + }; +} + +static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num, + const unsigned long *args, unsigned long *hret) +{ + /* Report unimplemented hypercall to guest */ + *hret = -KVM_ENOSYS; + return RESUME_GUEST; +} + +int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu) +{ + unsigned long num, args[MAX_HYPCALL_ARGS]; + + /* read hypcall number and arguments */ + num = vcpu->arch.gprs[2]; /* v0 */ + args[0] = vcpu->arch.gprs[4]; /* a0 */ + args[1] = vcpu->arch.gprs[5]; /* a1 */ + args[2] = vcpu->arch.gprs[6]; /* a2 */ + args[3] = vcpu->arch.gprs[7]; /* a3 */ + + return kvm_mips_hypercall(vcpu, num, + args, &vcpu->arch.gprs[2] /* v0 */); +} diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h index fb118a2c8379..3bf0a49725e8 100644 --- a/arch/mips/kvm/interrupt.h +++ b/arch/mips/kvm/interrupt.h @@ -30,8 +30,13 @@ #define C_TI (_ULCAST_(1) << 30) +#ifdef CONFIG_KVM_MIPS_VZ +#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (1) +#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (1) +#else #define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0) #define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0) +#endif void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority); void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 15a1b1716c2e..d4b2ad18eef2 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -59,6 +59,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, +#ifdef CONFIG_KVM_MIPS_VZ + { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU }, + { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU }, + { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU }, + { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU }, + { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU }, + { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU }, + { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU }, + { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU }, +#endif { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, @@ -66,6 +76,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { {NULL} }; +bool kvm_trace_guest_mode_change; + +int kvm_guest_mode_change_trace_reg(void) +{ + kvm_trace_guest_mode_change = 1; + return 0; +} + +void kvm_guest_mode_change_trace_unreg(void) +{ + kvm_trace_guest_mode_change = 0; +} + /* * XXXKYMA: We are simulatoring a processor that has the WII bit set in * Config7, so we are "runnable" if interrupts are pending @@ -82,7 +105,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_arch_hardware_enable(void) { - return 0; + return kvm_mips_callbacks->hardware_enable(); +} + +void kvm_arch_hardware_disable(void) +{ + kvm_mips_callbacks->hardware_disable(); } int kvm_arch_hardware_setup(void) @@ -97,6 +125,18 @@ void kvm_arch_check_processor_compat(void *rtn) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + switch (type) { +#ifdef CONFIG_KVM_MIPS_VZ + case KVM_VM_MIPS_VZ: +#else + case KVM_VM_MIPS_TE: +#endif + break; + default: + /* Unsupported KVM type */ + return -EINVAL; + }; + /* Allocate page table to map GPA -> RPA */ kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); if (!kvm->arch.gpa_mm.pgd) @@ -301,8 +341,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) /* Build guest exception vectors dynamically in unmapped memory */ handler = gebase + 0x2000; - /* TLB refill */ + /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */ refill_start = gebase; + if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && IS_ENABLED(CONFIG_64BIT)) + refill_start += 0x080; refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); /* General Exception Entry point */ @@ -353,9 +395,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) /* Init */ vcpu->arch.last_sched_cpu = -1; - - /* Start off the timer */ - kvm_mips_init_count(vcpu); + vcpu->arch.last_exec_cpu = -1; return vcpu; @@ -1030,9 +1070,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; case KVM_CAP_NR_VCPUS: r = num_online_cpus(); break; @@ -1059,7 +1096,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); break; default: - r = 0; + r = kvm_mips_callbacks->check_extension(kvm, ext); break; } return r; @@ -1067,7 +1104,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_mips_pending_timer(vcpu); + return kvm_mips_pending_timer(vcpu) || + kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI; } int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) @@ -1092,7 +1130,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); cop0 = vcpu->arch.cop0; - kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n", + kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n", kvm_read_c0_guest_status(cop0), kvm_read_c0_guest_cause(cop0)); @@ -1208,7 +1246,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; /* re-enable HTW before enabling interrupts */ - htw_start(); + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) + htw_start(); /* Set a default exit reason */ run->exit_reason = KVM_EXIT_UNKNOWN; @@ -1226,17 +1265,20 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) cause, opc, run, vcpu); trace_kvm_exit(vcpu, exccode); - /* - * Do a privilege check, if in UM most of these exit conditions end up - * causing an exception to be delivered to the Guest Kernel - */ - er = kvm_mips_check_privilege(cause, opc, run, vcpu); - if (er == EMULATE_PRIV_FAIL) { - goto skip_emul; - } else if (er == EMULATE_FAIL) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - goto skip_emul; + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + /* + * Do a privilege check, if in UM most of these exit conditions + * end up causing an exception to be delivered to the Guest + * Kernel + */ + er = kvm_mips_check_privilege(cause, opc, run, vcpu); + if (er == EMULATE_PRIV_FAIL) { + goto skip_emul; + } else if (er == EMULATE_FAIL) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + goto skip_emul; + } } switch (exccode) { @@ -1267,7 +1309,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case EXCCODE_TLBS: - kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n", + kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n", cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, badvaddr); @@ -1328,12 +1370,17 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); break; + case EXCCODE_GE: + /* defer exit accounting to handler */ + ret = kvm_mips_callbacks->handle_guest_exit(vcpu); + break; + default: if (cause & CAUSEF_BD) opc += 1; inst = 0; kvm_get_badinstr(opc, vcpu, &inst); - kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", + kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", exccode, opc, inst, badvaddr, kvm_read_c0_guest_status(vcpu->arch.cop0)); kvm_arch_vcpu_dump_regs(vcpu); @@ -1346,6 +1393,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) skip_emul: local_irq_disable(); + if (ret == RESUME_GUEST) + kvm_vz_acquire_htimer(vcpu); + if (er == EMULATE_DONE && !(ret & RESUME_HOST)) kvm_mips_deliver_interrupts(vcpu, cause); @@ -1391,7 +1441,8 @@ skip_emul: } /* Disable HTW before returning to guest or host */ - htw_stop(); + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) + htw_stop(); return ret; } @@ -1527,16 +1578,18 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu) void kvm_lose_fpu(struct kvm_vcpu *vcpu) { /* - * FPU & MSA get disabled in root context (hardware) when it is disabled - * in guest context (software), but the register state in the hardware - * may still be in use. This is why we explicitly re-enable the hardware - * before saving. + * With T&E, FPU & MSA get disabled in root context (hardware) when it + * is disabled in guest context (software), but the register state in + * the hardware may still be in use. + * This is why we explicitly re-enable the hardware before saving. */ preempt_disable(); if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { - set_c0_config5(MIPS_CONF5_MSAEN); - enable_fpu_hazard(); + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + } __kvm_save_msa(&vcpu->arch); trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA); @@ -1549,8 +1602,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) } vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA); } else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) { - set_c0_status(ST0_CU1); - enable_fpu_hazard(); + if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) { + set_c0_status(ST0_CU1); + enable_fpu_hazard(); + } __kvm_save_fpu(&vcpu->arch); vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU; diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index cb0faade311e..ee64db032793 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -992,6 +992,22 @@ static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) return kvm_mips_gpa_pte_to_gva_unmapped(pte); } +#ifdef CONFIG_KVM_MIPS_VZ +int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu, + bool write_fault) +{ + int ret; + + ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL); + if (ret) + return ret; + + /* Invalidate this entry in the TLB */ + return kvm_vz_host_tlb_inv(vcpu, badvaddr); +} +#endif + /* XXXKYMA: Must be called with interrupts disabled */ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu, @@ -1225,6 +1241,10 @@ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) { int err; + if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ), + "Expect BadInstr/BadInstrP registers to be used with VZ\n")) + return -EINVAL; + retry: kvm_trap_emul_gva_lockless_begin(vcpu); err = get_user(*out, opc); diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 2819eb793345..7c6336dd2638 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -33,6 +33,25 @@ #define KVM_GUEST_PC_TLB 0 #define KVM_GUEST_SP_TLB 1 +#ifdef CONFIG_KVM_MIPS_VZ +unsigned long GUESTID_MASK; +EXPORT_SYMBOL_GPL(GUESTID_MASK); +unsigned long GUESTID_FIRST_VERSION; +EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION); +unsigned long GUESTID_VERSION_MASK; +EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK); + +static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu) +{ + struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm; + + if (cpu_has_guestid) + return 0; + else + return cpu_asid(smp_processor_id(), gpa_mm); +} +#endif + static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) { struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; @@ -166,6 +185,13 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, local_irq_restore(flags); + /* + * We don't want to get reserved instruction exceptions for missing tlb + * entries. + */ + if (cpu_has_vtag_icache) + flush_icache_all(); + if (user && idx_user >= 0) kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", __func__, (va & VPN2_MASK) | @@ -179,6 +205,421 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, } EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); +#ifdef CONFIG_KVM_MIPS_VZ + +/* GuestID management */ + +/** + * clear_root_gid() - Set GuestCtl1.RID for normal root operation. + */ +static inline void clear_root_gid(void) +{ + if (cpu_has_guestid) { + clear_c0_guestctl1(MIPS_GCTL1_RID); + mtc0_tlbw_hazard(); + } +} + +/** + * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID. + * + * Sets the root GuestID to match the current guest GuestID, for TLB operation + * on the GPA->RPA mappings in the root TLB. + * + * The caller must be sure to disable HTW while the root GID is set, and + * possibly longer if TLB registers are modified. + */ +static inline void set_root_gid_to_guest_gid(void) +{ + unsigned int guestctl1; + + if (cpu_has_guestid) { + back_to_back_c0_hazard(); + guestctl1 = read_c0_guestctl1(); + guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) | + ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT) + << MIPS_GCTL1_RID_SHIFT; + write_c0_guestctl1(guestctl1); + mtc0_tlbw_hazard(); + } +} + +int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) +{ + int idx; + unsigned long flags, old_entryhi; + + local_irq_save(flags); + htw_stop(); + + /* Set root GuestID for root probe and write of guest TLB entry */ + set_root_gid_to_guest_gid(); + + old_entryhi = read_c0_entryhi(); + + idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_root_asid(vcpu)); + + write_c0_entryhi(old_entryhi); + clear_root_gid(); + mtc0_tlbw_hazard(); + + htw_start(); + local_irq_restore(flags); + + /* + * We don't want to get reserved instruction exceptions for missing tlb + * entries. + */ + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (idx > 0) + kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_root_asid(vcpu), idx); + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv); + +/** + * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping. + * @vcpu: KVM VCPU pointer. + * @gpa: Guest virtual address in a TLB mapped guest segment. + * @gpa: Ponter to output guest physical address it maps to. + * + * Converts a guest virtual address in a guest TLB mapped segment to a guest + * physical address, by probing the guest TLB. + * + * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been + * written. + * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not + * have been written. + */ +int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa) +{ + unsigned long o_entryhi, o_entrylo[2], o_pagemask; + unsigned int o_index; + unsigned long entrylo[2], pagemask, pagemaskbit, pa; + unsigned long flags; + int index; + + /* Probe the guest TLB for a mapping */ + local_irq_save(flags); + /* Set root GuestID for root probe of guest TLB entry */ + htw_stop(); + set_root_gid_to_guest_gid(); + + o_entryhi = read_gc0_entryhi(); + o_index = read_gc0_index(); + + write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl)); + mtc0_tlbw_hazard(); + guest_tlb_probe(); + tlb_probe_hazard(); + + index = read_gc0_index(); + if (index < 0) { + /* No match, fail */ + write_gc0_entryhi(o_entryhi); + write_gc0_index(o_index); + + clear_root_gid(); + htw_start(); + local_irq_restore(flags); + return -EFAULT; + } + + /* Match! read the TLB entry */ + o_entrylo[0] = read_gc0_entrylo0(); + o_entrylo[1] = read_gc0_entrylo1(); + o_pagemask = read_gc0_pagemask(); + + mtc0_tlbr_hazard(); + guest_tlb_read(); + tlb_read_hazard(); + + entrylo[0] = read_gc0_entrylo0(); + entrylo[1] = read_gc0_entrylo1(); + pagemask = ~read_gc0_pagemask() & ~0x1fffl; + + write_gc0_entryhi(o_entryhi); + write_gc0_index(o_index); + write_gc0_entrylo0(o_entrylo[0]); + write_gc0_entrylo1(o_entrylo[1]); + write_gc0_pagemask(o_pagemask); + + clear_root_gid(); + htw_start(); + local_irq_restore(flags); + + /* Select one of the EntryLo values and interpret the GPA */ + pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1; + pa = entrylo[!!(gva & pagemaskbit)]; + + /* + * TLB entry may have become invalid since TLB probe if physical FTLB + * entries are shared between threads (e.g. I6400). + */ + if (!(pa & ENTRYLO_V)) + return -EFAULT; + + /* + * Note, this doesn't take guest MIPS32 XPA into account, where PFN is + * split with XI/RI in the middle. + */ + pa = (pa << 6) & ~0xfffl; + pa |= gva & ~(pagemask | pagemaskbit); + + *gpa = pa; + return 0; +} +EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup); + +/** + * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for + * guests. + * + * Invalidate all entries in root tlb which are GPA mappings. + */ +void kvm_vz_local_flush_roottlb_all_guests(void) +{ + unsigned long flags; + unsigned long old_entryhi, old_pagemask, old_guestctl1; + int entry; + + if (WARN_ON(!cpu_has_guestid)) + return; + + local_irq_save(flags); + htw_stop(); + + /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */ + old_entryhi = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + old_guestctl1 = read_c0_guestctl1(); + + /* + * Invalidate guest entries in root TLB while leaving root entries + * intact when possible. + */ + for (entry = 0; entry < current_cpu_data.tlbsize; entry++) { + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_read(); + tlb_read_hazard(); + + /* Don't invalidate non-guest (RVA) mappings in the root TLB */ + if (!(read_c0_guestctl1() & MIPS_GCTL1_RID)) + continue; + + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + write_c0_guestctl1(0); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + + write_c0_entryhi(old_entryhi); + write_c0_pagemask(old_pagemask); + write_c0_guestctl1(old_guestctl1); + tlbw_use_hazard(); + + htw_start(); + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests); + +/** + * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries. + * + * Invalidate all entries in guest tlb irrespective of guestid. + */ +void kvm_vz_local_flush_guesttlb_all(void) +{ + unsigned long flags; + unsigned long old_index; + unsigned long old_entryhi; + unsigned long old_entrylo[2]; + unsigned long old_pagemask; + int entry; + u64 cvmmemctl2 = 0; + + local_irq_save(flags); + + /* Preserve all clobbered guest registers */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo[0] = read_gc0_entrylo0(); + old_entrylo[1] = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Inhibit machine check due to multiple matching TLB entries */ + cvmmemctl2 = read_c0_cvmmemctl2(); + cvmmemctl2 |= CVMMEMCTL2_INHIBITTS; + write_c0_cvmmemctl2(cvmmemctl2); + break; + }; + + /* Invalidate guest entries in guest TLB */ + write_gc0_entrylo0(0); + write_gc0_entrylo1(0); + write_gc0_pagemask(0); + for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) { + /* Make sure all entries differ. */ + write_gc0_index(entry); + write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry)); + mtc0_tlbw_hazard(); + guest_tlb_write_indexed(); + } + + if (cvmmemctl2) { + cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS; + write_c0_cvmmemctl2(cvmmemctl2); + }; + + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo[0]); + write_gc0_entrylo1(old_entrylo[1]); + write_gc0_pagemask(old_pagemask); + tlbw_use_hazard(); + + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all); + +/** + * kvm_vz_save_guesttlb() - Save a range of guest TLB entries. + * @buf: Buffer to write TLB entries into. + * @index: Start index. + * @count: Number of entries to save. + * + * Save a range of guest TLB entries. The caller must ensure interrupts are + * disabled. + */ +void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count) +{ + unsigned int end = index + count; + unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; + unsigned int guestctl1 = 0; + int old_index, i; + + /* Save registers we're about to clobber */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo0 = read_gc0_entrylo0(); + old_entrylo1 = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + /* Set root GuestID for root probe */ + htw_stop(); + set_root_gid_to_guest_gid(); + if (cpu_has_guestid) + guestctl1 = read_c0_guestctl1(); + + /* Read each entry from guest TLB */ + for (i = index; i < end; ++i, ++buf) { + write_gc0_index(i); + + mtc0_tlbr_hazard(); + guest_tlb_read(); + tlb_read_hazard(); + + if (cpu_has_guestid && + (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) { + /* Entry invalid or belongs to another guest */ + buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i); + buf->tlb_lo[0] = 0; + buf->tlb_lo[1] = 0; + buf->tlb_mask = 0; + } else { + /* Entry belongs to the right guest */ + buf->tlb_hi = read_gc0_entryhi(); + buf->tlb_lo[0] = read_gc0_entrylo0(); + buf->tlb_lo[1] = read_gc0_entrylo1(); + buf->tlb_mask = read_gc0_pagemask(); + } + } + + /* Clear root GuestID again */ + clear_root_gid(); + htw_start(); + + /* Restore clobbered registers */ + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo0); + write_gc0_entrylo1(old_entrylo1); + write_gc0_pagemask(old_pagemask); + + tlbw_use_hazard(); +} +EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb); + +/** + * kvm_vz_load_guesttlb() - Save a range of guest TLB entries. + * @buf: Buffer to read TLB entries from. + * @index: Start index. + * @count: Number of entries to load. + * + * Load a range of guest TLB entries. The caller must ensure interrupts are + * disabled. + */ +void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index, + unsigned int count) +{ + unsigned int end = index + count; + unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask; + int old_index, i; + + /* Save registers we're about to clobber */ + old_index = read_gc0_index(); + old_entryhi = read_gc0_entryhi(); + old_entrylo0 = read_gc0_entrylo0(); + old_entrylo1 = read_gc0_entrylo1(); + old_pagemask = read_gc0_pagemask(); + + /* Set root GuestID for root probe */ + htw_stop(); + set_root_gid_to_guest_gid(); + + /* Write each entry to guest TLB */ + for (i = index; i < end; ++i, ++buf) { + write_gc0_index(i); + write_gc0_entryhi(buf->tlb_hi); + write_gc0_entrylo0(buf->tlb_lo[0]); + write_gc0_entrylo1(buf->tlb_lo[1]); + write_gc0_pagemask(buf->tlb_mask); + + mtc0_tlbw_hazard(); + guest_tlb_write_indexed(); + } + + /* Clear root GuestID again */ + clear_root_gid(); + htw_start(); + + /* Restore clobbered registers */ + write_gc0_index(old_index); + write_gc0_entryhi(old_entryhi); + write_gc0_entrylo0(old_entrylo0); + write_gc0_entrylo1(old_entrylo1); + write_gc0_pagemask(old_pagemask); + + tlbw_use_hazard(); +} +EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb); + +#endif + /** * kvm_mips_suspend_mm() - Suspend the active mm. * @cpu The CPU we're running on. diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index c858cf168078..a8c7fd7bf6d2 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -18,6 +18,13 @@ #define TRACE_INCLUDE_FILE trace /* + * arch/mips/kvm/mips.c + */ +extern bool kvm_trace_guest_mode_change; +int kvm_guest_mode_change_trace_reg(void); +void kvm_guest_mode_change_trace_unreg(void); + +/* * Tracepoints for VM enters */ DECLARE_EVENT_CLASS(kvm_transition, @@ -62,10 +69,20 @@ DEFINE_EVENT(kvm_transition, kvm_out, #define KVM_TRACE_EXIT_MSA_FPE 14 #define KVM_TRACE_EXIT_FPE 15 #define KVM_TRACE_EXIT_MSA_DISABLED 21 +#define KVM_TRACE_EXIT_GUEST_EXIT 27 /* Further exit reasons */ #define KVM_TRACE_EXIT_WAIT 32 #define KVM_TRACE_EXIT_CACHE 33 #define KVM_TRACE_EXIT_SIGNAL 34 +/* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */ +#define KVM_TRACE_EXIT_GEXCCODE_BASE 64 +#define KVM_TRACE_EXIT_GPSI 64 /* 0 */ +#define KVM_TRACE_EXIT_GSFC 65 /* 1 */ +#define KVM_TRACE_EXIT_HC 66 /* 2 */ +#define KVM_TRACE_EXIT_GRR 67 /* 3 */ +#define KVM_TRACE_EXIT_GVA 72 /* 8 */ +#define KVM_TRACE_EXIT_GHFC 73 /* 9 */ +#define KVM_TRACE_EXIT_GPA 74 /* 10 */ /* Tracepoints for VM exits */ #define kvm_trace_symbol_exit_types \ @@ -83,9 +100,17 @@ DEFINE_EVENT(kvm_transition, kvm_out, { KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \ { KVM_TRACE_EXIT_FPE, "FPE" }, \ { KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \ + { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \ { KVM_TRACE_EXIT_WAIT, "WAIT" }, \ { KVM_TRACE_EXIT_CACHE, "CACHE" }, \ - { KVM_TRACE_EXIT_SIGNAL, "Signal" } + { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \ + { KVM_TRACE_EXIT_GPSI, "GPSI" }, \ + { KVM_TRACE_EXIT_GSFC, "GSFC" }, \ + { KVM_TRACE_EXIT_HC, "HC" }, \ + { KVM_TRACE_EXIT_GRR, "GRR" }, \ + { KVM_TRACE_EXIT_GVA, "GVA" }, \ + { KVM_TRACE_EXIT_GHFC, "GHFC" }, \ + { KVM_TRACE_EXIT_GPA, "GPA" } TRACE_EVENT(kvm_exit, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason), @@ -158,6 +183,8 @@ TRACE_EVENT(kvm_exit, { KVM_TRACE_COP0(16, 4), "Config4" }, \ { KVM_TRACE_COP0(16, 5), "Config5" }, \ { KVM_TRACE_COP0(16, 7), "Config7" }, \ + { KVM_TRACE_COP0(17, 1), "MAAR" }, \ + { KVM_TRACE_COP0(17, 2), "MAARI" }, \ { KVM_TRACE_COP0(26, 0), "ECC" }, \ { KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \ { KVM_TRACE_COP0(31, 2), "KScratch1" }, \ @@ -268,6 +295,51 @@ TRACE_EVENT(kvm_asid_change, __entry->new_asid) ); +TRACE_EVENT(kvm_guestid_change, + TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid), + TP_ARGS(vcpu, guestid), + TP_STRUCT__entry( + __field(unsigned int, guestid) + ), + + TP_fast_assign( + __entry->guestid = guestid; + ), + + TP_printk("GuestID: 0x%02x", + __entry->guestid) +); + +TRACE_EVENT_FN(kvm_guest_mode_change, + TP_PROTO(struct kvm_vcpu *vcpu), + TP_ARGS(vcpu), + TP_STRUCT__entry( + __field(unsigned long, epc) + __field(unsigned long, pc) + __field(unsigned long, badvaddr) + __field(unsigned int, status) + __field(unsigned int, cause) + ), + + TP_fast_assign( + __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0); + __entry->pc = vcpu->arch.pc; + __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0); + __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0); + __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0); + ), + + TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx", + __entry->epc, + __entry->pc, + __entry->status, + __entry->cause, + __entry->badvaddr), + + kvm_guest_mode_change_trace_reg, + kvm_guest_mode_change_trace_unreg +); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index b1fa53b252ea..a563759fd142 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -12,6 +12,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/log2.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <asm/mmu_context.h> @@ -40,6 +41,29 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) return gpa; } +static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu) +{ + u32 __user *opc = (u32 __user *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 inst = 0; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + kvm_read_c0_guest_status(vcpu->arch.cop0)); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; +} + static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; @@ -82,6 +106,10 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) ret = RESUME_HOST; break; + case EMULATE_HYPERCALL: + ret = kvm_mips_handle_hypcall(vcpu); + break; + default: BUG(); } @@ -484,6 +512,31 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_hardware_enable(void) +{ + return 0; +} + +static void kvm_trap_emul_hardware_disable(void) +{ +} + +static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_MIPS_TE: + r = 1; + break; + default: + r = 0; + break; + } + + return r; +} + static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) { struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; @@ -561,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) u32 config, config1; int vcpu_id = vcpu->vcpu_id; + /* Start off the timer at 100 MHz */ + kvm_mips_init_count(vcpu, 100*1000*1000); + /* * Arch specific stuff, set up config registers properly so that the * guest will come up as expected @@ -589,6 +645,13 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) /* Read the cache characteristics from the host Config1 Register */ config1 = (read_c0_config1() & ~0x7f); + /* DCache line size not correctly reported in Config1 on Octeon CPUs */ + if (cpu_dcache_line_size()) { + config1 &= ~MIPS_CONF1_DL; + config1 |= ((ilog2(cpu_dcache_line_size()) - 1) << + MIPS_CONF1_DL_SHF) & MIPS_CONF1_DL; + } + /* Set up MMU size */ config1 &= ~(0x3f << 25); config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25); @@ -892,10 +955,12 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, if (v & CAUSEF_DC) { /* disable timer first */ kvm_mips_count_disable_cause(vcpu); - kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); + kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC, + v); } else { /* enable timer last */ - kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v); + kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC, + v); kvm_mips_count_enable_cause(vcpu); } } else { @@ -1230,7 +1295,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, .handle_fpe = kvm_trap_emul_handle_fpe, .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, + .handle_guest_exit = kvm_trap_emul_no_handler, + .hardware_enable = kvm_trap_emul_hardware_enable, + .hardware_disable = kvm_trap_emul_hardware_disable, + .check_extension = kvm_trap_emul_check_extension, .vcpu_init = kvm_trap_emul_vcpu_init, .vcpu_uninit = kvm_trap_emul_vcpu_uninit, .vcpu_setup = kvm_trap_emul_vcpu_setup, diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c new file mode 100644 index 000000000000..71d8856ade64 --- /dev/null +++ b/arch/mips/kvm/vz.c @@ -0,0 +1,3223 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * KVM/MIPS: Support for hardware virtualization extensions + * + * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. + * Authors: Yann Le Du <ledu@kymasys.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/module.h> +#include <linux/preempt.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h> +#include <asm/cacheops.h> +#include <asm/cmpxchg.h> +#include <asm/fpu.h> +#include <asm/hazards.h> +#include <asm/inst.h> +#include <asm/mmu_context.h> +#include <asm/r4kcache.h> +#include <asm/time.h> +#include <asm/tlb.h> +#include <asm/tlbex.h> + +#include <linux/kvm_host.h> + +#include "interrupt.h" + +#include "trace.h" + +/* Pointers to last VCPU loaded on each physical CPU */ +static struct kvm_vcpu *last_vcpu[NR_CPUS]; +/* Pointers to last VCPU executed on each physical CPU */ +static struct kvm_vcpu *last_exec_vcpu[NR_CPUS]; + +/* + * Number of guest VTLB entries to use, so we can catch inconsistency between + * CPUs. + */ +static unsigned int kvm_vz_guest_vtlb_size; + +static inline long kvm_vz_read_gc0_ebase(void) +{ + if (sizeof(long) == 8 && cpu_has_ebase_wg) + return read_gc0_ebase_64(); + else + return read_gc0_ebase(); +} + +static inline void kvm_vz_write_gc0_ebase(long v) +{ + /* + * First write with WG=1 to write upper bits, then write again in case + * WG should be left at 0. + * write_gc0_ebase_64() is no longer UNDEFINED since R6. + */ + if (sizeof(long) == 8 && + (cpu_has_mips64r6 || cpu_has_ebase_wg)) { + write_gc0_ebase_64(v | MIPS_EBASE_WG); + write_gc0_ebase_64(v); + } else { + write_gc0_ebase(v | MIPS_EBASE_WG); + write_gc0_ebase(v); + } +} + +/* + * These Config bits may be writable by the guest: + * Config: [K23, KU] (!TLB), K0 + * Config1: (none) + * Config2: [TU, SU] (impl) + * Config3: ISAOnExc + * Config4: FTLBPageSize + * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR + */ + +static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return CONF_CM_CMASK; +} + +static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu) +{ + return MIPS_CONF3_ISA_OE; +} + +static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu) +{ + /* no need to be exact */ + return MIPS_CONF4_VFTLBPAGESIZE; +} + +static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI; + + /* Permit MSAEn changes if MSA supported and enabled */ + if (kvm_mips_guest_has_msa(&vcpu->arch)) + mask |= MIPS_CONF5_MSAEN; + + /* + * Permit guest FPU mode changes if FPU is enabled and the relevant + * feature exists according to FIR register. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + if (cpu_has_ufr) + mask |= MIPS_CONF5_UFR; + if (cpu_has_fre) + mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE; + } + + return mask; +} + +/* + * VZ optionally allows these additional Config bits to be written by root: + * Config: M, [MT] + * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP + * Config2: M + * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC, + * VInt, SP, CDMM, MT, SM, TL] + * Config4: M, [VTLBSizeExt, MMUSizeExt] + * Config5: MRP + */ + +static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M; + + /* Permit FPU to be present if FPU is supported */ + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) + mask |= MIPS_CONF1_FP; + + return mask; +} + +static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu) +{ + unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M | + MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC; + + /* Permit MSA to be present if MSA is supported */ + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + mask |= MIPS_CONF3_MSA; + + return mask; +} + +static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M; +} + +static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu) +{ + return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP; +} + +static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva) +{ + /* VZ guest has already converted gva to gpa */ + return gva; +} + +static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + set_bit(priority, &vcpu->arch.pending_exceptions); + clear_bit(priority, &vcpu->arch.pending_exceptions_clr); +} + +static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority) +{ + clear_bit(priority, &vcpu->arch.pending_exceptions); + set_bit(priority, &vcpu->arch.pending_exceptions_clr); +} + +static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * timer expiry is asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu) +{ + /* + * timer expiry is asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * interrupts are asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + switch (intr) { + case 2: + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO); + break; + + case 3: + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1); + break; + + case 4: + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2); + break; + + default: + break; + } + +} + +static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu, + struct kvm_mips_interrupt *irq) +{ + int intr = (int)irq->irq; + + /* + * interrupts are asynchronous to vcpu execution therefore defer guest + * cp0 accesses + */ + switch (intr) { + case -2: + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO); + break; + + case -3: + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1); + break; + + case -4: + kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2); + break; + + default: + break; + } + +} + +static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = { + [MIPS_EXC_INT_TIMER] = C_IRQ5, + [MIPS_EXC_INT_IO] = C_IRQ0, + [MIPS_EXC_INT_IPI_1] = C_IRQ1, + [MIPS_EXC_INT_IPI_2] = C_IRQ2, +}; + +static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + u32 irq = (priority < MIPS_EXC_MAX) ? + kvm_vz_priority_to_irq[priority] : 0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + set_gc0_cause(C_TI); + break; + + case MIPS_EXC_INT_IO: + case MIPS_EXC_INT_IPI_1: + case MIPS_EXC_INT_IPI_2: + if (cpu_has_guestctl2) + set_c0_guestctl2(irq); + else + set_gc0_cause(irq); + break; + + default: + break; + } + + clear_bit(priority, &vcpu->arch.pending_exceptions); + return 1; +} + +static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority, + u32 cause) +{ + u32 irq = (priority < MIPS_EXC_MAX) ? + kvm_vz_priority_to_irq[priority] : 0; + + switch (priority) { + case MIPS_EXC_INT_TIMER: + /* + * Call to kvm_write_c0_guest_compare() clears Cause.TI in + * kvm_mips_emulate_CP0(). Explicitly clear irq associated with + * Cause.IP[IPTI] if GuestCtl2 virtual interrupt register not + * supported or if not using GuestCtl2 Hardware Clear. + */ + if (cpu_has_guestctl2) { + if (!(read_c0_guestctl2() & (irq << 14))) + clear_c0_guestctl2(irq); + } else { + clear_gc0_cause(irq); + } + break; + + case MIPS_EXC_INT_IO: + case MIPS_EXC_INT_IPI_1: + case MIPS_EXC_INT_IPI_2: + /* Clear GuestCtl2.VIP irq if not using Hardware Clear */ + if (cpu_has_guestctl2) { + if (!(read_c0_guestctl2() & (irq << 14))) + clear_c0_guestctl2(irq); + } else { + clear_gc0_cause(irq); + } + break; + + default: + break; + } + + clear_bit(priority, &vcpu->arch.pending_exceptions_clr); + return 1; +} + +/* + * VZ guest timer handling. + */ + +/** + * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer. + * @vcpu: Virtual CPU. + * + * Returns: true if the VZ GTOffset & real guest CP0_Count should be used + * instead of software emulation of guest timer. + * false otherwise. + */ +static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu) +{ + if (kvm_mips_count_disabled(vcpu)) + return false; + + /* Chosen frequency must match real frequency */ + if (mips_hpt_frequency != vcpu->arch.count_hz) + return false; + + /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */ + if (current_cpu_data.gtoffset_mask != 0xffffffff) + return false; + + return true; +} + +/** + * _kvm_vz_restore_stimer() - Restore soft timer state. + * @vcpu: Virtual CPU. + * @compare: CP0_Compare register value, restored by caller. + * @cause: CP0_Cause register to restore. + * + * Restore VZ state relating to the soft timer. The hard timer can be enabled + * later. + */ +static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare, + u32 cause) +{ + /* + * Avoid spurious counter interrupts by setting Guest CP0_Count to just + * after Guest CP0_Compare. + */ + write_c0_gtoffset(compare - read_c0_count()); + + back_to_back_c0_hazard(); + write_gc0_cause(cause); +} + +/** + * _kvm_vz_restore_htimer() - Restore hard timer state. + * @vcpu: Virtual CPU. + * @compare: CP0_Compare register value, restored by caller. + * @cause: CP0_Cause register to restore. + * + * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the + * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause. + */ +static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, + u32 compare, u32 cause) +{ + u32 start_count, after_count; + ktime_t freeze_time; + unsigned long flags; + + /* + * Freeze the soft-timer and sync the guest CP0_Count with it. We do + * this with interrupts disabled to avoid latency. + */ + local_irq_save(flags); + freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count); + write_c0_gtoffset(start_count - read_c0_count()); + local_irq_restore(flags); + + /* restore guest CP0_Cause, as TI may already be set */ + back_to_back_c0_hazard(); + write_gc0_cause(cause); + + /* + * The above sequence isn't atomic and would result in lost timer + * interrupts if we're not careful. Detect if a timer interrupt is due + * and assert it. + */ + back_to_back_c0_hazard(); + after_count = read_gc0_count(); + if (after_count - start_count > compare - start_count - 1) + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); +} + +/** + * kvm_vz_restore_timer() - Restore timer state. + * @vcpu: Virtual CPU. + * + * Restore soft timer state from saved context. + */ +static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 cause, compare; + + compare = kvm_read_sw_gc0_compare(cop0); + cause = kvm_read_sw_gc0_cause(cop0); + + write_gc0_compare(compare); + _kvm_vz_restore_stimer(vcpu, compare, cause); +} + +/** + * kvm_vz_acquire_htimer() - Switch to hard timer state. + * @vcpu: Virtual CPU. + * + * Restore hard timer state on top of existing soft timer state if possible. + * + * Since hard timer won't remain active over preemption, preemption should be + * disabled by the caller. + */ +void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) +{ + u32 gctl0; + + gctl0 = read_c0_guestctl0(); + if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) { + /* enable guest access to hard timer */ + write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT); + + _kvm_vz_restore_htimer(vcpu, read_gc0_compare(), + read_gc0_cause()); + } +} + +/** + * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. + * @vcpu: Virtual CPU. + * @compare: Pointer to write compare value to. + * @cause: Pointer to write cause value to. + * + * Save VZ guest timer state and switch to software emulation of guest CP0 + * timer. The hard timer must already be in use, so preemption should be + * disabled. + */ +static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu, + u32 *out_compare, u32 *out_cause) +{ + u32 cause, compare, before_count, end_count; + ktime_t before_time; + + compare = read_gc0_compare(); + *out_compare = compare; + + before_time = ktime_get(); + + /* + * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time + * at which no pending timer interrupt is missing. + */ + before_count = read_gc0_count(); + back_to_back_c0_hazard(); + cause = read_gc0_cause(); + *out_cause = cause; + + /* + * Record a final CP0_Count which we will transfer to the soft-timer. + * This is recorded *after* saving CP0_Cause, so we don't get any timer + * interrupts from just after the final CP0_Count point. + */ + back_to_back_c0_hazard(); + end_count = read_gc0_count(); + + /* + * The above sequence isn't atomic, so we could miss a timer interrupt + * between reading CP0_Cause and end_count. Detect and record any timer + * interrupt due between before_count and end_count. + */ + if (end_count - before_count > compare - before_count - 1) + kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER); + + /* + * Restore soft-timer, ignoring a small amount of negative drift due to + * delay between freeze_hrtimer and setting CP0_GTOffset. + */ + kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000); +} + +/** + * kvm_vz_save_timer() - Save guest timer state. + * @vcpu: Virtual CPU. + * + * Save VZ guest timer state and switch to soft guest timer if hard timer was in + * use. + */ +static void kvm_vz_save_timer(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + u32 gctl0, compare, cause; + + gctl0 = read_c0_guestctl0(); + if (gctl0 & MIPS_GCTL0_GT) { + /* disable guest use of hard timer */ + write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); + + /* save hard timer state */ + _kvm_vz_save_htimer(vcpu, &compare, &cause); + } else { + compare = read_gc0_compare(); + cause = read_gc0_cause(); + } + + /* save timer-related state to VCPU context */ + kvm_write_sw_gc0_cause(cop0, cause); + kvm_write_sw_gc0_compare(cop0, compare); +} + +/** + * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use. + * @vcpu: Virtual CPU. + * + * Transfers the state of the hard guest timer to the soft guest timer, leaving + * guest state intact so it can continue to be used with the soft timer. + */ +void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) +{ + u32 gctl0, compare, cause; + + preempt_disable(); + gctl0 = read_c0_guestctl0(); + if (gctl0 & MIPS_GCTL0_GT) { + /* disable guest use of timer */ + write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT); + + /* switch to soft timer */ + _kvm_vz_save_htimer(vcpu, &compare, &cause); + + /* leave soft timer in usable state */ + _kvm_vz_restore_stimer(vcpu, compare, cause); + } + preempt_enable(); +} + +/** + * is_eva_access() - Find whether an instruction is an EVA memory accessor. + * @inst: 32-bit instruction encoding. + * + * Finds whether @inst encodes an EVA memory access instruction, which would + * indicate that emulation of it should access the user mode address space + * instead of the kernel mode address space. This matters for MUSUK segments + * which are TLB mapped for user mode but unmapped for kernel mode. + * + * Returns: Whether @inst encodes an EVA accessor instruction. + */ +static bool is_eva_access(union mips_instruction inst) +{ + if (inst.spec3_format.opcode != spec3_op) + return false; + + switch (inst.spec3_format.func) { + case lwle_op: + case lwre_op: + case cachee_op: + case sbe_op: + case she_op: + case sce_op: + case swe_op: + case swle_op: + case swre_op: + case prefe_op: + case lbue_op: + case lhue_op: + case lbe_op: + case lhe_op: + case lle_op: + case lwe_op: + return true; + default: + return false; + } +} + +/** + * is_eva_am_mapped() - Find whether an access mode is mapped. + * @vcpu: KVM VCPU state. + * @am: 3-bit encoded access mode. + * @eu: Segment becomes unmapped and uncached when Status.ERL=1. + * + * Decode @am to find whether it encodes a mapped segment for the current VCPU + * state. Where necessary @eu and the actual instruction causing the fault are + * taken into account to make the decision. + * + * Returns: Whether the VCPU faulted on a TLB mapped address. + */ +static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu) +{ + u32 am_lookup; + int err; + + /* + * Interpret access control mode. We assume address errors will already + * have been caught by the guest, leaving us with: + * AM UM SM KM 31..24 23..16 + * UK 0 000 Unm 0 0 + * MK 1 001 TLB 1 + * MSK 2 010 TLB TLB 1 + * MUSK 3 011 TLB TLB TLB 1 + * MUSUK 4 100 TLB TLB Unm 0 1 + * USK 5 101 Unm Unm 0 0 + * - 6 110 0 0 + * UUSK 7 111 Unm Unm Unm 0 0 + * + * We shift a magic value by AM across the sign bit to find if always + * TLB mapped, and if not shift by 8 again to find if it depends on KM. + */ + am_lookup = 0x70080000 << am; + if ((s32)am_lookup < 0) { + /* + * MK, MSK, MUSK + * Always TLB mapped, unless SegCtl.EU && ERL + */ + if (!eu || !(read_gc0_status() & ST0_ERL)) + return true; + } else { + am_lookup <<= 8; + if ((s32)am_lookup < 0) { + union mips_instruction inst; + unsigned int status; + u32 *opc; + + /* + * MUSUK + * TLB mapped if not in kernel mode + */ + status = read_gc0_status(); + if (!(status & (ST0_EXL | ST0_ERL)) && + (status & ST0_KSU)) + return true; + /* + * EVA access instructions in kernel + * mode access user address space. + */ + opc = (u32 *)vcpu->arch.pc; + if (vcpu->arch.host_cp0_cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (!err && is_eva_access(inst)) + return true; + } + } + + return false; +} + +/** + * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA. + * @vcpu: KVM VCPU state. + * @gva: Guest virtual address to convert. + * @gpa: Output guest physical address. + * + * Convert a guest virtual address (GVA) which is valid according to the guest + * context, to a guest physical address (GPA). + * + * Returns: 0 on success. + * -errno on failure. + */ +static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, + unsigned long *gpa) +{ + u32 gva32 = gva; + unsigned long segctl; + + if ((long)gva == (s32)gva32) { + /* Handle canonical 32-bit virtual address */ + if (cpu_guest_has_segments) { + unsigned long mask, pa; + + switch (gva32 >> 29) { + case 0: + case 1: /* CFG5 (1GB) */ + segctl = read_gc0_segctl2() >> 16; + mask = (unsigned long)0xfc0000000ull; + break; + case 2: + case 3: /* CFG4 (1GB) */ + segctl = read_gc0_segctl2(); + mask = (unsigned long)0xfc0000000ull; + break; + case 4: /* CFG3 (512MB) */ + segctl = read_gc0_segctl1() >> 16; + mask = (unsigned long)0xfe0000000ull; + break; + case 5: /* CFG2 (512MB) */ + segctl = read_gc0_segctl1(); + mask = (unsigned long)0xfe0000000ull; + break; + case 6: /* CFG1 (512MB) */ + segctl = read_gc0_segctl0() >> 16; + mask = (unsigned long)0xfe0000000ull; + break; + case 7: /* CFG0 (512MB) */ + segctl = read_gc0_segctl0(); + mask = (unsigned long)0xfe0000000ull; + break; + default: + /* + * GCC 4.9 isn't smart enough to figure out that + * segctl and mask are always initialised. + */ + unreachable(); + } + + if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7, + segctl & 0x0008)) + goto tlb_mapped; + + /* Unmapped, find guest physical address */ + pa = (segctl << 20) & mask; + pa |= gva32 & ~mask; + *gpa = pa; + return 0; + } else if ((s32)gva32 < (s32)0xc0000000) { + /* legacy unmapped KSeg0 or KSeg1 */ + *gpa = gva32 & 0x1fffffff; + return 0; + } +#ifdef CONFIG_64BIT + } else if ((gva & 0xc000000000000000) == 0x8000000000000000) { + /* XKPHYS */ + if (cpu_guest_has_segments) { + /* + * Each of the 8 regions can be overridden by SegCtl2.XR + * to use SegCtl1.XAM. + */ + segctl = read_gc0_segctl2(); + if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) { + segctl = read_gc0_segctl1(); + if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7, + 0)) + goto tlb_mapped; + } + + } + /* + * Traditionally fully unmapped. + * Bits 61:59 specify the CCA, which we can just mask off here. + * Bits 58:PABITS should be zero, but we shouldn't have got here + * if it wasn't. + */ + *gpa = gva & 0x07ffffffffffffff; + return 0; +#endif + } + +tlb_mapped: + return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa); +} + +/** + * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA. + * @vcpu: KVM VCPU state. + * @badvaddr: Root BadVAddr. + * @gpa: Output guest physical address. + * + * VZ implementations are permitted to report guest virtual addresses (GVA) in + * BadVAddr on a root exception during guest execution, instead of the more + * convenient guest physical addresses (GPA). When we get a GVA, this function + * converts it to a GPA, taking into account guest segmentation and guest TLB + * state. + * + * Returns: 0 on success. + * -errno on failure. + */ +static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr, + unsigned long *gpa) +{ + unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 & + MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; + + /* If BadVAddr is GPA, then all is well in the world */ + if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) { + *gpa = badvaddr; + return 0; + } + + /* Otherwise we'd expect it to be GVA ... */ + if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA, + "Unexpected gexccode %#x\n", gexccode)) + return -EINVAL; + + /* ... and we need to perform the GVA->GPA translation in software */ + return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa); +} + +static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu) +{ + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; + unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; + u32 inst = 0; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", + exccode, opc, inst, badvaddr, + read_gc0_status()); + kvm_arch_vcpu_dump_regs(vcpu); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; +} + +static unsigned long mips_process_maar(unsigned int op, unsigned long val) +{ + /* Mask off unused bits */ + unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL; + + if (read_gc0_pagegrain() & PG_ELPA) + mask |= 0x00ffffff00000000ull; + if (cpu_guest_has_mvh) + mask |= MIPS_MAAR_VH; + + /* Set or clear VH */ + if (op == mtc_op) { + /* clear VH */ + val &= ~MIPS_MAAR_VH; + } else if (op == dmtc_op) { + /* set VH to match VL */ + val &= ~MIPS_MAAR_VH; + if (val & MIPS_MAAR_VL) + val |= MIPS_MAAR_VH; + } + + return val & mask; +} + +static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + val &= MIPS_MAARI_INDEX; + if (val == MIPS_MAARI_INDEX) + kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1); + else if (val < ARRAY_SIZE(vcpu->arch.maar)) + kvm_write_sw_gc0_maari(cop0, val); +} + +static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + enum emulation_result er = EMULATE_DONE; + u32 rt, rd, sel; + unsigned long curr_pc; + unsigned long val; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + if (inst.co_format.co) { + switch (inst.co_format.func) { + case wait_op: + er = kvm_mips_emul_wait(vcpu); + break; + default: + er = EMULATE_FAIL; + } + } else { + rt = inst.c0r_format.rt; + rd = inst.c0r_format.rd; + sel = inst.c0r_format.sel; + + switch (inst.c0r_format.rs) { + case dmfc_op: + case mfc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + if (rd == MIPS_CP0_COUNT && + sel == 0) { /* Count */ + val = kvm_mips_read_count(vcpu); + } else if (rd == MIPS_CP0_COMPARE && + sel == 0) { /* Compare */ + val = read_gc0_compare(); + } else if (rd == MIPS_CP0_LLADDR && + sel == 0) { /* LLAddr */ + if (cpu_guest_has_rw_llb) + val = read_gc0_lladdr() & + MIPS_LLADDR_LLB; + else + val = 0; + } else if (rd == MIPS_CP0_LLADDR && + sel == 1 && /* MAAR */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + /* MAARI must be in range */ + BUG_ON(kvm_read_sw_gc0_maari(cop0) >= + ARRAY_SIZE(vcpu->arch.maar)); + val = vcpu->arch.maar[ + kvm_read_sw_gc0_maari(cop0)]; + } else if ((rd == MIPS_CP0_PRID && + (sel == 0 || /* PRid */ + sel == 2 || /* CDMMBase */ + sel == 3)) || /* CMGCRBase */ + (rd == MIPS_CP0_STATUS && + (sel == 2 || /* SRSCtl */ + sel == 3)) || /* SRSMap */ + (rd == MIPS_CP0_CONFIG && + (sel == 7)) || /* Config7 */ + (rd == MIPS_CP0_LLADDR && + (sel == 2) && /* MAARI */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) || + (rd == MIPS_CP0_ERRCTL && + (sel == 0))) { /* ErrCtl */ + val = cop0->reg[rd][sel]; + } else { + val = 0; + er = EMULATE_FAIL; + } + + if (er != EMULATE_FAIL) { + /* Sign extend */ + if (inst.c0r_format.rs == mfc_op) + val = (int)val; + vcpu->arch.gprs[rt] = val; + } + + trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ? + KVM_TRACE_MFC0 : KVM_TRACE_DMFC0, + KVM_TRACE_COP0(rd, sel), val); + break; + + case dmtc_op: + case mtc_op: +#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS + cop0->stat[rd][sel]++; +#endif + val = vcpu->arch.gprs[rt]; + trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ? + KVM_TRACE_MTC0 : KVM_TRACE_DMTC0, + KVM_TRACE_COP0(rd, sel), val); + + if (rd == MIPS_CP0_COUNT && + sel == 0) { /* Count */ + kvm_vz_lose_htimer(vcpu); + kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]); + } else if (rd == MIPS_CP0_COMPARE && + sel == 0) { /* Compare */ + kvm_mips_write_compare(vcpu, + vcpu->arch.gprs[rt], + true); + } else if (rd == MIPS_CP0_LLADDR && + sel == 0) { /* LLAddr */ + /* + * P5600 generates GPSI on guest MTC0 LLAddr. + * Only allow the guest to clear LLB. + */ + if (cpu_guest_has_rw_llb && + !(val & MIPS_LLADDR_LLB)) + write_gc0_lladdr(0); + } else if (rd == MIPS_CP0_LLADDR && + sel == 1 && /* MAAR */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + val = mips_process_maar(inst.c0r_format.rs, + val); + + /* MAARI must be in range */ + BUG_ON(kvm_read_sw_gc0_maari(cop0) >= + ARRAY_SIZE(vcpu->arch.maar)); + vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] = + val; + } else if (rd == MIPS_CP0_LLADDR && + (sel == 2) && /* MAARI */ + cpu_guest_has_maar && + !cpu_guest_has_dyn_maar) { + kvm_write_maari(vcpu, val); + } else if (rd == MIPS_CP0_ERRCTL && + (sel == 0)) { /* ErrCtl */ + /* ignore the written value */ + } else { + er = EMULATE_FAIL; + } + break; + + default: + er = EMULATE_FAIL; + break; + } + } + /* Rollback PC only if emulation was unsuccessful */ + if (er == EMULATE_FAIL) { + kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n", + curr_pc, __func__, inst.word); + + vcpu->arch.pc = curr_pc; + } + + return er; +} + +static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, + u32 *opc, u32 cause, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + u32 cache, op_inst, op, base; + s16 offset; + struct kvm_vcpu_arch *arch = &vcpu->arch; + unsigned long va, curr_pc; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + base = inst.i_format.rs; + op_inst = inst.i_format.rt; + if (cpu_has_mips_r6) + offset = inst.spec3_format.simmediate; + else + offset = inst.i_format.simmediate; + cache = op_inst & CacheOp_Cache; + op = op_inst & CacheOp_Op; + + va = arch->gprs[base] + offset; + + kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + cache, op, base, arch->gprs[base], offset); + + /* Secondary or tirtiary cache ops ignored */ + if (cache != Cache_I && cache != Cache_D) + return EMULATE_DONE; + + switch (op_inst) { + case Index_Invalidate_I: + flush_icache_line_indexed(va); + return EMULATE_DONE; + case Index_Writeback_Inv_D: + flush_dcache_line_indexed(va); + return EMULATE_DONE; + case Hit_Invalidate_I: + case Hit_Invalidate_D: + case Hit_Writeback_Inv_D: + if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) { + /* We can just flush entire icache */ + local_flush_icache_range(0, 0); + return EMULATE_DONE; + } + + /* So far, other platforms support guest hit cache ops */ + break; + default: + break; + }; + + kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", + curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], + offset); + /* Rollback PC */ + vcpu->arch.pc = curr_pc; + + return EMULATE_FAIL; +} + +static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_vcpu_arch *arch = &vcpu->arch; + struct kvm_run *run = vcpu->run; + union mips_instruction inst; + int rd, rt, sel; + int err; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + switch (inst.r_format.opcode) { + case cop0_op: + er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu); + break; +#ifndef CONFIG_CPU_MIPSR6 + case cache_op: + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); + break; +#endif + case spec3_op: + switch (inst.spec3_format.func) { +#ifdef CONFIG_CPU_MIPSR6 + case cache6_op: + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE); + er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu); + break; +#endif + case rdhwr_op: + if (inst.r_format.rs || (inst.r_format.re >> 3)) + goto unknown; + + rd = inst.r_format.rd; + rt = inst.r_format.rt; + sel = inst.r_format.re & 0x7; + + switch (rd) { + case MIPS_HWR_CC: /* Read count register */ + arch->gprs[rt] = + (long)(int)kvm_mips_read_count(vcpu); + break; + default: + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, + KVM_TRACE_HWR(rd, sel), 0); + goto unknown; + }; + + trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, + KVM_TRACE_HWR(rd, sel), arch->gprs[rt]); + + er = update_pc(vcpu, cause); + break; + default: + goto unknown; + }; + break; +unknown: + + default: + kvm_err("GPSI exception not supported (%p/%#x)\n", + opc, inst.word); + kvm_arch_vcpu_dump_regs(vcpu); + er = EMULATE_FAIL; + break; + } + + return er; +} + +static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er = EMULATE_DONE; + struct kvm_vcpu_arch *arch = &vcpu->arch; + union mips_instruction inst; + int err; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + /* complete MTC0 on behalf of guest and advance EPC */ + if (inst.c0r_format.opcode == cop0_op && + inst.c0r_format.rs == mtc_op && + inst.c0r_format.z == 0) { + int rt = inst.c0r_format.rt; + int rd = inst.c0r_format.rd; + int sel = inst.c0r_format.sel; + unsigned int val = arch->gprs[rt]; + unsigned int old_val, change; + + trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel), + val); + + if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { + /* FR bit should read as zero if no FPU */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + val &= ~(ST0_CU1 | ST0_FR); + + /* + * Also don't allow FR to be set if host doesn't support + * it. + */ + if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64)) + val &= ~ST0_FR; + + old_val = read_gc0_status(); + change = val ^ old_val; + + if (change & ST0_FR) { + /* + * FPU and Vector register state is made + * UNPREDICTABLE by a change of FR, so don't + * even bother saving it. + */ + kvm_drop_fpu(vcpu); + } + + /* + * If MSA state is already live, it is undefined how it + * interacts with FR=0 FPU state, and we don't want to + * hit reserved instruction exceptions trying to save + * the MSA state later when CU=1 && FR=1, so play it + * safe and save it first. + */ + if (change & ST0_CU1 && !(val & ST0_FR) && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) + kvm_lose_fpu(vcpu); + + write_gc0_status(val); + } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { + u32 old_cause = read_gc0_cause(); + u32 change = old_cause ^ val; + + /* DC bit enabling/disabling timer? */ + if (change & CAUSEF_DC) { + if (val & CAUSEF_DC) { + kvm_vz_lose_htimer(vcpu); + kvm_mips_count_disable_cause(vcpu); + } else { + kvm_mips_count_enable_cause(vcpu); + } + } + + /* Only certain bits are RW to the guest */ + change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP | + CAUSEF_IP0 | CAUSEF_IP1); + + /* WP can only be cleared */ + change &= ~CAUSEF_WP | old_cause; + + write_gc0_cause(old_cause ^ change); + } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */ + write_gc0_intctl(val); + } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { + old_val = read_gc0_config5(); + change = val ^ old_val; + /* Handle changes in FPU/MSA modes */ + preempt_disable(); + + /* + * Propagate FRE changes immediately if the FPU + * context is already loaded. + */ + if (change & MIPS_CONF5_FRE && + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) + change_c0_config5(MIPS_CONF5_FRE, val); + + preempt_enable(); + + val = old_val ^ + (change & kvm_vz_config5_guest_wrmask(vcpu)); + write_gc0_config5(val); + } else { + kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n", + opc, inst.word); + er = EMULATE_FAIL; + } + + if (er != EMULATE_FAIL) + er = update_pc(vcpu, cause); + } else { + kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n", + opc, inst.word); + er = EMULATE_FAIL; + } + + return er; +} + +static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + /* + * Presumably this is due to MC (guest mode change), so lets trace some + * relevant info. + */ + trace_kvm_guest_mode_change(vcpu); + + return EMULATE_DONE; +} + +static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + unsigned long curr_pc; + int err; + + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; + + /* + * Update PC and hold onto current PC in case there is + * an error and we want to rollback the PC + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + + er = kvm_mips_emul_hypcall(vcpu, inst); + if (er == EMULATE_FAIL) + vcpu->arch.pc = curr_pc; + + return er; +} + +static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode, + u32 cause, + u32 *opc, + struct kvm_vcpu *vcpu) +{ + u32 inst; + + /* + * Fetch the instruction. + */ + if (cause & CAUSEF_BD) + opc += 1; + kvm_get_badinstr(opc, vcpu, &inst); + + kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n", + gexccode, opc, inst, read_gc0_status()); + + return EMULATE_FAIL; +} + +static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) +{ + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + u32 gexccode = (vcpu->arch.host_cp0_guestctl0 & + MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT; + int ret = RESUME_GUEST; + + trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode); + switch (gexccode) { + case MIPS_GCTL0_GEXC_GPSI: + ++vcpu->stat.vz_gpsi_exits; + er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GSFC: + ++vcpu->stat.vz_gsfc_exits; + er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_HC: + ++vcpu->stat.vz_hc_exits; + er = kvm_trap_vz_handle_hc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GRR: + ++vcpu->stat.vz_grr_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + case MIPS_GCTL0_GEXC_GVA: + ++vcpu->stat.vz_gva_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + case MIPS_GCTL0_GEXC_GHFC: + ++vcpu->stat.vz_ghfc_exits; + er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu); + break; + case MIPS_GCTL0_GEXC_GPA: + ++vcpu->stat.vz_gpa_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + default: + ++vcpu->stat.vz_resvd_exits; + er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc, + vcpu); + break; + + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_HYPERCALL) { + ret = kvm_mips_handle_hypcall(vcpu); + } else { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +/** + * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use a coprocessor which hasn't been allowed + * by the root context. + */ +static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_FAIL; + int ret = RESUME_GUEST; + + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + /* + * If guest FPU not present, the FPU operation should have been + * treated as a reserved instruction! + * If FPU already in use, we shouldn't get this at all. + */ + if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) || + vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) { + preempt_enable(); + return EMULATE_FAIL; + } + + kvm_own_fpu(vcpu); + er = EMULATE_DONE; + } + /* other coprocessors not handled */ + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + default: + BUG(); + } + return ret; +} + +/** + * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use MSA when it is disabled in the root + * context. + */ +static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + + /* + * If MSA not present or not exposed to guest or FR=0, the MSA operation + * should have been treated as a reserved instruction! + * Same if CU1=1, FR=0. + * If MSA already in use, we shouldn't get this at all. + */ + if (!kvm_mips_guest_has_msa(&vcpu->arch) || + (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 || + !(read_gc0_config5() & MIPS_CONF5_MSAEN) || + vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + kvm_own_msa(vcpu); + + return RESUME_GUEST; +} + +static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + ulong badvaddr = vcpu->arch.host_cp0_badvaddr; + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err, ret = RESUME_GUEST; + + if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) { + /* A code fetch fault doesn't count as an MMIO */ + if (kvm_is_ifetch_fault(&vcpu->arch)) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Fetch the instruction */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + er = kvm_mips_emulate_load(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n", + opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + u32 *opc = (u32 *) vcpu->arch.pc; + u32 cause = vcpu->arch.host_cp0_cause; + ulong badvaddr = vcpu->arch.host_cp0_badvaddr; + union mips_instruction inst; + enum emulation_result er = EMULATE_DONE; + int err; + int ret = RESUME_GUEST; + + /* Just try the access again if we couldn't do the translation */ + if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr)) + return RESUME_GUEST; + vcpu->arch.host_cp0_badvaddr = badvaddr; + + if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) { + /* Fetch the instruction */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Treat as MMIO */ + er = kvm_mips_emulate_store(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n", + opc, badvaddr); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } + } + + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else if (er == EMULATE_DO_MMIO) { + run->exit_reason = KVM_EXIT_MMIO; + ret = RESUME_HOST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + +static u64 kvm_vz_get_one_regs[] = { + KVM_REG_MIPS_CP0_INDEX, + KVM_REG_MIPS_CP0_ENTRYLO0, + KVM_REG_MIPS_CP0_ENTRYLO1, + KVM_REG_MIPS_CP0_CONTEXT, + KVM_REG_MIPS_CP0_PAGEMASK, + KVM_REG_MIPS_CP0_PAGEGRAIN, + KVM_REG_MIPS_CP0_WIRED, + KVM_REG_MIPS_CP0_HWRENA, + KVM_REG_MIPS_CP0_BADVADDR, + KVM_REG_MIPS_CP0_COUNT, + KVM_REG_MIPS_CP0_ENTRYHI, + KVM_REG_MIPS_CP0_COMPARE, + KVM_REG_MIPS_CP0_STATUS, + KVM_REG_MIPS_CP0_INTCTL, + KVM_REG_MIPS_CP0_CAUSE, + KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, + KVM_REG_MIPS_CP0_EBASE, + KVM_REG_MIPS_CP0_CONFIG, + KVM_REG_MIPS_CP0_CONFIG1, + KVM_REG_MIPS_CP0_CONFIG2, + KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, +#ifdef CONFIG_64BIT + KVM_REG_MIPS_CP0_XCONTEXT, +#endif + KVM_REG_MIPS_CP0_ERROREPC, + + KVM_REG_MIPS_COUNT_CTL, + KVM_REG_MIPS_COUNT_RESUME, + KVM_REG_MIPS_COUNT_HZ, +}; + +static u64 kvm_vz_get_one_regs_contextconfig[] = { + KVM_REG_MIPS_CP0_CONTEXTCONFIG, +#ifdef CONFIG_64BIT + KVM_REG_MIPS_CP0_XCONTEXTCONFIG, +#endif +}; + +static u64 kvm_vz_get_one_regs_segments[] = { + KVM_REG_MIPS_CP0_SEGCTL0, + KVM_REG_MIPS_CP0_SEGCTL1, + KVM_REG_MIPS_CP0_SEGCTL2, +}; + +static u64 kvm_vz_get_one_regs_htw[] = { + KVM_REG_MIPS_CP0_PWBASE, + KVM_REG_MIPS_CP0_PWFIELD, + KVM_REG_MIPS_CP0_PWSIZE, + KVM_REG_MIPS_CP0_PWCTL, +}; + +static u64 kvm_vz_get_one_regs_kscratch[] = { + KVM_REG_MIPS_CP0_KSCRATCH1, + KVM_REG_MIPS_CP0_KSCRATCH2, + KVM_REG_MIPS_CP0_KSCRATCH3, + KVM_REG_MIPS_CP0_KSCRATCH4, + KVM_REG_MIPS_CP0_KSCRATCH5, + KVM_REG_MIPS_CP0_KSCRATCH6, +}; + +static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu) +{ + unsigned long ret; + + ret = ARRAY_SIZE(kvm_vz_get_one_regs); + if (cpu_guest_has_userlocal) + ++ret; + if (cpu_guest_has_badinstr) + ++ret; + if (cpu_guest_has_badinstrp) + ++ret; + if (cpu_guest_has_contextconfig) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); + if (cpu_guest_has_segments) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments); + if (cpu_guest_has_htw) + ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw); + if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) + ret += 1 + ARRAY_SIZE(vcpu->arch.maar); + ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask); + + return ret; +} + +static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) +{ + u64 index; + unsigned int i; + + if (copy_to_user(indices, kvm_vz_get_one_regs, + sizeof(kvm_vz_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs); + + if (cpu_guest_has_userlocal) { + index = KVM_REG_MIPS_CP0_USERLOCAL; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_badinstr) { + index = KVM_REG_MIPS_CP0_BADINSTR; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_badinstrp) { + index = KVM_REG_MIPS_CP0_BADINSTRP; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + if (cpu_guest_has_contextconfig) { + if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig, + sizeof(kvm_vz_get_one_regs_contextconfig))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig); + } + if (cpu_guest_has_segments) { + if (copy_to_user(indices, kvm_vz_get_one_regs_segments, + sizeof(kvm_vz_get_one_regs_segments))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments); + } + if (cpu_guest_has_htw) { + if (copy_to_user(indices, kvm_vz_get_one_regs_htw, + sizeof(kvm_vz_get_one_regs_htw))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw); + } + if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) { + for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) { + index = KVM_REG_MIPS_CP0_MAAR(i); + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + + index = KVM_REG_MIPS_CP0_MAARI; + if (copy_to_user(indices, &index, sizeof(index))) + return -EFAULT; + ++indices; + } + for (i = 0; i < 6; ++i) { + if (!cpu_guest_has_kscr(i + 2)) + continue; + + if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i], + sizeof(kvm_vz_get_one_regs_kscratch[i]))) + return -EFAULT; + ++indices; + } + + return 0; +} + +static inline s64 entrylo_kvm_to_user(unsigned long v) +{ + s64 mask, ret = v; + + if (BITS_PER_LONG == 32) { + /* + * KVM API exposes 64-bit version of the register, so move the + * RI/XI bits up into place. + */ + mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; + ret &= ~mask; + ret |= ((s64)v & mask) << 32; + } + return ret; +} + +static inline unsigned long entrylo_user_to_kvm(s64 v) +{ + unsigned long mask, ret = v; + + if (BITS_PER_LONG == 32) { + /* + * KVM API exposes 64-bit versiono of the register, so move the + * RI/XI bits down into place. + */ + mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI; + ret &= ~mask; + ret |= (v >> 32) & mask; + } + return ret; +} + +static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 *v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int idx; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + *v = (long)read_gc0_index(); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + *v = entrylo_kvm_to_user(read_gc0_entrylo0()); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + *v = entrylo_kvm_to_user(read_gc0_entrylo1()); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + *v = (long)read_gc0_context(); + break; + case KVM_REG_MIPS_CP0_CONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + *v = read_gc0_contextconfig(); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + if (!cpu_guest_has_userlocal) + return -EINVAL; + *v = read_gc0_userlocal(); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + *v = read_gc0_xcontextconfig(); + break; +#endif + case KVM_REG_MIPS_CP0_PAGEMASK: + *v = (long)read_gc0_pagemask(); + break; + case KVM_REG_MIPS_CP0_PAGEGRAIN: + *v = (long)read_gc0_pagegrain(); + break; + case KVM_REG_MIPS_CP0_SEGCTL0: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl0(); + break; + case KVM_REG_MIPS_CP0_SEGCTL1: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl1(); + break; + case KVM_REG_MIPS_CP0_SEGCTL2: + if (!cpu_guest_has_segments) + return -EINVAL; + *v = read_gc0_segctl2(); + break; + case KVM_REG_MIPS_CP0_PWBASE: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwbase(); + break; + case KVM_REG_MIPS_CP0_PWFIELD: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwfield(); + break; + case KVM_REG_MIPS_CP0_PWSIZE: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwsize(); + break; + case KVM_REG_MIPS_CP0_WIRED: + *v = (long)read_gc0_wired(); + break; + case KVM_REG_MIPS_CP0_PWCTL: + if (!cpu_guest_has_htw) + return -EINVAL; + *v = read_gc0_pwctl(); + break; + case KVM_REG_MIPS_CP0_HWRENA: + *v = (long)read_gc0_hwrena(); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + *v = (long)read_gc0_badvaddr(); + break; + case KVM_REG_MIPS_CP0_BADINSTR: + if (!cpu_guest_has_badinstr) + return -EINVAL; + *v = read_gc0_badinstr(); + break; + case KVM_REG_MIPS_CP0_BADINSTRP: + if (!cpu_guest_has_badinstrp) + return -EINVAL; + *v = read_gc0_badinstrp(); + break; + case KVM_REG_MIPS_CP0_COUNT: + *v = kvm_mips_read_count(vcpu); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + *v = (long)read_gc0_entryhi(); + break; + case KVM_REG_MIPS_CP0_COMPARE: + *v = (long)read_gc0_compare(); + break; + case KVM_REG_MIPS_CP0_STATUS: + *v = (long)read_gc0_status(); + break; + case KVM_REG_MIPS_CP0_INTCTL: + *v = read_gc0_intctl(); + break; + case KVM_REG_MIPS_CP0_CAUSE: + *v = (long)read_gc0_cause(); + break; + case KVM_REG_MIPS_CP0_EPC: + *v = (long)read_gc0_epc(); + break; + case KVM_REG_MIPS_CP0_PRID: + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Octeon III has a read-only guest.PRid */ + *v = read_gc0_prid(); + break; + default: + *v = (long)kvm_read_c0_guest_prid(cop0); + break; + }; + break; + case KVM_REG_MIPS_CP0_EBASE: + *v = kvm_vz_read_gc0_ebase(); + break; + case KVM_REG_MIPS_CP0_CONFIG: + *v = read_gc0_config(); + break; + case KVM_REG_MIPS_CP0_CONFIG1: + if (!cpu_guest_has_conf1) + return -EINVAL; + *v = read_gc0_config1(); + break; + case KVM_REG_MIPS_CP0_CONFIG2: + if (!cpu_guest_has_conf2) + return -EINVAL; + *v = read_gc0_config2(); + break; + case KVM_REG_MIPS_CP0_CONFIG3: + if (!cpu_guest_has_conf3) + return -EINVAL; + *v = read_gc0_config3(); + break; + case KVM_REG_MIPS_CP0_CONFIG4: + if (!cpu_guest_has_conf4) + return -EINVAL; + *v = read_gc0_config4(); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + if (!cpu_guest_has_conf5) + return -EINVAL; + *v = read_gc0_config5(); + break; + case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); + if (idx >= ARRAY_SIZE(vcpu->arch.maar)) + return -EINVAL; + *v = vcpu->arch.maar[idx]; + break; + case KVM_REG_MIPS_CP0_MAARI: + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXT: + *v = read_gc0_xcontext(); + break; +#endif + case KVM_REG_MIPS_CP0_ERROREPC: + *v = (long)read_gc0_errorepc(); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!cpu_guest_has_kscr(idx)) + return -EINVAL; + switch (idx) { + case 2: + *v = (long)read_gc0_kscratch1(); + break; + case 3: + *v = (long)read_gc0_kscratch2(); + break; + case 4: + *v = (long)read_gc0_kscratch3(); + break; + case 5: + *v = (long)read_gc0_kscratch4(); + break; + case 6: + *v = (long)read_gc0_kscratch5(); + break; + case 7: + *v = (long)read_gc0_kscratch6(); + break; + } + break; + case KVM_REG_MIPS_COUNT_CTL: + *v = vcpu->arch.count_ctl; + break; + case KVM_REG_MIPS_COUNT_RESUME: + *v = ktime_to_ns(vcpu->arch.count_resume); + break; + case KVM_REG_MIPS_COUNT_HZ: + *v = vcpu->arch.count_hz; + break; + default: + return -EINVAL; + } + return 0; +} + +static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, + const struct kvm_one_reg *reg, + s64 v) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int idx; + int ret = 0; + unsigned int cur, change; + + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + write_gc0_index(v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + write_gc0_entrylo0(entrylo_user_to_kvm(v)); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + write_gc0_entrylo1(entrylo_user_to_kvm(v)); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + write_gc0_context(v); + break; + case KVM_REG_MIPS_CP0_CONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + write_gc0_contextconfig(v); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + if (!cpu_guest_has_userlocal) + return -EINVAL; + write_gc0_userlocal(v); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXTCONFIG: + if (!cpu_guest_has_contextconfig) + return -EINVAL; + write_gc0_xcontextconfig(v); + break; +#endif + case KVM_REG_MIPS_CP0_PAGEMASK: + write_gc0_pagemask(v); + break; + case KVM_REG_MIPS_CP0_PAGEGRAIN: + write_gc0_pagegrain(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL0: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl0(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL1: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl1(v); + break; + case KVM_REG_MIPS_CP0_SEGCTL2: + if (!cpu_guest_has_segments) + return -EINVAL; + write_gc0_segctl2(v); + break; + case KVM_REG_MIPS_CP0_PWBASE: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwbase(v); + break; + case KVM_REG_MIPS_CP0_PWFIELD: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwfield(v); + break; + case KVM_REG_MIPS_CP0_PWSIZE: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwsize(v); + break; + case KVM_REG_MIPS_CP0_WIRED: + change_gc0_wired(MIPSR6_WIRED_WIRED, v); + break; + case KVM_REG_MIPS_CP0_PWCTL: + if (!cpu_guest_has_htw) + return -EINVAL; + write_gc0_pwctl(v); + break; + case KVM_REG_MIPS_CP0_HWRENA: + write_gc0_hwrena(v); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + write_gc0_badvaddr(v); + break; + case KVM_REG_MIPS_CP0_BADINSTR: + if (!cpu_guest_has_badinstr) + return -EINVAL; + write_gc0_badinstr(v); + break; + case KVM_REG_MIPS_CP0_BADINSTRP: + if (!cpu_guest_has_badinstrp) + return -EINVAL; + write_gc0_badinstrp(v); + break; + case KVM_REG_MIPS_CP0_COUNT: + kvm_mips_write_count(vcpu, v); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + write_gc0_entryhi(v); + break; + case KVM_REG_MIPS_CP0_COMPARE: + kvm_mips_write_compare(vcpu, v, false); + break; + case KVM_REG_MIPS_CP0_STATUS: + write_gc0_status(v); + break; + case KVM_REG_MIPS_CP0_INTCTL: + write_gc0_intctl(v); + break; + case KVM_REG_MIPS_CP0_CAUSE: + /* + * If the timer is stopped or started (DC bit) it must look + * atomic with changes to the timer interrupt pending bit (TI). + * A timer interrupt should not happen in between. + */ + if ((read_gc0_cause() ^ v) & CAUSEF_DC) { + if (v & CAUSEF_DC) { + /* disable timer first */ + kvm_mips_count_disable_cause(vcpu); + change_gc0_cause((u32)~CAUSEF_DC, v); + } else { + /* enable timer last */ + change_gc0_cause((u32)~CAUSEF_DC, v); + kvm_mips_count_enable_cause(vcpu); + } + } else { + write_gc0_cause(v); + } + break; + case KVM_REG_MIPS_CP0_EPC: + write_gc0_epc(v); + break; + case KVM_REG_MIPS_CP0_PRID: + switch (boot_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Octeon III has a guest.PRid, but its read-only */ + break; + default: + kvm_write_c0_guest_prid(cop0, v); + break; + }; + break; + case KVM_REG_MIPS_CP0_EBASE: + kvm_vz_write_gc0_ebase(v); + break; + case KVM_REG_MIPS_CP0_CONFIG: + cur = read_gc0_config(); + change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG1: + if (!cpu_guest_has_conf1) + break; + cur = read_gc0_config1(); + change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config1(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG2: + if (!cpu_guest_has_conf2) + break; + cur = read_gc0_config2(); + change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config2(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG3: + if (!cpu_guest_has_conf3) + break; + cur = read_gc0_config3(); + change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config3(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG4: + if (!cpu_guest_has_conf4) + break; + cur = read_gc0_config4(); + change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config4(v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG5: + if (!cpu_guest_has_conf5) + break; + cur = read_gc0_config5(); + change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu); + if (change) { + v = cur ^ change; + write_gc0_config5(v); + } + break; + case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f): + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0); + if (idx >= ARRAY_SIZE(vcpu->arch.maar)) + return -EINVAL; + vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v); + break; + case KVM_REG_MIPS_CP0_MAARI: + if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) + return -EINVAL; + kvm_write_maari(vcpu, v); + break; +#ifdef CONFIG_64BIT + case KVM_REG_MIPS_CP0_XCONTEXT: + write_gc0_xcontext(v); + break; +#endif + case KVM_REG_MIPS_CP0_ERROREPC: + write_gc0_errorepc(v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: + idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; + if (!cpu_guest_has_kscr(idx)) + return -EINVAL; + switch (idx) { + case 2: + write_gc0_kscratch1(v); + break; + case 3: + write_gc0_kscratch2(v); + break; + case 4: + write_gc0_kscratch3(v); + break; + case 5: + write_gc0_kscratch4(v); + break; + case 6: + write_gc0_kscratch5(v); + break; + case 7: + write_gc0_kscratch6(v); + break; + } + break; + case KVM_REG_MIPS_COUNT_CTL: + ret = kvm_mips_set_count_ctl(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_RESUME: + ret = kvm_mips_set_count_resume(vcpu, v); + break; + case KVM_REG_MIPS_COUNT_HZ: + ret = kvm_mips_set_count_hz(vcpu, v); + break; + default: + return -EINVAL; + } + return ret; +} + +#define guestid_cache(cpu) (cpu_data[cpu].guestid_cache) +static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu) +{ + unsigned long guestid = guestid_cache(cpu); + + if (!(++guestid & GUESTID_MASK)) { + if (cpu_has_vtag_icache) + flush_icache_all(); + + if (!guestid) /* fix version if needed */ + guestid = GUESTID_FIRST_VERSION; + + ++guestid; /* guestid 0 reserved for root */ + + /* start new guestid cycle */ + kvm_vz_local_flush_roottlb_all_guests(); + kvm_vz_local_flush_guesttlb_all(); + } + + guestid_cache(cpu) = guestid; +} + +/* Returns 1 if the guest TLB may be clobbered */ +static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) +{ + int ret = 0; + int i; + + if (!vcpu->requests) + return 0; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + if (cpu_has_guestid) { + /* Drop all GuestIDs for this VCPU */ + for_each_possible_cpu(i) + vcpu->arch.vzguestid[i] = 0; + /* This will clobber guest TLB contents too */ + ret = 1; + } + /* + * For Root ASID Dealias (RAD) we don't do anything here, but we + * still need the request to ensure we recheck asid_flush_mask. + * We can still return 0 as only the root TLB will be affected + * by a root ASID flush. + */ + } + + return ret; +} + +static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu) +{ + unsigned int wired = read_gc0_wired(); + struct kvm_mips_tlb *tlbs; + int i; + + /* Expand the wired TLB array if necessary */ + wired &= MIPSR6_WIRED_WIRED; + if (wired > vcpu->arch.wired_tlb_limit) { + tlbs = krealloc(vcpu->arch.wired_tlb, wired * + sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC); + if (WARN_ON(!tlbs)) { + /* Save whatever we can */ + wired = vcpu->arch.wired_tlb_limit; + } else { + vcpu->arch.wired_tlb = tlbs; + vcpu->arch.wired_tlb_limit = wired; + } + } + + if (wired) + /* Save wired entries from the guest TLB */ + kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired); + /* Invalidate any dropped entries since last time */ + for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) { + vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i); + vcpu->arch.wired_tlb[i].tlb_lo[0] = 0; + vcpu->arch.wired_tlb[i].tlb_lo[1] = 0; + vcpu->arch.wired_tlb[i].tlb_mask = 0; + } + vcpu->arch.wired_tlb_used = wired; +} + +static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu) +{ + /* Load wired entries into the guest TLB */ + if (vcpu->arch.wired_tlb) + kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0, + vcpu->arch.wired_tlb_used); +} + +static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu) +{ + struct kvm *kvm = vcpu->kvm; + struct mm_struct *gpa_mm = &kvm->arch.gpa_mm; + bool migrated; + + /* + * Are we entering guest context on a different CPU to last time? + * If so, the VCPU's guest TLB state on this CPU may be stale. + */ + migrated = (vcpu->arch.last_exec_cpu != cpu); + vcpu->arch.last_exec_cpu = cpu; + + /* + * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and + * remains set until another vcpu is loaded in. As a rule GuestRID + * remains zeroed when in root context unless the kernel is busy + * manipulating guest tlb entries. + */ + if (cpu_has_guestid) { + /* + * Check if our GuestID is of an older version and thus invalid. + * + * We also discard the stored GuestID if we've executed on + * another CPU, as the guest mappings may have changed without + * hypervisor knowledge. + */ + if (migrated || + (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) & + GUESTID_VERSION_MASK) { + kvm_vz_get_new_guestid(cpu, vcpu); + vcpu->arch.vzguestid[cpu] = guestid_cache(cpu); + trace_kvm_guestid_change(vcpu, + vcpu->arch.vzguestid[cpu]); + } + + /* Restore GuestID */ + change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]); + } else { + /* + * The Guest TLB only stores a single guest's TLB state, so + * flush it if another VCPU has executed on this CPU. + * + * We also flush if we've executed on another CPU, as the guest + * mappings may have changed without hypervisor knowledge. + */ + if (migrated || last_exec_vcpu[cpu] != vcpu) + kvm_vz_local_flush_guesttlb_all(); + last_exec_vcpu[cpu] = vcpu; + + /* + * Root ASID dealiases guest GPA mappings in the root TLB. + * Allocate new root ASID if needed. + */ + if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask) + || (cpu_context(cpu, gpa_mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu)) + get_new_mmu_context(gpa_mm, cpu); + } +} + +static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + bool migrated, all; + + /* + * Have we migrated to a different CPU? + * If so, any old guest TLB state may be stale. + */ + migrated = (vcpu->arch.last_sched_cpu != cpu); + + /* + * Was this the last VCPU to run on this CPU? + * If not, any old guest state from this VCPU will have been clobbered. + */ + all = migrated || (last_vcpu[cpu] != vcpu); + last_vcpu[cpu] = vcpu; + + /* + * Restore CP0_Wired unconditionally as we clear it after use, and + * restore wired guest TLB entries (while in guest context). + */ + kvm_restore_gc0_wired(cop0); + if (current->flags & PF_VCPU) { + tlbw_use_hazard(); + kvm_vz_vcpu_load_tlb(vcpu, cpu); + kvm_vz_vcpu_load_wired(vcpu); + } + + /* + * Restore timer state regardless, as e.g. Cause.TI can change over time + * if left unmaintained. + */ + kvm_vz_restore_timer(vcpu); + + /* Set MC bit if we want to trace guest mode changes */ + if (kvm_trace_guest_mode_change) + set_c0_guestctl0(MIPS_GCTL0_MC); + else + clear_c0_guestctl0(MIPS_GCTL0_MC); + + /* Don't bother restoring registers multiple times unless necessary */ + if (!all) + return 0; + + /* + * Restore config registers first, as some implementations restrict + * writes to other registers when the corresponding feature bits aren't + * set. For example Status.CU1 cannot be set unless Config1.FP is set. + */ + kvm_restore_gc0_config(cop0); + if (cpu_guest_has_conf1) + kvm_restore_gc0_config1(cop0); + if (cpu_guest_has_conf2) + kvm_restore_gc0_config2(cop0); + if (cpu_guest_has_conf3) + kvm_restore_gc0_config3(cop0); + if (cpu_guest_has_conf4) + kvm_restore_gc0_config4(cop0); + if (cpu_guest_has_conf5) + kvm_restore_gc0_config5(cop0); + if (cpu_guest_has_conf6) + kvm_restore_gc0_config6(cop0); + if (cpu_guest_has_conf7) + kvm_restore_gc0_config7(cop0); + + kvm_restore_gc0_index(cop0); + kvm_restore_gc0_entrylo0(cop0); + kvm_restore_gc0_entrylo1(cop0); + kvm_restore_gc0_context(cop0); + if (cpu_guest_has_contextconfig) + kvm_restore_gc0_contextconfig(cop0); +#ifdef CONFIG_64BIT + kvm_restore_gc0_xcontext(cop0); + if (cpu_guest_has_contextconfig) + kvm_restore_gc0_xcontextconfig(cop0); +#endif + kvm_restore_gc0_pagemask(cop0); + kvm_restore_gc0_pagegrain(cop0); + kvm_restore_gc0_hwrena(cop0); + kvm_restore_gc0_badvaddr(cop0); + kvm_restore_gc0_entryhi(cop0); + kvm_restore_gc0_status(cop0); + kvm_restore_gc0_intctl(cop0); + kvm_restore_gc0_epc(cop0); + kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0)); + if (cpu_guest_has_userlocal) + kvm_restore_gc0_userlocal(cop0); + + kvm_restore_gc0_errorepc(cop0); + + /* restore KScratch registers if enabled in guest */ + if (cpu_guest_has_conf4) { + if (cpu_guest_has_kscr(2)) + kvm_restore_gc0_kscratch1(cop0); + if (cpu_guest_has_kscr(3)) + kvm_restore_gc0_kscratch2(cop0); + if (cpu_guest_has_kscr(4)) + kvm_restore_gc0_kscratch3(cop0); + if (cpu_guest_has_kscr(5)) + kvm_restore_gc0_kscratch4(cop0); + if (cpu_guest_has_kscr(6)) + kvm_restore_gc0_kscratch5(cop0); + if (cpu_guest_has_kscr(7)) + kvm_restore_gc0_kscratch6(cop0); + } + + if (cpu_guest_has_badinstr) + kvm_restore_gc0_badinstr(cop0); + if (cpu_guest_has_badinstrp) + kvm_restore_gc0_badinstrp(cop0); + + if (cpu_guest_has_segments) { + kvm_restore_gc0_segctl0(cop0); + kvm_restore_gc0_segctl1(cop0); + kvm_restore_gc0_segctl2(cop0); + } + + /* restore HTW registers */ + if (cpu_guest_has_htw) { + kvm_restore_gc0_pwbase(cop0); + kvm_restore_gc0_pwfield(cop0); + kvm_restore_gc0_pwsize(cop0); + kvm_restore_gc0_pwctl(cop0); + } + + /* restore Root.GuestCtl2 from unused Guest guestctl2 register */ + if (cpu_has_guestctl2) + write_c0_guestctl2( + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]); + + /* + * We should clear linked load bit to break interrupted atomics. This + * prevents a SC on the next VCPU from succeeding by matching a LL on + * the previous VCPU. + */ + if (cpu_guest_has_rw_llb) + write_gc0_lladdr(0); + + return 0; +} + +static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + if (current->flags & PF_VCPU) + kvm_vz_vcpu_save_wired(vcpu); + + kvm_lose_fpu(vcpu); + + kvm_save_gc0_index(cop0); + kvm_save_gc0_entrylo0(cop0); + kvm_save_gc0_entrylo1(cop0); + kvm_save_gc0_context(cop0); + if (cpu_guest_has_contextconfig) + kvm_save_gc0_contextconfig(cop0); +#ifdef CONFIG_64BIT + kvm_save_gc0_xcontext(cop0); + if (cpu_guest_has_contextconfig) + kvm_save_gc0_xcontextconfig(cop0); +#endif + kvm_save_gc0_pagemask(cop0); + kvm_save_gc0_pagegrain(cop0); + kvm_save_gc0_wired(cop0); + /* allow wired TLB entries to be overwritten */ + clear_gc0_wired(MIPSR6_WIRED_WIRED); + kvm_save_gc0_hwrena(cop0); + kvm_save_gc0_badvaddr(cop0); + kvm_save_gc0_entryhi(cop0); + kvm_save_gc0_status(cop0); + kvm_save_gc0_intctl(cop0); + kvm_save_gc0_epc(cop0); + kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase()); + if (cpu_guest_has_userlocal) + kvm_save_gc0_userlocal(cop0); + + /* only save implemented config registers */ + kvm_save_gc0_config(cop0); + if (cpu_guest_has_conf1) + kvm_save_gc0_config1(cop0); + if (cpu_guest_has_conf2) + kvm_save_gc0_config2(cop0); + if (cpu_guest_has_conf3) + kvm_save_gc0_config3(cop0); + if (cpu_guest_has_conf4) + kvm_save_gc0_config4(cop0); + if (cpu_guest_has_conf5) + kvm_save_gc0_config5(cop0); + if (cpu_guest_has_conf6) + kvm_save_gc0_config6(cop0); + if (cpu_guest_has_conf7) + kvm_save_gc0_config7(cop0); + + kvm_save_gc0_errorepc(cop0); + + /* save KScratch registers if enabled in guest */ + if (cpu_guest_has_conf4) { + if (cpu_guest_has_kscr(2)) + kvm_save_gc0_kscratch1(cop0); + if (cpu_guest_has_kscr(3)) + kvm_save_gc0_kscratch2(cop0); + if (cpu_guest_has_kscr(4)) + kvm_save_gc0_kscratch3(cop0); + if (cpu_guest_has_kscr(5)) + kvm_save_gc0_kscratch4(cop0); + if (cpu_guest_has_kscr(6)) + kvm_save_gc0_kscratch5(cop0); + if (cpu_guest_has_kscr(7)) + kvm_save_gc0_kscratch6(cop0); + } + + if (cpu_guest_has_badinstr) + kvm_save_gc0_badinstr(cop0); + if (cpu_guest_has_badinstrp) + kvm_save_gc0_badinstrp(cop0); + + if (cpu_guest_has_segments) { + kvm_save_gc0_segctl0(cop0); + kvm_save_gc0_segctl1(cop0); + kvm_save_gc0_segctl2(cop0); + } + + /* save HTW registers if enabled in guest */ + if (cpu_guest_has_htw && + kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) { + kvm_save_gc0_pwbase(cop0); + kvm_save_gc0_pwfield(cop0); + kvm_save_gc0_pwsize(cop0); + kvm_save_gc0_pwctl(cop0); + } + + kvm_vz_save_timer(vcpu); + + /* save Root.GuestCtl2 in unused Guest guestctl2 register */ + if (cpu_has_guestctl2) + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = + read_c0_guestctl2(); + + return 0; +} + +/** + * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB. + * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries). + * + * Attempt to resize the guest VTLB by writing guest Config registers. This is + * necessary for cores with a shared root/guest TLB to avoid overlap with wired + * entries in the root VTLB. + * + * Returns: The resulting guest VTLB size. + */ +static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size) +{ + unsigned int config4 = 0, ret = 0, limit; + + /* Write MMUSize - 1 into guest Config registers */ + if (cpu_guest_has_conf1) + change_gc0_config1(MIPS_CONF1_TLBS, + (size - 1) << MIPS_CONF1_TLBS_SHIFT); + if (cpu_guest_has_conf4) { + config4 = read_gc0_config4(); + if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) { + config4 &= ~MIPS_CONF4_VTLBSIZEEXT; + config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << + MIPS_CONF4_VTLBSIZEEXT_SHIFT; + } else if ((config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) { + config4 &= ~MIPS_CONF4_MMUSIZEEXT; + config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) << + MIPS_CONF4_MMUSIZEEXT_SHIFT; + } + write_gc0_config4(config4); + } + + /* + * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it + * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write + * not dropped) + */ + if (cpu_has_mips_r6) { + limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >> + MIPSR6_WIRED_LIMIT_SHIFT; + if (size - 1 <= limit) + limit = 0; + write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT); + } + + /* Read back MMUSize - 1 */ + back_to_back_c0_hazard(); + if (cpu_guest_has_conf1) + ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >> + MIPS_CONF1_TLBS_SHIFT; + if (config4) { + if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) + ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >> + MIPS_CONF4_VTLBSIZEEXT_SHIFT) << + MIPS_CONF1_TLBS_SIZE; + else if ((config4 & MIPS_CONF4_MMUEXTDEF) == + MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) + ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >> + MIPS_CONF4_MMUSIZEEXT_SHIFT) << + MIPS_CONF1_TLBS_SIZE; + } + return ret + 1; +} + +static int kvm_vz_hardware_enable(void) +{ + unsigned int mmu_size, guest_mmu_size, ftlb_size; + u64 guest_cvmctl, cvmvmconfig; + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* Set up guest timer/perfcount IRQ lines */ + guest_cvmctl = read_gc0_cvmctl(); + guest_cvmctl &= ~CVMCTL_IPTI; + guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT; + guest_cvmctl &= ~CVMCTL_IPPCI; + guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT; + write_gc0_cvmctl(guest_cvmctl); + + cvmvmconfig = read_c0_cvmvmconfig(); + /* No I/O hole translation. */ + cvmvmconfig |= CVMVMCONF_DGHT; + /* Halve the root MMU size */ + mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) + >> CVMVMCONF_MMUSIZEM1_S) + 1; + guest_mmu_size = mmu_size / 2; + mmu_size -= guest_mmu_size; + cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; + cvmvmconfig |= mmu_size - 1; + write_c0_cvmvmconfig(cvmvmconfig); + + /* Update our records */ + current_cpu_data.tlbsize = mmu_size; + current_cpu_data.tlbsizevtlb = mmu_size; + current_cpu_data.guest.tlbsize = guest_mmu_size; + + /* Flush moved entries in new (guest) context */ + kvm_vz_local_flush_guesttlb_all(); + break; + default: + /* + * ImgTec cores tend to use a shared root/guest TLB. To avoid + * overlap of root wired and guest entries, the guest TLB may + * need resizing. + */ + mmu_size = current_cpu_data.tlbsizevtlb; + ftlb_size = current_cpu_data.tlbsize - mmu_size; + + /* Try switching to maximum guest VTLB size for flush */ + guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size); + current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; + kvm_vz_local_flush_guesttlb_all(); + + /* + * Reduce to make space for root wired entries and at least 2 + * root non-wired entries. This does assume that long-term wired + * entries won't be added later. + */ + guest_mmu_size = mmu_size - num_wired_entries() - 2; + guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size); + current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size; + + /* + * Write the VTLB size, but if another CPU has already written, + * check it matches or we won't provide a consistent view to the + * guest. If this ever happens it suggests an asymmetric number + * of wired entries. + */ + if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) && + WARN(guest_mmu_size != kvm_vz_guest_vtlb_size, + "Available guest VTLB size mismatch")) + return -EINVAL; + break; + } + + /* + * Enable virtualization features granting guest direct control of + * certain features: + * CP0=1: Guest coprocessor 0 context. + * AT=Guest: Guest MMU. + * CG=1: Hit (virtual address) CACHE operations (optional). + * CF=1: Guest Config registers. + * CGI=1: Indexed flush CACHE operations (optional). + */ + write_c0_guestctl0(MIPS_GCTL0_CP0 | + (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) | + MIPS_GCTL0_CG | MIPS_GCTL0_CF); + if (cpu_has_guestctl0ext) + set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI); + + if (cpu_has_guestid) { + write_c0_guestctl1(0); + kvm_vz_local_flush_roottlb_all_guests(); + + GUESTID_MASK = current_cpu_data.guestid_mask; + GUESTID_FIRST_VERSION = GUESTID_MASK + 1; + GUESTID_VERSION_MASK = ~GUESTID_MASK; + + current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION; + } + + /* clear any pending injected virtual guest interrupts */ + if (cpu_has_guestctl2) + clear_c0_guestctl2(0x3f << 10); + + return 0; +} + +static void kvm_vz_hardware_disable(void) +{ + u64 cvmvmconfig; + unsigned int mmu_size; + + /* Flush any remaining guest TLB entries */ + kvm_vz_local_flush_guesttlb_all(); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON3: + /* + * Allocate whole TLB for root. Existing guest TLB entries will + * change ownership to the root TLB. We should be safe though as + * they've already been flushed above while in guest TLB. + */ + cvmvmconfig = read_c0_cvmvmconfig(); + mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1) + >> CVMVMCONF_MMUSIZEM1_S) + 1; + cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1; + cvmvmconfig |= mmu_size - 1; + write_c0_cvmvmconfig(cvmvmconfig); + + /* Update our records */ + current_cpu_data.tlbsize = mmu_size; + current_cpu_data.tlbsizevtlb = mmu_size; + current_cpu_data.guest.tlbsize = 0; + + /* Flush moved entries in new (root) context */ + local_flush_tlb_all(); + break; + } + + if (cpu_has_guestid) { + write_c0_guestctl1(0); + kvm_vz_local_flush_roottlb_all_guests(); + } +} + +static int kvm_vz_check_extension(struct kvm *kvm, long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_MIPS_VZ: + /* we wouldn't be here unless cpu_has_vz */ + r = 1; + break; +#ifdef CONFIG_64BIT + case KVM_CAP_MIPS_64BIT: + /* We support 64-bit registers/operations and addresses */ + r = 2; + break; +#endif + default: + r = 0; + break; + } + + return r; +} + +static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + + for_each_possible_cpu(i) + vcpu->arch.vzguestid[i] = 0; + + return 0; +} + +static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + int cpu; + + /* + * If the VCPU is freed and reused as another VCPU, we don't want the + * matching pointer wrongly hanging around in last_vcpu[] or + * last_exec_vcpu[]. + */ + for_each_possible_cpu(cpu) { + if (last_vcpu[cpu] == vcpu) + last_vcpu[cpu] = NULL; + if (last_exec_vcpu[cpu] == vcpu) + last_exec_vcpu[cpu] = NULL; + } +} + +static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */ + + /* + * Start off the timer at the same frequency as the host timer, but the + * soft timer doesn't handle frequencies greater than 1GHz yet. + */ + if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC) + count_hz = mips_hpt_frequency; + kvm_mips_init_count(vcpu, count_hz); + + /* + * Initialize guest register state to valid architectural reset state. + */ + + /* PageGrain */ + if (cpu_has_mips_r6) + kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC); + /* Wired */ + if (cpu_has_mips_r6) + kvm_write_sw_gc0_wired(cop0, + read_gc0_wired() & MIPSR6_WIRED_LIMIT); + /* Status */ + kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL); + if (cpu_has_mips_r6) + kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status()); + /* IntCtl */ + kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() & + (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI)); + /* PRId */ + kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id); + /* EBase */ + kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id); + /* Config */ + kvm_save_gc0_config(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK, + _page_cachable_default >> _CACHE_SHIFT); + /* architecturally read only, but maybe writable from root */ + kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config()); + if (cpu_guest_has_conf1) { + kvm_set_sw_gc0_config(cop0, MIPS_CONF_M); + /* Config1 */ + kvm_save_gc0_config1(cop0); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 | + MIPS_CONF1_MD | + MIPS_CONF1_PC | + MIPS_CONF1_WR | + MIPS_CONF1_CA | + MIPS_CONF1_FP); + } + if (cpu_guest_has_conf2) { + kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M); + /* Config2 */ + kvm_save_gc0_config2(cop0); + } + if (cpu_guest_has_conf3) { + kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M); + /* Config3 */ + kvm_save_gc0_config3(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA | + MIPS_CONF3_BPG | + MIPS_CONF3_ULRI | + MIPS_CONF3_DSP | + MIPS_CONF3_CTXTC | + MIPS_CONF3_ITL | + MIPS_CONF3_LPA | + MIPS_CONF3_VEIC | + MIPS_CONF3_VINT | + MIPS_CONF3_SP | + MIPS_CONF3_CDMM | + MIPS_CONF3_MT | + MIPS_CONF3_SM | + MIPS_CONF3_TL); + } + if (cpu_guest_has_conf4) { + kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M); + /* Config4 */ + kvm_save_gc0_config4(cop0); + } + if (cpu_guest_has_conf5) { + kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M); + /* Config5 */ + kvm_save_gc0_config5(cop0); + /* architecturally writable (e.g. from guest) */ + kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K | + MIPS_CONF5_CV | + MIPS_CONF5_MSAEN | + MIPS_CONF5_UFE | + MIPS_CONF5_FRE | + MIPS_CONF5_SBRI | + MIPS_CONF5_UFR); + /* architecturally read only, but maybe writable from root */ + kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP); + } + + if (cpu_guest_has_contextconfig) { + /* ContextConfig */ + kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0); +#ifdef CONFIG_64BIT + /* XContextConfig */ + /* bits SEGBITS-13+3:4 set */ + kvm_write_sw_gc0_xcontextconfig(cop0, + ((1ull << (cpu_vmbits - 13)) - 1) << 4); +#endif + } + + /* Implementation dependent, use the legacy layout */ + if (cpu_guest_has_segments) { + /* SegCtl0, SegCtl1, SegCtl2 */ + kvm_write_sw_gc0_segctl0(cop0, 0x00200010); + kvm_write_sw_gc0_segctl1(cop0, 0x00000002 | + (_page_cachable_default >> _CACHE_SHIFT) << + (16 + MIPS_SEGCFG_C_SHIFT)); + kvm_write_sw_gc0_segctl2(cop0, 0x00380438); + } + + /* reset HTW registers */ + if (cpu_guest_has_htw && cpu_has_mips_r6) { + /* PWField */ + kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302); + /* PWSize */ + kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT); + } + + /* start with no pending virtual guest interrupts */ + if (cpu_has_guestctl2) + cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0; + + /* Put PC at reset vector */ + vcpu->arch.pc = CKSEG1ADDR(0x1fc00000); + + return 0; +} + +static void kvm_vz_flush_shadow_all(struct kvm *kvm) +{ + if (cpu_has_guestid) { + /* Flush GuestID for each VCPU individually */ + kvm_flush_remote_tlbs(kvm); + } else { + /* + * For each CPU there is a single GPA ASID used by all VCPUs in + * the VM, so it doesn't make sense for the VCPUs to handle + * invalidation of these ASIDs individually. + * + * Instead mark all CPUs as needing ASID invalidation in + * asid_flush_mask, and just use kvm_flush_remote_tlbs(kvm) to + * kick any running VCPUs so they check asid_flush_mask. + */ + cpumask_setall(&kvm->arch.asid_flush_mask); + kvm_flush_remote_tlbs(kvm); + } +} + +static void kvm_vz_flush_shadow_memslot(struct kvm *kvm, + const struct kvm_memory_slot *slot) +{ + kvm_vz_flush_shadow_all(kvm); +} + +static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int preserve_guest_tlb; + + preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu); + + if (preserve_guest_tlb) + kvm_vz_vcpu_save_wired(vcpu); + + kvm_vz_vcpu_load_tlb(vcpu, cpu); + + if (preserve_guest_tlb) + kvm_vz_vcpu_load_wired(vcpu); +} + +static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int r; + + kvm_vz_acquire_htimer(vcpu); + /* Check if we have any exceptions/interrupts pending */ + kvm_mips_deliver_interrupts(vcpu, read_gc0_cause()); + + kvm_vz_check_requests(vcpu, cpu); + kvm_vz_vcpu_load_tlb(vcpu, cpu); + kvm_vz_vcpu_load_wired(vcpu); + + r = vcpu->arch.vcpu_run(run, vcpu); + + kvm_vz_vcpu_save_wired(vcpu); + + return r; +} + +static struct kvm_mips_callbacks kvm_vz_callbacks = { + .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable, + .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss, + .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss, + .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss, + .handle_addr_err_st = kvm_trap_vz_no_handler, + .handle_addr_err_ld = kvm_trap_vz_no_handler, + .handle_syscall = kvm_trap_vz_no_handler, + .handle_res_inst = kvm_trap_vz_no_handler, + .handle_break = kvm_trap_vz_no_handler, + .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled, + .handle_guest_exit = kvm_trap_vz_handle_guest_exit, + + .hardware_enable = kvm_vz_hardware_enable, + .hardware_disable = kvm_vz_hardware_disable, + .check_extension = kvm_vz_check_extension, + .vcpu_init = kvm_vz_vcpu_init, + .vcpu_uninit = kvm_vz_vcpu_uninit, + .vcpu_setup = kvm_vz_vcpu_setup, + .flush_shadow_all = kvm_vz_flush_shadow_all, + .flush_shadow_memslot = kvm_vz_flush_shadow_memslot, + .gva_to_gpa = kvm_vz_gva_to_gpa_cb, + .queue_timer_int = kvm_vz_queue_timer_int_cb, + .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb, + .queue_io_int = kvm_vz_queue_io_int_cb, + .dequeue_io_int = kvm_vz_dequeue_io_int_cb, + .irq_deliver = kvm_vz_irq_deliver_cb, + .irq_clear = kvm_vz_irq_clear_cb, + .num_regs = kvm_vz_num_regs, + .copy_reg_indices = kvm_vz_copy_reg_indices, + .get_one_reg = kvm_vz_get_one_reg, + .set_one_reg = kvm_vz_set_one_reg, + .vcpu_load = kvm_vz_vcpu_load, + .vcpu_put = kvm_vz_vcpu_put, + .vcpu_run = kvm_vz_vcpu_run, + .vcpu_reenter = kvm_vz_vcpu_reenter, +}; + +int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) +{ + if (!cpu_has_vz) + return -ENODEV; + + /* + * VZ requires at least 2 KScratch registers, so it should have been + * possible to allocate pgd_reg. + */ + if (WARN(pgd_reg == -1, + "pgd_reg not allocated even though cpu_has_vz\n")) + return -ENODEV; + + pr_info("Starting KVM with MIPS VZ extensions\n"); + + *install_callbacks = &kvm_vz_callbacks; + return 0; +} diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 6db341347202..899e46279902 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -24,6 +24,7 @@ /* Cache operations. */ void (*flush_cache_all)(void); void (*__flush_cache_all)(void); +EXPORT_SYMBOL_GPL(__flush_cache_all); void (*flush_cache_mm)(struct mm_struct *mm); void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, unsigned long end); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index aa75849c36bc..3ca20283b31e 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -348,7 +348,7 @@ void maar_init(void) upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; pr_info(" [%d]: ", i / 2); - if (!(attr & MIPS_MAAR_V)) { + if (!(attr & MIPS_MAAR_VL)) { pr_cont("disabled\n"); continue; } diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 4852e849128b..c0a55050f70f 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -87,6 +87,11 @@ static inline unsigned int get_oc(u32 inst) return (inst >> 11) & 0x7fff; } +static inline unsigned int get_tx_or_sx(u32 inst) +{ + return (inst) & 0x1; +} + #define IS_XFORM(inst) (get_op(inst) == 31) #define IS_DSFORM(inst) (get_op(inst) >= 56) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index d96142572e6d..8a8ce220d7d0 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -296,11 +296,21 @@ static inline void iommu_restore(void) #endif /* The API to support IOMMU operations for VFIO */ -extern int iommu_tce_clear_param_check(struct iommu_table *tbl, - unsigned long ioba, unsigned long tce_value, - unsigned long npages); -extern int iommu_tce_put_param_check(struct iommu_table *tbl, - unsigned long ioba, unsigned long tce); +extern int iommu_tce_check_ioba(unsigned long page_shift, + unsigned long offset, unsigned long size, + unsigned long ioba, unsigned long npages); +extern int iommu_tce_check_gpa(unsigned long page_shift, + unsigned long gpa); + +#define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \ + (iommu_tce_check_ioba((tbl)->it_page_shift, \ + (tbl)->it_offset, (tbl)->it_size, \ + (ioba), (npages)) || (tce_value)) +#define iommu_tce_put_param_check(tbl, ioba, gpa) \ + (iommu_tce_check_ioba((tbl)->it_page_shift, \ + (tbl)->it_offset, (tbl)->it_size, \ + (ioba), 1) || \ + iommu_tce_check_gpa((tbl)->it_page_shift, (gpa))) extern void iommu_flush_tce(struct iommu_table *tbl); extern int iommu_take_ownership(struct iommu_table *tbl); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7bba8f415627..77c60826d145 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -45,9 +45,6 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#ifdef CONFIG_KVM_MMIO -#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 -#endif #define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */ /* These values are internal and can be increased later */ @@ -191,6 +188,13 @@ struct kvmppc_pginfo { atomic_t refcnt; }; +struct kvmppc_spapr_tce_iommu_table { + struct rcu_head rcu; + struct list_head next; + struct iommu_table *tbl; + struct kref kref; +}; + struct kvmppc_spapr_tce_table { struct list_head list; struct kvm *kvm; @@ -199,6 +203,7 @@ struct kvmppc_spapr_tce_table { u32 page_shift; u64 offset; /* in pages */ u64 size; /* window size in pages */ + struct list_head iommu_tables; struct page *pages[0]; }; @@ -345,6 +350,7 @@ struct kvmppc_pte { bool may_read : 1; bool may_write : 1; bool may_execute : 1; + unsigned long wimg; u8 page_size; /* MMU_PAGE_xxx */ }; @@ -441,6 +447,11 @@ struct mmio_hpte_cache { unsigned int index; }; +#define KVMPPC_VSX_COPY_NONE 0 +#define KVMPPC_VSX_COPY_WORD 1 +#define KVMPPC_VSX_COPY_DWORD 2 +#define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3 + struct openpic; struct kvm_vcpu_arch { @@ -644,6 +655,21 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_host_swabbed; u8 mmio_sign_extend; + /* conversion between single and double precision */ + u8 mmio_sp64_extend; + /* + * Number of simulations for vsx. + * If we use 2*8bytes to simulate 1*16bytes, + * then the number should be 2 and + * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD. + * If we use 4*4bytes to simulate 1*16bytes, + * the number should be 4 and + * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD. + */ + u8 mmio_vsx_copy_nums; + u8 mmio_vsx_offset; + u8 mmio_vsx_copy_type; + u8 mmio_vsx_tx_sx_enabled; u8 osi_needed; u8 osi_enabled; u8 papr_enabled; @@ -732,6 +758,8 @@ struct kvm_vcpu_arch { }; #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] +#define VCPU_VSX_FPR(vcpu, i, j) ((vcpu)->arch.fp.fpr[i][j]) +#define VCPU_VSX_VR(vcpu, i) ((vcpu)->arch.vr.vr[i]) /* Values for vcpu->arch.state */ #define KVMPPC_VCPU_NOTREADY 0 @@ -745,6 +773,7 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FPR 0x0020 #define KVM_MMIO_REG_QPR 0x0040 #define KVM_MMIO_REG_FQPR 0x0060 +#define KVM_MMIO_REG_VSX 0x0080 #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c3877992eff9..76e940a3c145 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -78,9 +78,15 @@ extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_default_endian); +extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, + int is_default_endian, int mmio_sign_extend); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian); +extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, unsigned int bytes, + int is_default_endian); extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type, u32 *inst); @@ -132,6 +138,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu); extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags); +extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu); +extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, @@ -164,13 +173,19 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, + struct iommu_group *grp); +extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, + struct iommu_group *grp); extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce_64 *args); extern struct kvmppc_spapr_tce_table *kvmppc_find_table( - struct kvm_vcpu *vcpu, unsigned long liobn); -extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, - unsigned long ioba, unsigned long npages); + struct kvm *kvm, unsigned long liobn); +#define kvmppc_ioba_validate(stt, ioba, npages) \ + (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \ + (stt)->size, (ioba), (npages)) ? \ + H_PARAMETER : H_SUCCESS) extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt, unsigned long tce); extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, @@ -240,6 +255,7 @@ union kvmppc_one_reg { u64 dval; vector128 vval; u64 vsxval[2]; + u32 vsx32val[4]; struct { u64 addr; u64 length; diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 142d78d645f4..3a8d278e7421 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -86,32 +86,79 @@ #define OP_TRAP_64 2 #define OP_31_XOP_TRAP 4 +#define OP_31_XOP_LDX 21 #define OP_31_XOP_LWZX 23 +#define OP_31_XOP_LDUX 53 #define OP_31_XOP_DCBST 54 #define OP_31_XOP_LWZUX 55 #define OP_31_XOP_TRAP_64 68 #define OP_31_XOP_DCBF 86 #define OP_31_XOP_LBZX 87 +#define OP_31_XOP_STDX 149 #define OP_31_XOP_STWX 151 +#define OP_31_XOP_STDUX 181 +#define OP_31_XOP_STWUX 183 #define OP_31_XOP_STBX 215 #define OP_31_XOP_LBZUX 119 #define OP_31_XOP_STBUX 247 #define OP_31_XOP_LHZX 279 #define OP_31_XOP_LHZUX 311 #define OP_31_XOP_MFSPR 339 +#define OP_31_XOP_LWAX 341 #define OP_31_XOP_LHAX 343 +#define OP_31_XOP_LWAUX 373 #define OP_31_XOP_LHAUX 375 #define OP_31_XOP_STHX 407 #define OP_31_XOP_STHUX 439 #define OP_31_XOP_MTSPR 467 #define OP_31_XOP_DCBI 470 +#define OP_31_XOP_LDBRX 532 #define OP_31_XOP_LWBRX 534 #define OP_31_XOP_TLBSYNC 566 +#define OP_31_XOP_STDBRX 660 #define OP_31_XOP_STWBRX 662 +#define OP_31_XOP_STFSX 663 +#define OP_31_XOP_STFSUX 695 +#define OP_31_XOP_STFDX 727 +#define OP_31_XOP_STFDUX 759 #define OP_31_XOP_LHBRX 790 +#define OP_31_XOP_LFIWAX 855 +#define OP_31_XOP_LFIWZX 887 #define OP_31_XOP_STHBRX 918 +#define OP_31_XOP_STFIWX 983 + +/* VSX Scalar Load Instructions */ +#define OP_31_XOP_LXSDX 588 +#define OP_31_XOP_LXSSPX 524 +#define OP_31_XOP_LXSIWAX 76 +#define OP_31_XOP_LXSIWZX 12 + +/* VSX Scalar Store Instructions */ +#define OP_31_XOP_STXSDX 716 +#define OP_31_XOP_STXSSPX 652 +#define OP_31_XOP_STXSIWX 140 + +/* VSX Vector Load Instructions */ +#define OP_31_XOP_LXVD2X 844 +#define OP_31_XOP_LXVW4X 780 + +/* VSX Vector Load and Splat Instruction */ +#define OP_31_XOP_LXVDSX 332 + +/* VSX Vector Store Instructions */ +#define OP_31_XOP_STXVD2X 972 +#define OP_31_XOP_STXVW4X 908 + +#define OP_31_XOP_LFSX 535 +#define OP_31_XOP_LFSUX 567 +#define OP_31_XOP_LFDX 599 +#define OP_31_XOP_LFDUX 631 #define OP_LWZ 32 +#define OP_STFS 52 +#define OP_STFSU 53 +#define OP_STFD 54 +#define OP_STFDU 55 #define OP_LD 58 #define OP_LWZU 33 #define OP_LBZ 34 @@ -127,6 +174,17 @@ #define OP_LHAU 43 #define OP_STH 44 #define OP_STHU 45 +#define OP_LMW 46 +#define OP_STMW 47 +#define OP_LFS 48 +#define OP_LFSU 49 +#define OP_LFD 50 +#define OP_LFDU 51 +#define OP_STFS 52 +#define OP_STFSU 53 +#define OP_STFD 54 +#define OP_STFDU 55 +#define OP_LQ 56 /* sorted alphabetically */ #define PPC_INST_BHRBE 0x7c00025c diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 4edbe4bb0e8b..07fbeb927834 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -29,6 +29,9 @@ #define __KVM_HAVE_IRQ_LINE #define __KVM_HAVE_GUEST_DEBUG +/* Not always available, but if it is, this is the correct offset. */ +#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 + struct kvm_regs { __u64 pc; __u64 cr; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5a3231fedf08..f2b724cd9e64 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -963,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl) } EXPORT_SYMBOL_GPL(iommu_flush_tce); -int iommu_tce_clear_param_check(struct iommu_table *tbl, - unsigned long ioba, unsigned long tce_value, - unsigned long npages) +int iommu_tce_check_ioba(unsigned long page_shift, + unsigned long offset, unsigned long size, + unsigned long ioba, unsigned long npages) { - /* tbl->it_ops->clear() does not support any value but 0 */ - if (tce_value) - return -EINVAL; + unsigned long mask = (1UL << page_shift) - 1; - if (ioba & ~IOMMU_PAGE_MASK(tbl)) + if (ioba & mask) return -EINVAL; - ioba >>= tbl->it_page_shift; - if (ioba < tbl->it_offset) + ioba >>= page_shift; + if (ioba < offset) return -EINVAL; - if ((ioba + npages) > (tbl->it_offset + tbl->it_size)) + if ((ioba + 1) > (offset + size)) return -EINVAL; return 0; } -EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check); +EXPORT_SYMBOL_GPL(iommu_tce_check_ioba); -int iommu_tce_put_param_check(struct iommu_table *tbl, - unsigned long ioba, unsigned long tce) +int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa) { - if (tce & ~IOMMU_PAGE_MASK(tbl)) - return -EINVAL; - - if (ioba & ~IOMMU_PAGE_MASK(tbl)) - return -EINVAL; - - ioba >>= tbl->it_page_shift; - if (ioba < tbl->it_offset) - return -EINVAL; + unsigned long mask = (1UL << page_shift) - 1; - if ((ioba + 1) > (tbl->it_offset + tbl->it_size)) + if (gpa & mask) return -EINVAL; return 0; } -EXPORT_SYMBOL_GPL(iommu_tce_put_param_check); +EXPORT_SYMBOL_GPL(iommu_tce_check_gpa); long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry, unsigned long *hpa, enum dma_data_direction *direction) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 029be26b5a17..65a471de96de 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -67,6 +67,7 @@ config KVM_BOOK3S_64 select KVM_BOOK3S_64_HANDLER select KVM select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE + select SPAPR_TCE_IOMMU if IOMMU_SUPPORT ---help--- Support running unmodified book3s_64 and book3s_32 guest kernels in virtual machines on book3s_64 host processors. diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index aedacefd961d..8c4d7e9d27d2 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -197,6 +197,24 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags) } EXPORT_SYMBOL_GPL(kvmppc_core_queue_program); +void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu) +{ + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, 0); +} + +void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu) +{ + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, 0); +} + +void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu) +{ + /* might as well deliver this straight away */ + kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, 0); +} + void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) { kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER); diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 70153578131a..29ebe2fd5867 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -319,6 +319,7 @@ do_second: gpte->may_execute = true; gpte->may_read = false; gpte->may_write = false; + gpte->wimg = r & HPTE_R_WIMG; switch (pp) { case 0: diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 74b0153780e3..9a4614cd0e53 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte, else kvmppc_mmu_flush_icache(pfn); + rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg; + /* * Use 64K pages if possible; otherwise, on 64K page kernels, * we need to transfer 4 more bits from guest real to host real addr. @@ -177,12 +179,15 @@ map_again: ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, hpsize, hpsize, MMU_SEGSIZE_256M); - if (ret < 0) { + if (ret == -1) { /* If we couldn't map a primary PTE, try a secondary */ hash = ~hash; vflags ^= HPTE_V_SECONDARY; attempt++; goto map_again; + } else if (ret < 0) { + r = -EIO; + goto out_unlock; } else { trace_kvm_book3s_64_mmu_map(rflags, hpteg, vpn, hpaddr, orig_pte); diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 3e26cd4979f9..a160c14304eb 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -28,6 +28,8 @@ #include <linux/hugetlb.h> #include <linux/list.h> #include <linux/anon_inodes.h> +#include <linux/iommu.h> +#include <linux/file.h> #include <asm/tlbflush.h> #include <asm/kvm_ppc.h> @@ -40,6 +42,7 @@ #include <asm/udbg.h> #include <asm/iommu.h> #include <asm/tce.h> +#include <asm/mmu_context.h> static unsigned long kvmppc_tce_pages(unsigned long iommu_pages) { @@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc) return ret; } +static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head) +{ + struct kvmppc_spapr_tce_iommu_table *stit = container_of(head, + struct kvmppc_spapr_tce_iommu_table, rcu); + + iommu_tce_table_put(stit->tbl); + + kfree(stit); +} + +static void kvm_spapr_tce_liobn_put(struct kref *kref) +{ + struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref, + struct kvmppc_spapr_tce_iommu_table, kref); + + list_del_rcu(&stit->next); + + call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free); +} + +extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, + struct iommu_group *grp) +{ + int i; + struct kvmppc_spapr_tce_table *stt; + struct kvmppc_spapr_tce_iommu_table *stit, *tmp; + struct iommu_table_group *table_group = NULL; + + list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { + + table_group = iommu_group_get_iommudata(grp); + if (WARN_ON(!table_group)) + continue; + + list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (table_group->tables[i] != stit->tbl) + continue; + + kref_put(&stit->kref, kvm_spapr_tce_liobn_put); + return; + } + } + } +} + +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, + struct iommu_group *grp) +{ + struct kvmppc_spapr_tce_table *stt = NULL; + bool found = false; + struct iommu_table *tbl = NULL; + struct iommu_table_group *table_group; + long i; + struct kvmppc_spapr_tce_iommu_table *stit; + struct fd f; + + f = fdget(tablefd); + if (!f.file) + return -EBADF; + + list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) { + if (stt == f.file->private_data) { + found = true; + break; + } + } + + fdput(f); + + if (!found) + return -EINVAL; + + table_group = iommu_group_get_iommudata(grp); + if (WARN_ON(!table_group)) + return -EFAULT; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbltmp = table_group->tables[i]; + + if (!tbltmp) + continue; + /* + * Make sure hardware table parameters are exactly the same; + * this is used in the TCE handlers where boundary checks + * use only the first attached table. + */ + if ((tbltmp->it_page_shift == stt->page_shift) && + (tbltmp->it_offset == stt->offset) && + (tbltmp->it_size == stt->size)) { + /* + * Reference the table to avoid races with + * add/remove DMA windows. + */ + tbl = iommu_tce_table_get(tbltmp); + break; + } + } + if (!tbl) + return -EINVAL; + + list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { + if (tbl != stit->tbl) + continue; + + if (!kref_get_unless_zero(&stit->kref)) { + /* stit is being destroyed */ + iommu_tce_table_put(tbl); + return -ENOTTY; + } + /* + * The table is already known to this KVM, we just increased + * its KVM reference counter and can return. + */ + return 0; + } + + stit = kzalloc(sizeof(*stit), GFP_KERNEL); + if (!stit) { + iommu_tce_table_put(tbl); + return -ENOMEM; + } + + stit->tbl = tbl; + kref_init(&stit->kref); + + list_add_rcu(&stit->next, &stt->iommu_tables); + + return 0; +} + static void release_spapr_tce_table(struct rcu_head *head) { struct kvmppc_spapr_tce_table *stt = container_of(head, @@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma) static int kvm_spapr_tce_release(struct inode *inode, struct file *filp) { struct kvmppc_spapr_tce_table *stt = filp->private_data; + struct kvmppc_spapr_tce_iommu_table *stit, *tmp; list_del_rcu(&stt->list); + list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) { + WARN_ON(!kref_read(&stit->kref)); + while (1) { + if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put)) + break; + } + } + kvm_put_kvm(stt->kvm); kvmppc_account_memlimit( @@ -164,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return -EBUSY; } - size = args->size; + size = _ALIGN_UP(args->size, PAGE_SIZE >> 3); npages = kvmppc_tce_pages(size); ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true); if (ret) { @@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, stt->offset = args->offset; stt->size = size; stt->kvm = kvm; + INIT_LIST_HEAD_RCU(&stt->iommu_tables); for (i = 0; i < npages; i++) { stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); @@ -211,15 +355,106 @@ fail: return ret; } +static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg(tbl, entry, &hpa, &dir); +} + +static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + const unsigned long pgsize = 1ULL << tbl->it_page_shift; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + mem = mm_iommu_lookup(kvm->mm, *pua, pgsize); + if (!mem) + return H_TOO_HARD; + + mm_iommu_mapped_dec(mem); + + *pua = 0; + + return H_SUCCESS; +} + +static long kvmppc_tce_iommu_unmap(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + enum dma_data_direction dir = DMA_NONE; + unsigned long hpa = 0; + long ret; + + if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) + return H_HARDWARE; + + if (dir == DMA_NONE) + return H_SUCCESS; + + ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); + if (ret != H_SUCCESS) + iommu_tce_xchg(tbl, entry, &hpa, &dir); + + return ret; +} + +long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + long ret; + unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + struct mm_iommu_table_group_mem_t *mem; + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift); + if (!mem) + /* This only handles v2 IOMMU type, v1 is handled via ioctl() */ + return H_TOO_HARD; + + if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa))) + return H_HARDWARE; + + if (mm_iommu_mapped_inc(mem)) + return H_CLOSED; + + ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); + if (WARN_ON_ONCE(ret)) { + mm_iommu_mapped_dec(mem); + return H_HARDWARE; + } + + if (dir != DMA_NONE) + kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry); + + *pua = ua; + + return 0; +} + long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { - struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); - long ret; + struct kvmppc_spapr_tce_table *stt; + long ret, idx; + struct kvmppc_spapr_tce_iommu_table *stit; + unsigned long entry, ua = 0; + enum dma_data_direction dir; /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -231,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (ret != H_SUCCESS) return ret; - kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); + dir = iommu_tce_direction(tce); + if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) + return H_PARAMETER; + + entry = ioba >> stt->page_shift; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + if (dir == DMA_NONE) { + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry); + } else { + idx = srcu_read_lock(&vcpu->kvm->srcu); + ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl, + entry, ua, dir); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + } + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE(1); + kvmppc_clear_tce(stit->tbl, entry); + } + + kvmppc_tce_put(stt, entry, tce); return H_SUCCESS; } @@ -246,8 +509,9 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long entry, ua = 0; u64 __user *tces; u64 tce; + struct kvmppc_spapr_tce_iommu_table *stit; - stt = kvmppc_find_table(vcpu, liobn); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -284,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) goto unlock_exit; + if (kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), + &ua, NULL)) + return H_PARAMETER; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + ret = kvmppc_tce_iommu_map(vcpu->kvm, + stit->tbl, entry + i, ua, + iommu_tce_direction(tce)); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + goto unlock_exit; + + WARN_ON_ONCE(1); + kvmppc_clear_tce(stit->tbl, entry); + } + kvmppc_tce_put(stt, entry + i, tce); } @@ -300,8 +584,9 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, { struct kvmppc_spapr_tce_table *stt; long i, ret; + struct kvmppc_spapr_tce_iommu_table *stit; - stt = kvmppc_find_table(vcpu, liobn); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -313,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu, if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) return H_PARAMETER; + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + unsigned long entry = ioba >> stit->tbl->it_page_shift; + + for (i = 0; i < npages; ++i) { + ret = kvmppc_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry + i); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE(1); + kvmppc_clear_tce(stit->tbl, entry); + } + } + for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index e4c4ea973e57..eda0a8f6fae8 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -40,6 +40,31 @@ #include <asm/iommu.h> #include <asm/tce.h> +#ifdef CONFIG_BUG + +#define WARN_ON_ONCE_RM(condition) ({ \ + static bool __section(.data.unlikely) __warned; \ + int __ret_warn_once = !!(condition); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + __warned = true; \ + pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \ + __stringify(condition), \ + __func__, __LINE__); \ + dump_stack(); \ + } \ + unlikely(__ret_warn_once); \ +}) + +#else + +#define WARN_ON_ONCE_RM(condition) ({ \ + int __ret_warn_on = !!(condition); \ + unlikely(__ret_warn_on); \ +}) + +#endif + #define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64)) /* @@ -48,10 +73,9 @@ * WARNING: This will be called in real or virtual mode on HV KVM and virtual * mode on PR KVM */ -struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, +struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, unsigned long liobn) { - struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list) @@ -63,27 +87,6 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(kvmppc_find_table); /* - * Validates IO address. - * - * WARNING: This will be called in real-mode on HV KVM and virtual - * mode on PR KVM - */ -long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt, - unsigned long ioba, unsigned long npages) -{ - unsigned long mask = (1ULL << stt->page_shift) - 1; - unsigned long idx = ioba >> stt->page_shift; - - if ((ioba & mask) || (idx < stt->offset) || - (idx - stt->offset + npages > stt->size) || - (idx + npages < idx)) - return H_PARAMETER; - - return H_SUCCESS; -} -EXPORT_SYMBOL_GPL(kvmppc_ioba_validate); - -/* * Validates TCE address. * At the moment flags and page mask are validated. * As the host kernel does not access those addresses (just puts them @@ -96,10 +99,14 @@ EXPORT_SYMBOL_GPL(kvmppc_ioba_validate); */ long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce) { - unsigned long page_mask = ~((1ULL << stt->page_shift) - 1); - unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ); + unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE); + enum dma_data_direction dir = iommu_tce_direction(tce); + + /* Allow userspace to poison TCE table */ + if (dir == DMA_NONE) + return H_SUCCESS; - if (tce & mask) + if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; return H_SUCCESS; @@ -179,15 +186,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa, EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry) +{ + unsigned long hpa = 0; + enum dma_data_direction dir = DMA_NONE; + + iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); +} + +static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + struct mm_iommu_table_group_mem_t *mem = NULL; + const unsigned long pgsize = 1ULL << tbl->it_page_shift; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + pua = (void *) vmalloc_to_phys(pua); + if (WARN_ON_ONCE_RM(!pua)) + return H_HARDWARE; + + mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize); + if (!mem) + return H_TOO_HARD; + + mm_iommu_mapped_dec(mem); + + *pua = 0; + + return H_SUCCESS; +} + +static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm, + struct iommu_table *tbl, unsigned long entry) +{ + enum dma_data_direction dir = DMA_NONE; + unsigned long hpa = 0; + long ret; + + if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir)) + /* + * real mode xchg can fail if struct page crosses + * a page boundary + */ + return H_TOO_HARD; + + if (dir == DMA_NONE) + return H_SUCCESS; + + ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); + if (ret) + iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + + return ret; +} + +static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, + unsigned long entry, unsigned long ua, + enum dma_data_direction dir) +{ + long ret; + unsigned long hpa = 0; + unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry); + struct mm_iommu_table_group_mem_t *mem; + + if (!pua) + /* it_userspace allocation might be delayed */ + return H_TOO_HARD; + + mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift); + if (!mem) + return H_TOO_HARD; + + if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa))) + return H_HARDWARE; + + pua = (void *) vmalloc_to_phys(pua); + if (WARN_ON_ONCE_RM(!pua)) + return H_HARDWARE; + + if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) + return H_CLOSED; + + ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); + if (ret) { + mm_iommu_mapped_dec(mem); + /* + * real mode xchg can fail if struct page crosses + * a page boundary + */ + return H_TOO_HARD; + } + + if (dir != DMA_NONE) + kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry); + + *pua = ua; + + return 0; +} + long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce) { - struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); + struct kvmppc_spapr_tce_table *stt; long ret; + struct kvmppc_spapr_tce_iommu_table *stit; + unsigned long entry, ua = 0; + enum dma_data_direction dir; /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */ /* liobn, ioba, tce); */ + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -199,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, if (ret != H_SUCCESS) return ret; - kvmppc_tce_put(stt, ioba >> stt->page_shift, tce); + dir = iommu_tce_direction(tce); + if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL)) + return H_PARAMETER; + + entry = ioba >> stt->page_shift; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + if (dir == DMA_NONE) + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry); + else + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + stit->tbl, entry, ua, dir); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE_RM(1); + kvmppc_rm_clear_tce(stit->tbl, entry); + } + + kvmppc_tce_put(stt, entry, tce); return H_SUCCESS; } @@ -239,8 +378,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; unsigned long *rmap = NULL; + bool prereg = false; + struct kvmppc_spapr_tce_iommu_table *stit; - stt = kvmppc_find_table(vcpu, liobn); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -259,23 +400,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) return ret; - if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) - return H_TOO_HARD; + if (mm_iommu_preregistered(vcpu->kvm->mm)) { + /* + * We get here if guest memory was pre-registered which + * is normally VFIO case and gpa->hpa translation does not + * depend on hpt. + */ + struct mm_iommu_table_group_mem_t *mem; - rmap = (void *) vmalloc_to_phys(rmap); + if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + return H_TOO_HARD; - /* - * Synchronize with the MMU notifier callbacks in - * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). - * While we have the rmap lock, code running on other CPUs - * cannot finish unmapping the host real page that backs - * this guest real page, so we are OK to access the host - * real page. - */ - lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { - ret = H_TOO_HARD; - goto unlock_exit; + mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); + if (mem) + prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0; + } + + if (!prereg) { + /* + * This is usually a case of a guest with emulated devices only + * when TCE list is not in preregistered memory. + * We do not require memory to be preregistered in this case + * so lock rmap and do __find_linux_pte_or_hugepte(). + */ + if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) + return H_TOO_HARD; + + rmap = (void *) vmalloc_to_phys(rmap); + if (WARN_ON_ONCE_RM(!rmap)) + return H_HARDWARE; + + /* + * Synchronize with the MMU notifier callbacks in + * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.). + * While we have the rmap lock, code running on other CPUs + * cannot finish unmapping the host real page that backs + * this guest real page, so we are OK to access the host + * real page. + */ + lock_rmap(rmap); + if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + ret = H_TOO_HARD; + goto unlock_exit; + } } for (i = 0; i < npages; ++i) { @@ -285,11 +452,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (ret != H_SUCCESS) goto unlock_exit; + ua = 0; + if (kvmppc_gpa_to_ua(vcpu->kvm, + tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), + &ua, NULL)) + return H_PARAMETER; + + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, + stit->tbl, entry + i, ua, + iommu_tce_direction(tce)); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + goto unlock_exit; + + WARN_ON_ONCE_RM(1); + kvmppc_rm_clear_tce(stit->tbl, entry); + } + kvmppc_tce_put(stt, entry + i, tce); } unlock_exit: - unlock_rmap(rmap); + if (rmap) + unlock_rmap(rmap); return ret; } @@ -300,8 +489,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, { struct kvmppc_spapr_tce_table *stt; long i, ret; + struct kvmppc_spapr_tce_iommu_table *stit; - stt = kvmppc_find_table(vcpu, liobn); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -313,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ)) return H_PARAMETER; + list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { + unsigned long entry = ioba >> stit->tbl->it_page_shift; + + for (i = 0; i < npages; ++i) { + ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm, + stit->tbl, entry + i); + + if (ret == H_SUCCESS) + continue; + + if (ret == H_TOO_HARD) + return ret; + + WARN_ON_ONCE_RM(1); + kvmppc_rm_clear_tce(stit->tbl, entry); + } + } + for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift)) kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value); @@ -322,12 +530,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu, long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba) { - struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn); + struct kvmppc_spapr_tce_table *stt; long ret; unsigned long idx; struct page *page; u64 *tbl; + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index 8359752b3efc..68d68983948e 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -503,10 +503,18 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) break; unprivileged: default: - printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); -#ifndef DEBUG_SPR - emulated = EMULATE_FAIL; -#endif + pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn); + if (sprn & 0x10) { + if (kvmppc_get_msr(vcpu) & MSR_PR) { + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + emulated = EMULATE_AGAIN; + } + } else { + if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + emulated = EMULATE_AGAIN; + } + } break; } @@ -648,10 +656,20 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val break; default: unprivileged: - printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn); -#ifndef DEBUG_SPR - emulated = EMULATE_FAIL; -#endif + pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn); + if (sprn & 0x10) { + if (kvmppc_get_msr(vcpu) & MSR_PR) { + kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV); + emulated = EMULATE_AGAIN; + } + } else { + if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 || + sprn == 4 || sprn == 5 || sprn == 6) { + kvmppc_core_queue_program(vcpu, SRR1_PROGILL); + emulated = EMULATE_AGAIN; + } + } + break; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index fadb75abfe37..549dd6070dee 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3624,11 +3624,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) return -EIO; mutex_lock(&kvm->lock); + if (!kvm->arch.pimap) + goto unlock; - if (kvm->arch.pimap == NULL) { - mutex_unlock(&kvm->lock); - return 0; - } pimap = kvm->arch.pimap; for (i = 0; i < pimap->n_mapped; i++) { @@ -3650,7 +3648,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi) * We don't free this structure even when the count goes to * zero. The structure is freed when we destroy the VM. */ - + unlock: mutex_unlock(&kvm->lock); return 0; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d4dfc0ca2a44..69a09444d46e 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -349,7 +349,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr) if (msr & MSR_POW) { if (!vcpu->arch.pending_exceptions) { kvm_vcpu_block(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); vcpu->stat.halt_wakeup++; /* Unset POW bit after we woke up */ @@ -537,8 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, int r = RESUME_GUEST; int relocated; int page_found = 0; - struct kvmppc_pte pte; - bool is_mmio = false; + struct kvmppc_pte pte = { 0 }; bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false; bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false; u64 vsid; @@ -616,8 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Page not found in guest SLB */ kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu)); kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80); - } else if (!is_mmio && - kvmppc_visible_gpa(vcpu, pte.raddr)) { + } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) { if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) { /* * There is already a host HPTE there, presumably @@ -627,7 +625,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_mmu_unmap_page(vcpu, &pte); } /* The guest's PTE is not mapped yet. Map on the host */ - kvmppc_mmu_map_page(vcpu, &pte, iswrite); + if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) { + /* Exit KVM if mapping failed */ + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } if (data) vcpu->stat.sp_storage++; else if (vcpu->arch.mmu.is_dcbz32(vcpu) && diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index f102616febc7..bcbeeb62dd13 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -344,7 +344,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_CEDE: kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE); kvm_vcpu_block(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); vcpu->stat.halt_wakeup++; return EMULATE_DONE; case H_LOGICAL_CI_LOAD: diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 0514cbd4e533..3eaac3809977 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -300,6 +300,11 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); } +void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); +} + void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu) { kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER); @@ -579,7 +584,7 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu) * userspace, so clear the KVM_REQ_WATCHDOG request. */ if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS)) - clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); + kvm_clear_request(KVM_REQ_WATCHDOG, vcpu); spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); nr_jiffies = watchdog_next_timeout(vcpu); @@ -690,7 +695,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) if (vcpu->arch.shared->msr & MSR_WE) { local_irq_enable(); kvm_vcpu_block(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); hard_irq_disable(); kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS); diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 0fda4230f6c0..77fd043b3ecc 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -797,9 +797,8 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) host_tlb_params[0].sets = host_tlb_params[0].entries / host_tlb_params[0].ways; host_tlb_params[1].sets = 1; - - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * - host_tlb_params[1].entries, + vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries, + sizeof(*vcpu_e500->h2g_tlb1_rmap), GFP_KERNEL); if (!vcpu_e500->h2g_tlb1_rmap) return -EINVAL; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index b379146de55b..c873ffe55362 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -259,10 +259,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) case OP_31_XOP_MFSPR: emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt); + if (emulated == EMULATE_AGAIN) { + emulated = EMULATE_DONE; + advance = 0; + } break; case OP_31_XOP_MTSPR: emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs); + if (emulated == EMULATE_AGAIN) { + emulated = EMULATE_DONE; + advance = 0; + } break; case OP_31_XOP_TLBSYNC: diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 6d3c0ee1d744..af833531af31 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -34,18 +34,38 @@ #include "timing.h" #include "trace.h" -/* XXX to do: - * lhax - * lhaux - * lswx - * lswi - * stswx - * stswi - * lha - * lhau - * lmw - * stmw +#ifdef CONFIG_PPC_FPU +static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu) +{ + if (!(kvmppc_get_msr(vcpu) & MSR_FP)) { + kvmppc_core_queue_fpunavail(vcpu); + return true; + } + + return false; +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_VSX +static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu) +{ + if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) { + kvmppc_core_queue_vsx_unavail(vcpu); + return true; + } + + return false; +} +#endif /* CONFIG_VSX */ + +/* + * XXX to do: + * lfiwax, lfiwzx + * vector loads and stores * + * Instructions that trap when used on cache-inhibited mappings + * are not emulated here: multiple and string instructions, + * lq/stq, and the load-reserve/store-conditional instructions. */ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) { @@ -66,6 +86,19 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) rs = get_rs(inst); rt = get_rt(inst); + /* + * if mmio_vsx_tx_sx_enabled == 0, copy data between + * VSR[0..31] and memory + * if mmio_vsx_tx_sx_enabled == 1, copy data between + * VSR[32..63] and memory + */ + vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst); + vcpu->arch.mmio_vsx_copy_nums = 0; + vcpu->arch.mmio_vsx_offset = 0; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE; + vcpu->arch.mmio_sp64_extend = 0; + vcpu->arch.mmio_sign_extend = 0; + switch (get_op(inst)) { case 31: switch (get_xop(inst)) { @@ -73,6 +106,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; + case OP_31_XOP_LWZUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + case OP_31_XOP_LBZX: emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; @@ -82,22 +120,36 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; + case OP_31_XOP_STDX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), 8, 1); + break; + + case OP_31_XOP_STDUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + case OP_31_XOP_STWX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); + kvmppc_get_gpr(vcpu, rs), 4, 1); + break; + + case OP_31_XOP_STWUX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_31_XOP_STBX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); + kvmppc_get_gpr(vcpu, rs), 1, 1); break; case OP_31_XOP_STBUX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); + kvmppc_get_gpr(vcpu, rs), 1, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; @@ -105,6 +157,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); break; + case OP_31_XOP_LHAUX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + case OP_31_XOP_LHZX: emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); break; @@ -116,14 +173,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) case OP_31_XOP_STHX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); + kvmppc_get_gpr(vcpu, rs), 2, 1); break; case OP_31_XOP_STHUX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); + kvmppc_get_gpr(vcpu, rs), 2, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; @@ -143,8 +198,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) case OP_31_XOP_STWBRX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 0); + kvmppc_get_gpr(vcpu, rs), 4, 0); break; case OP_31_XOP_LHBRX: @@ -153,10 +207,258 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) case OP_31_XOP_STHBRX: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 0); + kvmppc_get_gpr(vcpu, rs), 2, 0); + break; + + case OP_31_XOP_LDBRX: + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0); + break; + + case OP_31_XOP_STDBRX: + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), 8, 0); + break; + + case OP_31_XOP_LDX: + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + + case OP_31_XOP_LDUX: + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LWAX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); + break; + + case OP_31_XOP_LWAUX: + emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + +#ifdef CONFIG_PPC_FPU + case OP_31_XOP_LFSX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 4, 1); + break; + + case OP_31_XOP_LFSUX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LFDX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 8, 1); + break; + + case OP_31_XOP_LFDUX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_LFIWAX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_loads(run, vcpu, + KVM_MMIO_REG_FPR|rt, 4, 1); + break; + + case OP_31_XOP_LFIWZX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 4, 1); + break; + + case OP_31_XOP_STFSX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), 4, 1); + break; + + case OP_31_XOP_STFSUX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STFDX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), 8, 1); + break; + + case OP_31_XOP_STFDUX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_31_XOP_STFIWX: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), 4, 1); + break; +#endif + +#ifdef CONFIG_VSX + case OP_31_XOP_LXSDX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 8, 1, 0); + break; + + case OP_31_XOP_LXSSPX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 4, 1, 0); + break; + + case OP_31_XOP_LXSIWAX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 4, 1, 1); + break; + + case OP_31_XOP_LXSIWZX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 4, 1, 0); + break; + + case OP_31_XOP_LXVD2X: + /* + * In this case, the official load/store process is like this: + * Step1, exit from vm by page fault isr, then kvm save vsr. + * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS + * as reference. + * + * Step2, copy data between memory and VCPU + * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use + * 2copies*8bytes or 4copies*4bytes + * to simulate one copy of 16bytes. + * Also there is an endian issue here, we should notice the + * layout of memory. + * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference. + * If host is little-endian, kvm will call XXSWAPD for + * LXVD2X_ROT/STXVD2X_ROT. + * So, if host is little-endian, + * the postion of memeory should be swapped. + * + * Step3, return to guest, kvm reset register. + * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS + * as reference. + */ + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 2; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 8, 1, 0); + break; + + case OP_31_XOP_LXVW4X: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 4; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 4, 1, 0); + break; + + case OP_31_XOP_LXVDSX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = + KVMPPC_VSX_COPY_DWORD_LOAD_DUMP; + emulated = kvmppc_handle_vsx_load(run, vcpu, + KVM_MMIO_REG_VSX|rt, 8, 1, 0); + break; + + case OP_31_XOP_STXSDX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + emulated = kvmppc_handle_vsx_store(run, vcpu, + rs, 8, 1); break; + case OP_31_XOP_STXSSPX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_vsx_store(run, vcpu, + rs, 4, 1); + break; + + case OP_31_XOP_STXSIWX: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_offset = 1; + vcpu->arch.mmio_vsx_copy_nums = 1; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; + emulated = kvmppc_handle_vsx_store(run, vcpu, + rs, 4, 1); + break; + + case OP_31_XOP_STXVD2X: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 2; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD; + emulated = kvmppc_handle_vsx_store(run, vcpu, + rs, 8, 1); + break; + + case OP_31_XOP_STXVW4X: + if (kvmppc_check_vsx_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_vsx_copy_nums = 4; + vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD; + emulated = kvmppc_handle_vsx_store(run, vcpu, + rs, 4, 1); + break; +#endif /* CONFIG_VSX */ default: emulated = EMULATE_FAIL; break; @@ -167,10 +469,60 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; - /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ +#ifdef CONFIG_PPC_FPU + case OP_STFS: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), + 4, 1); + break; + + case OP_STFSU: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), + 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_STFD: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), + 8, 1); + break; + + case OP_STFDU: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_store(run, vcpu, + VCPU_FPR(vcpu, rs), + 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; +#endif + case OP_LD: rt = get_rt(inst); - emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + switch (inst & 3) { + case 0: /* ld */ + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + case 1: /* ldu */ + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + case 2: /* lwa */ + emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1); + break; + default: + emulated = EMULATE_FAIL; + } break; case OP_LWZU: @@ -193,31 +545,37 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) 4, 1); break; - /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ case OP_STD: rs = get_rs(inst); - emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 8, 1); + switch (inst & 3) { + case 0: /* std */ + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), 8, 1); + break; + case 1: /* stdu */ + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), 8, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + default: + emulated = EMULATE_FAIL; + } break; case OP_STWU: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 4, 1); + kvmppc_get_gpr(vcpu, rs), 4, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; case OP_STB: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); + kvmppc_get_gpr(vcpu, rs), 1, 1); break; case OP_STBU: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 1, 1); + kvmppc_get_gpr(vcpu, rs), 1, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; @@ -241,16 +599,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) case OP_STH: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); + kvmppc_get_gpr(vcpu, rs), 2, 1); break; case OP_STHU: emulated = kvmppc_handle_store(run, vcpu, - kvmppc_get_gpr(vcpu, rs), - 2, 1); + kvmppc_get_gpr(vcpu, rs), 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + +#ifdef CONFIG_PPC_FPU + case OP_LFS: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 4, 1); + break; + + case OP_LFSU: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + vcpu->arch.mmio_sp64_extend = 1; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 4, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); + break; + + case OP_LFD: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 8, 1); + break; + + case OP_LFDU: + if (kvmppc_check_fp_disabled(vcpu)) + return EMULATE_DONE; + emulated = kvmppc_handle_load(run, vcpu, + KVM_MMIO_REG_FPR|rt, 8, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); break; +#endif default: emulated = EMULATE_FAIL; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 95c91a9de351..1ee22a910074 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -37,6 +37,7 @@ #include <asm/cputhreads.h> #include <asm/irqflags.h> #include <asm/iommu.h> +#include <asm/switch_to.h> #include "timing.h" #include "irq.h" #include "../mm/mmu_decl.h" @@ -232,7 +233,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) case EV_HCALL_TOKEN(EV_IDLE): r = EV_SUCCESS; kvm_vcpu_block(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); break; default: r = EV_UNIMPLEMENTED; @@ -524,11 +525,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) /* We support this only for PR */ r = !hv_enabled; break; -#ifdef CONFIG_KVM_MMIO - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; -#endif #ifdef CONFIG_KVM_MPIC case KVM_CAP_IRQ_MPIC: r = 1; @@ -538,6 +534,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: + /* fallthrough */ + case KVM_CAP_SPAPR_TCE_VFIO: case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -806,6 +804,129 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod); } +#ifdef CONFIG_VSX +static inline int kvmppc_get_vsr_dword_offset(int index) +{ + int offset; + + if ((index != 0) && (index != 1)) + return -1; + +#ifdef __BIG_ENDIAN + offset = index; +#else + offset = 1 - index; +#endif + + return offset; +} + +static inline int kvmppc_get_vsr_word_offset(int index) +{ + int offset; + + if ((index > 3) || (index < 0)) + return -1; + +#ifdef __BIG_ENDIAN + offset = index; +#else + offset = 3 - index; +#endif + return offset; +} + +static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu, + u64 gpr) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (offset == -1) + return; + + if (vcpu->arch.mmio_vsx_tx_sx_enabled) { + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsxval[offset] = gpr; + VCPU_VSX_VR(vcpu, index) = val.vval; + } else { + VCPU_VSX_FPR(vcpu, index, offset) = gpr; + } +} + +static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu, + u64 gpr) +{ + union kvmppc_one_reg val; + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + + if (vcpu->arch.mmio_vsx_tx_sx_enabled) { + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsxval[0] = gpr; + val.vsxval[1] = gpr; + VCPU_VSX_VR(vcpu, index) = val.vval; + } else { + VCPU_VSX_FPR(vcpu, index, 0) = gpr; + VCPU_VSX_FPR(vcpu, index, 1) = gpr; + } +} + +static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu, + u32 gpr32) +{ + union kvmppc_one_reg val; + int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset); + int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK; + int dword_offset, word_offset; + + if (offset == -1) + return; + + if (vcpu->arch.mmio_vsx_tx_sx_enabled) { + val.vval = VCPU_VSX_VR(vcpu, index); + val.vsx32val[offset] = gpr32; + VCPU_VSX_VR(vcpu, index) = val.vval; + } else { + dword_offset = offset / 2; + word_offset = offset % 2; + val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset); + val.vsx32val[word_offset] = gpr32; + VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0]; + } +} +#endif /* CONFIG_VSX */ + +#ifdef CONFIG_PPC_FPU +static inline u64 sp_to_dp(u32 fprs) +{ + u64 fprd; + + preempt_disable(); + enable_kernel_fp(); + asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs) + : "fr0"); + preempt_enable(); + return fprd; +} + +static inline u32 dp_to_sp(u64 fprd) +{ + u32 fprs; + + preempt_disable(); + enable_kernel_fp(); + asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd) + : "fr0"); + preempt_enable(); + return fprs; +} + +#else +#define sp_to_dp(x) (x) +#define dp_to_sp(x) (x) +#endif /* CONFIG_PPC_FPU */ + static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { @@ -832,6 +953,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } } + /* conversion between single and double precision */ + if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4)) + gpr = sp_to_dp(gpr); + if (vcpu->arch.mmio_sign_extend) { switch (run->mmio.len) { #ifdef CONFIG_PPC64 @@ -848,8 +973,6 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } } - kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); - switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) { case KVM_MMIO_REG_GPR: kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); @@ -866,6 +989,17 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr; break; #endif +#ifdef CONFIG_VSX + case KVM_MMIO_REG_VSX: + if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD) + kvmppc_set_vsr_dword(vcpu, gpr); + else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD) + kvmppc_set_vsr_word(vcpu, gpr); + else if (vcpu->arch.mmio_vsx_copy_type == + KVMPPC_VSX_COPY_DWORD_LOAD_DUMP) + kvmppc_set_vsr_dword_dump(vcpu, gpr); + break; +#endif default: BUG(); } @@ -932,6 +1066,35 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1); } +#ifdef CONFIG_VSX +int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, + int is_default_endian, int mmio_sign_extend) +{ + enum emulation_result emulated = EMULATE_DONE; + + /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ + if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || + (vcpu->arch.mmio_vsx_copy_nums < 0) ) { + return EMULATE_FAIL; + } + + while (vcpu->arch.mmio_vsx_copy_nums) { + emulated = __kvmppc_handle_load(run, vcpu, rt, bytes, + is_default_endian, mmio_sign_extend); + + if (emulated != EMULATE_DONE) + break; + + vcpu->arch.paddr_accessed += run->mmio.len; + + vcpu->arch.mmio_vsx_copy_nums--; + vcpu->arch.mmio_vsx_offset++; + } + return emulated; +} +#endif /* CONFIG_VSX */ + int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, u64 val, unsigned int bytes, int is_default_endian) { @@ -957,6 +1120,9 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->mmio_needed = 1; vcpu->mmio_is_write = 1; + if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4)) + val = dp_to_sp(val); + /* Store the value at the lowest bytes in 'data'. */ if (!host_swabbed) { switch (bytes) { @@ -990,6 +1156,129 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvmppc_handle_store); +#ifdef CONFIG_VSX +static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val) +{ + u32 dword_offset, word_offset; + union kvmppc_one_reg reg; + int vsx_offset = 0; + int copy_type = vcpu->arch.mmio_vsx_copy_type; + int result = 0; + + switch (copy_type) { + case KVMPPC_VSX_COPY_DWORD: + vsx_offset = + kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset); + + if (vsx_offset == -1) { + result = -1; + break; + } + + if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { + *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset); + } else { + reg.vval = VCPU_VSX_VR(vcpu, rs); + *val = reg.vsxval[vsx_offset]; + } + break; + + case KVMPPC_VSX_COPY_WORD: + vsx_offset = + kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset); + + if (vsx_offset == -1) { + result = -1; + break; + } + + if (!vcpu->arch.mmio_vsx_tx_sx_enabled) { + dword_offset = vsx_offset / 2; + word_offset = vsx_offset % 2; + reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset); + *val = reg.vsx32val[word_offset]; + } else { + reg.vval = VCPU_VSX_VR(vcpu, rs); + *val = reg.vsx32val[vsx_offset]; + } + break; + + default: + result = -1; + break; + } + + return result; +} + +int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, unsigned int bytes, int is_default_endian) +{ + u64 val; + enum emulation_result emulated = EMULATE_DONE; + + vcpu->arch.io_gpr = rs; + + /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */ + if ( (vcpu->arch.mmio_vsx_copy_nums > 4) || + (vcpu->arch.mmio_vsx_copy_nums < 0) ) { + return EMULATE_FAIL; + } + + while (vcpu->arch.mmio_vsx_copy_nums) { + if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1) + return EMULATE_FAIL; + + emulated = kvmppc_handle_store(run, vcpu, + val, bytes, is_default_endian); + + if (emulated != EMULATE_DONE) + break; + + vcpu->arch.paddr_accessed += run->mmio.len; + + vcpu->arch.mmio_vsx_copy_nums--; + vcpu->arch.mmio_vsx_offset++; + } + + return emulated; +} + +static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + enum emulation_result emulated = EMULATE_FAIL; + int r; + + vcpu->arch.paddr_accessed += run->mmio.len; + + if (!vcpu->mmio_is_write) { + emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr, + run->mmio.len, 1, vcpu->arch.mmio_sign_extend); + } else { + emulated = kvmppc_handle_vsx_store(run, vcpu, + vcpu->arch.io_gpr, run->mmio.len, 1); + } + + switch (emulated) { + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST; + break; + case EMULATE_FAIL: + pr_info("KVM: MMIO emulation failed (VSX repeat)\n"); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + r = RESUME_HOST; + break; + default: + r = RESUME_GUEST; + break; + } + return r; +} +#endif /* CONFIG_VSX */ + int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) { int r = 0; @@ -1092,13 +1381,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int r; sigset_t sigsaved; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); - if (vcpu->mmio_needed) { + vcpu->mmio_needed = 0; if (!vcpu->mmio_is_write) kvmppc_complete_mmio_load(vcpu, run); - vcpu->mmio_needed = 0; +#ifdef CONFIG_VSX + if (vcpu->arch.mmio_vsx_copy_nums > 0) { + vcpu->arch.mmio_vsx_copy_nums--; + vcpu->arch.mmio_vsx_offset++; + } + + if (vcpu->arch.mmio_vsx_copy_nums > 0) { + r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run); + if (r == RESUME_HOST) { + vcpu->mmio_needed = 1; + return r; + } + } +#endif } else if (vcpu->arch.osi_needed) { u64 *gprs = run->osi.gprs; int i; @@ -1120,6 +1420,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + if (run->immediate_exit) r = -EINTR; else diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index a41faf34b034..426614a882a9 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include <asm/cpu.h> #include <asm/fpu/api.h> #include <asm/isc.h> +#include <asm/guarded_storage.h> #define KVM_S390_BSCA_CPU_SLOTS 64 #define KVM_S390_ESCA_CPU_SLOTS 248 @@ -121,6 +122,7 @@ struct esca_block { #define CPUSTAT_SLSR 0x00002000 #define CPUSTAT_ZARCH 0x00000800 #define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 #define CPUSTAT_SM 0x00000080 #define CPUSTAT_IBS 0x00000040 #define CPUSTAT_GED2 0x00000010 @@ -164,16 +166,27 @@ struct kvm_s390_sie_block { #define ICTL_RRBE 0x00001000 #define ICTL_TPROT 0x00000200 __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_SII 0x00000001 __u32 eca; /* 0x004c */ #define ICPT_INST 0x04 #define ICPT_PROGI 0x08 #define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 #define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c #define ICPT_VALIDITY 0x20 #define ICPT_STOP 0x28 #define ICPT_OPEREXC 0x2C #define ICPT_PARTEXEC 0x38 #define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c __u8 icptcode; /* 0x0050 */ __u8 icptstatus; /* 0x0051 */ __u16 ihcpu; /* 0x0052 */ @@ -182,10 +195,19 @@ struct kvm_s390_sie_block { __u32 ipb; /* 0x0058 */ __u32 scaoh; /* 0x005c */ __u8 reserved60; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 __u8 ecb2; /* 0x0062 */ -#define ECB3_AES 0x04 #define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ @@ -219,11 +241,14 @@ struct kvm_s390_sie_block { __u32 crycbd; /* 0x00fc */ __u64 gcr[16]; /* 0x0100 */ __u64 gbea; /* 0x0180 */ - __u8 reserved188[24]; /* 0x0188 */ + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ __u32 fac; /* 0x01a0 */ __u8 reserved1a4[20]; /* 0x01a4 */ __u64 cbrlo; /* 0x01b8 */ __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 __u32 ecd; /* 0x01c8 */ __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ @@ -498,6 +523,12 @@ struct kvm_s390_local_interrupt { #define FIRQ_CNTR_PFAULT 3 #define FIRQ_MAX_COUNT 4 +/* mask the AIS mode for a given ISC */ +#define AIS_MODE_MASK(isc) (0x80 >> isc) + +#define KVM_S390_AIS_MODE_ALL 0 +#define KVM_S390_AIS_MODE_SINGLE 1 + struct kvm_s390_float_interrupt { unsigned long pending_irqs; spinlock_t lock; @@ -507,6 +538,10 @@ struct kvm_s390_float_interrupt { struct kvm_s390_ext_info srv_signal; int next_rr_cpu; unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + struct mutex ais_lock; + u8 simm; + u8 nimm; + int ais_enabled; }; struct kvm_hw_wp_info_arch { @@ -554,6 +589,7 @@ struct kvm_vcpu_arch { /* if vsie is active, currently executed shadow sie control block */ struct kvm_s390_sie_block *vsie_block; unsigned int host_acrs[NUM_ACRS]; + struct gs_cb *host_gscb; struct fpu host_fpregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; @@ -574,6 +610,7 @@ struct kvm_vcpu_arch { */ seqcount_t cputm_seqcount; __u64 cputm_start; + bool gs_enabled; }; struct kvm_vm_stat { @@ -596,6 +633,7 @@ struct s390_io_adapter { bool maskable; bool masked; bool swap; + bool suppressible; struct rw_semaphore maps_lock; struct list_head maps; atomic_t nr_maps; diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index ace3bd315438..6f5167bc1928 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -75,6 +75,7 @@ struct sclp_info { unsigned char has_pfmfi : 1; unsigned char has_ibs : 1; unsigned char has_skey : 1; + unsigned char has_kss : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a2ffec4139ad..3dd2a1d308dd 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -26,6 +26,8 @@ #define KVM_DEV_FLIC_ADAPTER_REGISTER 6 #define KVM_DEV_FLIC_ADAPTER_MODIFY 7 #define KVM_DEV_FLIC_CLEAR_IO_IRQ 8 +#define KVM_DEV_FLIC_AISM 9 +#define KVM_DEV_FLIC_AIRQ_INJECT 10 /* * We can have up to 4*64k pending subchannels + 8 adapter interrupts, * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts. @@ -41,7 +43,14 @@ struct kvm_s390_io_adapter { __u8 isc; __u8 maskable; __u8 swap; - __u8 pad; + __u8 flags; +}; + +#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01 + +struct kvm_s390_ais_req { + __u8 isc; + __u16 mode; }; #define KVM_S390_IO_ADAPTER_MASK 1 @@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_FEAT_CMMA 10 #define KVM_S390_VM_CPU_FEAT_PFMFI 11 #define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +#define KVM_S390_VM_CPU_FEAT_KSS 13 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; @@ -131,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kmo[16]; /* with MSA4 */ __u8 pcc[16]; /* with MSA4 */ __u8 ppno[16]; /* with MSA5 */ - __u8 reserved[1824]; + __u8 kma[16]; /* with MSA8 */ + __u8 reserved[1808]; }; /* kvm attributes for crypto */ @@ -197,6 +208,10 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_VRS (1UL << 6) #define KVM_SYNC_RICCB (1UL << 7) #define KVM_SYNC_FPRS (1UL << 8) +#define KVM_SYNC_GSCB (1UL << 9) +/* length and alignment of the sdnx as a power of two */ +#define SDNXC 8 +#define SDNXL (1UL << SDNXC) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -217,8 +232,16 @@ struct kvm_sync_regs { }; __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */ - __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 padding1[52]; /* riccb needs to be 64byte aligned */ __u8 riccb[64]; /* runtime instrumentation controls block */ + __u8 padding2[192]; /* sdnx needs to be 256byte aligned */ + union { + __u8 sdnx[SDNXL]; /* state description annex */ + struct { + __u64 reserved1[2]; + __u64 gscb[4]; + }; + }; }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ddbffb715b40..9da243d94cc3 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -261,7 +261,7 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->eca & 1) { + if (vcpu->arch.sie_block->eca & ECA_SII) { int rc; read_lock(&vcpu->kvm->arch.sca_lock); @@ -360,7 +360,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) void ipte_lock(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->eca & 1) + if (vcpu->arch.sie_block->eca & ECA_SII) ipte_lock_siif(vcpu); else ipte_lock_simple(vcpu); @@ -368,7 +368,7 @@ void ipte_lock(struct kvm_vcpu *vcpu) void ipte_unlock(struct kvm_vcpu *vcpu) { - if (vcpu->arch.sie_block->eca & 1) + if (vcpu->arch.sie_block->eca & ECA_SII) ipte_unlock_siif(vcpu); else ipte_unlock_simple(vcpu); diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 59920f96ebc0..a4752bf6b526 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = { [0xb6] = kvm_s390_handle_stctl, [0xb7] = kvm_s390_handle_lctl, [0xb9] = kvm_s390_handle_b9, + [0xe3] = kvm_s390_handle_e3, [0xe5] = kvm_s390_handle_e5, [0xeb] = kvm_s390_handle_eb, }; @@ -368,8 +369,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); - if (vcpu->arch.sie_block->ipa == 0xb256 && - test_kvm_facility(vcpu->kvm, 74)) + if (vcpu->arch.sie_block->ipa == 0xb256) return handle_sthyi(vcpu); if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) @@ -404,28 +404,31 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; switch (vcpu->arch.sie_block->icptcode) { - case 0x10: - case 0x18: + case ICPT_EXTREQ: + case ICPT_IOREQ: return handle_noop(vcpu); - case 0x04: + case ICPT_INST: rc = handle_instruction(vcpu); break; - case 0x08: + case ICPT_PROGI: return handle_prog(vcpu); - case 0x14: + case ICPT_EXTINT: return handle_external_interrupt(vcpu); - case 0x1c: + case ICPT_WAIT: return kvm_s390_handle_wait(vcpu); - case 0x20: + case ICPT_VALIDITY: return handle_validity(vcpu); - case 0x28: + case ICPT_STOP: return handle_stop(vcpu); - case 0x2c: + case ICPT_OPEREXC: rc = handle_operexc(vcpu); break; - case 0x38: + case ICPT_PARTEXEC: rc = handle_partial_execution(vcpu); break; + case ICPT_KSS: + rc = kvm_s390_skey_check_enable(vcpu); + break; default: return -EOPNOTSUPP; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 169558dc7daf..caf15c8a8948 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk) { unsigned long ext_sa_addr; + unsigned long lc; freg_t fprs[NUM_FPRS]; union mci mci; int rc; @@ -418,12 +419,34 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, /* take care of lazy register loading */ save_fpu_regs(); save_access_regs(vcpu->run->s.regs.acrs); + if (MACHINE_HAS_GS && vcpu->arch.gs_enabled) + save_gs_cb(current->thread.gs_cb); /* Extended save area */ rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, sizeof(unsigned long)); - /* Only bits 0-53 are used for address formation */ - ext_sa_addr &= ~0x3ffUL; + /* Only bits 0 through 63-LC are used for address formation */ + lc = ext_sa_addr & MCESA_LC_MASK; + if (test_kvm_facility(vcpu->kvm, 133)) { + switch (lc) { + case 0: + case 10: + ext_sa_addr &= ~0x3ffUL; + break; + case 11: + ext_sa_addr &= ~0x7ffUL; + break; + case 12: + ext_sa_addr &= ~0xfffUL; + break; + default: + ext_sa_addr = 0; + break; + } + } else { + ext_sa_addr &= ~0x3ffUL; + } + if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) { if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs, 512)) @@ -431,6 +454,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu, } else { mci.vr = 0; } + if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133) + && (lc == 11 || lc == 12)) { + if (write_guest_abs(vcpu, ext_sa_addr + 1024, + &vcpu->run->s.regs.gscb, 32)) + mci.gs = 0; + } else { + mci.gs = 0; + } /* General interruption information */ rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID); @@ -1968,6 +1999,8 @@ static int register_io_adapter(struct kvm_device *dev, adapter->maskable = adapter_info.maskable; adapter->masked = false; adapter->swap = adapter_info.swap; + adapter->suppressible = (adapter_info.flags) & + KVM_S390_ADAPTER_SUPPRESSIBLE; dev->kvm->arch.adapters[adapter->id] = adapter; return 0; @@ -2121,6 +2154,87 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr) return 0; } +static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct kvm_s390_ais_req req; + int ret = 0; + + if (!fi->ais_enabled) + return -ENOTSUPP; + + if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) + return -EFAULT; + + if (req.isc > MAX_ISC) + return -EINVAL; + + trace_kvm_s390_modify_ais_mode(req.isc, + (fi->simm & AIS_MODE_MASK(req.isc)) ? + (fi->nimm & AIS_MODE_MASK(req.isc)) ? + 2 : KVM_S390_AIS_MODE_SINGLE : + KVM_S390_AIS_MODE_ALL, req.mode); + + mutex_lock(&fi->ais_lock); + switch (req.mode) { + case KVM_S390_AIS_MODE_ALL: + fi->simm &= ~AIS_MODE_MASK(req.isc); + fi->nimm &= ~AIS_MODE_MASK(req.isc); + break; + case KVM_S390_AIS_MODE_SINGLE: + fi->simm |= AIS_MODE_MASK(req.isc); + fi->nimm &= ~AIS_MODE_MASK(req.isc); + break; + default: + ret = -EINVAL; + } + mutex_unlock(&fi->ais_lock); + + return ret; +} + +static int kvm_s390_inject_airq(struct kvm *kvm, + struct s390_io_adapter *adapter) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct kvm_s390_interrupt s390int = { + .type = KVM_S390_INT_IO(1, 0, 0, 0), + .parm = 0, + .parm64 = (adapter->isc << 27) | 0x80000000, + }; + int ret = 0; + + if (!fi->ais_enabled || !adapter->suppressible) + return kvm_s390_inject_vm(kvm, &s390int); + + mutex_lock(&fi->ais_lock); + if (fi->nimm & AIS_MODE_MASK(adapter->isc)) { + trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc); + goto out; + } + + ret = kvm_s390_inject_vm(kvm, &s390int); + if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) { + fi->nimm |= AIS_MODE_MASK(adapter->isc); + trace_kvm_s390_modify_ais_mode(adapter->isc, + KVM_S390_AIS_MODE_SINGLE, 2); + } +out: + mutex_unlock(&fi->ais_lock); + return ret; +} + +static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr) +{ + unsigned int id = attr->attr; + struct s390_io_adapter *adapter = get_io_adapter(kvm, id); + + if (!adapter) + return -EINVAL; + + return kvm_s390_inject_airq(kvm, adapter); +} + static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r = 0; @@ -2157,6 +2271,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) case KVM_DEV_FLIC_CLEAR_IO_IRQ: r = clear_io_irq(dev->kvm, attr); break; + case KVM_DEV_FLIC_AISM: + r = modify_ais_mode(dev->kvm, attr); + break; + case KVM_DEV_FLIC_AIRQ_INJECT: + r = flic_inject_airq(dev->kvm, attr); + break; default: r = -EINVAL; } @@ -2176,6 +2296,8 @@ static int flic_has_attr(struct kvm_device *dev, case KVM_DEV_FLIC_ADAPTER_REGISTER: case KVM_DEV_FLIC_ADAPTER_MODIFY: case KVM_DEV_FLIC_CLEAR_IO_IRQ: + case KVM_DEV_FLIC_AISM: + case KVM_DEV_FLIC_AIRQ_INJECT: return 0; } return -ENXIO; @@ -2286,12 +2408,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, ret = adapter_indicators_set(kvm, adapter, &e->adapter); up_read(&adapter->maps_lock); if ((ret > 0) && !adapter->masked) { - struct kvm_s390_interrupt s390int = { - .type = KVM_S390_INT_IO(1, 0, 0, 0), - .parm = 0, - .parm64 = (adapter->isc << 27) | 0x80000000, - }; - ret = kvm_s390_inject_vm(kvm, &s390int); + ret = kvm_s390_inject_airq(kvm, adapter); if (ret == 0) ret = 1; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d5c5c911821a..aeb3feb9de53 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -276,6 +276,10 @@ static void kvm_s390_cpu_feat_init(void) __cpacf_query(CPACF_PRNO, (cpacf_mask_t *) kvm_s390_available_subfunc.ppno); + if (test_facility(146)) /* MSA8 */ + __cpacf_query(CPACF_KMA, (cpacf_mask_t *) + kvm_s390_available_subfunc.kma); + if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* @@ -300,6 +304,8 @@ static void kvm_s390_cpu_feat_init(void) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); if (sclp.has_ibs) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); + if (sclp.has_kss) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); /* * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make * all skey handling functions read/set the skey from the PGSTE @@ -380,6 +386,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: + case KVM_CAP_S390_AIS: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -405,6 +412,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_RI: r = test_facility(64); break; + case KVM_CAP_S390_GS: + r = test_facility(133); + break; default: r = 0; } @@ -541,6 +551,34 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_AIS: + mutex_lock(&kvm->lock); + if (kvm->created_vcpus) { + r = -EBUSY; + } else { + set_kvm_facility(kvm->arch.model.fac_mask, 72); + set_kvm_facility(kvm->arch.model.fac_list, 72); + kvm->arch.float_int.ais_enabled = 1; + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: AIS %s", + r ? "(not available)" : "(success)"); + break; + case KVM_CAP_S390_GS: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(133)) { + set_kvm_facility(kvm->arch.model.fac_mask, 133); + set_kvm_facility(kvm->arch.model.fac_list, 133); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -1498,6 +1536,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_crypto_init(kvm); + mutex_init(&kvm->arch.float_int.ais_lock); + kvm->arch.float_int.simm = 0; + kvm->arch.float_int.nimm = 0; + kvm->arch.float_int.ais_enabled = 0; spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); @@ -1646,7 +1688,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu) sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; - vcpu->arch.sie_block->ecb2 |= 0x04U; + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; @@ -1700,7 +1742,7 @@ static int sca_switch_to_extended(struct kvm *kvm) kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { vcpu->arch.sie_block->scaoh = scaoh; vcpu->arch.sie_block->scaol = scaol; - vcpu->arch.sie_block->ecb2 |= 0x04U; + vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; } kvm->arch.sca = new_sca; kvm->arch.use_esca = 1; @@ -1749,6 +1791,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64)) vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; + if (test_kvm_facility(vcpu->kvm, 133)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; /* fprs can be synchronized via vrs, even if the guest has no vx. With * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format. */ @@ -1939,8 +1983,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) if (!vcpu->arch.sie_block->cbrlo) return -ENOMEM; - vcpu->arch.sie_block->ecb2 |= 0x80; - vcpu->arch.sie_block->ecb2 &= ~0x08; + vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; + vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI; return 0; } @@ -1970,31 +2014,37 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ if (MACHINE_HAS_ESOP) - vcpu->arch.sie_block->ecb |= 0x02; + vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9)) - vcpu->arch.sie_block->ecb |= 0x04; + vcpu->arch.sie_block->ecb |= ECB_SRSI; if (test_kvm_facility(vcpu->kvm, 73)) - vcpu->arch.sie_block->ecb |= 0x10; + vcpu->arch.sie_block->ecb |= ECB_TE; if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) - vcpu->arch.sie_block->ecb2 |= 0x08; + vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; if (test_kvm_facility(vcpu->kvm, 130)) - vcpu->arch.sie_block->ecb2 |= 0x20; - vcpu->arch.sie_block->eca = 0x1002000U; + vcpu->arch.sie_block->ecb2 |= ECB2_IEP; + vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; if (sclp.has_cei) - vcpu->arch.sie_block->eca |= 0x80000000U; + vcpu->arch.sie_block->eca |= ECA_CEI; if (sclp.has_ib) - vcpu->arch.sie_block->eca |= 0x40000000U; + vcpu->arch.sie_block->eca |= ECA_IB; if (sclp.has_siif) - vcpu->arch.sie_block->eca |= 1; + vcpu->arch.sie_block->eca |= ECA_SII; if (sclp.has_sigpif) - vcpu->arch.sie_block->eca |= 0x10000000U; + vcpu->arch.sie_block->eca |= ECA_SIGPI; if (test_kvm_facility(vcpu->kvm, 129)) { - vcpu->arch.sie_block->eca |= 0x00020000; - vcpu->arch.sie_block->ecd |= 0x20000000; + vcpu->arch.sie_block->eca |= ECA_VX; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; } + vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) + | SDNXC; vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; - vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + + if (sclp.has_kss) + atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags); + else + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { rc = kvm_s390_vcpu_setup_cmma(vcpu); @@ -2446,7 +2496,7 @@ retry: } /* nothing to do, just clear the request */ - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); return 0; } @@ -2719,6 +2769,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct runtime_instr_cb *riccb; + struct gs_cb *gscb; + + riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; + gscb = (struct gs_cb *) &kvm_run->s.regs.gscb; vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) @@ -2747,12 +2802,24 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * we should enable RI here instead of doing the lazy enablement. */ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) && - test_kvm_facility(vcpu->kvm, 64)) { - struct runtime_instr_cb *riccb = - (struct runtime_instr_cb *) &kvm_run->s.regs.riccb; - - if (riccb->valid) - vcpu->arch.sie_block->ecb3 |= 0x01; + test_kvm_facility(vcpu->kvm, 64) && + riccb->valid && + !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)"); + vcpu->arch.sie_block->ecb3 |= ECB3_RI; + } + /* + * If userspace sets the gscb (e.g. after migration) to non-zero, + * we should enable GS here instead of doing the lazy enablement. + */ + if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) && + test_kvm_facility(vcpu->kvm, 133) && + gscb->gssm && + !vcpu->arch.gs_enabled) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)"); + vcpu->arch.sie_block->ecb |= ECB_GS; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; + vcpu->arch.gs_enabled = 1; } save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); @@ -2768,6 +2835,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (test_fp_ctl(current->thread.fpu.fpc)) /* User space provided an invalid FPC, let's clear it */ current->thread.fpu.fpc = 0; + if (MACHINE_HAS_GS) { + preempt_disable(); + __ctl_set_bit(2, 4); + if (current->thread.gs_cb) { + vcpu->arch.host_gscb = current->thread.gs_cb; + save_gs_cb(vcpu->arch.host_gscb); + } + if (vcpu->arch.gs_enabled) { + current->thread.gs_cb = (struct gs_cb *) + &vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + } + preempt_enable(); + } kvm_run->kvm_dirty_regs = 0; } @@ -2794,6 +2875,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) /* Restore will be done lazily at return */ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc; current->thread.fpu.regs = vcpu->arch.host_fpregs.regs; + if (MACHINE_HAS_GS) { + __ctl_set_bit(2, 4); + if (vcpu->arch.gs_enabled) + save_gs_cb(current->thread.gs_cb); + preempt_disable(); + current->thread.gs_cb = vcpu->arch.host_gscb; + restore_gs_cb(vcpu->arch.host_gscb); + preempt_enable(); + if (!vcpu->arch.host_gscb) + __ctl_clear_bit(2, 4); + vcpu->arch.host_gscb = NULL; + } } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index af9fa91a0c91..55f5c8457d6d 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -25,7 +25,7 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* Transactional Memory Execution related macros */ -#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10)) +#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 #define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) @@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu) int is_valid_psw(psw_t *psw); int kvm_s390_handle_aa(struct kvm_vcpu *vcpu); int kvm_s390_handle_b2(struct kvm_vcpu *vcpu); +int kvm_s390_handle_e3(struct kvm_vcpu *vcpu); int kvm_s390_handle_e5(struct kvm_vcpu *vcpu); int kvm_s390_handle_01(struct kvm_vcpu *vcpu); int kvm_s390_handle_b9(struct kvm_vcpu *vcpu); @@ -253,6 +254,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu); int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu); int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu); int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); +int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu); /* implemented in vsie.c */ int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 64b6a309f2c4..c03106c428cf 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -37,7 +37,8 @@ static int handle_ri(struct kvm_vcpu *vcpu) { if (test_kvm_facility(vcpu->kvm, 64)) { - vcpu->arch.sie_block->ecb3 |= 0x01; + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)"); + vcpu->arch.sie_block->ecb3 |= ECB3_RI; kvm_s390_retry_instr(vcpu); return 0; } else @@ -52,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_gs(struct kvm_vcpu *vcpu) +{ + if (test_kvm_facility(vcpu->kvm, 133)) { + VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)"); + preempt_disable(); + __ctl_set_bit(2, 4); + current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb; + restore_gs_cb(current->thread.gs_cb); + preempt_enable(); + vcpu->arch.sie_block->ecb |= ECB_GS; + vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT; + vcpu->arch.gs_enabled = 1; + kvm_s390_retry_instr(vcpu); + return 0; + } else + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + +int kvm_s390_handle_e3(struct kvm_vcpu *vcpu) +{ + int code = vcpu->arch.sie_block->ipb & 0xff; + + if (code == 0x49 || code == 0x4d) + return handle_gs(vcpu); + else + return -EOPNOTSUPP; +} /* Handle SCK (SET CLOCK) interception */ static int handle_set_clock(struct kvm_vcpu *vcpu) { @@ -170,18 +198,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) return 0; } -static int __skey_check_enable(struct kvm_vcpu *vcpu) +int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu) { int rc = 0; + struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block; trace_kvm_s390_skey_related_inst(vcpu); - if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) + if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) && + !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)) return rc; rc = s390_enable_skey(); VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); - if (!rc) - vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + if (!rc) { + if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS) + atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags); + else + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | + ICTL_RRBE); + } return rc; } @@ -190,7 +225,7 @@ static int try_handle_skey(struct kvm_vcpu *vcpu) int rc; vcpu->stat.instruction_storage_key++; - rc = __skey_check_enable(vcpu); + rc = kvm_s390_skey_check_enable(vcpu); if (rc) return rc; if (sclp.has_skey) { @@ -759,6 +794,7 @@ static const intercept_handler_t b2_handlers[256] = { [0x3b] = handle_io_inst, [0x3c] = handle_io_inst, [0x50] = handle_ipte_interlock, + [0x56] = handle_sthyi, [0x5f] = handle_io_inst, [0x74] = handle_io_inst, [0x76] = handle_io_inst, @@ -887,7 +923,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { - int rc = __skey_check_enable(vcpu); + int rc = kvm_s390_skey_check_enable(vcpu); if (rc) return rc; diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 05c98bb853cf..926b5244263e 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -404,6 +404,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu) u64 code, addr, cc = 0; struct sthyi_sctns *sctns = NULL; + if (!test_kvm_facility(vcpu->kvm, 74)) + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); + /* * STHYI requires extensive locking in the higher hypervisors * and is very computational/memory expensive. Therefore we diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index 396485bca191..78b7e847984a 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -280,6 +280,58 @@ TRACE_EVENT(kvm_s390_enable_disable_ibs, __entry->state ? "enabling" : "disabling", __entry->id) ); +/* + * Trace point for modifying ais mode for a given isc. + */ +TRACE_EVENT(kvm_s390_modify_ais_mode, + TP_PROTO(__u8 isc, __u16 from, __u16 to), + TP_ARGS(isc, from, to), + + TP_STRUCT__entry( + __field(__u8, isc) + __field(__u16, from) + __field(__u16, to) + ), + + TP_fast_assign( + __entry->isc = isc; + __entry->from = from; + __entry->to = to; + ), + + TP_printk("for isc %x, modifying interruption mode from %s to %s", + __entry->isc, + (__entry->from == KVM_S390_AIS_MODE_ALL) ? + "ALL-Interruptions Mode" : + (__entry->from == KVM_S390_AIS_MODE_SINGLE) ? + "Single-Interruption Mode" : "No-Interruptions Mode", + (__entry->to == KVM_S390_AIS_MODE_ALL) ? + "ALL-Interruptions Mode" : + (__entry->to == KVM_S390_AIS_MODE_SINGLE) ? + "Single-Interruption Mode" : "No-Interruptions Mode") + ); + +/* + * Trace point for suppressed adapter I/O interrupt. + */ +TRACE_EVENT(kvm_s390_airq_suppressed, + TP_PROTO(__u32 id, __u8 isc), + TP_ARGS(id, isc), + + TP_STRUCT__entry( + __field(__u32, id) + __field(__u8, isc) + ), + + TP_fast_assign( + __entry->id = id; + __entry->isc = isc; + ), + + TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)", + __entry->id, __entry->isc) + ); + #endif /* _TRACE_KVMS390_H */ diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 5491be39776b..4719ecb9ab42 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -117,6 +117,8 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) newflags |= cpuflags & CPUSTAT_SM; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS)) newflags |= cpuflags & CPUSTAT_IBS; + if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS)) + newflags |= cpuflags & CPUSTAT_KSS; atomic_set(&scb_s->cpuflags, newflags); return 0; @@ -249,7 +251,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; - bool had_tx = scb_s->ecb & 0x10U; + bool had_tx = scb_s->ecb & ECB_TE; unsigned long new_mso = 0; int rc; @@ -289,7 +291,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) * bits. Therefore we cannot provide interpretation and would later * have to provide own emulation handlers. */ - scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS)) + scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + scb_s->icpua = scb_o->icpua; if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM)) @@ -307,34 +311,39 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) scb_s->ihcpu = scb_o->ihcpu; /* MVPG and Protection Exception Interpretation are always available */ - scb_s->eca |= scb_o->eca & 0x01002000U; + scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI); /* Host-protection-interruption introduced with ESOP */ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP)) - scb_s->ecb |= scb_o->ecb & 0x02U; + scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT; /* transactional execution */ if (test_kvm_facility(vcpu->kvm, 73)) { /* remap the prefix is tx is toggled on */ - if ((scb_o->ecb & 0x10U) && !had_tx) + if ((scb_o->ecb & ECB_TE) && !had_tx) prefix_unmapped(vsie_page); - scb_s->ecb |= scb_o->ecb & 0x10U; + scb_s->ecb |= scb_o->ecb & ECB_TE; } /* SIMD */ if (test_kvm_facility(vcpu->kvm, 129)) { - scb_s->eca |= scb_o->eca & 0x00020000U; - scb_s->ecd |= scb_o->ecd & 0x20000000U; + scb_s->eca |= scb_o->eca & ECA_VX; + scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; } /* Run-time-Instrumentation */ if (test_kvm_facility(vcpu->kvm, 64)) - scb_s->ecb3 |= scb_o->ecb3 & 0x01U; + scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI; /* Instruction Execution Prevention */ if (test_kvm_facility(vcpu->kvm, 130)) - scb_s->ecb2 |= scb_o->ecb2 & 0x20U; + scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP; + /* Guarded Storage */ + if (test_kvm_facility(vcpu->kvm, 133)) { + scb_s->ecb |= scb_o->ecb & ECB_GS; + scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT; + } if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) - scb_s->eca |= scb_o->eca & 0x00000001U; + scb_s->eca |= scb_o->eca & ECA_SII; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) - scb_s->eca |= scb_o->eca & 0x40000000U; + scb_s->eca |= scb_o->eca & ECA_IB; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) - scb_s->eca |= scb_o->eca & 0x80000000U; + scb_s->eca |= scb_o->eca & ECA_CEI; prepare_ibc(vcpu, vsie_page); rc = shadow_crycb(vcpu, vsie_page); @@ -406,7 +415,7 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) prefix += scb_s->mso; rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix); - if (!rc && (scb_s->ecb & 0x10U)) + if (!rc && (scb_s->ecb & ECB_TE)) rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix + PAGE_SIZE); /* @@ -496,6 +505,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) unpin_guest_page(vcpu->kvm, gpa, hpa); scb_s->riccbd = 0; } + + hpa = scb_s->sdnxo; + if (hpa) { + gpa = scb_o->sdnxo; + unpin_guest_page(vcpu->kvm, gpa, hpa); + scb_s->sdnxo = 0; + } } /* @@ -543,7 +559,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } gpa = scb_o->itdba & ~0xffUL; - if (gpa && (scb_s->ecb & 0x10U)) { + if (gpa && (scb_s->ecb & ECB_TE)) { if (!(gpa & ~0x1fffU)) { rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; @@ -558,8 +574,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } gpa = scb_o->gvrd & ~0x1ffUL; - if (gpa && (scb_s->eca & 0x00020000U) && - !(scb_s->ecd & 0x20000000U)) { + if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x1310U); goto unpin; @@ -577,7 +592,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } gpa = scb_o->riccbd & ~0x3fUL; - if (gpa && (scb_s->ecb3 & 0x01U)) { + if (gpa && (scb_s->ecb3 & ECB3_RI)) { if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0043U); goto unpin; @@ -591,6 +606,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) goto unpin; scb_s->riccbd = hpa; } + if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { + unsigned long sdnxc; + + gpa = scb_o->sdnxo & ~0xfUL; + sdnxc = scb_o->sdnxo & 0xfUL; + if (!gpa || !(gpa & ~0x1fffUL)) { + rc = set_validity_icpt(scb_s, 0x10b0U); + goto unpin; + } + if (sdnxc < 6 || sdnxc > 12) { + rc = set_validity_icpt(scb_s, 0x10b1U); + goto unpin; + } + if (gpa & ((1 << sdnxc) - 1)) { + rc = set_validity_icpt(scb_s, 0x10b2U); + goto unpin; + } + /* Due to alignment rules (checked above) this cannot + * cross page boundaries + */ + rc = pin_guest_page(vcpu->kvm, gpa, &hpa); + if (rc == -EINVAL) + rc = set_validity_icpt(scb_s, 0x10b0U); + if (rc) + goto unpin; + scb_s->sdnxo = hpa | sdnxc; + } return 0; unpin: unpin_blocks(vcpu, vsie_page); diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 0cf802de52a1..be63fbd699fd 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -82,6 +82,7 @@ static struct facility_def facility_defs[] = { 78, /* enhanced-DAT 2 */ 130, /* instruction-execution-protection */ 131, /* enhanced-SOP 2 and side-effect */ + 146, /* msa extension 8 */ -1 /* END */ } }, diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 3e8c287090e4..055962615779 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -221,6 +221,9 @@ struct x86_emulate_ops { void (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); + + unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); + void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags); }; typedef u32 __attribute__((vector_size(16))) sse128_t; @@ -290,7 +293,6 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; - int emul_flags; bool perm_ok; /* do not check permissions if true */ bool ud; /* inject an #UD if host doesn't support insn */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74ef58c8ff53..f5bddf92faba 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,8 +43,6 @@ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -#define KVM_PIO_PAGE_OFFSET 1 -#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 #define KVM_HALT_POLL_NS_DEFAULT 400000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS @@ -63,10 +61,10 @@ #define KVM_REQ_PMI 19 #define KVM_REQ_SMI 20 #define KVM_REQ_MASTERCLOCK_UPDATE 21 -#define KVM_REQ_MCLOCK_INPROGRESS 22 -#define KVM_REQ_SCAN_IOAPIC 23 +#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 -#define KVM_REQ_APIC_PAGE_RELOAD 25 +#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_CRASH 26 #define KVM_REQ_IOAPIC_EOI_EXIT 27 #define KVM_REQ_HV_RESET 28 @@ -343,9 +341,10 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - int root_level; - int shadow_root_level; union kvm_mmu_page_role base_role; + u8 root_level; + u8 shadow_root_level; + u8 ept_ad; bool direct_map; /* @@ -612,6 +611,8 @@ struct kvm_vcpu_arch { unsigned long dr7; unsigned long eff_db[KVM_NR_DB_REGS]; unsigned long guest_debug_dr7; + u64 msr_platform_info; + u64 msr_misc_features_enables; u64 mcg_cap; u64 mcg_status; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cc54b7026567..35cd06f636ab 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -70,8 +70,10 @@ #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 #define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND 0x00000800 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 #define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED 0x00010000 #define SECONDARY_EXEC_ENABLE_PML 0x00020000 #define SECONDARY_EXEC_XSAVES 0x00100000 #define SECONDARY_EXEC_TSC_SCALING 0x02000000 @@ -516,12 +518,14 @@ struct vmx_msr_entry { #define EPT_VIOLATION_READABLE_BIT 3 #define EPT_VIOLATION_WRITABLE_BIT 4 #define EPT_VIOLATION_EXECUTABLE_BIT 5 +#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8 #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) #define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) #define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) +#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) /* * VM-instruction error numbers diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 739c0c594022..c2824d02ba37 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -9,6 +9,9 @@ #include <linux/types.h> #include <linux/ioctl.h> +#define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 + #define DE_VECTOR 0 #define DB_VECTOR 1 #define BP_VECTOR 3 diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 14458658e988..690a2dcf4078 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -76,7 +76,11 @@ #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 #define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_RDRAND 57 #define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_VMFUNC 59 +#define EXIT_REASON_ENCLS 60 +#define EXIT_REASON_RDSEED 61 #define EXIT_REASON_PML_FULL 62 #define EXIT_REASON_XSAVES 63 #define EXIT_REASON_XRSTORS 64 @@ -90,6 +94,7 @@ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \ { EXIT_REASON_CPUID, "CPUID" }, \ { EXIT_REASON_HLT, "HLT" }, \ + { EXIT_REASON_INVD, "INVD" }, \ { EXIT_REASON_INVLPG, "INVLPG" }, \ { EXIT_REASON_RDPMC, "RDPMC" }, \ { EXIT_REASON_RDTSC, "RDTSC" }, \ @@ -108,6 +113,8 @@ { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \ { EXIT_REASON_MSR_READ, "MSR_READ" }, \ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \ + { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ + { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \ { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \ @@ -115,20 +122,24 @@ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ - { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ - { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ + { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ + { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \ + { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ { EXIT_REASON_INVEPT, "INVEPT" }, \ + { EXIT_REASON_RDTSCP, "RDTSCP" }, \ { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \ + { EXIT_REASON_INVVPID, "INVVPID" }, \ { EXIT_REASON_WBINVD, "WBINVD" }, \ + { EXIT_REASON_XSETBV, "XSETBV" }, \ { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ - { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ - { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ - { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \ - { EXIT_REASON_INVD, "INVD" }, \ - { EXIT_REASON_INVVPID, "INVVPID" }, \ + { EXIT_REASON_RDRAND, "RDRAND" }, \ { EXIT_REASON_INVPCID, "INVPCID" }, \ + { EXIT_REASON_VMFUNC, "VMFUNC" }, \ + { EXIT_REASON_ENCLS, "ENCLS" }, \ + { EXIT_REASON_RDSEED, "RDSEED" }, \ + { EXIT_REASON_PML_FULL, "PML_FULL" }, \ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 14f65a5f938e..da5c09789984 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu) src = &per_cpu(steal_time, cpu); do { version = src->version; - rmb(); + virt_rmb(); steal = src->steal; - rmb(); + virt_rmb(); } while ((version & 1) || (version != src->version)); return steal; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ab8e32f7b9a8..760433b2574a 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -86,18 +86,6 @@ config KVM_MMU_AUDIT This option adds a R/W kVM module parameter 'mmu_audit', which allows auditing of KVM MMU events at runtime. -config KVM_DEVICE_ASSIGNMENT - bool "KVM legacy PCI device assignment support (DEPRECATED)" - depends on KVM && PCI && IOMMU_API - default n - ---help--- - Provide support for legacy PCI device assignment through KVM. The - kernel now also supports a full featured userspace device driver - framework through VFIO, which supersedes this support and provides - better security. - - If unsure, say N. - # OK, it's a little counter-intuitive to do this, but it puts it neatly under # the virtualization menu. source drivers/vhost/Kconfig diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 3bff20710471..09d4b17be022 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -15,8 +15,6 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \ hyperv.o page_track.o debugfs.o -kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o - kvm-intel-y += vmx.o pmu_intel.o kvm-amd-y += svm.o pmu_amd.o diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c deleted file mode 100644 index 308b8597c691..000000000000 --- a/arch/x86/kvm/assigned-dev.c +++ /dev/null @@ -1,1058 +0,0 @@ -/* - * Kernel-based Virtual Machine - device assignment support - * - * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - */ - -#include <linux/kvm_host.h> -#include <linux/kvm.h> -#include <linux/uaccess.h> -#include <linux/vmalloc.h> -#include <linux/errno.h> -#include <linux/spinlock.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/slab.h> -#include <linux/namei.h> -#include <linux/fs.h> -#include "irq.h" -#include "assigned-dev.h" -#include "trace/events/kvm.h" - -struct kvm_assigned_dev_kernel { - struct kvm_irq_ack_notifier ack_notifier; - struct list_head list; - int assigned_dev_id; - int host_segnr; - int host_busnr; - int host_devfn; - unsigned int entries_nr; - int host_irq; - bool host_irq_disabled; - bool pci_2_3; - struct msix_entry *host_msix_entries; - int guest_irq; - struct msix_entry *guest_msix_entries; - unsigned long irq_requested_type; - int irq_source_id; - int flags; - struct pci_dev *dev; - struct kvm *kvm; - spinlock_t intx_lock; - spinlock_t intx_mask_lock; - char irq_name[32]; - struct pci_saved_state *pci_saved_state; -}; - -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct kvm_assigned_dev_kernel *match; - - list_for_each_entry(match, head, list) { - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static int find_index_from_host_irq(struct kvm_assigned_dev_kernel - *assigned_dev, int irq) -{ - int i, index; - struct msix_entry *host_msix_entries; - - host_msix_entries = assigned_dev->host_msix_entries; - - index = -1; - for (i = 0; i < assigned_dev->entries_nr; i++) - if (irq == host_msix_entries[i].vector) { - index = i; - break; - } - if (index < 0) - printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); - - return index; -} - -static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int ret; - - spin_lock(&assigned_dev->intx_lock); - if (pci_check_and_mask_intx(assigned_dev->dev)) { - assigned_dev->host_irq_disabled = true; - ret = IRQ_WAKE_THREAD; - } else - ret = IRQ_NONE; - spin_unlock(&assigned_dev->intx_lock); - - return ret; -} - -static void -kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, - int vector) -{ - if (unlikely(assigned_dev->irq_requested_type & - KVM_DEV_IRQ_GUEST_INTX)) { - spin_lock(&assigned_dev->intx_mask_lock); - if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1, - false); - spin_unlock(&assigned_dev->intx_mask_lock); - } else - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1, false); -} - -static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - - if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { - spin_lock_irq(&assigned_dev->intx_lock); - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - spin_unlock_irq(&assigned_dev->intx_lock); - } - - kvm_assigned_dev_raise_guest_irq(assigned_dev, - assigned_dev->guest_irq); - - return IRQ_HANDLED; -} - -/* - * Deliver an IRQ in an atomic context if we can, or return a failure, - * user can retry in a process context. - * Return value: - * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. - * Other values - No need to retry. - */ -static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, - int level) -{ - struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - struct kvm_kernel_irq_routing_entry *e; - int ret = -EINVAL; - int idx; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* - * Injection into either PIC or IOAPIC might need to scan all CPUs, - * which would need to be retried from thread context; when same GSI - * is connected to both PIC and IOAPIC, we'd have to report a - * partial failure here. - * Since there's no easy way to do this, we only support injecting MSI - * which is limited to 1:1 GSI mapping. - */ - idx = srcu_read_lock(&kvm->irq_srcu); - if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { - e = &entries[0]; - ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, - irq, level); - } - srcu_read_unlock(&kvm->irq_srcu, idx); - return ret; -} - - -static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int ret = kvm_set_irq_inatomic(assigned_dev->kvm, - assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; -} - -static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - - kvm_assigned_dev_raise_guest_irq(assigned_dev, - assigned_dev->guest_irq); - - return IRQ_HANDLED; -} - -static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int index = find_index_from_host_irq(assigned_dev, irq); - u32 vector; - int ret = 0; - - if (index >= 0) { - vector = assigned_dev->guest_msix_entries[index].vector; - ret = kvm_set_irq_inatomic(assigned_dev->kvm, - assigned_dev->irq_source_id, - vector, 1); - } - - return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED; -} - -static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) -{ - struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - int index = find_index_from_host_irq(assigned_dev, irq); - u32 vector; - - if (index >= 0) { - vector = assigned_dev->guest_msix_entries[index].vector; - kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); - } - - return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev = - container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false); - - spin_lock(&dev->intx_mask_lock); - - if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { - bool reassert = false; - - spin_lock_irq(&dev->intx_lock); - /* - * The guest IRQ may be shared so this ack can come from an - * IRQ for another guest device. - */ - if (dev->host_irq_disabled) { - if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) - enable_irq(dev->host_irq); - else if (!pci_check_and_unmask_intx(dev->dev)) - reassert = true; - dev->host_irq_disabled = reassert; - } - spin_unlock_irq(&dev->intx_lock); - - if (reassert) - kvm_set_irq(dev->kvm, dev->irq_source_id, - dev->guest_irq, 1, false); - } - - spin_unlock(&dev->intx_mask_lock); -} - -static void deassign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - if (assigned_dev->ack_notifier.gsi != -1) - kvm_unregister_irq_ack_notifier(kvm, - &assigned_dev->ack_notifier); - - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 0, false); - - if (assigned_dev->irq_source_id != -1) - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); - assigned_dev->irq_source_id = -1; - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); -} - -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void deassign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - /* - * We disable irq here to prevent further events. - * - * Notice this maybe result in nested disable if the interrupt type is - * INTx, but it's OK for we are going to free it. - * - * If this function is a part of VM destroy, please ensure that till - * now, the kvm state is still legal for probably we also have to wait - * on a currently running IRQ handler. - */ - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int i; - for (i = 0; i < assigned_dev->entries_nr; i++) - disable_irq(assigned_dev->host_msix_entries[i].vector); - - for (i = 0; i < assigned_dev->entries_nr; i++) - free_irq(assigned_dev->host_msix_entries[i].vector, - assigned_dev); - - assigned_dev->entries_nr = 0; - kfree(assigned_dev->host_msix_entries); - kfree(assigned_dev->guest_msix_entries); - pci_disable_msix(assigned_dev->dev); - } else { - /* Deal with MSI and INTx */ - if ((assigned_dev->irq_requested_type & - KVM_DEV_IRQ_HOST_INTX) && - (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { - spin_lock_irq(&assigned_dev->intx_lock); - pci_intx(assigned_dev->dev, false); - spin_unlock_irq(&assigned_dev->intx_lock); - synchronize_irq(assigned_dev->host_irq); - } else - disable_irq(assigned_dev->host_irq); - - free_irq(assigned_dev->host_irq, assigned_dev); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) - pci_disable_msi(assigned_dev->dev); - } - - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); -} - -static int kvm_deassign_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev, - unsigned long irq_requested_type) -{ - unsigned long guest_irq_type, host_irq_type; - - if (!irqchip_in_kernel(kvm)) - return -EINVAL; - /* no irq assignment to deassign */ - if (!assigned_dev->irq_requested_type) - return -ENXIO; - - host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; - guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; - - if (host_irq_type) - deassign_host_irq(kvm, assigned_dev); - if (guest_irq_type) - deassign_guest_irq(kvm, assigned_dev); - - return 0; -} - -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - kvm_free_assigned_irq(kvm, assigned_dev); - - pci_reset_function(assigned_dev->dev); - if (pci_load_and_free_saved_state(assigned_dev->dev, - &assigned_dev->pci_saved_state)) - printk(KERN_INFO "%s: Couldn't reload %s saved state\n", - __func__, dev_name(&assigned_dev->dev->dev)); - else - pci_restore_state(assigned_dev->dev); - - pci_clear_dev_assigned(assigned_dev->dev); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct kvm_assigned_dev_kernel *assigned_dev, *tmp; - - list_for_each_entry_safe(assigned_dev, tmp, - &kvm->arch.assigned_dev_head, list) { - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int assigned_device_enable_host_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - irq_handler_t irq_handler; - unsigned long flags; - - dev->host_irq = dev->dev->irq; - - /* - * We can only share the IRQ line with other host devices if we are - * able to disable the IRQ source at device-level - independently of - * the guest driver. Otherwise host devices may suffer from unbounded - * IRQ latencies when the guest keeps the line asserted. - */ - if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { - irq_handler = kvm_assigned_dev_intx; - flags = IRQF_SHARED; - } else { - irq_handler = NULL; - flags = IRQF_ONESHOT; - } - if (request_threaded_irq(dev->host_irq, irq_handler, - kvm_assigned_dev_thread_intx, flags, - dev->irq_name, dev)) - return -EIO; - - if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { - spin_lock_irq(&dev->intx_lock); - pci_intx(dev->dev, true); - spin_unlock_irq(&dev->intx_lock); - } - return 0; -} - -static int assigned_device_enable_host_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int r; - - if (!dev->dev->msi_enabled) { - r = pci_enable_msi(dev->dev); - if (r) - return r; - } - - dev->host_irq = dev->dev->irq; - if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi, - kvm_assigned_dev_thread_msi, 0, - dev->irq_name, dev)) { - pci_disable_msi(dev->dev); - return -EIO; - } - - return 0; -} - -static int assigned_device_enable_host_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int i, r = -EINVAL; - - /* host_msix_entries and guest_msix_entries should have been - * initialized */ - if (dev->entries_nr == 0) - return r; - - r = pci_enable_msix_exact(dev->dev, - dev->host_msix_entries, dev->entries_nr); - if (r) - return r; - - for (i = 0; i < dev->entries_nr; i++) { - r = request_threaded_irq(dev->host_msix_entries[i].vector, - kvm_assigned_dev_msix, - kvm_assigned_dev_thread_msix, - 0, dev->irq_name, dev); - if (r) - goto err; - } - - return 0; -err: - for (i -= 1; i >= 0; i--) - free_irq(dev->host_msix_entries[i].vector, dev); - pci_disable_msix(dev->dev); - return r; -} - -static int assigned_device_enable_guest_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = irq->guest_irq; - return 0; -} - -static int assigned_device_enable_guest_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - return 0; -} - -static int assigned_device_enable_guest_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - return 0; -} - -static int assign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - __u32 host_irq_type) -{ - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) - return r; - - snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s", - pci_name(dev->dev)); - - switch (host_irq_type) { - case KVM_DEV_IRQ_HOST_INTX: - r = assigned_device_enable_host_intx(kvm, dev); - break; - case KVM_DEV_IRQ_HOST_MSI: - r = assigned_device_enable_host_msi(kvm, dev); - break; - case KVM_DEV_IRQ_HOST_MSIX: - r = assigned_device_enable_host_msix(kvm, dev); - break; - default: - r = -EINVAL; - } - dev->host_irq_disabled = false; - - if (!r) - dev->irq_requested_type |= host_irq_type; - - return r; -} - -static int assign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq, - unsigned long guest_irq_type) -{ - int id; - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) - return r; - - id = kvm_request_irq_source_id(kvm); - if (id < 0) - return id; - - dev->irq_source_id = id; - - switch (guest_irq_type) { - case KVM_DEV_IRQ_GUEST_INTX: - r = assigned_device_enable_guest_intx(kvm, dev, irq); - break; - case KVM_DEV_IRQ_GUEST_MSI: - r = assigned_device_enable_guest_msi(kvm, dev, irq); - break; - case KVM_DEV_IRQ_GUEST_MSIX: - r = assigned_device_enable_guest_msix(kvm, dev, irq); - break; - default: - r = -EINVAL; - } - - if (!r) { - dev->irq_requested_type |= guest_irq_type; - if (dev->ack_notifier.gsi != -1) - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); - } else { - kvm_free_irq_source_id(kvm, dev->irq_source_id); - dev->irq_source_id = -1; - } - - return r; -} - -/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int r = -EINVAL; - struct kvm_assigned_dev_kernel *match; - unsigned long host_irq_type, guest_irq_type; - - if (!irqchip_in_kernel(kvm)) - return r; - - mutex_lock(&kvm->lock); - r = -ENODEV; - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); - guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - - r = -EINVAL; - /* can only assign one type at a time */ - if (hweight_long(host_irq_type) > 1) - goto out; - if (hweight_long(guest_irq_type) > 1) - goto out; - if (host_irq_type == 0 && guest_irq_type == 0) - goto out; - - r = 0; - if (host_irq_type) - r = assign_host_irq(kvm, match, host_irq_type); - if (r) - goto out; - - if (guest_irq_type) - r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); -out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) -{ - int r = -ENODEV; - struct kvm_assigned_dev_kernel *match; - unsigned long irq_type; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | - KVM_DEV_IRQ_GUEST_MASK); - r = kvm_deassign_irq(kvm, match, irq_type); -out: - mutex_unlock(&kvm->lock); - return r; -} - -/* - * We want to test whether the caller has been granted permissions to - * use this device. To be able to configure and control the device, - * the user needs access to PCI configuration space and BAR resources. - * These are accessed through PCI sysfs. PCI config space is often - * passed to the process calling this ioctl via file descriptor, so we - * can't rely on access to that file. We can check for permissions - * on each of the BAR resource files, which is a pretty clear - * indicator that the user has been granted access to the device. - */ -static int probe_sysfs_permissions(struct pci_dev *dev) -{ -#ifdef CONFIG_SYSFS - int i; - bool bar_found = false; - - for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { - char *kpath, *syspath; - struct path path; - struct inode *inode; - int r; - - if (!pci_resource_len(dev, i)) - continue; - - kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); - if (!kpath) - return -ENOMEM; - - /* Per sysfs-rules, sysfs is always at /sys */ - syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); - kfree(kpath); - if (!syspath) - return -ENOMEM; - - r = kern_path(syspath, LOOKUP_FOLLOW, &path); - kfree(syspath); - if (r) - return r; - - inode = d_backing_inode(path.dentry); - - r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); - path_put(&path); - if (r) - return r; - - bar_found = true; - } - - /* If no resources, probably something special */ - if (!bar_found) - return -EPERM; - - return 0; -#else - return -EINVAL; /* No way to control the device without sysfs */ -#endif -} - -static int kvm_vm_ioctl_assign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0, idx; - struct kvm_assigned_dev_kernel *match; - struct pci_dev *dev; - - if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) - return -EINVAL; - - mutex_lock(&kvm->lock); - idx = srcu_read_lock(&kvm->srcu); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (match) { - /* device already assigned */ - r = -EEXIST; - goto out; - } - - match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); - if (match == NULL) { - printk(KERN_INFO "%s: Couldn't allocate memory\n", - __func__); - r = -ENOMEM; - goto out; - } - dev = pci_get_domain_bus_and_slot(assigned_dev->segnr, - assigned_dev->busnr, - assigned_dev->devfn); - if (!dev) { - printk(KERN_INFO "%s: host device not found\n", __func__); - r = -EINVAL; - goto out_free; - } - - /* Don't allow bridges to be assigned */ - if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) { - r = -EPERM; - goto out_put; - } - - r = probe_sysfs_permissions(dev); - if (r) - goto out_put; - - if (pci_enable_device(dev)) { - printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); - r = -EBUSY; - goto out_put; - } - r = pci_request_regions(dev, "kvm_assigned_device"); - if (r) { - printk(KERN_INFO "%s: Could not get access to device regions\n", - __func__); - goto out_disable; - } - - pci_reset_function(dev); - pci_save_state(dev); - match->pci_saved_state = pci_store_saved_state(dev); - if (!match->pci_saved_state) - printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", - __func__, dev_name(&dev->dev)); - - if (!pci_intx_mask_supported(dev)) - assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; - - match->assigned_dev_id = assigned_dev->assigned_dev_id; - match->host_segnr = assigned_dev->segnr; - match->host_busnr = assigned_dev->busnr; - match->host_devfn = assigned_dev->devfn; - match->flags = assigned_dev->flags; - match->dev = dev; - spin_lock_init(&match->intx_lock); - spin_lock_init(&match->intx_mask_lock); - match->irq_source_id = -1; - match->kvm = kvm; - match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; - - list_add(&match->list, &kvm->arch.assigned_dev_head); - - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match->dev); - if (r) - goto out_list_del; - -out: - srcu_read_unlock(&kvm->srcu, idx); - mutex_unlock(&kvm->lock); - return r; -out_list_del: - if (pci_load_and_free_saved_state(dev, &match->pci_saved_state)) - printk(KERN_INFO "%s: Couldn't reload %s saved state\n", - __func__, dev_name(&dev->dev)); - list_del(&match->list); - pci_release_regions(dev); -out_disable: - pci_disable_device(dev); -out_put: - pci_dev_put(dev); -out_free: - kfree(match); - srcu_read_unlock(&kvm->srcu, idx); - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (!match) { - printk(KERN_INFO "%s: device hasn't been assigned before, " - "so cannot be deassigned\n", __func__); - r = -EINVAL; - goto out; - } - - kvm_deassign_device(kvm, match->dev); - - kvm_free_assigned_device(kvm, match); - -out: - mutex_unlock(&kvm->lock); - return r; -} - - -static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, - struct kvm_assigned_msix_nr *entry_nr) -{ - int r = 0; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry_nr->assigned_dev_id); - if (!adev) { - r = -EINVAL; - goto msix_nr_out; - } - - if (adev->entries_nr == 0) { - adev->entries_nr = entry_nr->entry_nr; - if (adev->entries_nr == 0 || - adev->entries_nr > KVM_MAX_MSIX_PER_DEV) { - r = -EINVAL; - goto msix_nr_out; - } - - adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * - entry_nr->entry_nr, - GFP_KERNEL); - if (!adev->host_msix_entries) { - r = -ENOMEM; - goto msix_nr_out; - } - adev->guest_msix_entries = - kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr, - GFP_KERNEL); - if (!adev->guest_msix_entries) { - kfree(adev->host_msix_entries); - r = -ENOMEM; - goto msix_nr_out; - } - } else /* Not allowed set MSI-X number twice */ - r = -EINVAL; -msix_nr_out: - mutex_unlock(&kvm->lock); - return r; -} - -static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, - struct kvm_assigned_msix_entry *entry) -{ - int r = 0, i; - struct kvm_assigned_dev_kernel *adev; - - mutex_lock(&kvm->lock); - - adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - entry->assigned_dev_id); - - if (!adev) { - r = -EINVAL; - goto msix_entry_out; - } - - for (i = 0; i < adev->entries_nr; i++) - if (adev->guest_msix_entries[i].vector == 0 || - adev->guest_msix_entries[i].entry == entry->entry) { - adev->guest_msix_entries[i].entry = entry->entry; - adev->guest_msix_entries[i].vector = entry->gsi; - adev->host_msix_entries[i].entry = entry->entry; - break; - } - if (i == adev->entries_nr) { - r = -ENOSPC; - goto msix_entry_out; - } - -msix_entry_out: - mutex_unlock(&kvm->lock); - - return r; -} - -static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int r = 0; - struct kvm_assigned_dev_kernel *match; - - mutex_lock(&kvm->lock); - - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_dev->assigned_dev_id); - if (!match) { - r = -ENODEV; - goto out; - } - - spin_lock(&match->intx_mask_lock); - - match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; - match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; - - if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { - if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { - kvm_set_irq(match->kvm, match->irq_source_id, - match->guest_irq, 0, false); - /* - * Masking at hardware-level is performed on demand, - * i.e. when an IRQ actually arrives at the host. - */ - } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { - /* - * Unmask the IRQ line if required. Unmasking at - * device level will be performed by user space. - */ - spin_lock_irq(&match->intx_lock); - if (match->host_irq_disabled) { - enable_irq(match->host_irq); - match->host_irq_disabled = false; - } - spin_unlock_irq(&match->intx_lock); - } - } - - spin_unlock(&match->intx_mask_lock); - -out: - mutex_unlock(&kvm->lock); - return r; -} - -long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int r; - - switch (ioctl) { - case KVM_ASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_IRQ: { - r = -EOPNOTSUPP; - break; - } - case KVM_ASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } - case KVM_DEASSIGN_DEV_IRQ: { - struct kvm_assigned_irq assigned_irq; - - r = -EFAULT; - if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) - goto out; - r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); - if (r) - goto out; - break; - } - case KVM_DEASSIGN_PCI_DEVICE: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); - if (r) - goto out; - break; - } - case KVM_ASSIGN_SET_MSIX_NR: { - struct kvm_assigned_msix_nr entry_nr; - r = -EFAULT; - if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) - goto out; - r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); - if (r) - goto out; - break; - } - case KVM_ASSIGN_SET_MSIX_ENTRY: { - struct kvm_assigned_msix_entry entry; - r = -EFAULT; - if (copy_from_user(&entry, argp, sizeof entry)) - goto out; - r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); - if (r) - goto out; - break; - } - case KVM_ASSIGN_SET_INTX_MASK: { - struct kvm_assigned_pci_dev assigned_dev; - - r = -EFAULT; - if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) - goto out; - r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); - break; - } - default: - r = -ENOTTY; - break; - } -out: - return r; -} diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h deleted file mode 100644 index a428c1a211b2..000000000000 --- a/arch/x86/kvm/assigned-dev.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H -#define ARCH_X86_KVM_ASSIGNED_DEV_H - -#include <linux/kvm_host.h> - -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT -int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev); -int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev); - -int kvm_iommu_map_guest(struct kvm *kvm); -int kvm_iommu_unmap_guest(struct kvm *kvm); - -long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, - unsigned long arg); - -void kvm_free_all_assigned_devices(struct kvm *kvm); -#else -static inline int kvm_iommu_unmap_guest(struct kvm *kvm) -{ - return 0; -} - -static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, - unsigned long arg) -{ - return -ENOTTY; -} - -static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {} -#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ - -#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index efde6cc50875..a181ae76c71c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -876,6 +876,9 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 eax, ebx, ecx, edx; + if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0)) + return 1; + eax = kvm_register_read(vcpu, VCPU_REGS_RAX); ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 35058c2c0eea..a6fd40aade7c 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -205,4 +205,15 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu) return x86_stepping(best->eax); } +static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT; +} + +static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.msr_misc_features_enables & + MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; +} + #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 45c7306c8780..c25cfaf584e7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2547,7 +2547,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) u64 smbase; int ret; - if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0) + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) return emulate_ud(ctxt); /* @@ -2596,11 +2596,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) return X86EMUL_UNHANDLEABLE; } - if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) + if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) ctxt->ops->set_nmi_mask(ctxt, false); - ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK; - ctxt->emul_flags &= ~X86EMUL_SMM_MASK; + ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) & + ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK)); return X86EMUL_CONTINUE; } @@ -3854,6 +3854,13 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) static int em_cpuid(struct x86_emulate_ctxt *ctxt) { u32 eax, ebx, ecx, edx; + u64 msr = 0; + + ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr); + if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT && + ctxt->ops->cpl(ctxt)) { + return emulate_gp(ctxt, 0); + } eax = reg_read(ctxt, VCPU_REGS_RAX); ecx = reg_read(ctxt, VCPU_REGS_RCX); @@ -5316,6 +5323,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) const struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; int saved_dst_type = ctxt->dst.type; + unsigned emul_flags; ctxt->mem_read.pos = 0; @@ -5330,6 +5338,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } + emul_flags = ctxt->ops->get_hflags(ctxt); if (unlikely(ctxt->d & (No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) { if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) || @@ -5363,7 +5372,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) fetch_possible_mmx_operand(ctxt, &ctxt->dst); } - if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { + if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_PRE_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5392,7 +5401,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) goto done; } - if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_EXCEPT); if (rc != X86EMUL_CONTINUE) @@ -5446,7 +5455,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) special_insn: - if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { + if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) { rc = emulator_check_intercept(ctxt, ctxt->intercept, X86_ICPT_POST_MEMACCESS); if (rc != X86EMUL_CONTINUE) diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 047b17a26269..bdcd4139eca9 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s) __releases(&s->lock) { bool wakeup = s->wakeup_needed; - struct kvm_vcpu *vcpu, *found = NULL; + struct kvm_vcpu *vcpu; int i; s->wakeup_needed = false; @@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s) if (wakeup) { kvm_for_each_vcpu(i, vcpu, s->kvm) { if (kvm_apic_accept_pic_intr(vcpu)) { - found = vcpu; - break; + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + return; } } - - if (!found) - return; - - kvm_make_request(KVM_REQ_EVENT, found); - kvm_vcpu_kick(found); } } @@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq) int kvm_pic_read_irq(struct kvm *kvm) { int irq, irq2, intno; - struct kvm_pic *s = pic_irqchip(kvm); + struct kvm_pic *s = kvm->arch.vpic; s->output = 0; @@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm) return intno; } -void kvm_pic_reset(struct kvm_kpic_state *s) +static void kvm_pic_reset(struct kvm_kpic_state *s) { int irq, i; struct kvm_vcpu *vcpu; @@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1) return ret; } -static u32 pic_ioport_read(void *opaque, u32 addr1) +static u32 pic_ioport_read(void *opaque, u32 addr) { struct kvm_kpic_state *s = opaque; - unsigned int addr; int ret; - addr = addr1; - addr &= 1; if (s->poll) { - ret = pic_poll_read(s, addr1); + ret = pic_poll_read(s, addr); s->poll = 0; } else - if (addr == 0) + if ((addr & 1) == 0) if (s->read_reg_select) ret = s->isr; else @@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) return s->elcr; } -static int picdev_in_range(gpa_t addr) -{ - switch (addr) { - case 0x20: - case 0x21: - case 0xa0: - case 0xa1: - case 0x4d0: - case 0x4d1: - return 1; - default: - return 0; - } -} - static int picdev_write(struct kvm_pic *s, gpa_t addr, int len, const void *val) { unsigned char data = *(unsigned char *)val; - if (!picdev_in_range(addr)) - return -EOPNOTSUPP; if (len != 1) { pr_pic_unimpl("non byte write\n"); return 0; } - pic_lock(s); switch (addr) { case 0x20: case 0x21: case 0xa0: case 0xa1: + pic_lock(s); pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_unlock(s); break; case 0x4d0: case 0x4d1: + pic_lock(s); elcr_ioport_write(&s->pics[addr & 1], addr, data); + pic_unlock(s); break; + default: + return -EOPNOTSUPP; } - pic_unlock(s); return 0; } static int picdev_read(struct kvm_pic *s, gpa_t addr, int len, void *val) { - unsigned char data = 0; - if (!picdev_in_range(addr)) - return -EOPNOTSUPP; + unsigned char *data = (unsigned char *)val; if (len != 1) { memset(val, 0, len); pr_pic_unimpl("non byte read\n"); return 0; } - pic_lock(s); switch (addr) { case 0x20: case 0x21: case 0xa0: case 0xa1: - data = pic_ioport_read(&s->pics[addr >> 7], addr); + pic_lock(s); + *data = pic_ioport_read(&s->pics[addr >> 7], addr); + pic_unlock(s); break; case 0x4d0: case 0x4d1: - data = elcr_ioport_read(&s->pics[addr & 1], addr); + pic_lock(s); + *data = elcr_ioport_read(&s->pics[addr & 1], addr); + pic_unlock(s); break; + default: + return -EOPNOTSUPP; } - *(unsigned char *)val = data; - pic_unlock(s); return 0; } @@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, */ static void pic_irq_request(struct kvm *kvm, int level) { - struct kvm_pic *s = pic_irqchip(kvm); + struct kvm_pic *s = kvm->arch.vpic; if (!s->output) s->wakeup_needed = true; @@ -660,9 +640,11 @@ void kvm_pic_destroy(struct kvm *kvm) if (!vpic) return; + mutex_lock(&kvm->slots_lock); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); + mutex_unlock(&kvm->slots_lock); kvm->arch.vpic = NULL; kfree(vpic); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 289270a6aecb..bdff437acbcb 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) spin_unlock(&ioapic->lock); } -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - - if (!ioapic) + if (!ioapic_in_kernel(kvm)) return; kvm_make_scan_ioapic_request(kvm); } @@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && ioapic->irr & (1 << index)) ioapic_service(ioapic, index, false); - kvm_vcpu_request_scan_ioapic(ioapic->kvm); + kvm_make_scan_ioapic_request(ioapic->kvm); break; } } @@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm) if (ret < 0) { kvm->arch.vioapic = NULL; kfree(ioapic); - return ret; } - kvm_vcpu_request_scan_ioapic(kvm); return ret; } @@ -639,36 +635,32 @@ void kvm_ioapic_destroy(struct kvm *kvm) return; cancel_delayed_work_sync(&ioapic->eoi_inject); + mutex_lock(&kvm->slots_lock); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + mutex_unlock(&kvm->slots_lock); kvm->arch.vioapic = NULL; kfree(ioapic); } -int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) { - struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); - if (!ioapic) - return -EINVAL; + struct kvm_ioapic *ioapic = kvm->arch.vioapic; spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); state->irr &= ~ioapic->irr_delivered; spin_unlock(&ioapic->lock); - return 0; } -int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) { - struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); - if (!ioapic) - return -EINVAL; + struct kvm_ioapic *ioapic = kvm->arch.vioapic; spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); ioapic->irr = 0; ioapic->irr_delivered = 0; - kvm_vcpu_request_scan_ioapic(kvm); + kvm_make_scan_ioapic_request(kvm); kvm_ioapic_inject_all(ioapic, state->irr); spin_unlock(&ioapic->lock); - return 0; } diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 1cc6e54436db..29ce19732ccf 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -105,17 +105,13 @@ do { \ #define ASSERT(x) do { } while (0) #endif -static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) -{ - return kvm->arch.vioapic; -} - static inline int ioapic_in_kernel(struct kvm *kvm) { - int ret; + int mode = kvm->arch.irqchip_mode; - ret = (ioapic_irqchip(kvm) != NULL); - return ret; + /* Matches smp_wmb() when setting irqchip_mode */ + smp_rmb(); + return mode == KVM_IRQCHIP_KERNEL; } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); @@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, struct dest_map *dest_map); -int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c deleted file mode 100644 index b181426f67b4..000000000000 --- a/arch/x86/kvm/iommu.c +++ /dev/null @@ -1,356 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Copyright IBM Corporation, 2008 - * Copyright 2010 Red Hat, Inc. and/or its affiliates. - * - * Author: Allen M. Kay <allen.m.kay@intel.com> - * Author: Weidong Han <weidong.han@intel.com> - * Author: Ben-Ami Yassour <benami@il.ibm.com> - */ - -#include <linux/list.h> -#include <linux/kvm_host.h> -#include <linux/moduleparam.h> -#include <linux/pci.h> -#include <linux/stat.h> -#include <linux/iommu.h> -#include "assigned-dev.h" - -static bool allow_unsafe_assigned_interrupts; -module_param_named(allow_unsafe_assigned_interrupts, - allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, - "Enable device assignment on platforms without interrupt remapping support."); - -static int kvm_iommu_unmap_memslots(struct kvm *kvm); -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages); - -static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn, - unsigned long npages) -{ - gfn_t end_gfn; - kvm_pfn_t pfn; - - pfn = gfn_to_pfn_memslot(slot, gfn); - end_gfn = gfn + npages; - gfn += 1; - - if (is_error_noslot_pfn(pfn)) - return pfn; - - while (gfn < end_gfn) - gfn_to_pfn_memslot(slot, gfn++); - - return pfn; -} - -static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn, - unsigned long npages) -{ - unsigned long i; - - for (i = 0; i < npages; ++i) - kvm_release_pfn_clean(pfn + i); -} - -int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) -{ - gfn_t gfn, end_gfn; - kvm_pfn_t pfn; - int r = 0; - struct iommu_domain *domain = kvm->arch.iommu_domain; - int flags; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - gfn = slot->base_gfn; - end_gfn = gfn + slot->npages; - - flags = IOMMU_READ; - if (!(slot->flags & KVM_MEM_READONLY)) - flags |= IOMMU_WRITE; - if (!kvm->arch.iommu_noncoherent) - flags |= IOMMU_CACHE; - - - while (gfn < end_gfn) { - unsigned long page_size; - - /* Check if already mapped */ - if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) { - gfn += 1; - continue; - } - - /* Get the page size we could use to map */ - page_size = kvm_host_page_size(kvm, gfn); - - /* Make sure the page_size does not exceed the memslot */ - while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn) - page_size >>= 1; - - /* Make sure gfn is aligned to the page size we want to map */ - while ((gfn << PAGE_SHIFT) & (page_size - 1)) - page_size >>= 1; - - /* Make sure hva is aligned to the page size we want to map */ - while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1)) - page_size >>= 1; - - /* - * Pin all pages we are about to map in memory. This is - * important because we unmap and unpin in 4kb steps later. - */ - pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT); - if (is_error_noslot_pfn(pfn)) { - gfn += 1; - continue; - } - - /* Map into IO address space */ - r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - page_size, flags); - if (r) { - printk(KERN_ERR "kvm_iommu_map_address:" - "iommu failed to map pfn=%llx\n", pfn); - kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT); - goto unmap_pages; - } - - gfn += page_size >> PAGE_SHIFT; - - cond_resched(); - } - - return 0; - -unmap_pages: - kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn); - return r; -} - -static int kvm_iommu_map_memslots(struct kvm *kvm) -{ - int idx, r = 0; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - - if (kvm->arch.iommu_noncoherent) - kvm_arch_register_noncoherent_dma(kvm); - - idx = srcu_read_lock(&kvm->srcu); - slots = kvm_memslots(kvm); - - kvm_for_each_memslot(memslot, slots) { - r = kvm_iommu_map_pages(kvm, memslot); - if (r) - break; - } - srcu_read_unlock(&kvm->srcu, idx); - - return r; -} - -int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) -{ - struct iommu_domain *domain = kvm->arch.iommu_domain; - int r; - bool noncoherent; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - if (pdev == NULL) - return -ENODEV; - - r = iommu_attach_device(domain, &pdev->dev); - if (r) { - dev_err(&pdev->dev, "kvm assign device failed ret %d", r); - return r; - } - - noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); - - /* Check if need to update IOMMU page table for guest memory */ - if (noncoherent != kvm->arch.iommu_noncoherent) { - kvm_iommu_unmap_memslots(kvm); - kvm->arch.iommu_noncoherent = noncoherent; - r = kvm_iommu_map_memslots(kvm); - if (r) - goto out_unmap; - } - - kvm_arch_start_assignment(kvm); - pci_set_dev_assigned(pdev); - - dev_info(&pdev->dev, "kvm assign device\n"); - - return 0; -out_unmap: - kvm_iommu_unmap_memslots(kvm); - return r; -} - -int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) -{ - struct iommu_domain *domain = kvm->arch.iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - if (pdev == NULL) - return -ENODEV; - - iommu_detach_device(domain, &pdev->dev); - - pci_clear_dev_assigned(pdev); - kvm_arch_end_assignment(kvm); - - dev_info(&pdev->dev, "kvm deassign device\n"); - - return 0; -} - -int kvm_iommu_map_guest(struct kvm *kvm) -{ - int r; - - if (!iommu_present(&pci_bus_type)) { - printk(KERN_ERR "%s: iommu not found\n", __func__); - return -ENODEV; - } - - mutex_lock(&kvm->slots_lock); - - kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); - if (!kvm->arch.iommu_domain) { - r = -ENOMEM; - goto out_unlock; - } - - if (!allow_unsafe_assigned_interrupts && - !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { - printk(KERN_WARNING "%s: No interrupt remapping support," - " disallowing device assignment." - " Re-enable with \"allow_unsafe_assigned_interrupts=1\"" - " module option.\n", __func__); - iommu_domain_free(kvm->arch.iommu_domain); - kvm->arch.iommu_domain = NULL; - r = -EPERM; - goto out_unlock; - } - - r = kvm_iommu_map_memslots(kvm); - if (r) - kvm_iommu_unmap_memslots(kvm); - -out_unlock: - mutex_unlock(&kvm->slots_lock); - return r; -} - -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) -{ - struct iommu_domain *domain; - gfn_t end_gfn, gfn; - kvm_pfn_t pfn; - u64 phys; - - domain = kvm->arch.iommu_domain; - end_gfn = base_gfn + npages; - gfn = base_gfn; - - /* check if iommu exists and in use */ - if (!domain) - return; - - while (gfn < end_gfn) { - unsigned long unmap_pages; - size_t size; - - /* Get physical address */ - phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); - - if (!phys) { - gfn++; - continue; - } - - pfn = phys >> PAGE_SHIFT; - - /* Unmap address from IO address space */ - size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); - unmap_pages = 1ULL << get_order(size); - - /* Unpin all pages we just unmapped to not leak any memory */ - kvm_unpin_pages(kvm, pfn, unmap_pages); - - gfn += unmap_pages; - - cond_resched(); - } -} - -void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot) -{ - kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages); -} - -static int kvm_iommu_unmap_memslots(struct kvm *kvm) -{ - int idx; - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - - idx = srcu_read_lock(&kvm->srcu); - slots = kvm_memslots(kvm); - - kvm_for_each_memslot(memslot, slots) - kvm_iommu_unmap_pages(kvm, memslot); - - srcu_read_unlock(&kvm->srcu, idx); - - if (kvm->arch.iommu_noncoherent) - kvm_arch_unregister_noncoherent_dma(kvm); - - return 0; -} - -int kvm_iommu_unmap_guest(struct kvm *kvm) -{ - struct iommu_domain *domain = kvm->arch.iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - mutex_lock(&kvm->slots_lock); - kvm_iommu_unmap_memslots(kvm); - kvm->arch.iommu_domain = NULL; - kvm->arch.iommu_noncoherent = false; - mutex_unlock(&kvm->slots_lock); - - iommu_domain_free(domain); - return 0; -} diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 60d91c9d160c..5c24811e8b0b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v) if (irqchip_split(v->kvm)) return pending_userspace_extint(v); else - return pic_irqchip(v->kvm)->output; + return v->kvm->arch.vpic->output; } else return 0; } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 40d5b2cf6061..d5005cc26521 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -static inline struct kvm_pic *pic_irqchip(struct kvm *kvm) -{ - return kvm->arch.vpic; -} - static inline int pic_in_kernel(struct kvm *kvm) { - int ret; + int mode = kvm->arch.irqchip_mode; - ret = (pic_irqchip(kvm) != NULL); - return ret; + /* Matches smp_wmb() when setting irqchip_mode */ + smp_rmb(); + return mode == KVM_IRQCHIP_KERNEL; } static inline int irqchip_split(struct kvm *kvm) { - return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT; + int mode = kvm->arch.irqchip_mode; + + /* Matches smp_wmb() when setting irqchip_mode */ + smp_rmb(); + return mode == KVM_IRQCHIP_SPLIT; } static inline int irqchip_kernel(struct kvm *kvm) { - return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL; + int mode = kvm->arch.irqchip_mode; + + /* Matches smp_wmb() when setting irqchip_mode */ + smp_rmb(); + return mode == KVM_IRQCHIP_KERNEL; } static inline int irqchip_in_kernel(struct kvm *kvm) { - bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE; + int mode = kvm->arch.irqchip_mode; - /* Matches with wmb after initializing kvm->irq_routing. */ + /* Matches smp_wmb() when setting irqchip_mode */ smp_rmb(); - return ret; + return mode != KVM_IRQCHIP_NONE; } -void kvm_pic_reset(struct kvm_kpic_state *s); - void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 6825cd36d13b..3cc3b2d130a0 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { - struct kvm_pic *pic = pic_irqchip(kvm); + struct kvm_pic *pic = kvm->arch.vpic; return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); } @@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) goto unlock; } clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); - if (!ioapic_in_kernel(kvm)) + if (!irqchip_kernel(kvm)) goto unlock; kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); - kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); + kvm_pic_clear_all(kvm->arch.vpic, irq_source_id); unlock: mutex_unlock(&kvm->irq_lock); } @@ -274,42 +274,42 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } +bool kvm_arch_can_set_irq_routing(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} + int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - int r = -EINVAL; - int delta; - unsigned max_pin; - + /* We can't check irqchip_in_kernel() here as some callers are + * currently inititalizing the irqchip. Other callers should therefore + * check kvm_arch_can_set_irq_routing() before calling this function. + */ switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: - delta = 0; + if (irqchip_split(kvm)) + return -EINVAL; + e->irqchip.pin = ue->u.irqchip.pin; switch (ue->u.irqchip.irqchip) { case KVM_IRQCHIP_PIC_SLAVE: - delta = 8; + e->irqchip.pin += PIC_NUM_PINS / 2; /* fall through */ case KVM_IRQCHIP_PIC_MASTER: - if (!pic_in_kernel(kvm)) - goto out; - + if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2) + return -EINVAL; e->set = kvm_set_pic_irq; - max_pin = PIC_NUM_PINS; break; case KVM_IRQCHIP_IOAPIC: - if (!ioapic_in_kernel(kvm)) - goto out; - - max_pin = KVM_IOAPIC_NUM_PINS; + if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS) + return -EINVAL; e->set = kvm_set_ioapic_irq; break; default: - goto out; + return -EINVAL; } e->irqchip.irqchip = ue->u.irqchip.irqchip; - e->irqchip.pin = ue->u.irqchip.pin + delta; - if (e->irqchip.pin >= max_pin) - goto out; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; @@ -318,7 +318,7 @@ int kvm_set_routing_entry(struct kvm *kvm, e->msi.data = ue->u.msi.data; if (kvm_msi_route_invalid(kvm, e)) - goto out; + return -EINVAL; break; case KVM_IRQ_ROUTING_HV_SINT: e->set = kvm_hv_set_sint; @@ -326,12 +326,10 @@ int kvm_set_routing_entry(struct kvm *kvm, e->hv_sint.sint = ue->u.hv_sint.sint; break; default: - goto out; + return -EINVAL; } - r = 0; -out: - return r; + return 0; } bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index bad6a25067bc..9fa5b8164961 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -529,14 +529,16 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) { - return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val, - sizeof(val)); + + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, + sizeof(val)); } static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) { - return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val, - sizeof(*val)); + + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, + sizeof(*val)); } static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) @@ -2285,8 +2287,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32))) + if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32))) return; apic_set_tpr(vcpu->arch.apic, data & 0xff); @@ -2338,14 +2340,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) max_isr = 0; data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32)); + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); } int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) { if (vapic_addr) { - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apic->vapic_cache, vapic_addr, sizeof(u32))) return -EINVAL; @@ -2439,7 +2441,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) vcpu->arch.pv_eoi.msr_val = data; if (!pv_eoi_enabled(vcpu)) return 0; - return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data, + return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, addr, sizeof(u8)); } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ac7810513d0e..558676538fca 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4340,7 +4340,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); -void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) +void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool accessed_dirty) { struct kvm_mmu *context = &vcpu->arch.mmu; @@ -4349,6 +4350,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) context->shadow_root_level = kvm_x86_ops->get_tdp_level(); context->nx = true; + context->ept_ad = accessed_dirty; context->page_fault = ept_page_fault; context->gva_to_gpa = ept_gva_to_gpa; context->sync_page = ept_sync_page; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index ddc56e91f2e4..d8ccb32f7308 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -74,7 +74,8 @@ enum { int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); -void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); +void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool accessed_dirty); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a01105485315..314d2071b337 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -23,13 +23,6 @@ * so the code in this file is compiled twice, once per pte size. */ -/* - * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro - * uses for EPT without A/D paging type. - */ -extern u64 __pure __using_nonexistent_pte_bit(void) - __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT"); - #if PTTYPE == 64 #define pt_element_t u64 #define guest_walker guest_walker64 @@ -39,10 +32,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_BITS PT64_LEVEL_BITS - #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK - #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT + #define PT_HAVE_ACCESSED_DIRTY(mmu) true #ifdef CONFIG_X86_64 #define PT_MAX_FULL_LEVELS 4 #define CMPXCHG cmpxchg @@ -60,10 +52,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void) #define PT_INDEX(addr, level) PT32_INDEX(addr, level) #define PT_LEVEL_BITS PT32_LEVEL_BITS #define PT_MAX_FULL_LEVELS 2 - #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK - #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT + #define PT_HAVE_ACCESSED_DIRTY(mmu) true #define CMPXCHG cmpxchg #elif PTTYPE == PTTYPE_EPT #define pt_element_t u64 @@ -74,16 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_BITS PT64_LEVEL_BITS - #define PT_GUEST_ACCESSED_MASK 0 - #define PT_GUEST_DIRTY_MASK 0 - #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() - #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit() + #define PT_GUEST_DIRTY_SHIFT 9 + #define PT_GUEST_ACCESSED_SHIFT 8 + #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 4 #else #error Invalid PTTYPE value #endif +#define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT) +#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) + #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) @@ -111,12 +104,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl) return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT; } -static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte) +static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access, + unsigned gpte) { unsigned mask; /* dirty bit is not supported, so no need to track it */ - if (!PT_GUEST_DIRTY_MASK) + if (!PT_HAVE_ACCESSED_DIRTY(mmu)) return; BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK); @@ -171,7 +165,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, goto no_present; /* if accessed bit is not supported prefetch non accessed gpte */ - if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK)) + if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; return false; @@ -217,7 +211,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, int ret; /* dirty/accessed bits are not supported, so no need to update them */ - if (!PT_GUEST_DIRTY_MASK) + if (!PT_HAVE_ACCESSED_DIRTY(mmu)) return 0; for (level = walker->max_level; level >= walker->level; --level) { @@ -286,7 +280,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; + unsigned nested_access; gpa_t pte_gpa; + bool have_ad; int offset; const int write_fault = access & PFERR_WRITE_MASK; const int user_fault = access & PFERR_USER_MASK; @@ -299,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, retry_walk: walker->level = mmu->root_level; pte = mmu->get_cr3(vcpu); + have_ad = PT_HAVE_ACCESSED_DIRTY(mmu); #if PTTYPE == 64 if (walker->level == PT32E_ROOT_LEVEL) { @@ -312,7 +309,15 @@ retry_walk: walker->max_level = walker->level; ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); - accessed_dirty = PT_GUEST_ACCESSED_MASK; + accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; + + /* + * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging + * by the MOV to CR instruction are treated as reads and do not cause the + * processor to set the dirty flag in any EPT paging-structure entry. + */ + nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; + pt_access = pte_access = ACC_ALL; ++walker->level; @@ -332,7 +337,7 @@ retry_walk: walker->pte_gpa[walker->level - 1] = pte_gpa; real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), - PFERR_USER_MASK|PFERR_WRITE_MASK, + nested_access, &walker->fault); /* @@ -394,7 +399,7 @@ retry_walk: walker->gfn = real_gpa >> PAGE_SHIFT; if (!write_fault) - FNAME(protect_clean_gpte)(&pte_access, pte); + FNAME(protect_clean_gpte)(mmu, &pte_access, pte); else /* * On a write fault, fold the dirty bit into accessed_dirty. @@ -485,7 +490,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, gfn = gpte_to_gfn(gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); - FNAME(protect_clean_gpte)(&pte_access, gpte); + FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, no_dirty_log && (pte_access & ACC_WRITE_MASK)); if (is_error_pfn(pfn)) @@ -979,7 +984,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) gfn = gpte_to_gfn(gpte); pte_access = sp->role.access; pte_access &= FNAME(gpte_access)(vcpu, gpte); - FNAME(protect_clean_gpte)(&pte_access, gpte); + FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte); if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access, &nr_present)) @@ -1025,3 +1030,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) #undef PT_GUEST_DIRTY_MASK #undef PT_GUEST_DIRTY_SHIFT #undef PT_GUEST_ACCESSED_SHIFT +#undef PT_HAVE_ACCESSED_DIRTY diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5f48f62b8dc2..c27ac6923a18 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1196,10 +1196,13 @@ static void init_vmcb(struct vcpu_svm *svm) set_intercept(svm, INTERCEPT_CLGI); set_intercept(svm, INTERCEPT_SKINIT); set_intercept(svm, INTERCEPT_WBINVD); - set_intercept(svm, INTERCEPT_MONITOR); - set_intercept(svm, INTERCEPT_MWAIT); set_intercept(svm, INTERCEPT_XSETBV); + if (!kvm_mwait_in_guest()) { + set_intercept(svm, INTERCEPT_MONITOR); + set_intercept(svm, INTERCEPT_MWAIT); + } + control->iopm_base_pa = iopm_base; control->msrpm_base_pa = __pa(svm->msrpm); control->int_ctl = V_INTR_MASKING_MASK; @@ -5254,6 +5257,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) avic_handle_ldr_update(vcpu); } +static void svm_setup_mce(struct kvm_vcpu *vcpu) +{ + /* [63:9] are reserved. */ + vcpu->arch.mcg_cap &= 0x1ff; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -5365,6 +5374,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .pmu_ops = &amd_pmu_ops, .deliver_posted_interrupt = svm_deliver_avic_intr, .update_pi_irte = svm_update_pi_irte, + .setup_mce = svm_setup_mce, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1a471e5f963f..c5fd459c4043 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO); static bool __read_mostly emulate_invalid_guest_state = true; module_param(emulate_invalid_guest_state, bool, S_IRUGO); -static bool __read_mostly vmm_exclusive = 1; -module_param(vmm_exclusive, bool, S_IRUGO); - static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); @@ -615,10 +612,6 @@ struct vcpu_vmx { int vpid; bool emulation_required; - /* Support for vnmi-less CPUs */ - int soft_vnmi_blocked; - ktime_t entry_time; - s64 vnmi_blocked_time; u32 exit_reason; /* Posted interrupt descriptor */ @@ -914,8 +907,6 @@ static void nested_release_page_clean(struct page *page) static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); static u64 construct_eptp(unsigned long root_hpa); -static void kvm_cpu_vmxon(u64 addr); -static void kvm_cpu_vmxoff(void); static bool vmx_xsaves_supported(void); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, @@ -1289,11 +1280,6 @@ static inline bool cpu_has_vmx_invpcid(void) SECONDARY_EXEC_ENABLE_INVPCID; } -static inline bool cpu_has_virtual_nmis(void) -{ - return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; -} - static inline bool cpu_has_vmx_wbinvd_exit(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -2238,15 +2224,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx) static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); bool already_loaded = vmx->loaded_vmcs->cpu == cpu; - if (!vmm_exclusive) - kvm_cpu_vmxon(phys_addr); - else if (!already_loaded) - loaded_vmcs_clear(vmx->loaded_vmcs); - if (!already_loaded) { + loaded_vmcs_clear(vmx->loaded_vmcs); local_irq_disable(); crash_disable_local_vmclear(cpu); @@ -2324,11 +2305,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) vmx_vcpu_pi_put(vcpu); __vmx_load_host_state(to_vmx(vcpu)); - if (!vmm_exclusive) { - __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs); - vcpu->cpu = -1; - kvm_cpu_vmxoff(); - } } static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); @@ -2752,6 +2728,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_secondary_ctls_high); vmx->nested.nested_vmx_secondary_ctls_low = 0; vmx->nested.nested_vmx_secondary_ctls_high &= + SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_DESC | @@ -2766,14 +2743,16 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT; vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT | - VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT | - VMX_EPT_INVEPT_BIT; + VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; if (cpu_has_vmx_ept_execute_only()) vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXECUTE_ONLY_BIT; vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | - VMX_EPT_EXTENT_CONTEXT_BIT; + VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | + VMX_EPT_1GB_PAGE_BIT; + if (enable_ept_ad_bits) + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; } else vmx->nested.nested_vmx_ept_caps = 0; @@ -3420,6 +3399,7 @@ static __init int vmx_disabled_by_bios(void) static void kvm_cpu_vmxon(u64 addr) { + cr4_set_bits(X86_CR4_VMXE); intel_pt_handle_vmx(1); asm volatile (ASM_VMX_VMXON_RAX @@ -3462,12 +3442,8 @@ static int hardware_enable(void) /* enable and lock */ wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); } - cr4_set_bits(X86_CR4_VMXE); - - if (vmm_exclusive) { - kvm_cpu_vmxon(phys_addr); - ept_sync_global(); - } + kvm_cpu_vmxon(phys_addr); + ept_sync_global(); return 0; } @@ -3491,15 +3467,13 @@ static void kvm_cpu_vmxoff(void) asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); intel_pt_handle_vmx(0); + cr4_clear_bits(X86_CR4_VMXE); } static void hardware_disable(void) { - if (vmm_exclusive) { - vmclear_local_loaded_vmcss(); - kvm_cpu_vmxoff(); - } - cr4_clear_bits(X86_CR4_VMXE); + vmclear_local_loaded_vmcss(); + kvm_cpu_vmxoff(); } static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, @@ -3549,11 +3523,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MOV_DR_EXITING | CPU_BASED_USE_TSC_OFFSETING | - CPU_BASED_MWAIT_EXITING | - CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | CPU_BASED_RDPMC_EXITING; + if (!kvm_mwait_in_guest()) + min |= CPU_BASED_MWAIT_EXITING | + CPU_BASED_MONITOR_EXITING; + opt = CPU_BASED_TPR_SHADOW | CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -3619,9 +3595,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_vmexit_control) < 0) return -EIO; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; - opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | - PIN_BASED_VMX_PREEMPTION_TIMER; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | + PIN_BASED_VIRTUAL_NMIS; + opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; @@ -4013,11 +3989,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu) static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) { - vpid_sync_context(vpid); if (enable_ept) { if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); + } else { + vpid_sync_context(vpid); } } @@ -5293,8 +5270,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx->rmode.vm86_active = 0; - vmx->soft_vnmi_blocked = 0; - vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(vcpu, 0); @@ -5414,8 +5389,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) static void enable_nmi_window(struct kvm_vcpu *vcpu) { - if (!cpu_has_virtual_nmis() || - vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { + if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { enable_irq_window(vcpu); return; } @@ -5456,19 +5430,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); if (!is_guest_mode(vcpu)) { - if (!cpu_has_virtual_nmis()) { - /* - * Tracking the NMI-blocked state in software is built upon - * finding the next open IRQ window. This, in turn, depends on - * well-behaving guests: They have to keep IRQs disabled at - * least as long as the NMI handler runs. Otherwise we may - * cause NMI nesting, maybe breaking the guest. But as this is - * highly unlikely, we can live with the residual risk. - */ - vmx->soft_vnmi_blocked = 1; - vmx->vnmi_blocked_time = 0; - } - ++vcpu->stat.nmi_injections; vmx->nmi_known_unmasked = false; } @@ -5485,8 +5446,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { - if (!cpu_has_virtual_nmis()) - return to_vmx(vcpu)->soft_vnmi_blocked; if (to_vmx(vcpu)->nmi_known_unmasked) return false; return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; @@ -5496,20 +5455,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!cpu_has_virtual_nmis()) { - if (vmx->soft_vnmi_blocked != masked) { - vmx->soft_vnmi_blocked = masked; - vmx->vnmi_blocked_time = 0; - } - } else { - vmx->nmi_known_unmasked = !masked; - if (masked) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - } + vmx->nmi_known_unmasked = !masked; + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -5517,9 +5469,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) if (to_vmx(vcpu)->nested.nested_run_pending) return 0; - if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) - return 0; - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | GUEST_INTR_STATE_NMI)); @@ -6240,21 +6189,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) unsigned long exit_qualification; gpa_t gpa; u32 error_code; - int gla_validity; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - gla_validity = (exit_qualification >> 7) & 0x3; - if (gla_validity == 0x2) { - printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); - printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", - (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), - vmcs_readl(GUEST_LINEAR_ADDRESS)); - printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", - (long unsigned int)exit_qualification); - vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; - vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION; - return 0; + if (is_guest_mode(vcpu) + && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) { + /* + * Fix up exit_qualification according to whether guest + * page table accesses are reads or writes. + */ + u64 eptp = nested_ept_get_cr3(vcpu); + if (!(eptp & VMX_EPT_AD_ENABLE_BIT)) + exit_qualification &= ~EPT_VIOLATION_ACC_WRITE; } /* @@ -6264,7 +6210,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * AAK134, BY25. */ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - cpu_has_virtual_nmis() && (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -6350,7 +6295,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (intr_window_requested && vmx_interrupt_allowed(vcpu)) return handle_interrupt_window(&vmx->vcpu); - if (test_bit(KVM_REQ_EVENT, &vcpu->requests)) + if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); @@ -7103,34 +7048,24 @@ out_msr_bitmap: static int handle_vmon(struct kvm_vcpu *vcpu) { int ret; - struct kvm_segment cs; struct vcpu_vmx *vmx = to_vmx(vcpu); const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - /* The Intel VMX Instruction Reference lists a bunch of bits that - * are prerequisite to running VMXON, most notably cr4.VMXE must be - * set to 1 (see vmx_set_cr4() for when we allow the guest to set this). - * Otherwise, we should fail with #UD. We test these now: + /* + * The Intel VMX Instruction Reference lists a bunch of bits that are + * prerequisite to running VMXON, most notably cr4.VMXE must be set to + * 1 (see vmx_set_cr4() for when we allow the guest to set this). + * Otherwise, we should fail with #UD. But most faulting conditions + * have already been checked by hardware, prior to the VM-exit for + * VMXON. We do test guest cr4.VMXE because processor CR4 always has + * that bit set to 1 in non-root mode. */ - if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) || - !kvm_read_cr0_bits(vcpu, X86_CR0_PE) || - (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) { + if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - if (is_long_mode(vcpu) && !cs.l) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } - - if (vmx_get_cpl(vcpu)) { - kvm_inject_gp(vcpu, 0); - return 1; - } - if (vmx->nested.vmxon) { nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); return kvm_skip_emulated_instruction(vcpu); @@ -7157,29 +7092,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) * Intel's VMX Instruction Reference specifies a common set of prerequisites * for running VMX instructions (except VMXON, whose prerequisites are * slightly different). It also specifies what exception to inject otherwise. + * Note that many of these exceptions have priority over VM exits, so they + * don't have to be checked again here. */ static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) { - struct kvm_segment cs; - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!vmx->nested.vmxon) { + if (!to_vmx(vcpu)->nested.vmxon) { kvm_queue_exception(vcpu, UD_VECTOR); return 0; } - - vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) || - (is_long_mode(vcpu) && !cs.l)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 0; - } - - if (vmx_get_cpl(vcpu)) { - kvm_inject_gp(vcpu, 0); - return 0; - } - return 1; } @@ -7523,7 +7444,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, exit_qualification, vmx_instruction_info, true, &gva)) return 1; - /* _system ok, as nested_vmx_check_permission verified cpl=0 */ + /* _system ok, as hardware has verified cpl=0 */ kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva, &field_value, (is_long_mode(vcpu) ? 8 : 4), NULL); } @@ -7656,7 +7577,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) if (get_vmx_mem_address(vcpu, exit_qualification, vmx_instruction_info, true, &vmcs_gva)) return 1; - /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */ + /* ok to use *_system, as hardware has verified cpl=0 */ if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva, (void *)&to_vmx(vcpu)->nested.current_vmptr, sizeof(u64), &e)) { @@ -7689,11 +7610,6 @@ static int handle_invept(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } - vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); @@ -7815,7 +7731,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) * "blocked by NMI" bit has to be set before next VM entry. */ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - cpu_has_virtual_nmis() && (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -8117,6 +8032,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); + case EXIT_REASON_RDRAND: + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND); + case EXIT_REASON_RDSEED: + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED); case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: @@ -8490,26 +8409,6 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) return 0; } - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked && - !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis( - get_vmcs12(vcpu))))) { - if (vmx_interrupt_allowed(vcpu)) { - vmx->soft_vnmi_blocked = 0; - } else if (vmx->vnmi_blocked_time > 1000000000LL && - vcpu->arch.nmi_pending) { - /* - * This CPU don't support us in finding the end of an - * NMI-blocked window if the guest runs with IRQs - * disabled. So we pull the trigger after 1 s of - * futile waiting, but inform the user about this. - */ - printk(KERN_WARNING "%s: Breaking out of NMI-blocked " - "state on VCPU %d after 1 s timeout\n", - __func__, vcpu->vcpu_id); - vmx->soft_vnmi_blocked = 0; - } - } - if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); @@ -8785,37 +8684,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - if (cpu_has_virtual_nmis()) { - if (vmx->nmi_known_unmasked) - return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Re-set bit "block by NMI" before VM entry if vmexit caused by - * a guest IRET fault. - * SDM 3: 23.2.2 (September 2008) - * Bit 12 is undefined in any of the following cases: - * If the VM exit sets the valid bit in the IDT-vectoring - * information field. - * If the VM exit is due to a double fault. - */ - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && - vector != DF_VECTOR && !idtv_info_valid) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->nmi_known_unmasked = - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) - & GUEST_INTR_STATE_NMI); - } else if (unlikely(vmx->soft_vnmi_blocked)) - vmx->vnmi_blocked_time += - ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); + if (vmx->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + /* + * SDM 3: 27.7.1.2 (September 2008) + * Re-set bit "block by NMI" before VM entry if vmexit caused by + * a guest IRET fault. + * SDM 3: 23.2.2 (September 2008) + * Bit 12 is undefined in any of the following cases: + * If the VM exit sets the valid bit in the IDT-vectoring + * information field. + * If the VM exit is due to a double fault. + */ + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && + vector != DF_VECTOR && !idtv_info_valid) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmx->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); } static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, @@ -8932,10 +8827,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long debugctlmsr, cr4; - /* Record the guest's net vcpu time for enforced NMI injections. */ - if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) - vmx->entry_time = ktime_get(); - /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ if (vmx->emulation_required) @@ -9143,16 +9034,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx_complete_interrupts(vmx); } -static void vmx_load_vmcs01(struct kvm_vcpu *vcpu) +static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) { struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; - if (vmx->loaded_vmcs == &vmx->vmcs01) + if (vmx->loaded_vmcs == vmcs) return; cpu = get_cpu(); - vmx->loaded_vmcs = &vmx->vmcs01; + vmx->loaded_vmcs = vmcs; vmx_vcpu_put(vcpu); vmx_vcpu_load(vcpu, cpu); vcpu->cpu = cpu; @@ -9170,7 +9061,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); BUG_ON(r); - vmx_load_vmcs01(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); free_nested(vmx); vcpu_put(vcpu); } @@ -9231,11 +9122,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) goto free_msrs; - if (!vmm_exclusive) - kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id()))); loaded_vmcs_init(vmx->loaded_vmcs); - if (!vmm_exclusive) - kvm_cpu_vmxoff(); cpu = get_cpu(); vmx_vcpu_load(&vmx->vcpu, cpu); @@ -9495,17 +9382,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) return get_vmcs12(vcpu)->ept_pointer; } -static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) +static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { + u64 eptp; + WARN_ON(mmu_is_nested(vcpu)); + eptp = nested_ept_get_cr3(vcpu); + if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits) + return 1; + + kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.nested_vmx_ept_caps & - VMX_EPT_EXECUTE_ONLY_BIT); + VMX_EPT_EXECUTE_ONLY_BIT, + eptp & VMX_EPT_AD_ENABLE_BIT); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; + return 0; } static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) @@ -10279,8 +10175,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } if (nested_cpu_has_ept(vmcs12)) { - kvm_mmu_unload(vcpu); - nested_ept_init_mmu_context(vcpu); + if (nested_ept_init_mmu_context(vcpu)) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return 1; + } } else if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmx_flush_tlb_ept_only(vcpu); @@ -10353,9 +10251,10 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, vmx->nested.nested_vmx_procbased_ctls_low, vmx->nested.nested_vmx_procbased_ctls_high) || - !vmx_control_verify(vmcs12->secondary_vm_exec_control, - vmx->nested.nested_vmx_secondary_ctls_low, - vmx->nested.nested_vmx_secondary_ctls_high) || + (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) && + !vmx_control_verify(vmcs12->secondary_vm_exec_control, + vmx->nested.nested_vmx_secondary_ctls_low, + vmx->nested.nested_vmx_secondary_ctls_high)) || !vmx_control_verify(vmcs12->pin_based_vm_exec_control, vmx->nested.nested_vmx_pinbased_ctls_low, vmx->nested.nested_vmx_pinbased_ctls_high) || @@ -10434,7 +10333,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct loaded_vmcs *vmcs02; - int cpu; u32 msr_entry_idx; u32 exit_qual; @@ -10447,18 +10345,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; - vmx_vcpu_put(vcpu); - vmx_vcpu_load(vcpu, cpu); - vcpu->cpu = cpu; - put_cpu(); - + vmx_switch_vmcs(vcpu, vmcs02); vmx_segment_cache_clear(vmx); if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { leave_guest_mode(vcpu); - vmx_load_vmcs01(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_INVALID_STATE, exit_qual); return 1; @@ -10471,7 +10363,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) vmcs12->vm_entry_msr_load_count); if (msr_entry_idx) { leave_guest_mode(vcpu); - vmx_load_vmcs01(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); nested_vmx_entry_failure(vcpu, vmcs12, EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); return 1; @@ -11039,7 +10931,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (unlikely(vmx->fail)) vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR); - vmx_load_vmcs01(vcpu); + vmx_switch_vmcs(vcpu, &vmx->vmcs01); if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) && nested_exit_intr_ack_set(vcpu)) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ccbd45ecd41a..b38a302858a0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -27,7 +27,6 @@ #include "kvm_cache_regs.h" #include "x86.h" #include "cpuid.h" -#include "assigned-dev.h" #include "pmu.h" #include "hyperv.h" @@ -1008,6 +1007,8 @@ static u32 emulated_msrs[] = { MSR_IA32_MCG_CTL, MSR_IA32_MCG_EXT_CTL, MSR_IA32_SMBASE, + MSR_PLATFORM_INFO, + MSR_MISC_FEATURES_ENABLES, }; static unsigned num_emulated_msrs; @@ -1444,10 +1445,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) struct kvm *kvm = vcpu->kvm; u64 offset, ns, elapsed; unsigned long flags; - s64 usdiff; bool matched; bool already_matched; u64 data = msr->data; + bool synchronizing = false; raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); offset = kvm_compute_tsc_offset(vcpu, data); @@ -1455,51 +1456,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) elapsed = ns - kvm->arch.last_tsc_nsec; if (vcpu->arch.virtual_tsc_khz) { - int faulted = 0; - - /* n.b - signed multiplication and division required */ - usdiff = data - kvm->arch.last_tsc_write; -#ifdef CONFIG_X86_64 - usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz; -#else - /* do_div() only does unsigned */ - asm("1: idivl %[divisor]\n" - "2: xor %%edx, %%edx\n" - " movl $0, %[faulted]\n" - "3:\n" - ".section .fixup,\"ax\"\n" - "4: movl $1, %[faulted]\n" - " jmp 3b\n" - ".previous\n" - - _ASM_EXTABLE(1b, 4b) - - : "=A"(usdiff), [faulted] "=r" (faulted) - : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz)); - -#endif - do_div(elapsed, 1000); - usdiff -= elapsed; - if (usdiff < 0) - usdiff = -usdiff; - - /* idivl overflow => difference is larger than USEC_PER_SEC */ - if (faulted) - usdiff = USEC_PER_SEC; - } else - usdiff = USEC_PER_SEC; /* disable TSC match window below */ + if (data == 0 && msr->host_initiated) { + /* + * detection of vcpu initialization -- need to sync + * with other vCPUs. This particularly helps to keep + * kvm_clock stable after CPU hotplug + */ + synchronizing = true; + } else { + u64 tsc_exp = kvm->arch.last_tsc_write + + nsec_to_cycles(vcpu, elapsed); + u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL; + /* + * Special case: TSC write with a small delta (1 second) + * of virtual cycle time against real time is + * interpreted as an attempt to synchronize the CPU. + */ + synchronizing = data < tsc_exp + tsc_hz && + data + tsc_hz > tsc_exp; + } + } /* - * Special case: TSC write with a small delta (1 second) of virtual - * cycle time against real time is interpreted as an attempt to - * synchronize the CPU. - * * For a reliable TSC, we can match TSC offsets, and for an unstable * TSC, we add elapsed time in this computation. We could let the * compensation code attempt to catch up if we fall behind, but * it's better to try to match offsets from the beginning. */ - if (usdiff < USEC_PER_SEC && + if (synchronizing && vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { if (!check_tsc_unstable()) { offset = kvm->arch.cur_tsc_offset; @@ -1769,13 +1753,13 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) /* guest entries allowed */ kvm_for_each_vcpu(i, vcpu, kvm) - clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests); + kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu); spin_unlock(&ka->pvclock_gtod_sync_lock); #endif } -static u64 __get_kvmclock_ns(struct kvm *kvm) +u64 get_kvmclock_ns(struct kvm *kvm) { struct kvm_arch *ka = &kvm->arch; struct pvclock_vcpu_time_info hv_clock; @@ -1796,24 +1780,12 @@ static u64 __get_kvmclock_ns(struct kvm *kvm) return __pvclock_read_cycles(&hv_clock, rdtsc()); } -u64 get_kvmclock_ns(struct kvm *kvm) -{ - unsigned long flags; - s64 ns; - - local_irq_save(flags); - ns = __get_kvmclock_ns(kvm); - local_irq_restore(flags); - - return ns; -} - static void kvm_setup_pvclock_page(struct kvm_vcpu *v) { struct kvm_vcpu_arch *vcpu = &v->arch; struct pvclock_vcpu_time_info guest_hv_clock; - if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time, + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, &guest_hv_clock, sizeof(guest_hv_clock)))) return; @@ -1834,9 +1806,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); vcpu->hv_clock.version = guest_hv_clock.version + 1; - kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); smp_wmb(); @@ -1850,16 +1822,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); - kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); smp_wmb(); vcpu->hv_clock.version++; - kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); } static int kvm_guest_time_update(struct kvm_vcpu *v) @@ -2092,7 +2064,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, sizeof(u32))) return 1; @@ -2111,7 +2083,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime, + if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; @@ -2122,7 +2094,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) vcpu->arch.st.steal.version += 1; - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); smp_wmb(); @@ -2131,14 +2103,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); smp_wmb(); vcpu->arch.st.steal.version += 1; - kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); } @@ -2155,6 +2127,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_VM_HSAVE_PA: case MSR_AMD64_PATCH_LOADER: case MSR_AMD64_BU_CFG2: + case MSR_AMD64_DC_CFG: break; case MSR_EFER: @@ -2230,8 +2203,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) bool tmp = (msr == MSR_KVM_SYSTEM_TIME); if (ka->boot_vcpu_runs_old_kvmclock != tmp) - set_bit(KVM_REQ_MASTERCLOCK_UPDATE, - &vcpu->requests); + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ka->boot_vcpu_runs_old_kvmclock = tmp; } @@ -2243,7 +2215,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & 1)) break; - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_time, data & ~1ULL, sizeof(struct pvclock_vcpu_time_info))) vcpu->arch.pv_time_enabled = false; @@ -2264,7 +2236,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime, + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, data & KVM_STEAL_VALID_BITS, sizeof(struct kvm_steal_time))) return 1; @@ -2331,6 +2303,21 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.osvw.status = data; break; + case MSR_PLATFORM_INFO: + if (!msr_info->host_initiated || + data & ~MSR_PLATFORM_INFO_CPUID_FAULT || + (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) && + cpuid_fault_enabled(vcpu))) + return 1; + vcpu->arch.msr_platform_info = data; + break; + case MSR_MISC_FEATURES_ENABLES: + if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT || + (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT && + !supports_cpuid_fault(vcpu))) + return 1; + vcpu->arch.msr_misc_features_enables = data; + break; default: if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) return xen_hvm_config(vcpu, data); @@ -2417,6 +2404,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_FAM10H_MMIO_CONF_BASE: case MSR_AMD64_BU_CFG2: case MSR_IA32_PERF_CTL: + case MSR_AMD64_DC_CFG: msr_info->data = 0; break; case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3: @@ -2545,6 +2533,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vcpu->arch.osvw.status; break; + case MSR_PLATFORM_INFO: + msr_info->data = vcpu->arch.msr_platform_info; + break; + case MSR_MISC_FEATURES_ENABLES: + msr_info->data = vcpu->arch.msr_misc_features_enables; + break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); @@ -2675,15 +2669,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: case KVM_CAP_IMMEDIATE_EXIT: -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT - case KVM_CAP_ASSIGN_DEV_IRQ: - case KVM_CAP_PCI_2_3: -#endif r = 1; break; case KVM_CAP_ADJUST_CLOCK: r = KVM_CLOCK_TSC_STABLE; break; + case KVM_CAP_X86_GUEST_MWAIT: + r = kvm_mwait_in_guest(); + break; case KVM_CAP_X86_SMM: /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, @@ -2695,9 +2688,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); break; - case KVM_CAP_COALESCED_MMIO: - r = KVM_COALESCED_MMIO_PAGE_OFFSET; - break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); break; @@ -2713,11 +2703,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PV_MMU: /* obsolete */ r = 0; break; -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT - case KVM_CAP_IOMMU: - r = iommu_present(&pci_bus_type); - break; -#endif case KVM_CAP_MCE: r = KVM_MAX_MCE_BANKS; break; @@ -2816,11 +2801,6 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) return kvm_arch_has_noncoherent_dma(vcpu->kvm); } -static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) -{ - set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); -} - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Address WBINVD may be executed by guest */ @@ -2864,7 +2844,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1) kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); if (vcpu->cpu != cpu) - kvm_migrate_timers(vcpu); + kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu); vcpu->cpu = cpu; } @@ -2878,7 +2858,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) vcpu->arch.st.steal.preempted = 1; - kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime, + kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, &vcpu->arch.st.steal.preempted, offsetof(struct kvm_steal_time, preempted), sizeof(vcpu->arch.st.steal.preempted)); @@ -3124,7 +3104,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return -EINVAL; if (events->exception.injected && - (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR)) + (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR || + is_guest_mode(vcpu))) + return -EINVAL; + + /* INITs are latched while in SMM */ + if (events->flags & KVM_VCPUEVENT_VALID_SMM && + (events->smi.smm || events->smi.pending) && + vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) return -EINVAL; process_nmi(vcpu); @@ -3721,22 +3708,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm) static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) { + struct kvm_pic *pic = kvm->arch.vpic; int r; r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: - memcpy(&chip->chip.pic, - &pic_irqchip(kvm)->pics[0], + memcpy(&chip->chip.pic, &pic->pics[0], sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_PIC_SLAVE: - memcpy(&chip->chip.pic, - &pic_irqchip(kvm)->pics[1], + memcpy(&chip->chip.pic, &pic->pics[1], sizeof(struct kvm_pic_state)); break; case KVM_IRQCHIP_IOAPIC: - r = kvm_get_ioapic(kvm, &chip->chip.ioapic); + kvm_get_ioapic(kvm, &chip->chip.ioapic); break; default: r = -EINVAL; @@ -3747,32 +3733,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) { + struct kvm_pic *pic = kvm->arch.vpic; int r; r = 0; switch (chip->chip_id) { case KVM_IRQCHIP_PIC_MASTER: - spin_lock(&pic_irqchip(kvm)->lock); - memcpy(&pic_irqchip(kvm)->pics[0], - &chip->chip.pic, + spin_lock(&pic->lock); + memcpy(&pic->pics[0], &chip->chip.pic, sizeof(struct kvm_pic_state)); - spin_unlock(&pic_irqchip(kvm)->lock); + spin_unlock(&pic->lock); break; case KVM_IRQCHIP_PIC_SLAVE: - spin_lock(&pic_irqchip(kvm)->lock); - memcpy(&pic_irqchip(kvm)->pics[1], - &chip->chip.pic, + spin_lock(&pic->lock); + memcpy(&pic->pics[1], &chip->chip.pic, sizeof(struct kvm_pic_state)); - spin_unlock(&pic_irqchip(kvm)->lock); + spin_unlock(&pic->lock); break; case KVM_IRQCHIP_IOAPIC: - r = kvm_set_ioapic(kvm, &chip->chip.ioapic); + kvm_set_ioapic(kvm, &chip->chip.ioapic); break; default: r = -EINVAL; break; } - kvm_pic_update_irq(pic_irqchip(kvm)); + kvm_pic_update_irq(pic); return r; } @@ -4018,20 +4003,14 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_ioapic_init(kvm); if (r) { - mutex_lock(&kvm->slots_lock); kvm_pic_destroy(kvm); - mutex_unlock(&kvm->slots_lock); goto create_irqchip_unlock; } r = kvm_setup_default_irq_routing(kvm); if (r) { - mutex_lock(&kvm->slots_lock); - mutex_lock(&kvm->irq_lock); kvm_ioapic_destroy(kvm); kvm_pic_destroy(kvm); - mutex_unlock(&kvm->irq_lock); - mutex_unlock(&kvm->slots_lock); goto create_irqchip_unlock; } /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ @@ -4196,10 +4175,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; r = 0; - local_irq_disable(); - now_ns = __get_kvmclock_ns(kvm); + now_ns = get_kvmclock_ns(kvm); kvm->arch.kvmclock_offset += user_ns.clock - now_ns; - local_irq_enable(); kvm_gen_update_masterclock(kvm); break; } @@ -4207,11 +4184,9 @@ long kvm_arch_vm_ioctl(struct file *filp, struct kvm_clock_data user_ns; u64 now_ns; - local_irq_disable(); - now_ns = __get_kvmclock_ns(kvm); + now_ns = get_kvmclock_ns(kvm); user_ns.clock = now_ns; user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0; - local_irq_enable(); memset(&user_ns.pad, 0, sizeof(user_ns.pad)); r = -EFAULT; @@ -4230,7 +4205,7 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } default: - r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg); + r = -ENOTTY; } out: return r; @@ -5223,6 +5198,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked) kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked); } +static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) +{ + return emul_to_vcpu(ctxt)->arch.hflags; +} + +static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) +{ + kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags); +} + static const struct x86_emulate_ops emulate_ops = { .read_gpr = emulator_read_gpr, .write_gpr = emulator_write_gpr, @@ -5262,6 +5247,8 @@ static const struct x86_emulate_ops emulate_ops = { .intercept = emulator_intercept, .get_cpuid = emulator_get_cpuid, .set_nmi_mask = emulator_set_nmi_mask, + .get_hflags = emulator_get_hflags, + .set_hflags = emulator_set_hflags, }; static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) @@ -5314,7 +5301,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK); BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); - ctxt->emul_flags = vcpu->arch.hflags; init_decode_cache(ctxt); vcpu->arch.emulate_regs_need_sync_from_vcpu = false; @@ -5718,8 +5704,6 @@ restart: unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); toggle_interruptibility(vcpu, ctxt->interruptibility); vcpu->arch.emulate_regs_need_sync_to_vcpu = false; - if (vcpu->arch.hflags != ctxt->emul_flags) - kvm_set_hflags(vcpu, ctxt->emul_flags); kvm_rip_write(vcpu, ctxt->eip); if (r == EMULATE_DONE) kvm_vcpu_check_singlestep(vcpu, rflags, &r); @@ -6869,7 +6853,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* * 1) We should set ->mode before checking ->requests. Please see - * the comment in kvm_make_all_cpus_request. + * the comment in kvm_vcpu_exiting_guest_mode(). * * 2) For APICv, we should set ->mode before checking PIR.ON. This * pairs with the memory barrier implicit in pi_test_and_set_on @@ -7051,7 +7035,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (r <= 0) break; - clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); + kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu); if (kvm_cpu_has_pending_timer(vcpu)) kvm_inject_pending_timer_irqs(vcpu); @@ -7179,7 +7163,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { kvm_vcpu_block(vcpu); kvm_apic_accept_events(vcpu); - clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); r = -EAGAIN; goto out; } @@ -7355,6 +7339,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, mp_state->mp_state != KVM_MP_STATE_RUNNABLE) return -EINVAL; + /* INITs are latched while in SMM */ + if ((is_smm(vcpu) || vcpu->arch.smi_pending) && + (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED || + mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED)) + return -EINVAL; + if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events); @@ -7724,6 +7714,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (!init_event) { kvm_pmu_reset(vcpu); vcpu->arch.smbase = 0x30000; + + vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; + vcpu->arch.msr_misc_features_enables = 0; } memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); @@ -8068,7 +8061,6 @@ void kvm_arch_sync_events(struct kvm *kvm) { cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); - kvm_free_all_assigned_devices(kvm); kvm_free_pit(kvm); } @@ -8152,7 +8144,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } if (kvm_x86_ops->vm_destroy) kvm_x86_ops->vm_destroy(kvm); - kvm_iommu_unmap_guest(kvm); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); @@ -8385,7 +8376,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (atomic_read(&vcpu->arch.nmi_queued)) return true; - if (test_bit(KVM_REQ_SMI, &vcpu->requests)) + if (kvm_test_request(KVM_REQ_SMI, vcpu)) return true; if (kvm_arch_interrupt_allowed(vcpu) && @@ -8536,8 +8527,9 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) { - return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val, - sizeof(val)); + + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, + sizeof(val)); } void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index e8ff3e4ce38a..612067074905 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -1,6 +1,8 @@ #ifndef ARCH_X86_KVM_X86_H #define ARCH_X86_KVM_X86_H +#include <asm/processor.h> +#include <asm/mwait.h> #include <linux/kvm_host.h> #include <asm/pvclock.h> #include "kvm_cache_regs.h" @@ -212,4 +214,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) __rem; \ }) +static inline bool kvm_mwait_in_guest(void) +{ + unsigned int eax, ebx, ecx, edx; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT)) + return false; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* All AMD CPUs have a working MWAIT implementation */ + return true; + case X86_VENDOR_INTEL: + /* Handle Intel below */ + break; + default: + return false; + } + + /* + * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as + * they would allow guest to stop the CPU completely by disabling + * interrupts then invoking MWAIT. + */ + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return false; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return false; + + return true; +} + #endif |