From 367e1319b229110a27c53221c2fa32a6aa86d4a9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 26 Aug 2009 14:57:07 +0300 Subject: KVM: Return -ENOTTY on unrecognized ioctls Not the incorrect -EINVAL. Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 07ced89740d7..00e2ce8e91f5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -150,7 +150,7 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } default: - r = -EINVAL; + r = -ENOTTY; } return r; -- cgit v1.2.3 From 10474ae8945ce08622fd1f3464e55bd817bf2376 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Tue, 15 Sep 2009 11:37:46 +0200 Subject: KVM: Activate Virtualization On Demand X86 CPUs need to have some magic happening to enable the virtualization extensions on them. This magic can result in unpleasant results for users, like blocking other VMMs from working (vmx) or using invalid TLB entries (svm). Currently KVM activates virtualization when the respective kernel module is loaded. This blocks us from autoloading KVM modules without breaking other VMMs. To circumvent this problem at least a bit, this patch introduces on demand activation of virtualization. This means, that instead virtualization is enabled on creation of the first virtual machine and disabled on destruction of the last one. So using this, KVM can be easily autoloaded, while keeping other hypervisors usable. Signed-off-by: Alexander Graf Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 8 ++-- arch/powerpc/kvm/powerpc.c | 3 +- arch/s390/kvm/kvm-s390.c | 3 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/svm.c | 13 ++++-- arch/x86/kvm/vmx.c | 11 +++-- arch/x86/kvm/x86.c | 4 +- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 90 +++++++++++++++++++++++++++++++++++------ 9 files changed, 108 insertions(+), 28 deletions(-) (limited to 'arch/s390') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f6471c882667..5fdeec5fddcf 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) static DEFINE_SPINLOCK(vp_lock); -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { long status; long tmp_base; @@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage) slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); local_irq_restore(saved_psr); if (slot < 0) - return; + return -EINVAL; spin_lock(&vp_lock); status = ia64_pal_vp_init_env(kvm_vsa_base ? @@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage) __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); if (status != 0) { printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); - return ; + return -EINVAL; } if (!kvm_vsa_base) { @@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage) } spin_unlock(&vp_lock); ia64_ptr_entry(0x3, slot); + + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 95af62217b6b..5902bbc2411e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) return r; } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 00e2ce8e91f5..544505893c9f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { static unsigned long long *facilities; /* Section: not file related */ -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { /* every s390 is virtualization enabled ;-) */ + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a46e2dd9aca8..295c7c4d9c90 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -459,7 +459,7 @@ struct descriptor_table { struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ - void (*hardware_enable)(void *dummy); /* __init */ + int (*hardware_enable)(void *dummy); void (*hardware_disable)(void *dummy); void (*check_processor_compatibility)(void *rtn); int (*hardware_setup)(void); /* __init */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f54c4f9d2865..59fe4d54da11 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage) cpu_svm_disable(); } -static void svm_hardware_enable(void *garbage) +static int svm_hardware_enable(void *garbage) { struct svm_cpu_data *svm_data; @@ -325,16 +325,20 @@ static void svm_hardware_enable(void *garbage) struct desc_struct *gdt; int me = raw_smp_processor_id(); + rdmsrl(MSR_EFER, efer); + if (efer & EFER_SVME) + return -EBUSY; + if (!has_svm()) { printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me); - return; + return -EINVAL; } svm_data = per_cpu(svm_data, me); if (!svm_data) { printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n", me); - return; + return -EINVAL; } svm_data->asid_generation = 1; @@ -345,11 +349,12 @@ static void svm_hardware_enable(void *garbage) gdt = (struct desc_struct *)gdt_descr.base; svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); - rdmsrl(MSR_EFER, efer); wrmsrl(MSR_EFER, efer | EFER_SVME); wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + + return 0; } static void svm_cpu_uninit(int cpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 73cb5dd960cf..a187570e4837 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1138,12 +1138,15 @@ static __init int vmx_disabled_by_bios(void) /* locked but not enabled */ } -static void hardware_enable(void *garbage) +static int hardware_enable(void *garbage) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); u64 old; + if (read_cr4() & X86_CR4_VMXE) + return -EBUSY; + INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); rdmsrl(MSR_IA32_FEATURE_CONTROL, old); if ((old & (FEATURE_CONTROL_LOCKED | @@ -1158,6 +1161,10 @@ static void hardware_enable(void *garbage) asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) : "memory", "cc"); + + ept_sync_global(); + + return 0; } static void vmclear_local_vcpus(void) @@ -4040,8 +4047,6 @@ static int __init vmx_init(void) if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); - ept_sync_global(); - return 0; out3: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 829e3063e2ab..3d83de8bcbf4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4691,9 +4691,9 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) return kvm_x86_ops->vcpu_reset(vcpu); } -void kvm_arch_hardware_enable(void *garbage) +int kvm_arch_hardware_enable(void *garbage) { - kvm_x86_ops->hardware_enable(garbage); + return kvm_x86_ops->hardware_enable(garbage); } void kvm_arch_hardware_disable(void *garbage) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c0a1cc35f080..b985a29d8175 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -345,7 +345,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); -void kvm_arch_hardware_enable(void *garbage); +int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38e4d2c34ac1..70c8cbea0a99 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -69,6 +69,8 @@ DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; +static int kvm_usage_count = 0; +static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; EXPORT_SYMBOL_GPL(kvm_vcpu_cache); @@ -79,6 +81,8 @@ struct dentry *kvm_debugfs_dir; static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +static int hardware_enable_all(void); +static void hardware_disable_all(void); static bool kvm_rebooting; @@ -339,6 +343,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { static struct kvm *kvm_create_vm(void) { + int r = 0; struct kvm *kvm = kvm_arch_create_vm(); #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct page *page; @@ -346,6 +351,11 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; + + r = hardware_enable_all(); + if (r) + goto out_err_nodisable; + #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); @@ -354,8 +364,8 @@ static struct kvm *kvm_create_vm(void) #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { - kfree(kvm); - return ERR_PTR(-ENOMEM); + r = -ENOMEM; + goto out_err; } kvm->coalesced_mmio_ring = (struct kvm_coalesced_mmio_ring *)page_address(page); @@ -363,15 +373,13 @@ static struct kvm *kvm_create_vm(void) #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) { - int err; kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops; - err = mmu_notifier_register(&kvm->mmu_notifier, current->mm); - if (err) { + r = mmu_notifier_register(&kvm->mmu_notifier, current->mm); + if (r) { #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET put_page(page); #endif - kfree(kvm); - return ERR_PTR(err); + goto out_err; } } #endif @@ -395,6 +403,12 @@ static struct kvm *kvm_create_vm(void) #endif out: return kvm; + +out_err: + hardware_disable_all(); +out_err_nodisable: + kfree(kvm); + return ERR_PTR(r); } /* @@ -453,6 +467,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow(kvm); #endif kvm_arch_destroy_vm(kvm); + hardware_disable_all(); mmdrop(mm); } @@ -1644,11 +1659,21 @@ static struct miscdevice kvm_dev = { static void hardware_enable(void *junk) { int cpu = raw_smp_processor_id(); + int r; if (cpumask_test_cpu(cpu, cpus_hardware_enabled)) return; + cpumask_set_cpu(cpu, cpus_hardware_enabled); - kvm_arch_hardware_enable(NULL); + + r = kvm_arch_hardware_enable(NULL); + + if (r) { + cpumask_clear_cpu(cpu, cpus_hardware_enabled); + atomic_inc(&hardware_enable_failed); + printk(KERN_INFO "kvm: enabling virtualization on " + "CPU%d failed\n", cpu); + } } static void hardware_disable(void *junk) @@ -1661,11 +1686,52 @@ static void hardware_disable(void *junk) kvm_arch_hardware_disable(NULL); } +static void hardware_disable_all_nolock(void) +{ + BUG_ON(!kvm_usage_count); + + kvm_usage_count--; + if (!kvm_usage_count) + on_each_cpu(hardware_disable, NULL, 1); +} + +static void hardware_disable_all(void) +{ + spin_lock(&kvm_lock); + hardware_disable_all_nolock(); + spin_unlock(&kvm_lock); +} + +static int hardware_enable_all(void) +{ + int r = 0; + + spin_lock(&kvm_lock); + + kvm_usage_count++; + if (kvm_usage_count == 1) { + atomic_set(&hardware_enable_failed, 0); + on_each_cpu(hardware_enable, NULL, 1); + + if (atomic_read(&hardware_enable_failed)) { + hardware_disable_all_nolock(); + r = -EBUSY; + } + } + + spin_unlock(&kvm_lock); + + return r; +} + static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, void *v) { int cpu = (long)v; + if (!kvm_usage_count) + return NOTIFY_OK; + val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: @@ -1868,13 +1934,15 @@ static void kvm_exit_debug(void) static int kvm_suspend(struct sys_device *dev, pm_message_t state) { - hardware_disable(NULL); + if (kvm_usage_count) + hardware_disable(NULL); return 0; } static int kvm_resume(struct sys_device *dev) { - hardware_enable(NULL); + if (kvm_usage_count) + hardware_enable(NULL); return 0; } @@ -1949,7 +2017,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, goto out_free_1; } - on_each_cpu(hardware_enable, NULL, 1); r = register_cpu_notifier(&kvm_cpu_notifier); if (r) goto out_free_2; @@ -1999,7 +2066,6 @@ out_free_3: unregister_reboot_notifier(&kvm_reboot_notifier); unregister_cpu_notifier(&kvm_cpu_notifier); out_free_2: - on_each_cpu(hardware_disable, NULL, 1); out_free_1: kvm_arch_hardware_unsetup(); out_free_0a: -- cgit v1.2.3 From d7b0b5eb3000c6fb902f08c619fcd673a23d8fab Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Thu, 19 Nov 2009 14:21:16 +0100 Subject: KVM: s390: Make psw available on all exits, not just a subset This patch moves s390 processor status word into the base kvm_run struct and keeps it up-to date on all userspace exits. The userspace ABI is broken by this, however there are no applications in the wild using this. A capability check is provided so users can verify the updated API exists. Cc: stable@kernel.org Signed-off-by: Carsten Otte Signed-off-by: Avi Kivity --- arch/s390/include/asm/kvm.h | 3 ++- arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++++-------- include/linux/kvm.h | 8 ++++++-- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 3dfcaeb5d7f4..82b32a100c7d 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -1,6 +1,5 @@ #ifndef __LINUX_KVM_S390_H #define __LINUX_KVM_S390_H - /* * asm-s390/kvm.h - KVM s390 specific structures and definitions * @@ -15,6 +14,8 @@ */ #include +#define __KVM_S390 + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 544505893c9f..f8bcaefd7d34 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -117,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { + int r; + switch (ext) { + case KVM_CAP_S390_PSW: + r = 1; + break; default: - return 0; + r = 0; } + return r; } /* Section: vm related */ @@ -420,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) vcpu_load(vcpu); if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) rc = -EBUSY; - else - vcpu->arch.sie_block->gpsw = psw; + else { + vcpu->run->psw_mask = psw.mask; + vcpu->run->psw_addr = psw.addr; + } vcpu_put(vcpu); return rc; } @@ -509,9 +517,6 @@ rerun_vcpu: switch (kvm_run->exit_reason) { case KVM_EXIT_S390_SIEIC: - vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; - vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; - break; case KVM_EXIT_UNKNOWN: case KVM_EXIT_INTR: case KVM_EXIT_S390_RESET: @@ -520,6 +525,9 @@ rerun_vcpu: BUG(); } + vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; + might_fault(); do { @@ -539,8 +547,6 @@ rerun_vcpu: /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; - kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; rc = 0; @@ -552,6 +558,9 @@ rerun_vcpu: rc = 0; } + kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 92045a92d714..2d241da07236 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -181,6 +181,11 @@ struct kvm_run { __u64 cr8; __u64 apic_base; +#ifdef __KVM_S390 + /* the processor status word for s390 */ + __u64 psw_mask; /* psw upper half */ + __u64 psw_addr; /* psw lower half */ +#endif union { /* KVM_EXIT_UNKNOWN */ struct { @@ -232,8 +237,6 @@ struct kvm_run { /* KVM_EXIT_S390_SIEIC */ struct { __u8 icptcode; - __u64 mask; /* psw upper half */ - __u64 addr; /* psw lower half */ __u16 ipa; __u32 ipb; } s390_sieic; @@ -492,6 +495,7 @@ struct kvm_ioeventfd { #ifdef __KVM_HAVE_VCPU_EVENTS #define KVM_CAP_VCPU_EVENTS 41 #endif +#define KVM_CAP_S390_PSW 42 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f50146bd7bdb75435638e60d4960edd9bcdf88b8 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Mon, 30 Nov 2009 17:14:41 +0100 Subject: KVM: s390: Fix prefix register checking in arch/s390/kvm/sigp.c This patch corrects the checking of the new address for the prefix register. On s390, the prefix register is used to address the cpu's lowcore (address 0...8k). This check is supposed to verify that the memory is readable and present. copy_from_guest is a helper function, that can be used to read from guest memory. It applies prefixing, adds the start address of the guest memory in user, and then calls copy_from_user. Previous code was obviously broken for two reasons: - prefixing should not be applied here. The current prefix register is going to be updated soon, and the address we're looking for will be 0..8k after we've updated the register - we're adding the guest origin (gmsor) twice: once in subject code and once in copy_from_guest With kuli, we did not hit this problem because (a) we were lucky with previous prefix register content, and (b) our guest memory was mmaped very low into user address space. Cc: stable@kernel.org Signed-off-by: Carsten Otte Reported-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/s390/kvm/sigp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 40c8c6748cfe..15ee1111de58 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -188,9 +188,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, /* make sure that the new value is valid memory */ address = address & 0x7fffe000u; - if ((copy_from_guest(vcpu, &tmp, - (u64) (address + vcpu->arch.sie_block->gmsor) , 1)) || - (copy_from_guest(vcpu, &tmp, (u64) (address + + if ((copy_from_user(&tmp, (void __user *) + (address + vcpu->arch.sie_block->gmsor) , 1)) || + (copy_from_user(&tmp, (void __user *)(address + vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) { *reg |= SIGP_STAT_INVALID_PARAMETER; return 1; /* invalid parameter */ -- cgit v1.2.3