From 98fda169290b3b28c0f2db2b8f02290c13da50ef Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Wed, 4 Sep 2013 22:32:24 +0200 Subject: kvm: remove .done from struct kvm_async_pf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit '.done' is used to mark the completion of 'async_pf_execute()', but 'cancel_work_sync()' returns true when the work was canceled, so we use it instead. Signed-off-by: Radim Krčmář Reviewed-by: Paolo Bonzini Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0fbbc7aa02cb..749bdb12cd15 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -190,7 +190,6 @@ struct kvm_async_pf { unsigned long addr; struct kvm_arch_async_pf arch; struct page *page; - bool done; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 2f303b74a62fb74983c0a66e2df353be963c527c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Sep 2013 13:53:07 +0200 Subject: KVM: Convert kvm_lock back to non-raw spinlock In commit e935b8372cf8 ("KVM: Convert kvm_lock to raw_spinlock"), the kvm_lock was made a raw lock. However, the kvm mmu_shrink() function tries to grab the (non-raw) mmu_lock within the scope of the raw locked kvm_lock being held. This leads to the following: BUG: sleeping function called from invalid context at kernel/rtmutex.c:659 in_atomic(): 1, irqs_disabled(): 0, pid: 55, name: kswapd0 Preemption disabled at:[] mmu_shrink+0x5c/0x1b0 [kvm] Pid: 55, comm: kswapd0 Not tainted 3.4.34_preempt-rt Call Trace: [] __might_sleep+0xfd/0x160 [] rt_spin_lock+0x24/0x50 [] mmu_shrink+0xec/0x1b0 [kvm] [] shrink_slab+0x17d/0x3a0 [] ? mem_cgroup_iter+0x130/0x260 [] balance_pgdat+0x54a/0x730 [] ? set_pgdat_percpu_threshold+0xa7/0xd0 [] kswapd+0x18f/0x490 [] ? get_parent_ip+0x11/0x50 [] ? __init_waitqueue_head+0x50/0x50 [] ? balance_pgdat+0x730/0x730 [] kthread+0xdb/0xe0 [] ? finish_task_switch+0x52/0x100 [] kernel_thread_helper+0x4/0x10 [] ? __init_kthread_worker+0x After the previous patch, kvm_lock need not be a raw spinlock anymore, so change it back. Reported-by: Paul Gortmaker Cc: kvm@vger.kernel.org Cc: gleb@redhat.com Cc: jan.kiszka@siemens.com Reviewed-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 2 +- arch/x86/kvm/mmu.c | 5 ++--- arch/x86/kvm/x86.c | 8 ++++---- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 18 +++++++++--------- 5 files changed, 17 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index ba9e1c2150c2..f8869410d40c 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -132,7 +132,7 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update(). ------------ Name: kvm_lock -Type: raw_spinlock +Type: spinlock_t Arch: any Protects: - vm_list diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dce0df8150df..cf95cfe050a6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4428,7 +4428,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -4478,9 +4478,8 @@ unlock: break; } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return freed; - } static unsigned long diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5ca72a5cdb6..187f824b1454 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5263,7 +5263,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) @@ -5273,7 +5273,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va send_ipi = 1; } } - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -5426,12 +5426,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests); atomic_set(&kvm_guest_has_master_clock, 0); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 749bdb12cd15..7c961e1e9270 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -142,7 +142,7 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -extern raw_spinlock_t kvm_lock; +extern spinlock_t kvm_lock; extern struct list_head vm_list; struct kvm_io_range { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index eb94343c2ed2..d469114aff09 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -70,7 +70,7 @@ MODULE_LICENSE("GPL"); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_RAW_SPINLOCK(kvm_lock); +DEFINE_SPINLOCK(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -491,9 +491,9 @@ static struct kvm *kvm_create_vm(unsigned long type) if (r) goto out_err; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return kvm; @@ -582,9 +582,9 @@ static void kvm_destroy_vm(struct kvm *kvm) struct mm_struct *mm = kvm->mm; kvm_arch_sync_events(kvm); - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_del(&kvm->vm_list); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) kvm_io_bus_destroy(kvm->buses[i]); @@ -3054,10 +3054,10 @@ static int vm_stat_get(void *_offset, u64 *val) struct kvm *kvm; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) *val += *(u32 *)((void *)kvm + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } @@ -3071,12 +3071,12 @@ static int vcpu_stat_get(void *_offset, u64 *val) int i; *val = 0; - raw_spin_lock(&kvm_lock); + spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) *val += *(u32 *)((void *)vcpu + offset); - raw_spin_unlock(&kvm_lock); + spin_unlock(&kvm_lock); return 0; } -- cgit v1.2.3 From 42c4e0c77ac91505ab94284b14025e3a0865c0a5 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 30 Sep 2013 14:20:07 +0530 Subject: ARM/ARM64: KVM: Implement KVM_ARM_PREFERRED_TARGET ioctl For implementing CPU=host, we need a mechanism for querying preferred VCPU target type on underlying Host. This patch implements KVM_ARM_PREFERRED_TARGET vm ioctl which returns struct kvm_vcpu_init instance containing information about preferred VCPU target type and target specific features available for it. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 13 +++++++++++++ include/uapi/linux/kvm.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9c697db2787e..cc5adb9349ef 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -797,6 +797,19 @@ long kvm_arch_vm_ioctl(struct file *filp, return -EFAULT; return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } default: return -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 99c25338ede8..e32e776f20c0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1012,6 +1012,7 @@ struct kvm_s390_ucas_mapping { /* VM is being stopped by host */ #define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -- cgit v1.2.3 From 6d9d41e57440e32a3400f37aa05ef7a1a09ced64 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 2 Oct 2013 14:22:28 -0700 Subject: KVM: Move gfn_to_index to x86 specific code The gfn_to_index function relies on huge page defines which either may not make sense on systems that don't support huge pages or are defined in an unconvenient way for other architectures. Since this is x86-specific, move the function to arch/x86/include/asm/kvm_host.h. Signed-off-by: Christoffer Dall Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 7 +++++++ include/linux/kvm_host.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8dd143a65d60..5cbf3166257c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -79,6 +79,13 @@ #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7c961e1e9270..f6dccde755f6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -841,13 +841,6 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) return gfn_to_memslot(kvm, gfn)->id; } -static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) -{ - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ - return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); -} - static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { -- cgit v1.2.3 From f2e106692d5189303997ad7b96de8d8123aa5613 Mon Sep 17 00:00:00 2001 From: chai wen Date: Mon, 14 Oct 2013 22:22:33 +0800 Subject: KVM: Drop FOLL_GET in GUP when doing async page fault Page pinning is not mandatory in kvm async page fault processing since after async page fault event is delivered to a guest it accesses page once again and does its own GUP. Drop the FOLL_GET flag in GUP in async_pf code, and do some simplifying in check/clear processing. Suggested-by: Gleb Natapov Signed-off-by: Gu zheng Signed-off-by: chai wen Signed-off-by: Gleb Natapov --- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 2 +- include/trace/events/kvm.h | 10 ++++------ virt/kvm/async_pf.c | 17 +++++------------ 4 files changed, 12 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c951c71dc80b..edf2a07df3a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7298,7 +7298,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) int r; if ((vcpu->arch.mmu.direct_map != work->arch.direct_map) || - is_error_page(work->page)) + work->wakeup_all) return; r = kvm_mmu_reload(vcpu); @@ -7408,7 +7408,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct x86_exception fault; trace_kvm_async_pf_ready(work->arch.token, work->gva); - if (is_error_page(work->page)) + if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f6dccde755f6..c9d4236ab442 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -189,7 +189,7 @@ struct kvm_async_pf { gva_t gva; unsigned long addr; struct kvm_arch_async_pf arch; - struct page *page; + bool wakeup_all; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 7005d1109ec9..131a0bda7aec 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -296,23 +296,21 @@ DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TRACE_EVENT( kvm_async_pf_completed, - TP_PROTO(unsigned long address, struct page *page, u64 gva), - TP_ARGS(address, page, gva), + TP_PROTO(unsigned long address, u64 gva), + TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) - __field(pfn_t, pfn) __field(u64, gva) ), TP_fast_assign( __entry->address = address; - __entry->pfn = page ? page_to_pfn(page) : 0; __entry->gva = gva; ), - TP_printk("gva %#llx address %#lx pfn %#llx", __entry->gva, - __entry->address, __entry->pfn) + TP_printk("gva %#llx address %#lx", __entry->gva, + __entry->address) ); #endif diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index b197950ac4d5..8631d9c14320 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -56,7 +56,6 @@ void kvm_async_pf_vcpu_init(struct kvm_vcpu *vcpu) static void async_pf_execute(struct work_struct *work) { - struct page *page = NULL; struct kvm_async_pf *apf = container_of(work, struct kvm_async_pf, work); struct mm_struct *mm = apf->mm; @@ -68,13 +67,12 @@ static void async_pf_execute(struct work_struct *work) use_mm(mm); down_read(&mm->mmap_sem); - get_user_pages(current, mm, addr, 1, 1, 0, &page, NULL); + get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL); up_read(&mm->mmap_sem); unuse_mm(mm); spin_lock(&vcpu->async_pf.lock); list_add_tail(&apf->link, &vcpu->async_pf.done); - apf->page = page; spin_unlock(&vcpu->async_pf.lock); /* @@ -82,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) * this point */ - trace_kvm_async_pf_completed(addr, page, gva); + trace_kvm_async_pf_completed(addr, gva); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); @@ -112,8 +110,6 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu) list_entry(vcpu->async_pf.done.next, typeof(*work), link); list_del(&work->link); - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } spin_unlock(&vcpu->async_pf.lock); @@ -133,14 +129,11 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu) list_del(&work->link); spin_unlock(&vcpu->async_pf.lock); - if (work->page) - kvm_arch_async_page_ready(vcpu, work); + kvm_arch_async_page_ready(vcpu, work); kvm_arch_async_page_present(vcpu, work); list_del(&work->queue); vcpu->async_pf.queued--; - if (!is_error_page(work->page)) - kvm_release_page_clean(work->page); kmem_cache_free(async_pf_cache, work); } } @@ -163,7 +156,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, if (!work) return 0; - work->page = NULL; + work->wakeup_all = false; work->vcpu = vcpu; work->gva = gva; work->addr = gfn_to_hva(vcpu->kvm, gfn); @@ -203,7 +196,7 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu) if (!work) return -ENOMEM; - work->page = KVM_ERR_PTR_BAD_PAGE; + work->wakeup_all = true; INIT_LIST_HEAD(&work->queue); /* for list_del to work */ spin_lock(&vcpu->async_pf.lock); -- cgit v1.2.3 From 5587027ce9d59a57aecaa190be1c8e560aaff45d Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:00 +0530 Subject: kvm: Add struct kvm arg to memslot APIs We will use that in the later patch to find the kvm ops handler Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf --- arch/arm/kvm/arm.c | 5 +++-- arch/ia64/kvm/kvm-ia64.c | 5 +++-- arch/mips/kvm/kvm_mips.c | 5 +++-- arch/powerpc/include/asm/kvm_ppc.h | 6 ++++-- arch/powerpc/kvm/book3s.c | 4 ++-- arch/powerpc/kvm/booke.c | 4 ++-- arch/powerpc/kvm/powerpc.c | 9 +++++---- arch/s390/kvm/kvm-s390.c | 5 +++-- arch/x86/kvm/x86.c | 5 +++-- include/linux/kvm_host.h | 5 +++-- virt/kvm/kvm_main.c | 12 ++++++------ 11 files changed, 37 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc5adb9349ef..e312e4a53f8d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -152,12 +152,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index bdfd8789b376..985bf80c622e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1550,12 +1550,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index a7b044536de4..73b34827826c 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -198,12 +198,13 @@ kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) return -ENOIOCTLCMD; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index c13f15db476c..20f461637090 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -134,9 +134,11 @@ extern struct page *kvm_alloc_hpt(unsigned long nr_pages); extern void kvm_release_hpt(struct page *page, unsigned long nr_pages); extern int kvmppc_core_init_vm(struct kvm *kvm); extern void kvmppc_core_destroy_vm(struct kvm *kvm); -extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +extern void kvmppc_core_free_memslot(struct kvm *kvm, + struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +extern int kvmppc_core_create_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot, unsigned long npages); extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 39d2994f9d27..130fe1d75bac 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -761,13 +761,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return kvmppc_ops->get_dirty_log(kvm, log); } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { kvmppc_ops->free_memslot(free, dont); } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return kvmppc_ops->create_memslot(slot, npages); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 1769354d0bf8..cb2d986a3382 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1662,12 +1662,12 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) return -ENOTSUPP; } -void kvmppc_core_free_memslot(struct kvm_memory_slot *free, +void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvmppc_core_create_memslot(struct kvm_memory_slot *slot, +int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { return 0; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9aaa07efa4b6..b103d747934a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -417,15 +417,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_core_free_memslot(free, dont); + kvmppc_core_free_memslot(kvm, free, dont); } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { - return kvmppc_core_create_memslot(slot, npages); + return kvmppc_core_create_memslot(kvm, slot, npages); } void kvm_arch_memslots_updated(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1e4e7b97337a..bedda67cc222 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1089,12 +1089,13 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { return 0; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edf2a07df3a3..666526a55c46 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7080,7 +7080,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kfree(rcu_dereference_check(kvm->arch.apic_map, 1)); } -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { int i; @@ -7101,7 +7101,8 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free, } } -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages) { int i; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..8b0107dc2067 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -507,9 +507,10 @@ int kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem); -void kvm_arch_free_memslot(struct kvm_memory_slot *free, +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont); -int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); +int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, + unsigned long npages); void kvm_arch_memslots_updated(struct kvm *kvm); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 94c6e3f6f244..0932c3b64155 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -542,13 +542,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) /* * Free any memory in @free but not in @dont. */ -static void kvm_free_physmem_slot(struct kvm_memory_slot *free, +static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - kvm_arch_free_memslot(free, dont); + kvm_arch_free_memslot(kvm, free, dont); free->npages = 0; } @@ -559,7 +559,7 @@ void kvm_free_physmem(struct kvm *kvm) struct kvm_memory_slot *memslot; kvm_for_each_memslot(memslot, slots) - kvm_free_physmem_slot(memslot, NULL); + kvm_free_physmem_slot(kvm, memslot, NULL); kfree(kvm->memslots); } @@ -823,7 +823,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (change == KVM_MR_CREATE) { new.userspace_addr = mem->userspace_addr; - if (kvm_arch_create_memslot(&new, npages)) + if (kvm_arch_create_memslot(kvm, &new, npages)) goto out_free; } @@ -899,7 +899,7 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_arch_commit_memory_region(kvm, mem, &old, change); - kvm_free_physmem_slot(&old, &new); + kvm_free_physmem_slot(kvm, &old, &new); kfree(old_memslots); return 0; @@ -907,7 +907,7 @@ int __kvm_set_memory_region(struct kvm *kvm, out_slots: kfree(slots); out_free: - kvm_free_physmem_slot(&new, &old); + kvm_free_physmem_slot(kvm, &new, &old); out: return r; } -- cgit v1.2.3 From cbbc58d4fdfab1a39a6ac1b41fcb17885952157a Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 7 Oct 2013 22:18:01 +0530 Subject: kvm: powerpc: book3s: Allow the HV and PR selection per virtual machine This moves the kvmppc_ops callbacks to be a per VM entity. This enables us to select HV and PR mode when creating a VM. We also allow both kvm-hv and kvm-pr kernel module to be loaded. To achieve this we move /dev/kvm ownership to kvm.ko module. Depending on which KVM mode we select during VM creation we take a reference count on respective module Signed-off-by: Aneesh Kumar K.V [agraf: fix coding style] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 1 + arch/powerpc/include/asm/kvm_ppc.h | 7 +-- arch/powerpc/kvm/44x.c | 7 ++- arch/powerpc/kvm/book3s.c | 89 +++++++++++++++++++++++++------------ arch/powerpc/kvm/book3s.h | 2 + arch/powerpc/kvm/book3s_hv.c | 18 ++++---- arch/powerpc/kvm/book3s_pr.c | 25 +++++++---- arch/powerpc/kvm/book3s_xics.c | 2 +- arch/powerpc/kvm/booke.c | 22 ++++----- arch/powerpc/kvm/e500.c | 8 +++- arch/powerpc/kvm/e500mc.c | 6 ++- arch/powerpc/kvm/emulate.c | 11 ++--- arch/powerpc/kvm/powerpc.c | 76 ++++++++++++++++++++++--------- include/uapi/linux/kvm.h | 4 ++ 14 files changed, 187 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 61ce4dca45d3..237d1d25b448 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -270,6 +270,7 @@ struct kvm_arch { #ifdef CONFIG_KVM_XICS struct kvmppc_xics *xics; #endif + struct kvmppc_ops *kvm_ops; }; /* diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 20f461637090..3069cf4dcc88 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -182,6 +182,7 @@ union kvmppc_one_reg { }; struct kvmppc_ops { + struct module *owner; bool is_hv_enabled; int (*get_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int (*set_sregs)(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); @@ -217,7 +218,6 @@ struct kvmppc_ops { unsigned long npages); int (*init_vm)(struct kvm *kvm); void (*destroy_vm)(struct kvm *kvm); - int (*check_processor_compat)(void); int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info); int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance); @@ -229,7 +229,8 @@ struct kvmppc_ops { }; -extern struct kvmppc_ops *kvmppc_ops; +extern struct kvmppc_ops *kvmppc_hv_ops; +extern struct kvmppc_ops *kvmppc_pr_ops; /* * Cuts out inst bits with ordering according to spec. @@ -326,7 +327,7 @@ static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) { - kvmppc_ops->fast_vcpu_kick(vcpu); + vcpu->kvm->arch.kvm_ops->fast_vcpu_kick(vcpu); } #else diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c index a765bcd74fbb..93221e87b911 100644 --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -213,16 +213,19 @@ static int __init kvmppc_44x_init(void) if (r) goto err_out; - r = kvm_init(&kvm_ops_44x, sizeof(struct kvmppc_vcpu_44x), - 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE); if (r) goto err_out; + kvm_ops_44x.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_44x; + err_out: return r; } static void __exit kvmppc_44x_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 130fe1d75bac..ad8f6ed3f136 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -34,6 +34,7 @@ #include #include +#include "book3s.h" #include "trace.h" #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -71,7 +72,7 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) { - if (!kvmppc_ops->is_hv_enabled) + if (!vcpu->kvm->arch.kvm_ops->is_hv_enabled) return to_book3s(vcpu)->hior; return 0; } @@ -79,7 +80,7 @@ static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu) static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu, unsigned long pending_now, unsigned long old_pending) { - if (kvmppc_ops->is_hv_enabled) + if (vcpu->kvm->arch.kvm_ops->is_hv_enabled) return; if (pending_now) vcpu->arch.shared->int_pending = 1; @@ -93,7 +94,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu) ulong crit_r1; bool crit; - if (kvmppc_ops->is_hv_enabled) + if (vcpu->kvm->arch.kvm_ops->is_hv_enabled) return false; crit_raw = vcpu->arch.shared->critical; @@ -477,13 +478,13 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return kvmppc_ops->get_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - return kvmppc_ops->set_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) @@ -562,7 +563,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) if (size > sizeof(val)) return -EINVAL; - r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); if (r == -EINVAL) { r = 0; switch (reg->id) { @@ -641,7 +642,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size)) return -EFAULT; - r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); if (r == -EINVAL) { r = 0; switch (reg->id) { @@ -702,23 +703,23 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - kvmppc_ops->vcpu_load(vcpu, cpu); + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_put(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); } void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) { - kvmppc_ops->set_msr(vcpu, msr); + vcpu->kvm->arch.kvm_ops->set_msr(vcpu, msr); } EXPORT_SYMBOL_GPL(kvmppc_set_msr); int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - return kvmppc_ops->vcpu_run(kvm_run, vcpu); + return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu); } int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, @@ -743,84 +744,84 @@ void kvmppc_decrementer_func(unsigned long data) struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { - return kvmppc_ops->vcpu_create(kvm, id); + return kvm->arch.kvm_ops->vcpu_create(kvm, id); } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_free(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); } int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) { - return kvmppc_ops->check_requests(vcpu); + return vcpu->kvm->arch.kvm_ops->check_requests(vcpu); } int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - return kvmppc_ops->get_dirty_log(kvm, log); + return kvm->arch.kvm_ops->get_dirty_log(kvm, log); } void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - kvmppc_ops->free_memslot(free, dont); + kvm->arch.kvm_ops->free_memslot(free, dont); } int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, unsigned long npages) { - return kvmppc_ops->create_memslot(slot, npages); + return kvm->arch.kvm_ops->create_memslot(slot, npages); } void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { - kvmppc_ops->flush_memslot(kvm, memslot); + kvm->arch.kvm_ops->flush_memslot(kvm, memslot); } int kvmppc_core_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_userspace_memory_region *mem) { - return kvmppc_ops->prepare_memory_region(kvm, memslot, mem); + return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem); } void kvmppc_core_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old) { - kvmppc_ops->commit_memory_region(kvm, mem, old); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old); } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) { - return kvmppc_ops->unmap_hva(kvm, hva); + return kvm->arch.kvm_ops->unmap_hva(kvm, hva); } EXPORT_SYMBOL_GPL(kvm_unmap_hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) { - return kvmppc_ops->unmap_hva_range(kvm, start, end); + return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end); } int kvm_age_hva(struct kvm *kvm, unsigned long hva) { - return kvmppc_ops->age_hva(kvm, hva); + return kvm->arch.kvm_ops->age_hva(kvm, hva); } int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) { - return kvmppc_ops->test_age_hva(kvm, hva); + return kvm->arch.kvm_ops->test_age_hva(kvm, hva); } void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { - kvmppc_ops->set_spte_hva(kvm, hva, pte); + kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); } void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { - kvmppc_ops->mmu_destroy(vcpu); + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); } int kvmppc_core_init_vm(struct kvm *kvm) @@ -831,12 +832,12 @@ int kvmppc_core_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.rtas_tokens); #endif - return kvmppc_ops->init_vm(kvm); + return kvm->arch.kvm_ops->init_vm(kvm); } void kvmppc_core_destroy_vm(struct kvm *kvm) { - kvmppc_ops->destroy_vm(kvm); + kvm->arch.kvm_ops->destroy_vm(kvm); #ifdef CONFIG_PPC64 kvmppc_rtas_tokens_free(kvm); @@ -846,5 +847,35 @@ void kvmppc_core_destroy_vm(struct kvm *kvm) int kvmppc_core_check_processor_compat(void) { - return kvmppc_ops->check_processor_compat(); + /* + * We always return 0 for book3s. We check + * for compatability while loading the HV + * or PR module + */ + return 0; +} + +static int kvmppc_book3s_init(void) +{ + int r; + + r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + if (r) + return r; +#ifdef CONFIG_KVM_BOOK3S_32 + r = kvmppc_book3s_init_pr(); +#endif + return r; + +} + +static void kvmppc_book3s_exit(void) +{ +#ifdef CONFIG_KVM_BOOK3S_32 + kvmppc_book3s_exit_pr(); +#endif + kvm_exit(); } + +module_init(kvmppc_book3s_init); +module_exit(kvmppc_book3s_exit); diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h index 9e5b3a341943..4bf956cf94d6 100644 --- a/arch/powerpc/kvm/book3s.h +++ b/arch/powerpc/kvm/book3s.h @@ -28,5 +28,7 @@ extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val); extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val); +extern int kvmppc_book3s_init_pr(void); +extern void kvmppc_book3s_exit_pr(void); #endif diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9e954a81c078..8743048881b7 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2159,7 +2159,7 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp, return r; } -static struct kvmppc_ops kvmppc_hv_ops = { +static struct kvmppc_ops kvm_ops_hv = { .is_hv_enabled = true, .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -2186,7 +2186,6 @@ static struct kvmppc_ops kvmppc_hv_ops = { .create_memslot = kvmppc_core_create_memslot_hv, .init_vm = kvmppc_core_init_vm_hv, .destroy_vm = kvmppc_core_destroy_vm_hv, - .check_processor_compat = kvmppc_core_check_processor_compat_hv, .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv, .emulate_op = kvmppc_core_emulate_op_hv, .emulate_mtspr = kvmppc_core_emulate_mtspr_hv, @@ -2198,20 +2197,23 @@ static struct kvmppc_ops kvmppc_hv_ops = { static int kvmppc_book3s_init_hv(void) { int r; - - r = kvm_init(&kvmppc_hv_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - - if (r) + /* + * FIXME!! Do we need to check on all cpus ? + */ + r = kvmppc_core_check_processor_compat_hv(); + if (r < 0) return r; - r = kvmppc_mmu_hv_init(); + kvm_ops_hv.owner = THIS_MODULE; + kvmppc_hv_ops = &kvm_ops_hv; + r = kvmppc_mmu_hv_init(); return r; } static void kvmppc_book3s_exit_hv(void) { - kvm_exit(); + kvmppc_hv_ops = NULL; } module_init(kvmppc_book3s_init_hv); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 7f583a482161..fbd985f0cb02 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1525,7 +1525,7 @@ static long kvm_arch_vm_ioctl_pr(struct file *filp, return -ENOTTY; } -static struct kvmppc_ops kvmppc_pr_ops = { +static struct kvmppc_ops kvm_ops_pr = { .is_hv_enabled = false, .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_pr, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_pr, @@ -1552,7 +1552,6 @@ static struct kvmppc_ops kvmppc_pr_ops = { .create_memslot = kvmppc_core_create_memslot_pr, .init_vm = kvmppc_core_init_vm_pr, .destroy_vm = kvmppc_core_destroy_vm_pr, - .check_processor_compat = kvmppc_core_check_processor_compat_pr, .get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr, .emulate_op = kvmppc_core_emulate_op_pr, .emulate_mtspr = kvmppc_core_emulate_mtspr_pr, @@ -1561,27 +1560,35 @@ static struct kvmppc_ops kvmppc_pr_ops = { .arch_vm_ioctl = kvm_arch_vm_ioctl_pr, }; -static int kvmppc_book3s_init_pr(void) + +int kvmppc_book3s_init_pr(void) { int r; - r = kvm_init(&kvmppc_pr_ops, sizeof(struct kvm_vcpu), 0, THIS_MODULE); - - if (r) + r = kvmppc_core_check_processor_compat_pr(); + if (r < 0) return r; - r = kvmppc_mmu_hpte_sysinit(); + kvm_ops_pr.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_pr; + r = kvmppc_mmu_hpte_sysinit(); return r; } -static void kvmppc_book3s_exit_pr(void) +void kvmppc_book3s_exit_pr(void) { + kvmppc_pr_ops = NULL; kvmppc_mmu_hpte_sysexit(); - kvm_exit(); } +/* + * We only support separate modules for book3s 64 + */ +#ifdef CONFIG_PPC_BOOK3S_64 + module_init(kvmppc_book3s_init_pr); module_exit(kvmppc_book3s_exit_pr); MODULE_LICENSE("GPL"); +#endif diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index c3c832b27ee5..f7a5108a3483 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -818,7 +818,7 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) } /* Check for real mode returning too hard */ - if (xics->real_mode && kvmppc_ops->is_hv_enabled) + if (xics->real_mode && vcpu->kvm->arch.kvm_ops->is_hv_enabled) return kvmppc_xics_rm_complete(vcpu, req); switch (req) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index cb2d986a3382..15d0149511eb 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1472,7 +1472,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, get_sregs_base(vcpu, sregs); get_sregs_arch206(vcpu, sregs); - return kvmppc_ops->get_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs); } int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, @@ -1491,7 +1491,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (ret < 0) return ret; - return kvmppc_ops->set_sregs(vcpu, sregs); + return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs); } int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) @@ -1548,7 +1548,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) val = get_reg_val(reg->id, vcpu->arch.vrsave); break; default: - r = kvmppc_ops->get_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val); break; } @@ -1631,7 +1631,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu->arch.vrsave = set_reg_val(reg->id, val); break; default: - r = kvmppc_ops->set_one_reg(vcpu, reg->id, &val); + r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val); break; } @@ -1911,37 +1911,37 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { - kvmppc_ops->mmu_destroy(vcpu); + vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu); } int kvmppc_core_init_vm(struct kvm *kvm) { - return kvmppc_ops->init_vm(kvm); + return kvm->arch.kvm_ops->init_vm(kvm); } struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) { - return kvmppc_ops->vcpu_create(kvm, id); + return kvm->arch.kvm_ops->vcpu_create(kvm, id); } void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_free(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu); } void kvmppc_core_destroy_vm(struct kvm *kvm) { - kvmppc_ops->destroy_vm(kvm); + kvm->arch.kvm_ops->destroy_vm(kvm); } void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - kvmppc_ops->vcpu_load(vcpu, cpu); + vcpu->kvm->arch.kvm_ops->vcpu_load(vcpu, cpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - kvmppc_ops->vcpu_put(vcpu); + vcpu->kvm->arch.kvm_ops->vcpu_put(vcpu); } int __init kvmppc_booke_init(void) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index d225d5ebddcc..497b142f651c 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -555,13 +555,19 @@ static int __init kvmppc_e500_init(void) flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + ivor[max_ivor] + handler_len); - r = kvm_init(&kvm_ops_e500, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + if (r) + goto err_out; + kvm_ops_e500.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_e500; + err_out: return r; } static void __exit kvmppc_e500_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index db6a383401c7..4132cd2fc171 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -373,15 +373,19 @@ static int __init kvmppc_e500mc_init(void) kvmppc_init_lpid(64); kvmppc_claim_lpid(0); /* host */ - r = kvm_init(&kvm_ops_e500mc, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); + r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); if (r) goto err_out; + kvm_ops_e500mc.owner = THIS_MODULE; + kvmppc_pr_ops = &kvm_ops_e500mc; + err_out: return r; } static void __exit kvmppc_e500mc_exit(void) { + kvmppc_pr_ops = NULL; kvmppc_booke_exit(); } diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index de9a340d22ed..2f9a0873b44f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -130,8 +130,8 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_PIR: break; default: - emulated = kvmppc_ops->emulate_mtspr(vcpu, sprn, - spr_val); + emulated = vcpu->kvm->arch.kvm_ops->emulate_mtspr(vcpu, sprn, + spr_val); if (emulated == EMULATE_FAIL) printk(KERN_INFO "mtspr: unknown spr " "0x%x\n", sprn); @@ -191,8 +191,8 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) spr_val = kvmppc_get_dec(vcpu, get_tb()); break; default: - emulated = kvmppc_ops->emulate_mfspr(vcpu, sprn, - &spr_val); + emulated = vcpu->kvm->arch.kvm_ops->emulate_mfspr(vcpu, sprn, + &spr_val); if (unlikely(emulated == EMULATE_FAIL)) { printk(KERN_INFO "mfspr: unknown spr " "0x%x\n", sprn); @@ -464,7 +464,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) } if (emulated == EMULATE_FAIL) { - emulated = kvmppc_ops->emulate_op(run, vcpu, inst, &advance); + emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst, + &advance); if (emulated == EMULATE_AGAIN) { advance = 0; } else if (emulated == EMULATE_FAIL) { diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b103d747934a..0320c1721caa 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -39,7 +40,11 @@ #define CREATE_TRACE_POINTS #include "trace.h" -struct kvmppc_ops *kvmppc_ops; +struct kvmppc_ops *kvmppc_hv_ops; +EXPORT_SYMBOL_GPL(kvmppc_hv_ops); +struct kvmppc_ops *kvmppc_pr_ops; +EXPORT_SYMBOL_GPL(kvmppc_pr_ops); + int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { @@ -195,7 +200,7 @@ int kvmppc_sanity_check(struct kvm_vcpu *vcpu) goto out; /* HV KVM can only do PAPR mode for now */ - if (!vcpu->arch.papr_enabled && kvmppc_ops->is_hv_enabled) + if (!vcpu->arch.papr_enabled && vcpu->kvm->arch.kvm_ops->is_hv_enabled) goto out; #ifdef CONFIG_KVM_BOOKE_HV @@ -271,10 +276,35 @@ void kvm_arch_check_processor_compat(void *rtn) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - if (type) - return -EINVAL; - + struct kvmppc_ops *kvm_ops = NULL; + /* + * if we have both HV and PR enabled, default is HV + */ + if (type == 0) { + if (kvmppc_hv_ops) + kvm_ops = kvmppc_hv_ops; + else + kvm_ops = kvmppc_pr_ops; + if (!kvm_ops) + goto err_out; + } else if (type == KVM_VM_PPC_HV) { + if (!kvmppc_hv_ops) + goto err_out; + kvm_ops = kvmppc_hv_ops; + } else if (type == KVM_VM_PPC_PR) { + if (!kvmppc_pr_ops) + goto err_out; + kvm_ops = kvmppc_pr_ops; + } else + goto err_out; + + if (kvm_ops->owner && !try_module_get(kvm_ops->owner)) + return -ENOENT; + + kvm->arch.kvm_ops = kvm_ops; return kvmppc_core_init_vm(kvm); +err_out: + return -EINVAL; } void kvm_arch_destroy_vm(struct kvm *kvm) @@ -294,6 +324,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvmppc_core_destroy_vm(kvm); mutex_unlock(&kvm->lock); + + /* drop the module reference */ + module_put(kvm->arch.kvm_ops->owner); } void kvm_arch_sync_events(struct kvm *kvm) @@ -303,6 +336,10 @@ void kvm_arch_sync_events(struct kvm *kvm) int kvm_dev_ioctl_check_extension(long ext) { int r; + /* FIXME!! + * Should some of this be vm ioctl ? is it possible now ? + */ + int hv_enabled = kvmppc_hv_ops ? 1 : 0; switch (ext) { #ifdef CONFIG_BOOKE @@ -329,7 +366,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SW_TLB: #endif /* We support this only for PR */ - r = !kvmppc_ops->is_hv_enabled; + r = !hv_enabled; break; #ifdef CONFIG_KVM_MMIO case KVM_CAP_COALESCED_MMIO: @@ -354,13 +391,13 @@ int kvm_dev_ioctl_check_extension(long ext) #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_SMT: - if (kvmppc_ops->is_hv_enabled) + if (hv_enabled) r = threads_per_core; else r = 0; break; case KVM_CAP_PPC_RMA: - r = kvmppc_ops->is_hv_enabled; + r = hv_enabled; /* PPC970 requires an RMA */ if (r && cpu_has_feature(CPU_FTR_ARCH_201)) r = 2; @@ -368,7 +405,7 @@ int kvm_dev_ioctl_check_extension(long ext) #endif case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - if (kvmppc_ops->is_hv_enabled) + if (hv_enabled) r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0; else r = 0; @@ -380,7 +417,7 @@ int kvm_dev_ioctl_check_extension(long ext) break; #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE case KVM_CAP_PPC_HTAB_FD: - r = kvmppc_ops->is_hv_enabled; + r = hv_enabled; break; #endif case KVM_CAP_NR_VCPUS: @@ -390,7 +427,7 @@ int kvm_dev_ioctl_check_extension(long ext) * will have secondary threads "offline"), and for other KVM * implementations just count online CPUs. */ - if (kvmppc_ops->is_hv_enabled) + if (hv_enabled) r = num_present_cpus(); else r = num_online_cpus(); @@ -1039,9 +1076,10 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_GET_SMMU_INFO: { struct kvm_ppc_smmu_info info; + struct kvm *kvm = filp->private_data; memset(&info, 0, sizeof(info)); - r = kvmppc_ops->get_smmu_info(kvm, &info); + r = kvm->arch.kvm_ops->get_smmu_info(kvm, &info); if (r >= 0 && copy_to_user(argp, &info, sizeof(info))) r = -EFAULT; break; @@ -1052,9 +1090,10 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_rtas_define_token(kvm, argp); break; } - default: - r = kvmppc_ops->arch_vm_ioctl(filp, ioctl, arg); - + default: { + struct kvm *kvm = filp->private_data; + r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); + } #else /* CONFIG_PPC_BOOK3S_64 */ default: r = -ENOTTY; @@ -1104,15 +1143,10 @@ EXPORT_SYMBOL_GPL(kvmppc_init_lpid); int kvm_arch_init(void *opaque) { - if (kvmppc_ops) { - printk(KERN_ERR "kvm: already loaded the other module\n"); - return -EEXIST; - } - kvmppc_ops = (struct kvmppc_ops *)opaque; return 0; } void kvm_arch_exit(void) { - kvmppc_ops = NULL; + } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..5b5341f78368 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -518,6 +518,10 @@ struct kvm_ppc_smmu_info { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + #define KVM_S390_SIE_PAGE_OFFSET 1 /* -- cgit v1.2.3 From 9c15bb1d0a8411f9bb3395d21d5309bde7da0c1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 22 Sep 2013 16:44:50 +0200 Subject: kvm: Add KVM_GET_EMULATED_CPUID Add a kvm ioctl which states which system functionality kvm emulates. The format used is that of CPUID and we return the corresponding CPUID bits set for which we do emulate functionality. Make sure ->padding is being passed on clean from userspace so that we can use it for something in the future, after the ioctl gets cast in stone. s/kvm_dev_ioctl_get_supported_cpuid/kvm_dev_ioctl_get_cpuid/ while at it. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 77 +++++++++++++++++++++++++++++++++++++-- arch/x86/include/uapi/asm/kvm.h | 6 +-- arch/x86/kvm/cpuid.c | 57 ++++++++++++++++++++++++++--- arch/x86/kvm/cpuid.h | 5 ++- arch/x86/kvm/x86.c | 9 +++-- include/uapi/linux/kvm.h | 2 + 6 files changed, 139 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a89a5ee0b940..488964414f04 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1122,9 +1122,9 @@ struct kvm_cpuid2 { struct kvm_cpuid_entry2 entries[0]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) struct kvm_cpuid_entry2 { __u32 function; @@ -2684,6 +2684,77 @@ and usually define the validity of a groups of registers. (e.g. one bit }; +4.81 KVM_GET_EMULATED_CPUID + +Capability: KVM_CAP_EXT_EMUL_CPUID +Architectures: x86 +Type: system ioctl +Parameters: struct kvm_cpuid2 (in/out) +Returns: 0 on success, -1 on error + +struct kvm_cpuid2 { + __u32 nent; + __u32 flags; + struct kvm_cpuid_entry2 entries[0]; +}; + +The member 'flags' is used for passing flags from userspace. + +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) + +struct kvm_cpuid_entry2 { + __u32 function; + __u32 index; + __u32 flags; + __u32 eax; + __u32 ebx; + __u32 ecx; + __u32 edx; + __u32 padding[3]; +}; + +This ioctl returns x86 cpuid features which are emulated by +kvm.Userspace can use the information returned by this ioctl to query +which features are emulated by kvm instead of being present natively. + +Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2 +structure with the 'nent' field indicating the number of entries in +the variable-size array 'entries'. If the number of entries is too low +to describe the cpu capabilities, an error (E2BIG) is returned. If the +number is too high, the 'nent' field is adjusted and an error (ENOMEM) +is returned. If the number is just right, the 'nent' field is adjusted +to the number of valid entries in the 'entries' array, which is then +filled. + +The entries returned are the set CPUID bits of the respective features +which kvm emulates, as returned by the CPUID instruction, with unknown +or unsupported feature bits cleared. + +Features like x2apic, for example, may not be present in the host cpu +but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be +emulated efficiently and thus not included here. + +The fields in each entry are defined as follows: + + function: the eax value used to obtain the entry + index: the ecx value used to obtain the entry (for entries that are + affected by ecx) + flags: an OR of zero or more of the following: + KVM_CPUID_FLAG_SIGNIFCANT_INDEX: + if the index field is valid + KVM_CPUID_FLAG_STATEFUL_FUNC: + if cpuid for this function returns different values for successive + invocations; there will be several entries with the same function, + all with this flag set + KVM_CPUID_FLAG_STATE_READ_NEXT: + for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is + the first entry to be read by a cpu + eax, ebx, ecx, edx: the values returned by the cpuid instruction for + this function/index combination + + 6. Capabilities that can be enabled ----------------------------------- diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5d9a3033b3d7..d3a87780c70b 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -211,9 +211,9 @@ struct kvm_cpuid_entry2 { __u32 padding[3]; }; -#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1 -#define KVM_CPUID_FLAG_STATEFUL_FUNC 2 -#define KVM_CPUID_FLAG_STATE_READ_NEXT 4 +#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0) +#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1) +#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2) /* for KVM_SET_CPUID2 */ struct kvm_cpuid2 { diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0a1e3b8b964d..78a4439bfdc5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -219,8 +219,14 @@ static bool supported_xcr0_bit(unsigned bit) #define F(x) bit(X86_FEATURE_##x) -static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, - u32 index, int *nent, int maxnent) +static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, + u32 func, u32 index, int *nent, int maxnent) +{ + return 0; +} + +static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, + u32 index, int *nent, int maxnent) { int r; unsigned f_nx = is_efer_nx() ? F(NX) : 0; @@ -515,6 +521,15 @@ out: return r; } +static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, + u32 idx, int *nent, int maxnent, unsigned int type) +{ + if (type == KVM_GET_EMULATED_CPUID) + return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); + + return __do_cpuid_ent(entry, func, idx, nent, maxnent); +} + #undef F struct kvm_cpuid_param { @@ -529,8 +544,34 @@ static bool is_centaur_cpu(const struct kvm_cpuid_param *param) return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR; } -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries) +static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries, + __u32 num_entries, unsigned int ioctl_type) +{ + int i; + + if (ioctl_type != KVM_GET_EMULATED_CPUID) + return false; + + /* + * We want to make sure that ->padding is being passed clean from + * userspace in case we want to use it for something in the future. + * + * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we + * have to give ourselves satisfied only with the emulated side. /me + * sheds a tear. + */ + for (i = 0; i < num_entries; i++) { + if (entries[i].padding[0] || + entries[i].padding[1] || + entries[i].padding[2]) + return true; + } + return false; +} + +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type) { struct kvm_cpuid_entry2 *cpuid_entries; int limit, nent = 0, r = -E2BIG, i; @@ -547,6 +588,10 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, goto out; if (cpuid->nent > KVM_MAX_CPUID_ENTRIES) cpuid->nent = KVM_MAX_CPUID_ENTRIES; + + if (sanity_check_entries(entries, cpuid->nent, type)) + return -EINVAL; + r = -ENOMEM; cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent); if (!cpuid_entries) @@ -560,7 +605,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, continue; r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; @@ -571,7 +616,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, limit = cpuid_entries[nent - 1].eax; for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func) r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx, - &nent, cpuid->nent); + &nent, cpuid->nent, type); if (r) goto out_free; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index b7fd07984888..f1e4895174b2 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -6,8 +6,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); -int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 __user *entries); +int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries, + unsigned int type); int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edf2a07df3a3..3d1e3ee8b39e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2564,6 +2564,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: case KVM_CAP_SET_TSS_ADDR: case KVM_CAP_EXT_CPUID: + case KVM_CAP_EXT_EMUL_CPUID: case KVM_CAP_CLOCKSOURCE: case KVM_CAP_PIT: case KVM_CAP_NOP_IO_DELAY: @@ -2673,15 +2674,17 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { + case KVM_GET_SUPPORTED_CPUID: + case KVM_GET_EMULATED_CPUID: { struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) goto out; - r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, - cpuid_arg->entries); + + r = kvm_dev_ioctl_get_cpuid(&cpuid, cpuid_arg->entries, + ioctl); if (r) goto out; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e32e776f20c0..32c60b98ddf8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -541,6 +541,7 @@ struct kvm_ppc_smmu_info { #define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) /* * Extension capability list. @@ -668,6 +669,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_IRQ_XICS 92 #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From ec53500fae421e07c5d035918ca454a429732ef4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:17 -0600 Subject: kvm: Add VFIO device So far we've succeeded at making KVM and VFIO mostly unaware of each other, but areas are cropping up where a connection beyond eventfds and irqfds needs to be made. This patch introduces a KVM-VFIO device that is meant to be a gateway for such interaction. The user creates the device and can add and remove VFIO groups to it via file descriptors. When a group is added, KVM verifies the group is valid and gets a reference to it via the VFIO external user interface. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/devices/vfio.txt | 22 +++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/Makefile | 2 +- include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 4 + virt/kvm/Kconfig | 3 + virt/kvm/kvm_main.c | 5 + virt/kvm/vfio.c | 220 +++++++++++++++++++++++++++++ 8 files changed, 257 insertions(+), 1 deletion(-) create mode 100644 Documentation/virtual/kvm/devices/vfio.txt create mode 100644 virt/kvm/vfio.c (limited to 'include') diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt new file mode 100644 index 000000000000..ef51740c67ca --- /dev/null +++ b/Documentation/virtual/kvm/devices/vfio.txt @@ -0,0 +1,22 @@ +VFIO virtual device +=================== + +Device types supported: + KVM_DEV_TYPE_VFIO + +Only one VFIO instance may be created per VM. The created device +tracks VFIO groups in use by the VM and features of those groups +important to the correctness and acceleration of the VM. As groups +are enabled and disabled for use by the VM, KVM should be updated +about their presence. When registered with KVM, a reference to the +VFIO-group is held by KVM. + +Groups: + KVM_DEV_VFIO_GROUP + +KVM_DEV_VFIO_GROUP attributes: + KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking + KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking + +For each, kvm_device_attr.addr points to an int32_t file descriptor +for the VFIO group. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a47a3e54b964..b89c5db2b832 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -38,6 +38,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select KVM_VFIO ---help--- Support hosting fully virtualized guest machines using hardware virtualization extensions. You will need a fairly recent diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index bf4fb04d0112..25d22b2d6509 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -9,7 +9,7 @@ KVM := ../../../virt/kvm kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ - $(KVM)/eventfd.o $(KVM)/irqchip.o + $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c9d4236ab442..7beddbd38ac7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1058,6 +1058,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; +extern struct kvm_device_ops kvm_vfio_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 32c60b98ddf8..509cfbfe9658 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -845,6 +845,10 @@ struct kvm_device_attr { #define KVM_DEV_TYPE_FSL_MPIC_20 1 #define KVM_DEV_TYPE_FSL_MPIC_42 2 #define KVM_DEV_TYPE_XICS 3 +#define KVM_DEV_TYPE_VFIO 4 +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 /* * ioctls for VM fds diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 779262f59e25..fbe1a48bd629 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -27,3 +27,6 @@ config HAVE_KVM_MSI config HAVE_KVM_CPU_RELAX_INTERCEPT bool + +config KVM_VFIO + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9ca014da134d..82c4047aa0e3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2270,6 +2270,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, case KVM_DEV_TYPE_XICS: ops = &kvm_xics_ops; break; +#endif +#ifdef CONFIG_KVM_VFIO + case KVM_DEV_TYPE_VFIO: + ops = &kvm_vfio_ops; + break; #endif default: return -ENODEV; diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c new file mode 100644 index 000000000000..597c258245ea --- /dev/null +++ b/virt/kvm/vfio.c @@ -0,0 +1,220 @@ +/* + * VFIO-KVM bridge pseudo device + * + * Copyright (C) 2013 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct kvm_vfio_group { + struct list_head node; + struct vfio_group *vfio_group; +}; + +struct kvm_vfio { + struct list_head group_list; + struct mutex lock; +}; + +static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) +{ + struct vfio_group *vfio_group; + struct vfio_group *(*fn)(struct file *); + + fn = symbol_get(vfio_group_get_external_user); + if (!fn) + return ERR_PTR(-EINVAL); + + vfio_group = fn(filep); + + symbol_put(vfio_group_get_external_user); + + return vfio_group; +} + +static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) +{ + void (*fn)(struct vfio_group *); + + fn = symbol_get(vfio_group_put_external_user); + if (!fn) + return; + + fn(vfio_group); + + symbol_put(vfio_group_put_external_user); +} + +static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) +{ + struct kvm_vfio *kv = dev->private; + struct vfio_group *vfio_group; + struct kvm_vfio_group *kvg; + void __user *argp = (void __user *)arg; + struct fd f; + int32_t fd; + int ret; + + switch (attr) { + case KVM_DEV_VFIO_GROUP_ADD: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group == vfio_group) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -EEXIST; + } + } + + kvg = kzalloc(sizeof(*kvg), GFP_KERNEL); + if (!kvg) { + mutex_unlock(&kv->lock); + kvm_vfio_group_put_external_user(vfio_group); + return -ENOMEM; + } + + list_add_tail(&kvg->node, &kv->group_list); + kvg->vfio_group = vfio_group; + + mutex_unlock(&kv->lock); + + return 0; + + case KVM_DEV_VFIO_GROUP_DEL: + if (get_user(fd, (int32_t __user *)argp)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + vfio_group = kvm_vfio_group_get_external_user(f.file); + fdput(f); + + if (IS_ERR(vfio_group)) + return PTR_ERR(vfio_group); + + ret = -ENOENT; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + if (kvg->vfio_group != vfio_group) + continue; + + list_del(&kvg->node); + kvm_vfio_group_put_external_user(kvg->vfio_group); + kfree(kvg); + ret = 0; + break; + } + + mutex_unlock(&kv->lock); + + kvm_vfio_group_put_external_user(vfio_group); + + return ret; + } + + return -ENXIO; +} + +static int kvm_vfio_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + return kvm_vfio_set_group(dev, attr->attr, attr->addr); + } + + return -ENXIO; +} + +static int kvm_vfio_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_VFIO_GROUP: + switch (attr->attr) { + case KVM_DEV_VFIO_GROUP_ADD: + case KVM_DEV_VFIO_GROUP_DEL: + return 0; + } + + break; + } + + return -ENXIO; +} + +static void kvm_vfio_destroy(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + struct kvm_vfio_group *kvg, *tmp; + + list_for_each_entry_safe(kvg, tmp, &kv->group_list, node) { + kvm_vfio_group_put_external_user(kvg->vfio_group); + list_del(&kvg->node); + kfree(kvg); + } + + kfree(kv); + kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int kvm_vfio_create(struct kvm_device *dev, u32 type) +{ + struct kvm_device *tmp; + struct kvm_vfio *kv; + + /* Only one VFIO "device" per VM */ + list_for_each_entry(tmp, &dev->kvm->devices, vm_node) + if (tmp->ops == &kvm_vfio_ops) + return -EBUSY; + + kv = kzalloc(sizeof(*kv), GFP_KERNEL); + if (!kv) + return -ENOMEM; + + INIT_LIST_HEAD(&kv->group_list); + mutex_init(&kv->lock); + + dev->private = kv; + + return 0; +} + +struct kvm_device_ops kvm_vfio_ops = { + .name = "kvm-vfio", + .create = kvm_vfio_create, + .destroy = kvm_vfio_destroy, + .set_attr = kvm_vfio_set_attr, + .has_attr = kvm_vfio_has_attr, +}; -- cgit v1.2.3 From d96eb2c6f480769bff32054e78b964860dae4d56 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:23 -0600 Subject: kvm/x86: Convert iommu_flags to iommu_noncoherent Default to operating in coherent mode. This simplifies the logic when we switch to a model of registering and unregistering noncoherent I/O with KVM. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- arch/ia64/include/asm/kvm_host.h | 2 +- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.c | 2 +- include/linux/kvm_host.h | 3 --- virt/kvm/iommu.c | 16 ++++++++-------- 6 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 95a3ff93777c..db95f570705f 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -476,7 +476,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; - int iommu_flags; + bool iommu_noncoherent; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5cbf3166257c..91b35e4005d3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -564,7 +564,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; - int iommu_flags; + bool iommu_noncoherent; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6850b0f1d52b..727a5e980c43 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7446,7 +7446,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) if (is_mmio) ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; else if (vcpu->kvm->arch.iommu_domain && - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) + vcpu->kvm->arch.iommu_noncoherent) ret = kvm_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT; else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 15f9540a2b1f..92ad83e5b132 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2719,7 +2719,7 @@ static void wbinvd_ipi(void *garbage) static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.iommu_domain && - !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY); + vcpu->kvm->arch.iommu_noncoherent; } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7beddbd38ac7..ed64880e4915 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -746,9 +746,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -/* For vcpu->arch.iommu_flags */ -#define KVM_IOMMU_CACHE_COHERENCY 0x1 - #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index a3b14109049b..d32d156a423a 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -79,7 +79,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) flags = IOMMU_READ; if (!(slot->flags & KVM_MEM_READONLY)) flags |= IOMMU_WRITE; - if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) + if (!kvm->arch.iommu_noncoherent) flags |= IOMMU_CACHE; @@ -158,7 +158,8 @@ int kvm_assign_device(struct kvm *kvm, { struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; - int r, last_flags; + int r; + bool noncoherent; /* check if iommu exists and in use */ if (!domain) @@ -174,15 +175,13 @@ int kvm_assign_device(struct kvm *kvm, return r; } - last_flags = kvm->arch.iommu_flags; - if (iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_CACHE_COHERENCY)) - kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; + noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_CACHE_COHERENCY); /* Check if need to update IOMMU page table for guest memory */ - if ((last_flags ^ kvm->arch.iommu_flags) == - KVM_IOMMU_CACHE_COHERENCY) { + if (noncoherent != kvm->arch.iommu_noncoherent) { kvm_iommu_unmap_memslots(kvm); + kvm->arch.iommu_noncoherent = noncoherent; r = kvm_iommu_map_memslots(kvm); if (r) goto out_unmap; @@ -342,6 +341,7 @@ int kvm_iommu_unmap_guest(struct kvm *kvm) mutex_lock(&kvm->slots_lock); kvm_iommu_unmap_memslots(kvm); kvm->arch.iommu_domain = NULL; + kvm->arch.iommu_noncoherent = false; mutex_unlock(&kvm->slots_lock); iommu_domain_free(domain); -- cgit v1.2.3 From e0f0bbc527f6e9c0261f1d16b2a0b47612b7f235 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 30 Oct 2013 11:02:30 -0600 Subject: kvm: Create non-coherent DMA registeration We currently use some ad-hoc arch variables tied to legacy KVM device assignment to manage emulation of instructions that depend on whether non-coherent DMA is present. Create an interface for this, adapting legacy KVM device assignment and adding VFIO via the KVM-VFIO device. For now we assume that non-coherent DMA is possible any time we have a VFIO group. Eventually an interface can be developed as part of the VFIO external user interface to query the coherency of a group. Signed-off-by: Alex Williamson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx.c | 3 +-- arch/x86/kvm/x86.c | 22 +++++++++++++++++++-- include/linux/kvm_host.h | 19 ++++++++++++++++++ virt/kvm/iommu.c | 6 ++++++ virt/kvm/vfio.c | 44 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 91b35e4005d3..de388c55e7ec 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -565,6 +565,8 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; bool iommu_noncoherent; +#define __KVM_HAVE_ARCH_NONCOHERENT_DMA + atomic_t noncoherent_dma_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 727a5e980c43..fabf7421ec18 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7445,8 +7445,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) */ if (is_mmio) ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; - else if (vcpu->kvm->arch.iommu_domain && - vcpu->kvm->arch.iommu_noncoherent) + else if (kvm_arch_has_noncoherent_dma(vcpu->kvm)) ret = kvm_get_guest_memory_type(vcpu, gfn) << VMX_EPT_MT_EPTE_SHIFT; else diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92ad83e5b132..ec35d09937da 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2718,8 +2718,7 @@ static void wbinvd_ipi(void *garbage) static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) { - return vcpu->kvm->arch.iommu_domain && - vcpu->kvm->arch.iommu_noncoherent; + return kvm_arch_has_noncoherent_dma(vcpu->kvm); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -6998,6 +6997,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + atomic_set(&kvm->arch.noncoherent_dma_count, 0); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); @@ -7437,6 +7437,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_register_noncoherent_dma); + +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_unregister_noncoherent_dma); + +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.noncoherent_dma_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ed64880e4915..92aae88756db 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -670,6 +670,25 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif +#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA +void kvm_arch_register_noncoherent_dma(struct kvm *kvm); +void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); +bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); +#else +static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) +{ + return false; +} +#endif + static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index d32d156a423a..c7d9ce122529 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -140,6 +140,9 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + if (kvm->arch.iommu_noncoherent) + kvm_arch_register_noncoherent_dma(kvm); + idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); @@ -327,6 +330,9 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, idx); + if (kvm->arch.iommu_noncoherent) + kvm_arch_unregister_noncoherent_dma(kvm); + return 0; } diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 597c258245ea..ca4260e35037 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -27,6 +27,7 @@ struct kvm_vfio_group { struct kvm_vfio { struct list_head group_list; struct mutex lock; + bool noncoherent; }; static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) @@ -58,6 +59,43 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) symbol_put(vfio_group_put_external_user); } +/* + * Groups can use the same or different IOMMU domains. If the same then + * adding a new group may change the coherency of groups we've previously + * been told about. We don't want to care about any of that so we retest + * each group and bail as soon as we find one that's noncoherent. This + * means we only ever [un]register_noncoherent_dma once for the whole device. + */ +static void kvm_vfio_update_coherency(struct kvm_device *dev) +{ + struct kvm_vfio *kv = dev->private; + bool noncoherent = false; + struct kvm_vfio_group *kvg; + + mutex_lock(&kv->lock); + + list_for_each_entry(kvg, &kv->group_list, node) { + /* + * TODO: We need an interface to check the coherency of + * the IOMMU domain this group is using. For now, assume + * it's always noncoherent. + */ + noncoherent = true; + break; + } + + if (noncoherent != kv->noncoherent) { + kv->noncoherent = noncoherent; + + if (kv->noncoherent) + kvm_arch_register_noncoherent_dma(dev->kvm); + else + kvm_arch_unregister_noncoherent_dma(dev->kvm); + } + + mutex_unlock(&kv->lock); +} + static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) { struct kvm_vfio *kv = dev->private; @@ -105,6 +143,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); + kvm_vfio_update_coherency(dev); + return 0; case KVM_DEV_VFIO_GROUP_DEL: @@ -140,6 +180,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) kvm_vfio_group_put_external_user(vfio_group); + kvm_vfio_update_coherency(dev); + return ret; } @@ -185,6 +227,8 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kfree(kvg); } + kvm_vfio_update_coherency(dev); + kfree(kv); kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */ } -- cgit v1.2.3 From 81e87e26796782e014fd1f2bb9cd8fb6ce4021a8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 30 Oct 2013 21:43:01 +0200 Subject: kvm_host: typo fix fix up typo in comment. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 92aae88756db..9bb1048d010e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -804,7 +804,7 @@ static inline void kvm_guest_enter(void) /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode - * is very similar to exiting to userspase from rcu point of view. In + * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. -- cgit v1.2.3 From ce332f662deb545c8a4f3f58debcca26bb2e44b0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 4 Nov 2013 22:36:17 +0200 Subject: srcu: API for barrier after srcu read unlock srcu read lock/unlock include a full memory barrier but that's an implementation detail. Add an API for make memory fencing explicit for users that need this barrier, to make sure we can change it as needed without breaking all users. Acked-by: "Paul E. McKenney" Reviewed-by: Paul E. McKenney Signed-off-by: Michael S. Tsirkin Signed-off-by: Gleb Natapov --- include/linux/srcu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index c114614ed172..9b058eecd403 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -237,4 +237,18 @@ static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) __srcu_read_unlock(sp, idx); } +/** + * smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock + * + * Converts the preceding srcu_read_unlock into a two-way memory barrier. + * + * Call this after srcu_read_unlock, to guarantee that all memory operations + * that occur after smp_mb__after_srcu_read_unlock will appear to happen after + * the preceding srcu_read_unlock. + */ +static inline void smp_mb__after_srcu_read_unlock(void) +{ + /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ +} + #endif -- cgit v1.2.3 From 8b414521bc5375ae8ba18c083af95d44b8da0d04 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 11 Oct 2013 21:39:26 -0300 Subject: hung_task: add method to reset detector In certain occasions it is possible for a hung task detector positive to be false: continuation from a paused VM, for example. Add a method to reset detection, similar as is done with other kernel watchdogs. Acked-by: Don Zickus Acked-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kernel/pvclock.c | 1 + include/linux/sched.h | 8 ++++++++ kernel/hung_task.c | 11 +++++++++++ 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 6279928c0a71..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -48,6 +48,7 @@ void pvclock_touch_watchdogs(void) touch_softlockup_watchdog_sync(); clocksource_touch_watchdog(); rcu_cpu_stall_reset(); + reset_hung_task_detector(); } static atomic64_t last_value = ATOMIC64_INIT(0); diff --git a/include/linux/sched.h b/include/linux/sched.h index 6682da36b293..7bb4b4a2a101 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -285,6 +285,14 @@ static inline void lockup_detector_init(void) } #endif +#ifdef CONFIG_DETECT_HUNG_TASK +void reset_hung_task_detector(void); +#else +static inline void reset_hung_task_detector(void) +{ +} +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3e97fb126e6b..dfdf51534b3e 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -203,6 +203,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, return ret; } +static atomic_t reset_hung_task = ATOMIC_INIT(0); + +void reset_hung_task_detector(void) +{ + atomic_set(&reset_hung_task, 1); +} +EXPORT_SYMBOL_GPL(reset_hung_task_detector); + /* * kthread which checks for tasks stuck in D state */ @@ -216,6 +224,9 @@ static int watchdog(void *dummy) while (schedule_timeout_interruptible(timeout_jiffies(timeout))) timeout = sysctl_hung_task_timeout_secs; + if (atomic_xchg(&reset_hung_task, 0)) + continue; + check_hung_uninterruptible_tasks(timeout); } -- cgit v1.2.3