diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 16 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 5 | ||||
-rw-r--r-- | arch/x86/kvm/ioapic.c | 682 | ||||
-rw-r--r-- | arch/x86/kvm/ioapic.h | 102 | ||||
-rw-r--r-- | arch/x86/kvm/irq_comm.c | 347 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 1 |
6 files changed, 1150 insertions, 3 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 769db36a3001..76ff3e2d8fd2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -603,6 +603,9 @@ struct kvm_arch { struct kvm_xen_hvm_config xen_hvm_config; + /* reads protected by irq_srcu, writes by irq_lock */ + struct hlist_head mask_notifier_list; + /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; @@ -819,6 +822,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); +struct kvm_irq_mask_notifier { + void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); + int irq; + struct hlist_node link; +}; + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn); +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask); + extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 25d22b2d6509..ee1cd92b03be 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I. KVM := ../../../virt/kvm -kvm-y += $(KVM)/kvm_main.o $(KVM)/ioapic.o \ - $(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \ +kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \ $(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += $(KVM)/assigned-dev.o $(KVM)/iommu.o kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ - i8254.o cpuid.o pmu.o + i8254.o ioapic.o irq_comm.o cpuid.o pmu.o kvm-intel-y += vmx.o kvm-amd-y += svm.o diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c new file mode 100644 index 000000000000..f0f7ef82b7a6 --- /dev/null +++ b/arch/x86/kvm/ioapic.c @@ -0,0 +1,682 @@ +/* + * Copyright (C) 2001 MandrakeSoft S.A. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + * + * MandrakeSoft S.A. + * 43, rue d'Aboukir + * 75002 Paris - France + * http://www.linux-mandrake.com/ + * http://www.mandrakesoft.com/ + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Yunhong Jiang <yunhong.jiang@intel.com> + * Yaozu (Eddie) Dong <eddie.dong@intel.com> + * Based on Xen 3.1 code. + */ + +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/smp.h> +#include <linux/hrtimer.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/current.h> +#include <trace/events/kvm.h> + +#include "ioapic.h" +#include "lapic.h" +#include "irq.h" + +#if 0 +#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) +#else +#define ioapic_debug(fmt, arg...) +#endif +static int ioapic_service(struct kvm_ioapic *vioapic, int irq, + bool line_status); + +static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, + unsigned long addr, + unsigned long length) +{ + unsigned long result = 0; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) + | (IOAPIC_VERSION_ID & 0xff)); + break; + + case IOAPIC_REG_APIC_ID: + case IOAPIC_REG_ARB_ID: + result = ((ioapic->id & 0xf) << 24); + break; + + default: + { + u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; + u64 redir_content; + + if (redir_index < IOAPIC_NUM_PINS) + redir_content = + ioapic->redirtbl[redir_index].bits; + else + redir_content = ~0ULL; + + result = (ioapic->ioregsel & 0x1) ? + (redir_content >> 32) & 0xffffffff : + redir_content & 0xffffffff; + break; + } + } + + return result; +} + +static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) +{ + ioapic->rtc_status.pending_eoi = 0; + bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); + +static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) +{ + if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + +static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + bool new_val, old_val; + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + + e = &ioapic->redirtbl[RTC_GSI]; + if (!kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, + e->fields.dest_mode)) + return; + + new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector); + old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + + if (new_val == old_val) + return; + + if (new_val) { + __set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi++; + } else { + __clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi--; + rtc_status_pending_eoi_check_valid(ioapic); + } +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __rtc_irq_eoi_tracking_restore_one(vcpu); + spin_unlock(&ioapic->lock); +} + +static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) +{ + struct kvm_vcpu *vcpu; + int i; + + if (RTC_GSI >= IOAPIC_NUM_PINS) + return; + + rtc_irq_eoi_tracking_reset(ioapic); + kvm_for_each_vcpu(i, vcpu, ioapic->kvm) + __rtc_irq_eoi_tracking_restore_one(vcpu); +} + +static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) { + --ioapic->rtc_status.pending_eoi; + rtc_status_pending_eoi_check_valid(ioapic); + } +} + +static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) +{ + if (ioapic->rtc_status.pending_eoi > 0) + return true; /* coalesced */ + + return false; +} + +static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, + int irq_level, bool line_status) +{ + union kvm_ioapic_redirect_entry entry; + u32 mask = 1 << irq; + u32 old_irr; + int edge, ret; + + entry = ioapic->redirtbl[irq]; + edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); + + if (!irq_level) { + ioapic->irr &= ~mask; + ret = 1; + goto out; + } + + /* + * Return 0 for coalesced interrupts; for edge-triggered interrupts, + * this only happens if a previous edge has not been delivered due + * do masking. For level interrupts, the remote_irr field tells + * us if the interrupt is waiting for an EOI. + * + * RTC is special: it is edge-triggered, but userspace likes to know + * if it has been already ack-ed via EOI because coalesced RTC + * interrupts lead to time drift in Windows guests. So we track + * EOI manually for the RTC interrupt. + */ + if (irq == RTC_GSI && line_status && + rtc_irq_check_coalesced(ioapic)) { + ret = 0; + goto out; + } + + old_irr = ioapic->irr; + ioapic->irr |= mask; + if ((edge && old_irr == ioapic->irr) || + (!edge && entry.fields.remote_irr)) { + ret = 0; + goto out; + } + + ret = ioapic_service(ioapic, irq, line_status); + +out: + trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); + return ret; +} + +static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) +{ + u32 idx; + + rtc_irq_eoi_tracking_reset(ioapic); + for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) + ioapic_set_irq(ioapic, idx, 1, true); + + kvm_rtc_eoi_tracking_restore_all(ioapic); +} + + +static void update_handled_vectors(struct kvm_ioapic *ioapic) +{ + DECLARE_BITMAP(handled_vectors, 256); + int i; + + memset(handled_vectors, 0, sizeof(handled_vectors)); + for (i = 0; i < IOAPIC_NUM_PINS; ++i) + __set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors); + memcpy(ioapic->handled_vectors, handled_vectors, + sizeof(handled_vectors)); + smp_wmb(); +} + +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + union kvm_ioapic_redirect_entry *e; + int index; + + spin_lock(&ioapic->lock); + for (index = 0; index < IOAPIC_NUM_PINS; index++) { + e = &ioapic->redirtbl[index]; + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || + kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) || + index == RTC_GSI) { + if (kvm_apic_match_dest(vcpu, NULL, 0, + e->fields.dest_id, e->fields.dest_mode)) { + __set_bit(e->fields.vector, + (unsigned long *)eoi_exit_bitmap); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG) + __set_bit(e->fields.vector, + (unsigned long *)tmr); + } + } + } + spin_unlock(&ioapic->lock); +} + +#ifdef CONFIG_X86 +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + if (!ioapic) + return; + kvm_make_scan_ioapic_request(kvm); +} +#else +void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +{ + return; +} +#endif + +static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) +{ + unsigned index; + bool mask_before, mask_after; + union kvm_ioapic_redirect_entry *e; + + switch (ioapic->ioregsel) { + case IOAPIC_REG_VERSION: + /* Writes are ignored. */ + break; + + case IOAPIC_REG_APIC_ID: + ioapic->id = (val >> 24) & 0xf; + break; + + case IOAPIC_REG_ARB_ID: + break; + + default: + index = (ioapic->ioregsel - 0x10) >> 1; + + ioapic_debug("change redir index %x val %x\n", index, val); + if (index >= IOAPIC_NUM_PINS) + return; + e = &ioapic->redirtbl[index]; + mask_before = e->fields.mask; + if (ioapic->ioregsel & 1) { + e->bits &= 0xffffffff; + e->bits |= (u64) val << 32; + } else { + e->bits &= ~0xffffffffULL; + e->bits |= (u32) val; + e->fields.remote_irr = 0; + } + update_handled_vectors(ioapic); + mask_after = e->fields.mask; + if (mask_before != mask_after) + kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG + && ioapic->irr & (1 << index)) + ioapic_service(ioapic, index, false); + kvm_vcpu_request_scan_ioapic(ioapic->kvm); + break; + } +} + +static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) +{ + union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; + struct kvm_lapic_irq irqe; + int ret; + + if (entry->fields.mask) + return -1; + + ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " + "vector=%x trig_mode=%x\n", + entry->fields.dest_id, entry->fields.dest_mode, + entry->fields.delivery_mode, entry->fields.vector, + entry->fields.trig_mode); + + irqe.dest_id = entry->fields.dest_id; + irqe.vector = entry->fields.vector; + irqe.dest_mode = entry->fields.dest_mode; + irqe.trig_mode = entry->fields.trig_mode; + irqe.delivery_mode = entry->fields.delivery_mode << 8; + irqe.level = 1; + irqe.shorthand = 0; + + if (irqe.trig_mode == IOAPIC_EDGE_TRIG) + ioapic->irr &= ~(1 << irq); + + if (irq == RTC_GSI && line_status) { + /* + * pending_eoi cannot ever become negative (see + * rtc_status_pending_eoi_check_valid) and the caller + * ensures that it is only called if it is >= zero, namely + * if rtc_irq_check_coalesced returns false). + */ + BUG_ON(ioapic->rtc_status.pending_eoi != 0); + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, + ioapic->rtc_status.dest_map); + ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); + } else + ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL); + + if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) + entry->fields.remote_irr = 1; + + return ret; +} + +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, + int level, bool line_status) +{ + int ret, irq_level; + + BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); + + spin_lock(&ioapic->lock); + irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq], + irq_source_id, level); + ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); + + spin_unlock(&ioapic->lock); + + return ret; +} + +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) +{ + int i; + + spin_lock(&ioapic->lock); + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) + __clear_bit(irq_source_id, &ioapic->irq_states[i]); + spin_unlock(&ioapic->lock); +} + +static void kvm_ioapic_eoi_inject_work(struct work_struct *work) +{ + int i; + struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, + eoi_inject.work); + spin_lock(&ioapic->lock); + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) + continue; + + if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) + ioapic_service(ioapic, i, false); + } + spin_unlock(&ioapic->lock); +} + +#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 + +static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, + struct kvm_ioapic *ioapic, int vector, int trigger_mode) +{ + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; + + if (ent->fields.vector != vector) + continue; + + if (i == RTC_GSI) + rtc_irq_eoi(ioapic, vcpu); + /* + * We are dropping lock while calling ack notifiers because ack + * notifier callbacks for assigned devices call into IOAPIC + * recursively. Since remote_irr is cleared only after call + * to notifiers if the same vector will be delivered while lock + * is dropped it will be put into irr and will be delivered + * after ack notifier returns. + */ + spin_unlock(&ioapic->lock); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); + spin_lock(&ioapic->lock); + + if (trigger_mode != IOAPIC_LEVEL_TRIG) + continue; + + ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); + ent->fields.remote_irr = 0; + if (!ent->fields.mask && (ioapic->irr & (1 << i))) { + ++ioapic->irq_eoi[i]; + if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the interrupt + * immediately during eoi broadcast, and this + * lets a buggy guest make slow progress + * even if it does not correctly handle a + * level-triggered interrupt. Emulate this + * behavior if we detect an interrupt storm. + */ + schedule_delayed_work(&ioapic->eoi_inject, HZ / 100); + ioapic->irq_eoi[i] = 0; + trace_kvm_ioapic_delayed_eoi_inj(ent->bits); + } else { + ioapic_service(ioapic, i, false); + } + } else { + ioapic->irq_eoi[i] = 0; + } + } +} + +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) +{ + struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; + + spin_lock(&ioapic->lock); + __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode); + spin_unlock(&ioapic->lock); +} + +static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) +{ + return container_of(dev, struct kvm_ioapic, dev); +} + +static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) +{ + return ((addr >= ioapic->base_address && + (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); +} + +static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_ioapic *ioapic = to_ioapic(this); + u32 result; + if (!ioapic_in_range(ioapic, addr)) + return -EOPNOTSUPP; + + ioapic_debug("addr %lx\n", (unsigned long)addr); + ASSERT(!(addr & 0xf)); /* check alignment */ + + addr &= 0xff; + spin_lock(&ioapic->lock); + switch (addr) { + case IOAPIC_REG_SELECT: + result = ioapic->ioregsel; + break; + + case IOAPIC_REG_WINDOW: + result = ioapic_read_indirect(ioapic, addr, len); + break; + + default: + result = 0; + break; + } + spin_unlock(&ioapic->lock); + + switch (len) { + case 8: + *(u64 *) val = result; + break; + case 1: + case 2: + case 4: + memcpy(val, (char *)&result, len); + break; + default: + printk(KERN_WARNING "ioapic: wrong length %d\n", len); + } + return 0; +} + +static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_ioapic *ioapic = to_ioapic(this); + u32 data; + if (!ioapic_in_range(ioapic, addr)) + return -EOPNOTSUPP; + + ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", + (void*)addr, len, val); + ASSERT(!(addr & 0xf)); /* check alignment */ + + switch (len) { + case 8: + case 4: + data = *(u32 *) val; + break; + case 2: + data = *(u16 *) val; + break; + case 1: + data = *(u8 *) val; + break; + default: + printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); + return 0; + } + + addr &= 0xff; + spin_lock(&ioapic->lock); + switch (addr) { + case IOAPIC_REG_SELECT: + ioapic->ioregsel = data & 0xFF; /* 8-bit register */ + break; + + case IOAPIC_REG_WINDOW: + ioapic_write_indirect(ioapic, data); + break; + + default: + break; + } + spin_unlock(&ioapic->lock); + return 0; +} + +static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) +{ + int i; + + cancel_delayed_work_sync(&ioapic->eoi_inject); + for (i = 0; i < IOAPIC_NUM_PINS; i++) + ioapic->redirtbl[i].fields.mask = 1; + ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; + ioapic->ioregsel = 0; + ioapic->irr = 0; + ioapic->id = 0; + memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); + rtc_irq_eoi_tracking_reset(ioapic); + update_handled_vectors(ioapic); +} + +static const struct kvm_io_device_ops ioapic_mmio_ops = { + .read = ioapic_mmio_read, + .write = ioapic_mmio_write, +}; + +int kvm_ioapic_init(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic; + int ret; + + ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); + if (!ioapic) + return -ENOMEM; + spin_lock_init(&ioapic->lock); + INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); + kvm->arch.vioapic = ioapic; + kvm_ioapic_reset(ioapic); + kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); + ioapic->kvm = kvm; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, + IOAPIC_MEM_LENGTH, &ioapic->dev); + mutex_unlock(&kvm->slots_lock); + if (ret < 0) { + kvm->arch.vioapic = NULL; + kfree(ioapic); + } + + return ret; +} + +void kvm_ioapic_destroy(struct kvm *kvm) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + + cancel_delayed_work_sync(&ioapic->eoi_inject); + if (ioapic) { + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + kvm->arch.vioapic = NULL; + kfree(ioapic); + } +} + +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + spin_lock(&ioapic->lock); + memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + spin_unlock(&ioapic->lock); + return 0; +} + +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + spin_lock(&ioapic->lock); + memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + ioapic->irr = 0; + update_handled_vectors(ioapic); + kvm_vcpu_request_scan_ioapic(kvm); + kvm_ioapic_inject_all(ioapic, state->irr); + spin_unlock(&ioapic->lock); + return 0; +} diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h new file mode 100644 index 000000000000..deac8d509f2a --- /dev/null +++ b/arch/x86/kvm/ioapic.h @@ -0,0 +1,102 @@ +#ifndef __KVM_IO_APIC_H +#define __KVM_IO_APIC_H + +#include <linux/kvm_host.h> + +#include "iodev.h" + +struct kvm; +struct kvm_vcpu; + +#define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS +#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ +#define IOAPIC_EDGE_TRIG 0 +#define IOAPIC_LEVEL_TRIG 1 + +#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 +#define IOAPIC_MEM_LENGTH 0x100 + +/* Direct registers. */ +#define IOAPIC_REG_SELECT 0x00 +#define IOAPIC_REG_WINDOW 0x10 + +/* Indirect registers. */ +#define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ +#define IOAPIC_REG_VERSION 0x01 +#define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ + +/*ioapic delivery mode*/ +#define IOAPIC_FIXED 0x0 +#define IOAPIC_LOWEST_PRIORITY 0x1 +#define IOAPIC_PMI 0x2 +#define IOAPIC_NMI 0x4 +#define IOAPIC_INIT 0x5 +#define IOAPIC_EXTINT 0x7 + +#ifdef CONFIG_X86 +#define RTC_GSI 8 +#else +#define RTC_GSI -1U +#endif + +struct rtc_status { + int pending_eoi; + DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS); +}; + +struct kvm_ioapic { + u64 base_address; + u32 ioregsel; + u32 id; + u32 irr; + u32 pad; + union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; + unsigned long irq_states[IOAPIC_NUM_PINS]; + struct kvm_io_device dev; + struct kvm *kvm; + void (*ack_notifier)(void *opaque, int irq); + spinlock_t lock; + DECLARE_BITMAP(handled_vectors, 256); + struct rtc_status rtc_status; + struct delayed_work eoi_inject; + u32 irq_eoi[IOAPIC_NUM_PINS]; +}; + +#ifdef DEBUG +#define ASSERT(x) \ +do { \ + if (!(x)) { \ + printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ + __FILE__, __LINE__, #x); \ + BUG(); \ + } \ +} while (0) +#else +#define ASSERT(x) do { } while (0) +#endif + +static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) +{ + return kvm->arch.vioapic; +} + +void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, unsigned int dest, int dest_mode); +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); +void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, + int trigger_mode); +bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); +int kvm_ioapic_init(struct kvm *kvm); +void kvm_ioapic_destroy(struct kvm *kvm); +int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, + int level, bool line_status); +void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, unsigned long *dest_map); +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap, + u32 *tmr); + +#endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c new file mode 100644 index 000000000000..e9c135b639aa --- /dev/null +++ b/arch/x86/kvm/irq_comm.c @@ -0,0 +1,347 @@ +/* + * irq_comm.c: Common API for in kernel interrupt controller + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * Authors: + * Yaozu (Eddie) Dong <Eddie.dong@intel.com> + * + * Copyright 2010 Red Hat, Inc. and/or its affiliates. + */ + +#include <linux/kvm_host.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <trace/events/kvm.h> + +#include <asm/msidef.h> + +#include "irq.h" + +#include "ioapic.h" + +static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ +#ifdef CONFIG_X86 + struct kvm_pic *pic = pic_irqchip(kvm); + return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); +#else + return -1; +#endif +} + +static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, + line_status); +} + +inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) +{ + return irq->delivery_mode == APIC_DM_LOWEST; +} + +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq, unsigned long *dest_map) +{ + int i, r = -1; + struct kvm_vcpu *vcpu, *lowest = NULL; + + if (irq->dest_mode == 0 && irq->dest_id == 0xff && + kvm_is_dm_lowest_prio(irq)) { + printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); + irq->delivery_mode = APIC_DM_FIXED; + } + + if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map)) + return r; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (!kvm_is_dm_lowest_prio(irq)) { + if (r < 0) + r = 0; + r += kvm_apic_set_irq(vcpu, irq, dest_map); + } else if (kvm_lapic_enabled(vcpu)) { + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; + } + } + + if (lowest) + r = kvm_apic_set_irq(lowest, irq, dest_map); + + return r; +} + +static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm_lapic_irq *irq) +{ + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); + + irq->dest_id = (e->msi.address_lo & + MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + irq->vector = (e->msi.data & + MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; + irq->delivery_mode = e->msi.data & 0x700; + irq->level = 1; + irq->shorthand = 0; + /* TODO Deal with RH bit of MSI message address */ +} + +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, bool line_status) +{ + struct kvm_lapic_irq irq; + + if (!level) + return -1; + + kvm_set_msi_irq(e, &irq); + + return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL); +} + + +static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm) +{ + struct kvm_lapic_irq irq; + int r; + + kvm_set_msi_irq(e, &irq); + + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + else + return -EWOULDBLOCK; +} + +/* + * Deliver an IRQ in an atomic context if we can, or return a failure, + * user can retry in a process context. + * Return value: + * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context. + * Other values - No need to retry. + */ +int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) +{ + struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; + struct kvm_kernel_irq_routing_entry *e; + int ret = -EINVAL; + int idx; + + trace_kvm_set_irq(irq, level, irq_source_id); + + /* + * Injection into either PIC or IOAPIC might need to scan all CPUs, + * which would need to be retried from thread context; when same GSI + * is connected to both PIC and IOAPIC, we'd have to report a + * partial failure here. + * Since there's no easy way to do this, we only support injecting MSI + * which is limited to 1:1 GSI mapping. + */ + idx = srcu_read_lock(&kvm->irq_srcu); + if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { + e = &entries[0]; + if (likely(e->type == KVM_IRQ_ROUTING_MSI)) + ret = kvm_set_msi_inatomic(e, kvm); + else + ret = -EWOULDBLOCK; + } + srcu_read_unlock(&kvm->irq_srcu, idx); + return ret; +} + +int kvm_request_irq_source_id(struct kvm *kvm) +{ + unsigned long *bitmap = &kvm->arch.irq_sources_bitmap; + int irq_source_id; + + mutex_lock(&kvm->irq_lock); + irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG); + + if (irq_source_id >= BITS_PER_LONG) { + printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n"); + irq_source_id = -EFAULT; + goto unlock; + } + + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); +#ifdef CONFIG_X86 + ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); +#endif + set_bit(irq_source_id, bitmap); +unlock: + mutex_unlock(&kvm->irq_lock); + + return irq_source_id; +} + +void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) +{ + ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID); +#ifdef CONFIG_X86 + ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID); +#endif + + mutex_lock(&kvm->irq_lock); + if (irq_source_id < 0 || + irq_source_id >= BITS_PER_LONG) { + printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); + goto unlock; + } + clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); + if (!irqchip_in_kernel(kvm)) + goto unlock; + + kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id); +#ifdef CONFIG_X86 + kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id); +#endif +unlock: + mutex_unlock(&kvm->irq_lock); +} + +void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + mutex_lock(&kvm->irq_lock); + kimn->irq = irq; + hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list); + mutex_unlock(&kvm->irq_lock); +} + +void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, + struct kvm_irq_mask_notifier *kimn) +{ + mutex_lock(&kvm->irq_lock); + hlist_del_rcu(&kimn->link); + mutex_unlock(&kvm->irq_lock); + synchronize_srcu(&kvm->irq_srcu); +} + +void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, + bool mask) +{ + struct kvm_irq_mask_notifier *kimn; + int idx, gsi; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) + hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link) + if (kimn->irq == gsi) + kimn->func(kimn, mask); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + int delta; + unsigned max_pin; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + delta = 0; + switch (ue->u.irqchip.irqchip) { + case KVM_IRQCHIP_PIC_MASTER: + e->set = kvm_set_pic_irq; + max_pin = PIC_NUM_PINS; + break; + case KVM_IRQCHIP_PIC_SLAVE: + e->set = kvm_set_pic_irq; + max_pin = PIC_NUM_PINS; + delta = 8; + break; + case KVM_IRQCHIP_IOAPIC: + max_pin = KVM_IOAPIC_NUM_PINS; + e->set = kvm_set_ioapic_irq; + break; + default: + goto out; + } + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin + delta; + if (e->irqchip.pin >= max_pin) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + break; + default: + goto out; + } + + r = 0; +out: + return r; +} + +#define IOAPIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } +#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) + +#ifdef CONFIG_X86 +# define PIC_ROUTING_ENTRY(irq) \ + { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) +#else +# define ROUTING_ENTRY2(irq) \ + IOAPIC_ROUTING_ENTRY(irq) +#endif + +static const struct kvm_irq_routing_entry default_routing[] = { + ROUTING_ENTRY2(0), ROUTING_ENTRY2(1), + ROUTING_ENTRY2(2), ROUTING_ENTRY2(3), + ROUTING_ENTRY2(4), ROUTING_ENTRY2(5), + ROUTING_ENTRY2(6), ROUTING_ENTRY2(7), + ROUTING_ENTRY2(8), ROUTING_ENTRY2(9), + ROUTING_ENTRY2(10), ROUTING_ENTRY2(11), + ROUTING_ENTRY2(12), ROUTING_ENTRY2(13), + ROUTING_ENTRY2(14), ROUTING_ENTRY2(15), + ROUTING_ENTRY1(16), ROUTING_ENTRY1(17), + ROUTING_ENTRY1(18), ROUTING_ENTRY1(19), + ROUTING_ENTRY1(20), ROUTING_ENTRY1(21), + ROUTING_ENTRY1(22), ROUTING_ENTRY1(23), +}; + +int kvm_setup_default_irq_routing(struct kvm *kvm) +{ + return kvm_set_irq_routing(kvm, default_routing, + ARRAY_SIZE(default_routing), 0); +} diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a8f53a6960fd..5337039427c8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7264,6 +7264,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type) return -EINVAL; + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |