diff options
Diffstat (limited to 'hw')
-rw-r--r-- | hw/acpi.c | 3 | ||||
-rw-r--r-- | hw/acpi_piix4.c | 67 | ||||
-rw-r--r-- | hw/apic.c | 112 | ||||
-rw-r--r-- | hw/apic.h | 1 | ||||
-rw-r--r-- | hw/cirrus_vga.c | 18 | ||||
-rw-r--r-- | hw/device-assignment.c | 1672 | ||||
-rw-r--r-- | hw/device-assignment.h | 119 | ||||
-rw-r--r-- | hw/extboot.c | 123 | ||||
-rw-r--r-- | hw/hpet.c | 5 | ||||
-rw-r--r-- | hw/hw.h | 13 | ||||
-rw-r--r-- | hw/i8254-kvm.c | 122 | ||||
-rw-r--r-- | hw/i8254.c | 135 | ||||
-rw-r--r-- | hw/i8254.h | 69 | ||||
-rw-r--r-- | hw/i8259.c | 145 | ||||
-rw-r--r-- | hw/ioapic.c | 94 | ||||
-rw-r--r-- | hw/ipf.c | 713 | ||||
-rw-r--r-- | hw/msix.c | 224 | ||||
-rw-r--r-- | hw/msix.h | 2 | ||||
-rw-r--r-- | hw/pc.c | 57 | ||||
-rw-r--r-- | hw/pc.h | 16 | ||||
-rw-r--r-- | hw/pc_piix.c | 33 | ||||
-rw-r--r-- | hw/pci-hotplug.c | 23 | ||||
-rw-r--r-- | hw/pci.c | 182 | ||||
-rw-r--r-- | hw/pci.h | 53 | ||||
-rw-r--r-- | hw/pci_regs.h | 7 | ||||
-rw-r--r-- | hw/pcspk.c | 48 | ||||
-rw-r--r-- | hw/piix_pci.c | 16 | ||||
-rw-r--r-- | hw/ppc440.c | 1 | ||||
-rw-r--r-- | hw/ppc440_bamboo.c | 1 | ||||
-rw-r--r-- | hw/ppce500_mpc8544ds.c | 1 | ||||
-rw-r--r-- | hw/testdev.c | 129 | ||||
-rw-r--r-- | hw/vga-pci.c | 2 | ||||
-rw-r--r-- | hw/vga.c | 47 | ||||
-rw-r--r-- | hw/vga_int.h | 6 | ||||
-rw-r--r-- | hw/virtio-balloon.c | 1 | ||||
-rw-r--r-- | hw/virtio-pci.c | 34 |
36 files changed, 4202 insertions, 92 deletions
@@ -18,6 +18,9 @@ #include "hw.h" #include "pc.h" #include "acpi.h" +#include "kvm.h" +#include "qemu-kvm.h" +#include "string.h" struct acpi_table_header { diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c index 8d1a62820..227aba8f2 100644 --- a/hw/acpi_piix4.c +++ b/hw/acpi_piix4.c @@ -34,12 +34,14 @@ #define ACPI_DBG_IO_ADDR 0xb044 #define GPE_BASE 0xafe0 +#define PROC_BASE 0xaf00 #define PCI_BASE 0xae00 #define PCI_EJ_BASE 0xae08 struct gpe_regs { uint16_t sts; /* status */ uint16_t en; /* enabled */ + uint8_t cpus_sts[32]; }; struct pci_status { @@ -356,11 +358,16 @@ static void piix4_powerdown(void *opaque, int irq, int power_failing) } } +static PIIX4PMState *global_piix4_pm_state; /* cpu hotadd */ + static int piix4_pm_initfn(PCIDevice *dev) { PIIX4PMState *s = DO_UPCAST(PIIX4PMState, dev, dev); uint8_t *pci_conf; + /* for cpu hotadd */ + global_piix4_pm_state = s; + pci_conf = s->dev.config; pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3); @@ -374,6 +381,13 @@ static int piix4_pm_initfn(PCIDevice *dev) pci_conf[0x40] = 0x01; /* PM io base read only bit */ +#if defined(TARGET_IA64) + pci_conf[0x40] = 0x41; /* PM io base read only bit */ + pci_conf[0x41] = 0x1f; + pm_write_config(s, 0x80, 0x01, 1); /*Set default pm_io_base 0x1f40*/ + s->pmcntrl = SCI_EN; +#endif + /* APM */ apm_init(&s->apm, apm_ctrl_changed, s); @@ -462,6 +476,10 @@ static uint32_t gpe_readb(void *opaque, uint32_t addr) uint32_t val = 0; struct gpe_regs *g = opaque; switch (addr) { + case PROC_BASE ... PROC_BASE+31: + val = g->cpus_sts[addr - PROC_BASE]; + break; + case GPE_BASE: case GPE_BASE + 1: val = gpe_read_val(g->sts, addr); @@ -575,16 +593,27 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val) PIIX4_DPRINTF("pciej write %x <== %d\n", addr, val); } +extern const char *global_cpu_model; + static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, int state); static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s) { struct gpe_regs *gpe = &s->gpe; struct pci_status *pci0_status = &s->pci0_status; + int i = 0, cpus = smp_cpus; + + while (cpus > 0) { + gpe->cpus_sts[i++] = (cpus < 8) ? (1 << cpus) - 1 : 0xff; + cpus -= 8; + } register_ioport_write(GPE_BASE, 4, 1, gpe_writeb, gpe); register_ioport_read(GPE_BASE, 4, 1, gpe_readb, gpe); + register_ioport_write(PROC_BASE, 32, 1, gpe_writeb, gpe); + register_ioport_read(PROC_BASE, 32, 1, gpe_readb, gpe); + register_ioport_write(PCI_BASE, 8, 4, pcihotplug_write, pci0_status); register_ioport_read(PCI_BASE, 8, 4, pcihotplug_read, pci0_status); @@ -594,6 +623,44 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s) pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev); } +#if defined(TARGET_I386) +static void enable_processor(struct gpe_regs *g, int cpu) +{ + g->sts |= 4; + g->cpus_sts[cpu/8] |= (1 << (cpu%8)); +} + +static void disable_processor(struct gpe_regs *g, int cpu) +{ + g->sts |= 4; + g->cpus_sts[cpu/8] &= ~(1 << (cpu%8)); +} + +void qemu_system_cpu_hot_add(int cpu, int state) +{ + CPUState *env; + PIIX4PMState *s = global_piix4_pm_state; + + if (state && !qemu_get_cpu(cpu)) { + env = pc_new_cpu(global_cpu_model); + if (!env) { + fprintf(stderr, "cpu %d creation failed\n", cpu); + return; + } + env->cpuid_apic_id = cpu; + } + + if (state) + enable_processor(&s->gpe, cpu); + else + disable_processor(&s->gpe, cpu); + if (s->gpe.en & 4) { + qemu_set_irq(s->irq, 1); + qemu_set_irq(s->irq, 0); + } +} +#endif + static void enable_device(PIIX4PMState *s, int slot) { s->gpe.sts |= 2; @@ -21,6 +21,7 @@ #include "qemu-timer.h" #include "host-utils.h" #include "sysbus.h" +#include "kvm.h" //#define DEBUG_APIC //#define DEBUG_COALESCING @@ -315,8 +316,11 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val) DPRINTF("cpu_set_apic_base: %016" PRIx64 "\n", val); if (!s) return; - s->apicbase = (val & 0xfffff000) | - (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); + if (kvm_enabled() && kvm_irqchip_in_kernel()) + s->apicbase = val; + else + s->apicbase = (val & 0xfffff000) | + (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE)); /* if disabled, cannot be enabled again */ if (!(val & MSR_IA32_APICBASE_ENABLE)) { s->apicbase &= ~MSR_IA32_APICBASE_ENABLE; @@ -412,6 +416,11 @@ int apic_get_irq_delivered(void) return apic_irq_delivered; } +void apic_set_irq_delivered(void) +{ + apic_irq_delivered = 1; +} + static void apic_set_irq(APICState *s, int vector_num, int trigger_mode) { apic_irq_delivered += !get_bit(s->irr, vector_num); @@ -876,6 +885,105 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) } } +#ifdef KVM_CAP_IRQCHIP + +static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id) +{ + return *((uint32_t *) (kapic->regs + (reg_id << 4))); +} + +static inline void kapic_set_reg(struct kvm_lapic_state *kapic, + int reg_id, uint32_t val) +{ + *((uint32_t *) (kapic->regs + (reg_id << 4))) = val; +} + +static void kvm_kernel_lapic_save_to_user(APICState *s) +{ + struct kvm_lapic_state apic; + struct kvm_lapic_state *kapic = &apic; + int i, v; + + kvm_get_lapic(s->cpu_env, kapic); + + s->id = kapic_reg(kapic, 0x2) >> 24; + s->tpr = kapic_reg(kapic, 0x8); + s->arb_id = kapic_reg(kapic, 0x9); + s->log_dest = kapic_reg(kapic, 0xd) >> 24; + s->dest_mode = kapic_reg(kapic, 0xe) >> 28; + s->spurious_vec = kapic_reg(kapic, 0xf); + for (i = 0; i < 8; i++) { + s->isr[i] = kapic_reg(kapic, 0x10 + i); + s->tmr[i] = kapic_reg(kapic, 0x18 + i); + s->irr[i] = kapic_reg(kapic, 0x20 + i); + } + s->esr = kapic_reg(kapic, 0x28); + s->icr[0] = kapic_reg(kapic, 0x30); + s->icr[1] = kapic_reg(kapic, 0x31); + for (i = 0; i < APIC_LVT_NB; i++) + s->lvt[i] = kapic_reg(kapic, 0x32 + i); + s->initial_count = kapic_reg(kapic, 0x38); + s->divide_conf = kapic_reg(kapic, 0x3e); + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_get_clock(vm_clock); + apic_timer_update(s, s->initial_count_load_time); +} + +static void kvm_kernel_lapic_load_from_user(APICState *s) +{ + struct kvm_lapic_state apic; + struct kvm_lapic_state *klapic = &apic; + int i; + + memset(klapic, 0, sizeof apic); + kapic_set_reg(klapic, 0x2, s->id << 24); + kapic_set_reg(klapic, 0x8, s->tpr); + kapic_set_reg(klapic, 0xd, s->log_dest << 24); + kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff); + kapic_set_reg(klapic, 0xf, s->spurious_vec); + for (i = 0; i < 8; i++) { + kapic_set_reg(klapic, 0x10 + i, s->isr[i]); + kapic_set_reg(klapic, 0x18 + i, s->tmr[i]); + kapic_set_reg(klapic, 0x20 + i, s->irr[i]); + } + kapic_set_reg(klapic, 0x28, s->esr); + kapic_set_reg(klapic, 0x30, s->icr[0]); + kapic_set_reg(klapic, 0x31, s->icr[1]); + for (i = 0; i < APIC_LVT_NB; i++) + kapic_set_reg(klapic, 0x32 + i, s->lvt[i]); + kapic_set_reg(klapic, 0x38, s->initial_count); + kapic_set_reg(klapic, 0x3e, s->divide_conf); + + kvm_set_lapic(s->cpu_env, klapic); +} + +#endif + +void kvm_load_lapic(CPUState *env) +{ + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); + +#ifdef KVM_CAP_IRQCHIP + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_lapic_load_from_user(s); + } +#endif +} + +void kvm_save_lapic(CPUState *env) +{ + APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state); + +#ifdef KVM_CAP_IRQCHIP + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_lapic_save_to_user(s); + } +#endif +} + /* This function is only used for old state version 1 and 2 */ static int apic_load_old(QEMUFile *f, void *opaque, int version_id) { @@ -13,6 +13,7 @@ void apic_deliver_pic_intr(DeviceState *s, int level); int apic_get_interrupt(DeviceState *s); void apic_reset_irq_delivered(void); int apic_get_irq_delivered(void); +void apic_set_irq_delivered(void); void cpu_set_apic_base(DeviceState *s, uint64_t val); uint64_t cpu_get_apic_base(DeviceState *s); void cpu_set_apic_tpr(DeviceState *s, uint8_t val); diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index bbd4b082d..efa7a42f5 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -32,6 +32,7 @@ #include "console.h" #include "vga_int.h" #include "kvm.h" +#include "qemu-kvm.h" #include "loader.h" /* @@ -2552,6 +2553,7 @@ static CPUWriteMemoryFunc * const cirrus_linear_bitblt_write[3] = { static void map_linear_vram(CirrusVGAState *s) { + vga_dirty_log_stop(&s->vga); if (!s->vga.map_addr && s->vga.lfb_addr && s->vga.lfb_end) { s->vga.map_addr = s->vga.lfb_addr; s->vga.map_end = s->vga.lfb_end; @@ -2561,13 +2563,19 @@ static void map_linear_vram(CirrusVGAState *s) if (!s->vga.map_addr) return; +#ifndef TARGET_IA64 s->vga.lfb_vram_mapped = 0; + cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000, + (s->vga.vram_offset + s->cirrus_bank_base[0]) | IO_MEM_UNASSIGNED); + cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000, + (s->vga.vram_offset + s->cirrus_bank_base[1]) | IO_MEM_UNASSIGNED); if (!(s->cirrus_srcptr != s->cirrus_srcptr_end) && !((s->vga.sr[0x07] & 0x01) == 0) && !((s->vga.gr[0x0B] & 0x14) == 0x14) && !(s->vga.gr[0x0B] & 0x02)) { + vga_dirty_log_stop(&s->vga); cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000, (s->vga.vram_offset + s->cirrus_bank_base[0]) | IO_MEM_RAM); cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000, @@ -2579,12 +2587,14 @@ static void map_linear_vram(CirrusVGAState *s) cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000, s->vga.vga_io_memory); } +#endif vga_dirty_log_start(&s->vga); } static void unmap_linear_vram(CirrusVGAState *s) { + vga_dirty_log_stop(&s->vga); if (s->vga.map_addr && s->vga.lfb_addr && s->vga.lfb_end) { s->vga.map_addr = s->vga.map_end = 0; cpu_register_physical_memory(s->vga.lfb_addr, s->vga.vram_size, @@ -2592,6 +2602,8 @@ static void unmap_linear_vram(CirrusVGAState *s) } cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000, s->vga.vga_io_memory); + + vga_dirty_log_start(&s->vga); } /* Compute the memory access functions */ @@ -3144,6 +3156,8 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int region_num, { CirrusVGAState *s = &DO_UPCAST(PCICirrusVGAState, dev, d)->cirrus_vga; + vga_dirty_log_stop(&s->vga); + /* XXX: add byte swapping apertures */ cpu_register_physical_memory(addr, s->vga.vram_size, s->cirrus_linear_io_addr); @@ -3175,10 +3189,14 @@ static void pci_cirrus_write_config(PCIDevice *d, PCICirrusVGAState *pvs = DO_UPCAST(PCICirrusVGAState, dev, d); CirrusVGAState *s = &pvs->cirrus_vga; + vga_dirty_log_stop(&s->vga); + pci_default_write_config(d, address, val, len); if (s->vga.map_addr && d->io_regions[0].addr == PCI_BAR_UNMAPPED) s->vga.map_addr = 0; cirrus_update_memory_access(s); + + vga_dirty_log_start(&s->vga); } static int pci_cirrus_vga_initfn(PCIDevice *dev) diff --git a/hw/device-assignment.c b/hw/device-assignment.c new file mode 100644 index 000000000..2bba22f83 --- /dev/null +++ b/hw/device-assignment.c @@ -0,0 +1,1672 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * + * Assign a PCI device from the host to a guest VM. + * + * Adapted for KVM by Qumranet. + * + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com) + */ +#include <stdio.h> +#include <unistd.h> +#include <sys/io.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "qemu-kvm.h" +#include "hw.h" +#include "pc.h" +#include "qemu-error.h" +#include "console.h" +#include "device-assignment.h" +#include "loader.h" +#include "monitor.h" +#include <pci/header.h> + +/* From linux/ioport.h */ +#define IORESOURCE_IO 0x00000100 /* Resource type */ +#define IORESOURCE_MEM 0x00000200 +#define IORESOURCE_IRQ 0x00000400 +#define IORESOURCE_DMA 0x00000800 +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ + +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */ + +#ifdef DEVICE_ASSIGNMENT_DEBUG +#define DEBUG(fmt, ...) \ + do { \ + fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \ + } while (0) +#else +#define DEBUG(fmt, ...) do { } while(0) +#endif + +static void assigned_dev_load_option_rom(AssignedDevice *dev); + +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); + +static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr) +{ + return region->u.r_baseport + (addr - region->e_physbase); +} + +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, + uint32_t value) +{ + AssignedDevRegion *r_access = opaque; + uint32_t r_pio = guest_to_host_ioport(r_access, addr); + + DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); + + outb(value, r_pio); +} + +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, + uint32_t value) +{ + AssignedDevRegion *r_access = opaque; + uint32_t r_pio = guest_to_host_ioport(r_access, addr); + + DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); + + outw(value, r_pio); +} + +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, + uint32_t value) +{ + AssignedDevRegion *r_access = opaque; + uint32_t r_pio = guest_to_host_ioport(r_access, addr); + + DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); + + outl(value, r_pio); +} + +static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr) +{ + AssignedDevRegion *r_access = opaque; + uint32_t r_pio = guest_to_host_ioport(r_access, addr); + uint32_t value; + + value = inb(r_pio); + + DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); + + return value; +} + +static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr) +{ + AssignedDevRegion *r_access = opaque; + uint32_t r_pio = guest_to_host_ioport(r_access, addr); + uint32_t value; + + value = inw(r_pio); + + DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); + + return value; +} + +static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr) +{ + AssignedDevRegion *r_access = opaque; + uint32_t r_pio = guest_to_host_ioport(r_access, addr); + uint32_t value; + + value = inl(r_pio); + + DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n", + r_pio, (int)r_access->e_physbase, + (unsigned long)r_access->u.r_baseport, value); + + return value; +} + +static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr) +{ + AssignedDevRegion *d = opaque; + uint8_t *in = d->u.r_virtbase + addr; + uint32_t r; + + r = *in; + DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); + + return r; +} + +static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr) +{ + AssignedDevRegion *d = opaque; + uint16_t *in = d->u.r_virtbase + addr; + uint32_t r; + + r = *in; + DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); + + return r; +} + +static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr) +{ + AssignedDevRegion *d = opaque; + uint32_t *in = d->u.r_virtbase + addr; + uint32_t r; + + r = *in; + DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r); + + return r; +} + +static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + AssignedDevRegion *d = opaque; + uint8_t *out = d->u.r_virtbase + addr; + + DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val); + *out = val; +} + +static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + AssignedDevRegion *d = opaque; + uint16_t *out = d->u.r_virtbase + addr; + + DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val); + *out = val; +} + +static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + AssignedDevRegion *d = opaque; + uint32_t *out = d->u.r_virtbase + addr; + + DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val); + *out = val; +} + +static CPUWriteMemoryFunc * const slow_bar_write[] = { + &slow_bar_writeb, + &slow_bar_writew, + &slow_bar_writel +}; + +static CPUReadMemoryFunc * const slow_bar_read[] = { + &slow_bar_readb, + &slow_bar_readw, + &slow_bar_readl +}; + +static void assigned_dev_iomem_map_slow(PCIDevice *pci_dev, int region_num, + pcibus_t e_phys, pcibus_t e_size, + int type) +{ + AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev); + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; + PCIRegion *real_region = &r_dev->real_device.regions[region_num]; + int m; + + DEBUG("%s", "slow map\n"); + if (region_num == PCI_ROM_SLOT) + m = cpu_register_io_memory(slow_bar_read, NULL, region); + else + m = cpu_register_io_memory(slow_bar_read, slow_bar_write, region); + cpu_register_physical_memory(e_phys, e_size, m); + + /* MSI-X MMIO page */ + if ((e_size > 0) && + real_region->base_addr <= r_dev->msix_table_addr && + real_region->base_addr + real_region->size >= r_dev->msix_table_addr) { + int offset = r_dev->msix_table_addr - real_region->base_addr; + + cpu_register_physical_memory(e_phys + offset, + TARGET_PAGE_SIZE, r_dev->mmio_index); + } +} + +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, + pcibus_t e_phys, pcibus_t e_size, int type) +{ + AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev); + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; + PCIRegion *real_region = &r_dev->real_device.regions[region_num]; + int ret = 0, flags = 0; + + DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", + e_phys, region->u.r_virtbase, type, e_size, region_num); + + region->e_physbase = e_phys; + region->e_size = e_size; + + if (e_size > 0) { + + if (region_num == PCI_ROM_SLOT) + flags |= IO_MEM_ROM; + + cpu_register_physical_memory(e_phys, e_size, region->memory_index | flags); + + /* deal with MSI-X MMIO page */ + if (real_region->base_addr <= r_dev->msix_table_addr && + real_region->base_addr + real_region->size >= + r_dev->msix_table_addr) { + int offset = r_dev->msix_table_addr - real_region->base_addr; + + cpu_register_physical_memory(e_phys + offset, + TARGET_PAGE_SIZE, r_dev->mmio_index); + } + } + + if (ret != 0) { + fprintf(stderr, "%s: Error: create new mapping failed\n", __func__); + exit(1); + } +} + +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, + pcibus_t addr, pcibus_t size, int type) +{ + AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev); + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; + int first_map = (region->e_size == 0); + CPUState *env; + + region->e_physbase = addr; + region->e_size = size; + + DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n", + addr, region->u.r_baseport, type, size, region_num); + + if (first_map) { + struct ioperm_data *data; + + data = qemu_mallocz(sizeof(struct ioperm_data)); + if (data == NULL) { + fprintf(stderr, "%s: Out of memory\n", __func__); + exit(1); + } + + data->start_port = region->u.r_baseport; + data->num = region->r_size; + data->turn_on = 1; + + kvm_add_ioperm_data(data); + + for (env = first_cpu; env; env = env->next_cpu) + kvm_ioperm(env, data); + } + + register_ioport_read(addr, size, 1, assigned_dev_ioport_readb, + (r_dev->v_addrs + region_num)); + register_ioport_read(addr, size, 2, assigned_dev_ioport_readw, + (r_dev->v_addrs + region_num)); + register_ioport_read(addr, size, 4, assigned_dev_ioport_readl, + (r_dev->v_addrs + region_num)); + register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb, + (r_dev->v_addrs + region_num)); + register_ioport_write(addr, size, 2, assigned_dev_ioport_writew, + (r_dev->v_addrs + region_num)); + register_ioport_write(addr, size, 4, assigned_dev_ioport_writel, + (r_dev->v_addrs + region_num)); +} + +static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len) +{ + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + uint32_t val; + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pread(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } + + return val; +} + +static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) +{ + return (uint8_t)assigned_dev_pci_read(d, pos, 1); +} + +static uint16_t assigned_dev_pci_read_word(PCIDevice *d, int pos) +{ + return (uint16_t)assigned_dev_pci_read(d, pos, 2); +} + +static uint32_t assigned_dev_pci_read_long(PCIDevice *d, int pos) +{ + return assigned_dev_pci_read(d, pos, 4); +} + +static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) +{ + int id; + int max_cap = 48; + int pos = PCI_CAPABILITY_LIST; + int status; + + status = assigned_dev_pci_read_byte(d, PCI_STATUS); + if ((status & PCI_STATUS_CAP_LIST) == 0) + return 0; + + while (max_cap--) { + pos = assigned_dev_pci_read_byte(d, pos); + if (pos < 0x40) + break; + + pos &= ~3; + id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID); + + if (id == 0xff) + break; + if (id == cap) + return pos; + + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address, + uint32_t val, int len) +{ + int fd; + ssize_t ret; + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + (uint16_t) address, val, len); + + if (address == 0x4) { + pci_default_write_config(d, address, val, len); + /* Continue to program the card */ + } + + if ((address >= 0x10 && address <= 0x24) || address == 0x30 || + address == 0x34 || address == 0x3c || address == 0x3d || + pci_access_cap_config(d, address, len)) { + /* used for update-mappings (BAR emulation) */ + pci_default_write_config(d, address, val, len); + return; + } + + DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + (uint16_t) address, val, len); + + fd = pci_dev->real_device.config_fd; + +again: + ret = pwrite(fd, &val, len, address); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } +} + +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, + int len) +{ + uint32_t val = 0; + int fd; + ssize_t ret; + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + + if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) || + (address >= 0x10 && address <= 0x24) || address == 0x30 || + address == 0x34 || address == 0x3c || address == 0x3d || + pci_access_cap_config(d, address, len)) { + val = pci_default_read_config(d, address, len); + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); + return val; + } + + /* vga specific, remove later */ + if (address == 0xFC) + goto do_log; + + fd = pci_dev->real_device.config_fd; + +again: + ret = pread(fd, &val, len, address); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } + +do_log: + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); + + if (!pci_dev->cap.available) { + /* kill the special capabilities */ + if (address == 4 && len == 4) + val &= ~0x100000; + else if (address == 6) + val &= ~0x10; + } + + return val; +} + +static int assigned_dev_register_regions(PCIRegion *io_regions, + unsigned long regions_num, + AssignedDevice *pci_dev) +{ + uint32_t i; + PCIRegion *cur_region = io_regions; + + for (i = 0; i < regions_num; i++, cur_region++) { + if (!cur_region->valid) + continue; + pci_dev->v_addrs[i].num = i; + + /* handle memory io regions */ + if (cur_region->type & IORESOURCE_MEM) { + int slow_map = 0; + int t = cur_region->type & IORESOURCE_PREFETCH + ? PCI_BASE_ADDRESS_MEM_PREFETCH + : PCI_BASE_ADDRESS_SPACE_MEMORY; + + if (cur_region->size & 0xFFF) { + if (i != PCI_ROM_SLOT) { + fprintf(stderr, "PCI region %d at address 0x%llx " + "has size 0x%x, which is not a multiple of 4K. " + "You might experience some performance hit " + "due to that.\n", + i, (unsigned long long)cur_region->base_addr, + cur_region->size); + } + slow_map = 1; + } + + /* map physical memory */ + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + if (i == PCI_ROM_SLOT) { + pci_dev->v_addrs[i].u.r_virtbase = + mmap(NULL, + cur_region->size, + PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, + 0, (off_t) 0); + + } else { + pci_dev->v_addrs[i].u.r_virtbase = + mmap(NULL, + cur_region->size, + PROT_WRITE | PROT_READ, MAP_SHARED, + cur_region->resource_fd, (off_t) 0); + } + + if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) { + pci_dev->v_addrs[i].u.r_virtbase = NULL; + fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!" + "\n", __func__, + (uint32_t) (cur_region->base_addr)); + return -1; + } + + if (i == PCI_ROM_SLOT) { + memset(pci_dev->v_addrs[i].u.r_virtbase, 0, + (cur_region->size + 0xFFF) & 0xFFFFF000); + mprotect(pci_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase, + (cur_region->size + 0xFFF) & 0xFFFFF000, PROT_READ); + } + + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + /* add offset */ + pci_dev->v_addrs[i].u.r_virtbase += + (cur_region->base_addr & 0xFFF); + + + if (!slow_map) { + void *virtbase = pci_dev->v_addrs[i].u.r_virtbase; + char name[32]; + snprintf(name, sizeof(name), "%s.bar%d", + pci_dev->dev.qdev.info->name, i); + pci_dev->v_addrs[i].memory_index = + qemu_ram_map(&pci_dev->dev.qdev, + name, cur_region->size, + virtbase); + } else + pci_dev->v_addrs[i].memory_index = 0; + + pci_register_bar((PCIDevice *) pci_dev, i, + cur_region->size, t, + slow_map ? assigned_dev_iomem_map_slow + : assigned_dev_iomem_map); + continue; + } + /* handle port io regions */ + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr; + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + pci_register_bar((PCIDevice *) pci_dev, i, + cur_region->size, PCI_BASE_ADDRESS_SPACE_IO, + assigned_dev_ioport_map); + + /* not relevant for port io */ + pci_dev->v_addrs[i].memory_index = 0; + } + + /* success */ + return 0; +} + +static int get_real_id(const char *devpath, const char *idname, uint16_t *val) +{ + FILE *f; + char name[128]; + long id; + + snprintf(name, sizeof(name), "%s%s", devpath, idname); + f = fopen(name, "r"); + if (f == NULL) { + fprintf(stderr, "%s: %s: %m\n", __func__, name); + return -1; + } + if (fscanf(f, "%li\n", &id) == 1) { + *val = id; + } else { + return -1; + } + fclose(f); + + return 0; +} + +static int get_real_vendor_id(const char *devpath, uint16_t *val) +{ + return get_real_id(devpath, "vendor", val); +} + +static int get_real_device_id(const char *devpath, uint16_t *val) +{ + return get_real_id(devpath, "device", val); +} + +static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg, + uint8_t r_bus, uint8_t r_dev, uint8_t r_func) +{ + char dir[128], name[128]; + int fd, r = 0, v; + FILE *f; + unsigned long long start, end, size, flags; + uint16_t id; + struct stat statbuf; + PCIRegion *rp; + PCIDevRegions *dev = &pci_dev->real_device; + + dev->region_number = 0; + + snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/", + r_seg, r_bus, r_dev, r_func); + + snprintf(name, sizeof(name), "%sconfig", dir); + + if (pci_dev->configfd_name && *pci_dev->configfd_name) { + if (qemu_isdigit(pci_dev->configfd_name[0])) { + dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0); + } else { + dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name); + if (dev->config_fd < 0) { + fprintf(stderr, "%s: (%s) unkown\n", __func__, + pci_dev->configfd_name); + return 1; + } + } + } else { + dev->config_fd = open(name, O_RDWR); + + if (dev->config_fd == -1) { + fprintf(stderr, "%s: %s: %m\n", __func__, name); + return 1; + } + } +again: + r = read(dev->config_fd, pci_dev->dev.config, + pci_config_size(&pci_dev->dev)); + if (r < 0) { + if (errno == EINTR || errno == EAGAIN) + goto again; + fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno); + } + + snprintf(name, sizeof(name), "%sresource", dir); + + f = fopen(name, "r"); + if (f == NULL) { + fprintf(stderr, "%s: %s: %m\n", __func__, name); + return 1; + } + + for (r = 0; r < PCI_NUM_REGIONS; r++) { + if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3) + break; + + rp = dev->regions + r; + rp->valid = 0; + rp->resource_fd = -1; + size = end - start + 1; + flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) + continue; + if (flags & IORESOURCE_MEM) { + flags &= ~IORESOURCE_IO; + if (r != PCI_ROM_SLOT) { + snprintf(name, sizeof(name), "%sresource%d", dir, r); + fd = open(name, O_RDWR); + if (fd == -1) + continue; + rp->resource_fd = fd; + } + } else + flags &= ~IORESOURCE_PREFETCH; + + rp->type = flags; + rp->valid = 1; + rp->base_addr = start; + rp->size = size; + DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n", + r, rp->size, start, rp->type, rp->resource_fd); + } + + fclose(f); + + /* read and fill vendor ID */ + v = get_real_vendor_id(dir, &id); + if (v) { + return 1; + } + pci_dev->dev.config[0] = id & 0xff; + pci_dev->dev.config[1] = (id & 0xff00) >> 8; + + /* read and fill device ID */ + v = get_real_device_id(dir, &id); + if (v) { + return 1; + } + pci_dev->dev.config[2] = id & 0xff; + pci_dev->dev.config[3] = (id & 0xff00) >> 8; + + /* dealing with virtual function device */ + snprintf(name, sizeof(name), "%sphysfn/", dir); + if (!stat(name, &statbuf)) + pci_dev->need_emulate_cmd = 1; + else + pci_dev->need_emulate_cmd = 0; + + dev->region_number = r; + return 0; +} + +static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs); + +#ifdef KVM_CAP_IRQ_ROUTING +static void free_dev_irq_entries(AssignedDevice *dev) +{ + int i; + + for (i = 0; i < dev->irq_entries_nr; i++) + kvm_del_routing_entry(kvm_context, &dev->entry[i]); + free(dev->entry); + dev->entry = NULL; + dev->irq_entries_nr = 0; +} +#endif + +static void free_assigned_device(AssignedDevice *dev) +{ + if (dev) { + int i; + + for (i = 0; i < dev->real_device.region_number; i++) { + PCIRegion *pci_region = &dev->real_device.regions[i]; + AssignedDevRegion *region = &dev->v_addrs[i]; + + if (!pci_region->valid) + continue; + + if (pci_region->type & IORESOURCE_IO) { + kvm_remove_ioperm_data(region->u.r_baseport, region->r_size); + continue; + } else if (pci_region->type & IORESOURCE_MEM) { + if (region->u.r_virtbase) { + if (region->memory_index) { + cpu_register_physical_memory(region->e_physbase, + region->e_size, + IO_MEM_UNASSIGNED); + qemu_ram_unmap(region->memory_index); + } + if (munmap(region->u.r_virtbase, + (pci_region->size + 0xFFF) & 0xFFFFF000)) + fprintf(stderr, + "Failed to unmap assigned device region: %s\n", + strerror(errno)); + if (pci_region->resource_fd >= 0) { + close(pci_region->resource_fd); + } + } + } + } + + if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) + assigned_dev_unregister_msix_mmio(dev); + + if (dev->real_device.config_fd >= 0) { + close(dev->real_device.config_fd); + } + +#ifdef KVM_CAP_IRQ_ROUTING + free_dev_irq_entries(dev); +#endif + } +} + +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; +} + +static void assign_failed_examine(AssignedDevice *dev) +{ + char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; + uint16_t vendor_id, device_id; + int r; + + sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); + + sprintf(name, "%sdriver", dir); + + r = readlink(name, driver, sizeof(driver)); + if ((r <= 0) || r >= sizeof(driver) || !(ns = strrchr(driver, '/'))) { + goto fail; + } + + ns++; + + if (get_real_vendor_id(dir, &vendor_id) || + get_real_device_id(dir, &device_id)) { + goto fail; + } + + fprintf(stderr, "*** The driver '%s' is occupying your device " + "%04x:%02x:%02x.%x.\n", + ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); + fprintf(stderr, "***\n"); + fprintf(stderr, "*** You can try the following commands to free it:\n"); + fprintf(stderr, "***\n"); + fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/" + "new_id\n", vendor_id, device_id); + fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + "%s/unbind\n", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns); + fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/" + "pci-stub/bind\n", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); + fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub" + "/remove_id\n", vendor_id, device_id); + fprintf(stderr, "***\n"); + + return; + +fail: + fprintf(stderr, "Couldn't find out why.\n"); +} + +static int assign_device(AssignedDevice *dev) +{ + struct kvm_assigned_pci_dev assigned_dev_data; + int r; + +#ifdef KVM_CAP_PCI_SEGMENT + /* Only pass non-zero PCI segment to capable module */ + if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) && + dev->h_segnr) { + fprintf(stderr, "Can't assign device inside non-zero PCI segment " + "as this KVM module doesn't support it.\n"); + return -ENODEV; + } +#endif + + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); + assigned_dev_data.assigned_dev_id = + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); +#ifdef KVM_CAP_PCI_SEGMENT + assigned_dev_data.segnr = dev->h_segnr; +#endif + assigned_dev_data.busnr = dev->h_busnr; + assigned_dev_data.devfn = dev->h_devfn; + +#ifdef KVM_CAP_IOMMU + /* We always enable the IOMMU unless disabled on the command line */ + if (dev->use_iommu) { + if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) { + fprintf(stderr, "No IOMMU found. Unable to assign device \"%s\"\n", + dev->dev.qdev.id); + return -ENODEV; + } + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; + } +#else + dev->use_iommu = 0; +#endif + + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); + if (r < 0) { + fprintf(stderr, "Failed to assign device \"%s\" : %s\n", + dev->dev.qdev.id, strerror(-r)); + + switch (r) { + case -EBUSY: + assign_failed_examine(dev); + break; + default: + break; + } + } + return r; +} + +static int assign_irq(AssignedDevice *dev) +{ + struct kvm_assigned_irq assigned_irq_data; + int irq, r = 0; + + /* Interrupt PIN 0 means don't use INTx */ + if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0) + return 0; + + irq = pci_map_irq(&dev->dev, dev->intpin); + irq = piix_get_irq(irq); + +#ifdef TARGET_IA64 + irq = ipf_map_irq(&dev->dev, irq); +#endif + + if (dev->girq == irq) + return r; + + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); + assigned_irq_data.assigned_dev_id = + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); + assigned_irq_data.guest_irq = irq; + assigned_irq_data.host_irq = dev->real_device.irq; +#ifdef KVM_CAP_ASSIGN_DEV_IRQ + if (dev->irq_requested_type) { + assigned_irq_data.flags = dev->irq_requested_type; + r = kvm_deassign_irq(kvm_context, &assigned_irq_data); + /* -ENXIO means no assigned irq */ + if (r && r != -ENXIO) + perror("assign_irq: deassign"); + } + + assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX; + if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) + assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI; + else + assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX; +#endif + + r = kvm_assign_irq(kvm_context, &assigned_irq_data); + if (r < 0) { + fprintf(stderr, "Failed to assign irq for \"%s\": %s\n", + dev->dev.qdev.id, strerror(-r)); + fprintf(stderr, "Perhaps you are assigning a device " + "that shares an IRQ with another device?\n"); + return r; + } + + dev->girq = irq; + dev->irq_requested_type = assigned_irq_data.flags; + return r; +} + +static void deassign_device(AssignedDevice *dev) +{ +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT + struct kvm_assigned_pci_dev assigned_dev_data; + int r; + + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); + assigned_dev_data.assigned_dev_id = + calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn); + + r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data); + if (r < 0) + fprintf(stderr, "Failed to deassign device \"%s\" : %s\n", + dev->dev.qdev.id, strerror(-r)); +#endif +} + +#if 0 +AssignedDevInfo *get_assigned_device(int pcibus, int slot) +{ + AssignedDevice *assigned_dev = NULL; + AssignedDevInfo *adev = NULL; + + QLIST_FOREACH(adev, &adev_head, next) { + assigned_dev = adev->assigned_dev; + if (pci_bus_num(assigned_dev->dev.bus) == pcibus && + PCI_SLOT(assigned_dev->dev.devfn) == slot) + return adev; + } + + return NULL; +} +#endif + +/* The pci config space got updated. Check if irq numbers have changed + * for our devices + */ +void assigned_dev_update_irqs(void) +{ + AssignedDevice *dev, *next; + int r; + + dev = QLIST_FIRST(&devs); + while (dev) { + next = QLIST_NEXT(dev, next); + r = assign_irq(dev); + if (r < 0) + qdev_unplug(&dev->dev.qdev); + dev = next; + } +} + +#ifdef KVM_CAP_IRQ_ROUTING + +#ifdef KVM_CAP_DEVICE_MSI +static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos) +{ + struct kvm_assigned_irq assigned_irq_data; + AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev); + uint8_t ctrl_byte = pci_dev->config[ctrl_pos]; + int r; + + memset(&assigned_irq_data, 0, sizeof assigned_irq_data); + assigned_irq_data.assigned_dev_id = + calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr, + (uint8_t)assigned_dev->h_devfn); + + /* Some guests gratuitously disable MSI even if they're not using it, + * try to catch this by only deassigning irqs if the guest is using + * MSI or intends to start. */ + if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSI) || + (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) { + + assigned_irq_data.flags = assigned_dev->irq_requested_type; + free_dev_irq_entries(assigned_dev); + r = kvm_deassign_irq(kvm_context, &assigned_irq_data); + /* -ENXIO means no assigned irq */ + if (r && r != -ENXIO) + perror("assigned_dev_update_msi: deassign irq"); + + assigned_dev->irq_requested_type = 0; + } + + if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) { + assigned_dev->entry = calloc(1, sizeof(struct kvm_irq_routing_entry)); + if (!assigned_dev->entry) { + perror("assigned_dev_update_msi: "); + return; + } + assigned_dev->entry->u.msi.address_lo = + *(uint32_t *)(pci_dev->config + pci_dev->cap.start + + PCI_MSI_ADDRESS_LO); + assigned_dev->entry->u.msi.address_hi = 0; + assigned_dev->entry->u.msi.data = *(uint16_t *)(pci_dev->config + + pci_dev->cap.start + PCI_MSI_DATA_32); + assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI; + r = kvm_get_irq_route_gsi(kvm_context); + if (r < 0) { + perror("assigned_dev_update_msi: kvm_get_irq_route_gsi"); + return; + } + assigned_dev->entry->gsi = r; + + kvm_add_routing_entry(kvm_context, assigned_dev->entry); + if (kvm_commit_irq_routes(kvm_context) < 0) { + perror("assigned_dev_update_msi: kvm_commit_irq_routes"); + assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED; + return; + } + assigned_dev->irq_entries_nr = 1; + + assigned_irq_data.guest_irq = assigned_dev->entry->gsi; + assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI; + if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) + perror("assigned_dev_enable_msi: assign irq"); + + assigned_dev->irq_requested_type = assigned_irq_data.flags; + } +} +#endif + +#ifdef KVM_CAP_DEVICE_MSIX +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev) +{ + AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev); + uint16_t entries_nr = 0, entries_max_nr; + int pos = 0, i, r = 0; + uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl; + struct kvm_assigned_msix_nr msix_nr; + struct kvm_assigned_msix_entry msix_entry; + void *va = adev->msix_table_page; + + if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI) + pos = pci_dev->cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH; + else + pos = pci_dev->cap.start; + + entries_max_nr = *(uint16_t *)(pci_dev->config + pos + 2); + entries_max_nr &= PCI_MSIX_TABSIZE; + entries_max_nr += 1; + + /* Get the usable entry number for allocating */ + for (i = 0; i < entries_max_nr; i++) { + memcpy(&msg_ctrl, va + i * 16 + 12, 4); + memcpy(&msg_data, va + i * 16 + 8, 4); + /* Ignore unused entry even it's unmasked */ + if (msg_data == 0) + continue; + entries_nr ++; + } + + if (entries_nr == 0) { + fprintf(stderr, "MSI-X entry number is zero!\n"); + return -EINVAL; + } + msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr, + (uint8_t)adev->h_devfn); + msix_nr.entry_nr = entries_nr; + r = kvm_assign_set_msix_nr(kvm_context, &msix_nr); + if (r != 0) { + fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n", + strerror(-r)); + return r; + } + + free_dev_irq_entries(adev); + adev->irq_entries_nr = entries_nr; + adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry)); + if (!adev->entry) { + perror("assigned_dev_update_msix_mmio: "); + return -errno; + } + + msix_entry.assigned_dev_id = msix_nr.assigned_dev_id; + entries_nr = 0; + for (i = 0; i < entries_max_nr; i++) { + if (entries_nr >= msix_nr.entry_nr) + break; + memcpy(&msg_ctrl, va + i * 16 + 12, 4); + memcpy(&msg_data, va + i * 16 + 8, 4); + if (msg_data == 0) + continue; + + memcpy(&msg_addr, va + i * 16, 4); + memcpy(&msg_upper_addr, va + i * 16 + 4, 4); + + r = kvm_get_irq_route_gsi(kvm_context); + if (r < 0) + return r; + + adev->entry[entries_nr].gsi = r; + adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI; + adev->entry[entries_nr].flags = 0; + adev->entry[entries_nr].u.msi.address_lo = msg_addr; + adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr; + adev->entry[entries_nr].u.msi.data = msg_data; + DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr); + kvm_add_routing_entry(kvm_context, &adev->entry[entries_nr]); + + msix_entry.gsi = adev->entry[entries_nr].gsi; + msix_entry.entry = i; + r = kvm_assign_set_msix_entry(kvm_context, &msix_entry); + if (r) { + fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r)); + break; + } + DEBUG("MSI-X entry gsi 0x%x, entry %d\n!", + msix_entry.gsi, msix_entry.entry); + entries_nr ++; + } + + if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) { + perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes"); + return -EINVAL; + } + + return r; +} + +static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) +{ + struct kvm_assigned_irq assigned_irq_data; + AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev); + uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos); + int r; + + memset(&assigned_irq_data, 0, sizeof assigned_irq_data); + assigned_irq_data.assigned_dev_id = + calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr, + (uint8_t)assigned_dev->h_devfn); + + /* Some guests gratuitously disable MSIX even if they're not using it, + * try to catch this by only deassigning irqs if the guest is using + * MSIX or intends to start. */ + if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) || + (*ctrl_word & PCI_MSIX_ENABLE)) { + + assigned_irq_data.flags = assigned_dev->irq_requested_type; + free_dev_irq_entries(assigned_dev); + r = kvm_deassign_irq(kvm_context, &assigned_irq_data); + /* -ENXIO means no assigned irq */ + if (r && r != -ENXIO) + perror("assigned_dev_update_msix: deassign irq"); + + assigned_dev->irq_requested_type = 0; + } + + if (*ctrl_word & PCI_MSIX_ENABLE) { + assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX | + KVM_DEV_IRQ_GUEST_MSIX; + + if (assigned_dev_update_msix_mmio(pci_dev) < 0) { + perror("assigned_dev_update_msix_mmio"); + return; + } + if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) { + perror("assigned_dev_enable_msix: assign irq"); + return; + } + assigned_dev->irq_requested_type = assigned_irq_data.flags; + } +} +#endif +#endif + +static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) +{ + AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev); + unsigned int pos = pci_dev->cap.start, ctrl_pos; + + pci_default_cap_write_config(pci_dev, address, val, len); +#ifdef KVM_CAP_IRQ_ROUTING +#ifdef KVM_CAP_DEVICE_MSI + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { + ctrl_pos = pos + PCI_MSI_FLAGS; + if (address <= ctrl_pos && address + len > ctrl_pos) + assigned_dev_update_msi(pci_dev, ctrl_pos); + pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH; + } +#endif +#ifdef KVM_CAP_DEVICE_MSIX + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { + ctrl_pos = pos + 3; + if (address <= ctrl_pos && address + len > ctrl_pos) { + ctrl_pos--; /* control is word long */ + assigned_dev_update_msix(pci_dev, ctrl_pos); + } + pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; + } +#endif +#endif + return; +} + +static int assigned_device_pci_cap_init(PCIDevice *pci_dev) +{ + AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev); + PCIRegion *pci_region = dev->real_device.regions; + int next_cap_pt = 0; + + pci_dev->cap.length = 0; +#ifdef KVM_CAP_IRQ_ROUTING +#ifdef KVM_CAP_DEVICE_MSI + /* Expose MSI capability + * MSI capability is the 1st capability in capability config */ + if (pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI)) { + dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI; + memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length], + 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH); + pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] = + PCI_CAP_ID_MSI; + pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH; + next_cap_pt = 1; + } +#endif +#ifdef KVM_CAP_DEVICE_MSIX + /* Expose MSI-X capability */ + if (pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX)) { + int pos, entry_nr, bar_nr; + uint32_t msix_table_entry; + dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX; + memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length], + 0, PCI_CAPABILITY_CONFIG_MSIX_LENGTH); + pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX); + entry_nr = assigned_dev_pci_read_word(pci_dev, pos + 2) & + PCI_MSIX_TABSIZE; + pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] = 0x11; + *(uint16_t *)(pci_dev->config + pci_dev->cap.start + + pci_dev->cap.length + 2) = entry_nr; + msix_table_entry = assigned_dev_pci_read_long(pci_dev, + pos + PCI_MSIX_TABLE); + *(uint32_t *)(pci_dev->config + pci_dev->cap.start + + pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry; + *(uint32_t *)(pci_dev->config + pci_dev->cap.start + + pci_dev->cap.length + PCI_MSIX_PBA) = + assigned_dev_pci_read_long(pci_dev, pos + PCI_MSIX_PBA); + bar_nr = msix_table_entry & PCI_MSIX_BIR; + msix_table_entry &= ~PCI_MSIX_BIR; + dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + if (next_cap_pt != 0) { + pci_dev->config[pci_dev->cap.start + next_cap_pt] = + pci_dev->cap.start + pci_dev->cap.length; + next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH; + } else + next_cap_pt = 1; + pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH; + } +#endif +#endif + + return 0; +} + +static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr) +{ + AssignedDevice *adev = opaque; + unsigned int offset = addr & 0xfff; + void *page = adev->msix_table_page; + uint32_t val = 0; + + memcpy(&val, (void *)((char *)page + offset), 4); + + return val; +} + +static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr) +{ + return ((msix_mmio_readl(opaque, addr & ~3)) >> + (8 * (addr & 3))) & 0xff; +} + +static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) +{ + return ((msix_mmio_readl(opaque, addr & ~3)) >> + (8 * (addr & 3))) & 0xffff; +} + +static void msix_mmio_writel(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + AssignedDevice *adev = opaque; + unsigned int offset = addr & 0xfff; + void *page = adev->msix_table_page; + + DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", + addr, val); + memcpy((void *)((char *)page + offset), &val, 4); +} + +static void msix_mmio_writew(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + msix_mmio_writel(opaque, addr & ~3, + (val & 0xffff) << (8*(addr & 3))); +} + +static void msix_mmio_writeb(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + msix_mmio_writel(opaque, addr & ~3, + (val & 0xff) << (8*(addr & 3))); +} + +static CPUWriteMemoryFunc *msix_mmio_write[] = { + msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel +}; + +static CPUReadMemoryFunc *msix_mmio_read[] = { + msix_mmio_readb, msix_mmio_readw, msix_mmio_readl +}; + +static int assigned_dev_register_msix_mmio(AssignedDevice *dev) +{ + dev->msix_table_page = mmap(NULL, 0x1000, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); + if (dev->msix_table_page == MAP_FAILED) { + fprintf(stderr, "fail allocate msix_table_page! %s\n", + strerror(errno)); + return -EFAULT; + } + memset(dev->msix_table_page, 0, 0x1000); + dev->mmio_index = cpu_register_io_memory( + msix_mmio_read, msix_mmio_write, dev); + return 0; +} + +static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) +{ + if (!dev->msix_table_page) + return; + + cpu_unregister_io_memory(dev->mmio_index); + dev->mmio_index = 0; + + if (munmap(dev->msix_table_page, 0x1000) == -1) { + fprintf(stderr, "error unmapping msix_table_page! %s\n", + strerror(errno)); + } + dev->msix_table_page = NULL; +} + +static int assigned_initfn(struct PCIDevice *pci_dev) +{ + AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); + uint8_t e_device, e_intx; + int r; + + if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) { + error_report("pci-assign: error: no host device specified"); + return -1; + } + + if (get_real_device(dev, dev->host.seg, dev->host.bus, + dev->host.dev, dev->host.func)) { + error_report("pci-assign: Error: Couldn't get real device (%s)!", + dev->dev.qdev.id); + goto out; + } + + /* handle real device's MMIO/PIO BARs */ + if (assigned_dev_register_regions(dev->real_device.regions, + dev->real_device.region_number, + dev)) + goto out; + + /* handle interrupt routing */ + e_device = (dev->dev.devfn >> 3) & 0x1f; + e_intx = dev->dev.config[0x3d] - 1; + dev->intpin = e_intx; + dev->run = 0; + dev->girq = -1; + dev->h_segnr = dev->host.seg; + dev->h_busnr = dev->host.bus; + dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func); + + if (pci_enable_capability_support(pci_dev, 0, NULL, + assigned_device_pci_cap_write_config, + assigned_device_pci_cap_init) < 0) + goto out; + + /* assign device to guest */ + r = assign_device(dev); + if (r < 0) + goto out; + + /* assign irq for the device */ + r = assign_irq(dev); + if (r < 0) + goto assigned_out; + + /* intercept MSI-X entry page in the MMIO */ + if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) + if (assigned_dev_register_msix_mmio(dev)) + goto assigned_out; + + assigned_dev_load_option_rom(dev); + QLIST_INSERT_HEAD(&devs, dev, next); + return 0; + +assigned_out: + deassign_device(dev); +out: + free_assigned_device(dev); + return -1; +} + +static int assigned_exitfn(struct PCIDevice *pci_dev) +{ + AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); + + QLIST_REMOVE(dev, next); + deassign_device(dev); + free_assigned_device(dev); + return 0; +} + +static int parse_hostaddr(DeviceState *dev, Property *prop, const char *str) +{ + PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop); + int rc; + + rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func); + if (rc != 0) + return -1; + return 0; +} + +static int print_hostaddr(DeviceState *dev, Property *prop, char *dest, size_t len) +{ + PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop); + + return snprintf(dest, len, "%02x:%02x.%x", ptr->bus, ptr->dev, ptr->func); +} + +PropertyInfo qdev_prop_hostaddr = { + .name = "pci-hostaddr", + .type = -1, + .size = sizeof(PCIHostDevice), + .parse = parse_hostaddr, + .print = print_hostaddr, +}; + +static PCIDeviceInfo assign_info = { + .qdev.name = "pci-assign", + .qdev.desc = "pass through host pci devices to the guest", + .qdev.size = sizeof(AssignedDevice), + .init = assigned_initfn, + .exit = assigned_exitfn, + .config_read = assigned_dev_pci_read_config, + .config_write = assigned_dev_pci_write_config, + .qdev.props = (Property[]) { + DEFINE_PROP("host", AssignedDevice, host, qdev_prop_hostaddr, PCIHostDevice), + DEFINE_PROP_UINT32("iommu", AssignedDevice, use_iommu, 1), + DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name), + DEFINE_PROP_END_OF_LIST(), + }, +}; + +static void assign_register_devices(void) +{ + pci_qdev_register(&assign_info); +} + +device_init(assign_register_devices) + + +/* + * Syntax to assign device: + * + * -pcidevice host=bus:dev.func[,dma=none][,name=Foo] + * + * Example: + * -pcidevice host=00:13.0,dma=pvdma + * + * dma can currently only be 'none' to disable iommu support. + */ +QemuOpts *add_assigned_device(const char *arg) +{ + QemuOpts *opts = NULL; + char host[64], id[64], dma[8]; + int r; + + r = get_param_value(host, sizeof(host), "host", arg); + if (!r) + goto bad; + r = get_param_value(id, sizeof(id), "id", arg); + if (!r) + r = get_param_value(id, sizeof(id), "name", arg); + if (!r) + r = get_param_value(id, sizeof(id), "host", arg); + + opts = qemu_opts_create(&qemu_device_opts, id, 0); + if (!opts) + goto bad; + qemu_opt_set(opts, "driver", "pci-assign"); + qemu_opt_set(opts, "host", host); + +#ifdef KVM_CAP_IOMMU + r = get_param_value(dma, sizeof(dma), "dma", arg); + if (r && !strncmp(dma, "none", 4)) + qemu_opt_set(opts, "iommu", "0"); +#endif + qemu_opts_print(opts, NULL); + return opts; + +bad: + fprintf(stderr, "pcidevice argument parse error; " + "please check the help text for usage\n"); + if (opts) + qemu_opts_del(opts); + return NULL; +} + +void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices) +{ + QemuOpts *opts; + int i; + + for (i = 0; i < n_devices; i++) { + opts = add_assigned_device(devices[i]); + if (opts == NULL) { + fprintf(stderr, "Could not add assigned device %s\n", devices[i]); + exit(1); + } + /* generic code will call qdev_device_add() for the device */ + } +} + +/* + * Scan the assigned devices for the devices that have an option ROM, and then + * load the corresponding ROM data to RAM. If an error occurs while loading an + * option ROM, we just ignore that option ROM and continue with the next one. + */ +static void assigned_dev_load_option_rom(AssignedDevice *dev) +{ + int size, len, ret; + void *buf; + FILE *fp; + uint8_t i = 1; + char rom_file[64]; + + snprintf(rom_file, sizeof(rom_file), + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func); + + if (access(rom_file, F_OK)) + return; + + /* Write something to the ROM file to enable it */ + fp = fopen(rom_file, "wb"); + if (fp == NULL) + return; + len = fwrite(&i, 1, 1, fp); + fclose(fp); + if (len != 1) + return; + + /* The file has to be closed and reopened, otherwise it won't work */ + fp = fopen(rom_file, "rb"); + if (fp == NULL) + return; + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + buf = malloc(size); + if (buf == NULL) { + fclose(fp); + return; + } + + ret = fread(buf, size, 1, fp); + if (!feof(fp) || ferror(fp) || ret != 1) { + free(buf); + fclose(fp); + return; + } + fclose(fp); + + /* Copy ROM contents into the space backing the ROM BAR */ + if (dev->v_addrs[PCI_ROM_SLOT].r_size >= size && + dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase) { + mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase, + size, PROT_READ | PROT_WRITE); + memcpy(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase, + buf, size); + mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase, + size, PROT_READ); + } + + free(buf); +} diff --git a/hw/device-assignment.h b/hw/device-assignment.h new file mode 100644 index 000000000..4e7fe878c --- /dev/null +++ b/hw/device-assignment.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Data structures for storing PCI state + * + * Adapted to kvm by Qumranet + * + * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com) + * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com) + * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com) + * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com) + */ + +#ifndef __DEVICE_ASSIGNMENT_H__ +#define __DEVICE_ASSIGNMENT_H__ + +#include <sys/mman.h> +#include "qemu-common.h" +#include "qemu-queue.h" +#include "pci.h" + +/* From include/linux/pci.h in the kernel sources */ +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) + +typedef struct PCIHostDevice { + int seg; + int bus; + int dev; + int func; +} PCIHostDevice; + +typedef struct { + int type; /* Memory or port I/O */ + int valid; + uint32_t base_addr; + uint32_t size; /* size of the region */ + int resource_fd; +} PCIRegion; + +typedef struct { + uint8_t bus, dev, func; /* Bus inside domain, device and function */ + int irq; /* IRQ number */ + uint16_t region_number; /* number of active regions */ + + /* Port I/O or MMIO Regions */ + PCIRegion regions[PCI_NUM_REGIONS]; + int config_fd; +} PCIDevRegions; + +typedef struct { + pcibus_t e_physbase; + ram_addr_t memory_index; + union { + void *r_virtbase; /* mmapped access address for memory regions */ + uint32_t r_baseport; /* the base guest port for I/O regions */ + } u; + int num; /* our index within v_addrs[] */ + pcibus_t e_size; /* emulated size of region in bytes */ + pcibus_t r_size; /* real size of region in bytes */ +} AssignedDevRegion; + +typedef struct AssignedDevice { + PCIDevice dev; + PCIHostDevice host; + uint32_t use_iommu; + int intpin; + uint8_t debug_flags; + AssignedDevRegion v_addrs[PCI_NUM_REGIONS]; + PCIDevRegions real_device; + int run; + int girq; + unsigned int h_segnr; + unsigned char h_busnr; + unsigned int h_devfn; + int irq_requested_type; + int bound; + struct { +#define ASSIGNED_DEVICE_CAP_MSI (1 << 0) +#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1) + uint32_t available; +#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0) +#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1) +#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) + uint32_t state; + } cap; + int irq_entries_nr; + struct kvm_irq_routing_entry *entry; + void *msix_table_page; + target_phys_addr_t msix_table_addr; + int mmio_index; + int need_emulate_cmd; + char *configfd_name; + QLIST_ENTRY(AssignedDevice) next; +} AssignedDevice; + +QemuOpts *add_assigned_device(const char *arg); +void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices); +void assigned_dev_update_irqs(void); + +#define MAX_DEV_ASSIGN_CMDLINE 8 + +extern const char *assigned_devices[MAX_DEV_ASSIGN_CMDLINE]; +extern int assigned_devices_index; + +#endif /* __DEVICE_ASSIGNMENT_H__ */ diff --git a/hw/extboot.c b/hw/extboot.c new file mode 100644 index 000000000..8ada21bf5 --- /dev/null +++ b/hw/extboot.c @@ -0,0 +1,123 @@ +/* + * Extended boot option ROM support. + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "hw.h" +#include "pc.h" +#include "isa.h" +#include "block.h" + +/* Extended Boot ROM suport */ + +union extboot_cmd +{ + uint16_t type; + struct { + uint16_t type; + uint16_t cylinders; + uint16_t heads; + uint16_t sectors; + uint64_t nb_sectors; + } query_geometry; + struct { + uint16_t type; + uint16_t nb_sectors; + uint16_t segment; + uint16_t offset; + uint64_t sector; + } xfer; +}; + +static void get_translated_chs(BlockDriverState *bs, int *c, int *h, int *s) +{ + bdrv_get_geometry_hint(bs, c, h, s); + + if (*c <= 1024) { + *c >>= 0; + *h <<= 0; + } else if (*c <= 2048) { + *c >>= 1; + *h <<= 1; + } else if (*c <= 4096) { + *c >>= 2; + *h <<= 2; + } else if (*c <= 8192) { + *c >>= 3; + *h <<= 3; + } else { + *c >>= 4; + *h <<= 4; + } + + /* what is the correct algorithm for this?? */ + if (*h == 256) { + *h = 255; + *c = *c + 1; + } +} + +static void extboot_write_cmd(void *opaque, uint32_t addr, uint32_t value) +{ + union extboot_cmd cmd; + BlockDriverState *bs = opaque; + int cylinders, heads, sectors, err; + uint64_t nb_sectors; + target_phys_addr_t pa = 0; + int blen = 0; + void *buf = NULL; + + cpu_physical_memory_read((value & 0xFFFF) << 4, (uint8_t *)&cmd, + sizeof(cmd)); + + if (cmd.type == 0x01 || cmd.type == 0x02) { + pa = cmd.xfer.segment * 16 + cmd.xfer.offset; + blen = cmd.xfer.nb_sectors * 512; + buf = qemu_memalign(512, blen); + } + + switch (cmd.type) { + case 0x00: + get_translated_chs(bs, &cylinders, &heads, §ors); + bdrv_get_geometry(bs, &nb_sectors); + cmd.query_geometry.cylinders = cylinders; + cmd.query_geometry.heads = heads; + cmd.query_geometry.sectors = sectors; + cmd.query_geometry.nb_sectors = nb_sectors; + break; + case 0x01: + err = bdrv_read(bs, cmd.xfer.sector, buf, cmd.xfer.nb_sectors); + if (err) + printf("Read failed\n"); + + cpu_physical_memory_write(pa, buf, blen); + + break; + case 0x02: + cpu_physical_memory_read(pa, buf, blen); + + err = bdrv_write(bs, cmd.xfer.sector, buf, cmd.xfer.nb_sectors); + if (err) + printf("Write failed\n"); + + break; + } + + cpu_physical_memory_write((value & 0xFFFF) << 4, (uint8_t *)&cmd, + sizeof(cmd)); + if (buf) + qemu_free(buf); +} + +void extboot_init(BlockDriverState *bs) +{ + register_ioport_write(0x405, 1, 2, extboot_write_cmd, bs); +} @@ -238,6 +238,11 @@ static int hpet_post_load(void *opaque, int version_id) if (s->timer[0].config & HPET_TN_FSB_CAP) { s->flags |= 1 << HPET_MSI_SUPPORT; } + + if (hpet_in_legacy_mode(s)) { + hpet_pit_disable(); + } + return 0; } @@ -340,6 +340,10 @@ extern const VMStateInfo vmstate_info_uint16; extern const VMStateInfo vmstate_info_uint32; extern const VMStateInfo vmstate_info_uint64; +#ifdef __linux__ +extern const VMStateInfo vmstate_info_u64; +#endif + extern const VMStateInfo vmstate_info_timer; extern const VMStateInfo vmstate_info_ptimer; extern const VMStateInfo vmstate_info_buffer; @@ -630,6 +634,15 @@ extern const VMStateDescription vmstate_i2c_slave; #define VMSTATE_UINT64(_f, _s) \ VMSTATE_UINT64_V(_f, _s, 0) +/* This is needed because on linux __u64 is unsigned long long + and on glibc uint64_t is unsigned long on 64 bits */ +#ifdef __linux__ +#define VMSTATE_U64_V(_f, _s, _v) \ + VMSTATE_SINGLE(_f, _s, _v, vmstate_info_u64, __u64) +#define VMSTATE_U64(_f, _s) \ + VMSTATE_U64_V(_f, _s, 0) +#endif + #define VMSTATE_UINT8_EQUAL(_f, _s) \ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t) diff --git a/hw/i8254-kvm.c b/hw/i8254-kvm.c new file mode 100644 index 000000000..6125213ce --- /dev/null +++ b/hw/i8254-kvm.c @@ -0,0 +1,122 @@ +/* + * QEMU 8253/8254 interval timer emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "hw.h" +#include "pc.h" +#include "isa.h" +#include "qemu-timer.h" +#include "i8254.h" +#include "qemu-kvm.h" + +extern VMStateDescription vmstate_pit; + +static PITState pit_state; + +static void kvm_pit_pre_save(void *opaque) +{ + PITState *s = (void *)opaque; + struct kvm_pit_state2 pit2; + struct kvm_pit_channel_state *c; + struct PITChannelState *sc; + int i; + + if(qemu_kvm_has_pit_state2()) { + kvm_get_pit2(kvm_context, &pit2); + s->flags = pit2.flags; + } else { + /* pit2 is superset of pit struct so just cast it and use it */ + kvm_get_pit(kvm_context, (struct kvm_pit_state *)&pit2); + } + for (i = 0; i < 3; i++) { + c = &pit2.channels[i]; + sc = &s->channels[i]; + sc->count = c->count; + sc->latched_count = c->latched_count; + sc->count_latched = c->count_latched; + sc->status_latched = c->status_latched; + sc->status = c->status; + sc->read_state = c->read_state; + sc->write_state = c->write_state; + sc->write_latch = c->write_latch; + sc->rw_mode = c->rw_mode; + sc->mode = c->mode; + sc->bcd = c->bcd; + sc->gate = c->gate; + sc->count_load_time = c->count_load_time; + } +} + +static int kvm_pit_post_load(void *opaque, int version_id) +{ + PITState *s = opaque; + struct kvm_pit_state2 pit2; + struct kvm_pit_channel_state *c; + struct PITChannelState *sc; + int i; + + pit2.flags = s->flags; + for (i = 0; i < 3; i++) { + c = &pit2.channels[i]; + sc = &s->channels[i]; + c->count = sc->count; + c->latched_count = sc->latched_count; + c->count_latched = sc->count_latched; + c->status_latched = sc->status_latched; + c->status = sc->status; + c->read_state = sc->read_state; + c->write_state = sc->write_state; + c->write_latch = sc->write_latch; + c->rw_mode = sc->rw_mode; + c->mode = sc->mode; + c->bcd = sc->bcd; + c->gate = sc->gate; + c->count_load_time = sc->count_load_time; + } + + if(qemu_kvm_has_pit_state2()) { + kvm_set_pit2(kvm_context, &pit2); + } else { + kvm_set_pit(kvm_context, (struct kvm_pit_state *)&pit2); + } + return 0; +} + +static void dummy_timer(void *opaque) +{ +} + +PITState *kvm_pit_init(int base, qemu_irq irq) +{ + PITState *pit = &pit_state; + PITChannelState *s; + + s = &pit->channels[0]; + s->irq_timer = qemu_new_timer(vm_clock, dummy_timer, s); + vmstate_pit.pre_save = kvm_pit_pre_save; + vmstate_pit.post_load = kvm_pit_post_load; + vmstate_register(NULL, base, &vmstate_pit, pit); + qemu_register_reset(pit_reset, pit); + pit_reset(pit); + + return pit; +} diff --git a/hw/i8254.c b/hw/i8254.c index 06b225cf4..321a5d535 100644 --- a/hw/i8254.c +++ b/hw/i8254.c @@ -25,38 +25,11 @@ #include "pc.h" #include "isa.h" #include "qemu-timer.h" +#include "kvm.h" +#include "i8254.h" //#define DEBUG_PIT -#define RW_STATE_LSB 1 -#define RW_STATE_MSB 2 -#define RW_STATE_WORD0 3 -#define RW_STATE_WORD1 4 - -typedef struct PITChannelState { - int count; /* can be 65536 */ - uint16_t latched_count; - uint8_t count_latched; - uint8_t status_latched; - uint8_t status; - uint8_t read_state; - uint8_t write_state; - uint8_t write_latch; - uint8_t rw_mode; - uint8_t mode; - uint8_t bcd; /* not supported */ - uint8_t gate; /* timer start */ - int64_t count_load_time; - /* irq handling */ - int64_t next_transition_time; - QEMUTimer *irq_timer; - qemu_irq irq; -} PITChannelState; - -struct PITState { - PITChannelState channels[3]; -}; - static PITState pit_state; static void pit_irq_timer_update(PITChannelState *s, int64_t current_time); @@ -228,13 +201,18 @@ int pit_get_mode(PITState *pit, int channel) return s->mode; } -static inline void pit_load_count(PITChannelState *s, int val) +static inline void pit_load_count(PITState *s, int val, int chan) { if (val == 0) val = 0x10000; - s->count_load_time = qemu_get_clock(vm_clock); - s->count = val; - pit_irq_timer_update(s, s->count_load_time); + s->channels[chan].count_load_time = qemu_get_clock(vm_clock); + s->channels[chan].count = val; +#ifdef TARGET_I386 + if (chan == 0 && pit_state.flags & PIT_FLAGS_HPET_LEGACY) { + return; + } +#endif + pit_irq_timer_update(&s->channels[chan], s->channels[chan].count_load_time); } /* if already latched, do not latch again */ @@ -294,17 +272,17 @@ static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val) switch(s->write_state) { default: case RW_STATE_LSB: - pit_load_count(s, val); + pit_load_count(pit, val, addr); break; case RW_STATE_MSB: - pit_load_count(s, val << 8); + pit_load_count(pit, val << 8, addr); break; case RW_STATE_WORD0: s->write_latch = val; s->write_state = RW_STATE_WORD1; break; case RW_STATE_WORD1: - pit_load_count(s, s->write_latch | (val << 8)); + pit_load_count(pit, s->write_latch | (val << 8), addr); s->write_state = RW_STATE_WORD0; break; } @@ -364,6 +342,11 @@ static uint32_t pit_ioport_read(void *opaque, uint32_t addr) return ret; } +/* global counters for time-drift fix */ +int64_t timer_acks=0, timer_interrupts=0, timer_ints_to_push=0; + +extern int time_drift_fix; + static void pit_irq_timer_update(PITChannelState *s, int64_t current_time) { int64_t expire_time; @@ -374,16 +357,35 @@ static void pit_irq_timer_update(PITChannelState *s, int64_t current_time) expire_time = pit_get_next_transition_time(s, current_time); irq_level = pit_get_out1(s, current_time); qemu_set_irq(s->irq, irq_level); + if (time_drift_fix && irq_level==1) { + /* FIXME: fine tune timer_max_fix (max fix per tick). + * Should it be 1 (double time), 2 , 4, 10 ? + * Currently setting it to 5% of PIT-ticks-per-second (per PIT-tick) + */ + const long pit_ticks_per_sec = (s->count>0) ? (PIT_FREQ/s->count) : 0; + const long timer_max_fix = pit_ticks_per_sec/20; + const long delta = timer_interrupts - timer_acks; + const long max_delta = pit_ticks_per_sec * 60; /* one minute */ + if ((delta > max_delta) && (pit_ticks_per_sec > 0)) { + printf("time drift is too long, %ld seconds were lost\n", delta/pit_ticks_per_sec); + timer_acks = timer_interrupts; + timer_ints_to_push = 0; + } else if (delta > 0) { + timer_ints_to_push = MIN(delta, timer_max_fix); + } + timer_interrupts++; + } #ifdef DEBUG_PIT printf("irq_level=%d next_delay=%f\n", irq_level, (double)(expire_time - current_time) / get_ticks_per_sec()); #endif s->next_transition_time = expire_time; - if (expire_time != -1) + if (expire_time != -1) { qemu_mod_timer(s->irq_timer, expire_time); - else + } else { qemu_del_timer(s->irq_timer); + } } static void pit_irq_timer(void *opaque) @@ -423,9 +425,10 @@ static int pit_load_old(QEMUFile *f, void *opaque, int version_id) PITChannelState *s; int i; - if (version_id != 1) + if (version_id != PIT_SAVEVM_VERSION) return -EINVAL; + pit->flags = qemu_get_be32(f); for(i = 0; i < 3; i++) { s = &pit->channels[i]; s->count=qemu_get_be32(f); @@ -446,42 +449,61 @@ static int pit_load_old(QEMUFile *f, void *opaque, int version_id) qemu_get_timer(f, s->irq_timer); } } + return 0; } -static const VMStateDescription vmstate_pit = { +VMStateDescription vmstate_pit = { .name = "i8254", .version_id = 2, .minimum_version_id = 2, .minimum_version_id_old = 1, .load_state_old = pit_load_old, .fields = (VMStateField []) { + VMSTATE_UINT32(flags, PITState), VMSTATE_STRUCT_ARRAY(channels, PITState, 3, 2, vmstate_pit_channel, PITChannelState), VMSTATE_TIMER(channels[0].irq_timer, PITState), VMSTATE_END_OF_LIST() } }; -static void pit_reset(void *opaque) +void pit_reset(void *opaque) { PITState *pit = opaque; PITChannelState *s; int i; +#ifdef TARGET_I386 + pit->flags &= ~PIT_FLAGS_HPET_LEGACY; +#endif for(i = 0;i < 3; i++) { s = &pit->channels[i]; s->mode = 3; s->gate = (i != 2); - pit_load_count(s, 0); + pit_load_count(pit, 0, i); } } +#ifdef TARGET_I386 /* When HPET is operating in legacy mode, i8254 timer0 is disabled */ -void hpet_pit_disable(void) { - PITChannelState *s; - s = &pit_state.channels[0]; - if (s->irq_timer) - qemu_del_timer(s->irq_timer); + +void hpet_pit_disable(void) +{ + PITChannelState *s = &pit_state.channels[0]; + + if (kvm_enabled() && kvm_pit_in_kernel()) { + if (qemu_kvm_has_pit_state2()) { + kvm_hpet_disable_kpit(); + } else { + fprintf(stderr, "%s: kvm does not support pit_state2!\n", __FUNCTION__); + exit(1); + } + } else { + pit_state.flags |= PIT_FLAGS_HPET_LEGACY; + if (s->irq_timer) { + qemu_del_timer(s->irq_timer); + } + } } /* When HPET is reset or leaving legacy mode, it must reenable i8254 @@ -491,12 +513,21 @@ void hpet_pit_disable(void) { void hpet_pit_enable(void) { PITState *pit = &pit_state; - PITChannelState *s; - s = &pit->channels[0]; - s->mode = 3; - s->gate = 1; - pit_load_count(s, 0); + PITChannelState *s = &pit->channels[0]; + + if (kvm_enabled() && kvm_pit_in_kernel()) { + if (qemu_kvm_has_pit_state2()) { + kvm_hpet_enable_kpit(); + } else { + fprintf(stderr, "%s: kvm does not support pit_state2!\n", __FUNCTION__); + exit(1); + } + } else { + pit_state.flags &= ~PIT_FLAGS_HPET_LEGACY; + pit_load_count(pit, s->count, 0); + } } +#endif PITState *pit_init(int base, qemu_irq irq) { diff --git a/hw/i8254.h b/hw/i8254.h new file mode 100644 index 000000000..d23303a8b --- /dev/null +++ b/hw/i8254.h @@ -0,0 +1,69 @@ +/* + * QEMU 8253/8254 interval timer emulation + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef QEMU_I8254_H +#define QEMU_I8254_H + +#define PIT_SAVEVM_NAME "i8254" +#define PIT_SAVEVM_VERSION 2 + +#define RW_STATE_LSB 1 +#define RW_STATE_MSB 2 +#define RW_STATE_WORD0 3 +#define RW_STATE_WORD1 4 + +#define PIT_FLAGS_HPET_LEGACY 1 + +typedef struct PITChannelState { + int count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + int64_t count_load_time; + /* irq handling */ + int64_t next_transition_time; + QEMUTimer *irq_timer; + qemu_irq irq; +} PITChannelState; + +struct PITState { + PITChannelState channels[3]; + uint32_t flags; +}; + +void pit_save(QEMUFile *f, void *opaque); + +int pit_load(QEMUFile *f, void *opaque, int version_id); + +void pit_reset(void *opaque); + +#endif diff --git a/hw/i8259.c b/hw/i8259.c index a8dbee647..11060d3d1 100644 --- a/hw/i8259.c +++ b/hw/i8259.c @@ -23,10 +23,13 @@ */ #include "hw.h" #include "pc.h" +#include "apic.h" #include "isa.h" #include "monitor.h" #include "qemu-timer.h" +#include "kvm.h" + /* debug PIC */ //#define DEBUG_PIC @@ -189,7 +192,6 @@ int64_t irq_time[16]; static void i8259_set_irq(void *opaque, int irq, int level) { PicState2 *s = opaque; - #if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT) if (level != irq_level[irq]) { DPRINTF("i8259_set_irq: irq=%d level=%d\n", irq, level); @@ -218,18 +220,35 @@ static inline void pic_intack(PicState *s, int irq) } else { s->isr |= (1 << irq); } + /* We don't clear a level sensitive interrupt here */ if (!(s->elcr & (1 << irq))) s->irr &= ~(1 << irq); + } +extern int time_drift_fix; + int pic_read_irq(PicState2 *s) { int irq, irq2, intno; irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { + pic_intack(&s->pics[0], irq); +#ifdef TARGET_I386 + if (time_drift_fix && irq == 0) { + extern int64_t timer_acks, timer_ints_to_push; + timer_acks++; + if (timer_ints_to_push > 0) { + timer_ints_to_push--; + /* simulate an edge irq0, like the one generated by i8254 */ + pic_set_irq1(&s->pics[0], 0, 0); + pic_set_irq1(&s->pics[0], 0, 1); + } + } +#endif if (irq == 2) { irq2 = pic_get_irq(&s->pics[1]); if (irq2 >= 0) { @@ -448,9 +467,33 @@ static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1) return s->elcr; } +static void kvm_kernel_pic_save_to_user(PicState *s); +static int kvm_kernel_pic_load_from_user(PicState *s); + +static void pic_pre_save(void *opaque) +{ + PicState *s = opaque; + + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_pic_save_to_user(s); + } +} + +static int pic_post_load(void *opaque, int version_id) +{ + PicState *s = opaque; + + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_pic_load_from_user(s); + } + return 0; +} + static const VMStateDescription vmstate_pic = { .name = "i8259", .version_id = 1, + .pre_save = pic_pre_save, + .post_load = pic_post_load, .minimum_version_id = 1, .minimum_version_id_old = 1, .fields = (VMStateField []) { @@ -537,3 +580,103 @@ qemu_irq *i8259_init(qemu_irq parent_irq) isa_pic = s; return qemu_allocate_irqs(i8259_set_irq, s, 16); } + +static void kvm_kernel_pic_save_to_user(PicState *s) +{ +#ifdef KVM_CAP_IRQCHIP + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + + chip.chip_id = (&s->pics_state->pics[0] == s) ? + KVM_IRQCHIP_PIC_MASTER : + KVM_IRQCHIP_PIC_SLAVE; + kvm_get_irqchip(kvm_context, &chip); + kpic = &chip.chip.pic; + + s->last_irr = kpic->last_irr; + s->irr = kpic->irr; + s->imr = kpic->imr; + s->isr = kpic->isr; + s->priority_add = kpic->priority_add; + s->irq_base = kpic->irq_base; + s->read_reg_select = kpic->read_reg_select; + s->poll = kpic->poll; + s->special_mask = kpic->special_mask; + s->init_state = kpic->init_state; + s->auto_eoi = kpic->auto_eoi; + s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi; + s->special_fully_nested_mode = kpic->special_fully_nested_mode; + s->init4 = kpic->init4; + s->elcr = kpic->elcr; + s->elcr_mask = kpic->elcr_mask; +#endif +} + +static int kvm_kernel_pic_load_from_user(PicState *s) +{ +#ifdef KVM_CAP_IRQCHIP + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + + chip.chip_id = (&s->pics_state->pics[0] == s) ? + KVM_IRQCHIP_PIC_MASTER : + KVM_IRQCHIP_PIC_SLAVE; + kpic = &chip.chip.pic; + + kpic->last_irr = s->last_irr; + kpic->irr = s->irr; + kpic->imr = s->imr; + kpic->isr = s->isr; + kpic->priority_add = s->priority_add; + kpic->irq_base = s->irq_base; + kpic->read_reg_select = s->read_reg_select; + kpic->poll = s->poll; + kpic->special_mask = s->special_mask; + kpic->init_state = s->init_state; + kpic->auto_eoi = s->auto_eoi; + kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi; + kpic->special_fully_nested_mode = s->special_fully_nested_mode; + kpic->init4 = s->init4; + kpic->elcr = s->elcr; + kpic->elcr_mask = s->elcr_mask; + + kvm_set_irqchip(kvm_context, &chip); +#endif + return 0; +} + +#ifdef KVM_CAP_IRQCHIP +static void kvm_i8259_set_irq(void *opaque, int irq, int level) +{ + int pic_ret; + if (kvm_set_irq(irq, level, &pic_ret)) { + if (pic_ret != 0) + apic_set_irq_delivered(); + return; + } +} + +static void kvm_pic_init1(int io_addr, PicState *s) +{ + vmstate_register(NULL, io_addr, &vmstate_pic, s); + qemu_register_reset(pic_reset, s); +} + +qemu_irq *kvm_i8259_init(qemu_irq parent_irq) +{ + PicState2 *s; + + s = qemu_mallocz(sizeof(PicState2)); + + kvm_pic_init1(0x20, &s->pics[0]); + kvm_pic_init1(0xa0, &s->pics[1]); + s->parent_irq = parent_irq; + s->pics[0].pics_state = s; + s->pics[1].pics_state = s; + isa_pic = s; + return qemu_allocate_irqs(kvm_i8259_set_irq, s, 24); +} +#endif + + + diff --git a/hw/ioapic.c b/hw/ioapic.c index 5ae21e910..276c72ed2 100644 --- a/hw/ioapic.c +++ b/hw/ioapic.c @@ -23,10 +23,14 @@ #include "hw.h" #include "pc.h" #include "apic.h" +#include "sysemu.h" +#include "apic.h" #include "qemu-timer.h" #include "host-utils.h" #include "sysbus.h" +#include "kvm.h" + //#define DEBUG_IOAPIC #ifdef DEBUG_IOAPIC @@ -36,6 +40,7 @@ #define DPRINTF(fmt, ...) #endif +#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 #define IOAPIC_LVT_MASKED (1<<16) #define IOAPIC_TRIGGER_EDGE 0 @@ -56,6 +61,7 @@ struct IOAPICState { SysBusDevice busdev; uint8_t id; uint8_t ioregsel; + uint64_t base_address; uint32_t irr; uint64_t ioredtbl[IOAPIC_NUM_PINS]; @@ -106,8 +112,9 @@ static void ioapic_set_irq(void *opaque, int vector, int level) * the cleanest way of doing it but it should work. */ DPRINTF("%s: %s vec %x\n", __func__, level? "raise" : "lower", vector); - if (vector == 0) + if (vector == 0 && irq0override) { vector = 2; + } if (vector >= 0 && vector < IOAPIC_NUM_PINS) { uint32_t mask = 1 << vector; @@ -199,14 +206,91 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va } } +static void kvm_kernel_ioapic_save_to_user(IOAPICState *s) +{ +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kvm_get_irqchip(kvm_context, &chip); + kioapic = &chip.chip.ioapic; + + s->id = kioapic->id; + s->ioregsel = kioapic->ioregsel; + s->base_address = kioapic->base_address; + s->irr = kioapic->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + s->ioredtbl[i] = kioapic->redirtbl[i].bits; + } +#endif +} + +static void kvm_kernel_ioapic_load_from_user(IOAPICState *s) +{ +#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386) + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kioapic = &chip.chip.ioapic; + + kioapic->id = s->id; + kioapic->ioregsel = s->ioregsel; + kioapic->base_address = s->base_address; + kioapic->irr = s->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + kioapic->redirtbl[i].bits = s->ioredtbl[i]; + } + + kvm_set_irqchip(kvm_context, &chip); +#endif +} + +static void ioapic_pre_save(void *opaque) +{ + IOAPICState *s = (void *)opaque; + + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_ioapic_save_to_user(s); + } +} + +static int ioapic_pre_load(void *opaque) +{ + IOAPICState *s = opaque; + + /* in case we are doing version 1, we just set these to sane values */ + s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; + s->irr = 0; + return 0; +} + +static int ioapic_post_load(void *opaque, int version_id) +{ + IOAPICState *s = opaque; + + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_ioapic_load_from_user(s); + } + return 0; +} + static const VMStateDescription vmstate_ioapic = { .name = "ioapic", - .version_id = 1, + .version_id = 2, .minimum_version_id = 1, .minimum_version_id_old = 1, + .pre_load = ioapic_pre_load, + .post_load = ioapic_post_load, + .pre_save = ioapic_pre_save, .fields = (VMStateField []) { VMSTATE_UINT8(id, IOAPICState), VMSTATE_UINT8(ioregsel, IOAPICState), + VMSTATE_UINT64_V(base_address, IOAPICState, 2), + VMSTATE_UINT32_V(irr, IOAPICState, 2), VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS), VMSTATE_END_OF_LIST() } @@ -217,11 +301,17 @@ static void ioapic_reset(DeviceState *d) IOAPICState *s = DO_UPCAST(IOAPICState, busdev.qdev, d); int i; + s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; s->id = 0; s->ioregsel = 0; s->irr = 0; for(i = 0; i < IOAPIC_NUM_PINS; i++) s->ioredtbl[i] = 1 << 16; /* mask LVT */ +#ifdef KVM_CAP_IRQCHIP + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_kernel_ioapic_load_from_user(s); + } +#endif } static CPUReadMemoryFunc * const ioapic_mem_read[3] = { diff --git a/hw/ipf.c b/hw/ipf.c new file mode 100644 index 000000000..21cff72b7 --- /dev/null +++ b/hw/ipf.c @@ -0,0 +1,713 @@ +/* + * Itanium Platform Emulator derived from QEMU PC System Emulator + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Copyright (c) 2007 Intel + * Ported for IA64 Platform Zhang Xiantao <xiantao.zhang@intel.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "hw.h" +#include "pc.h" +#include "fdc.h" +#include "pci.h" +#include "block.h" +#include "sysemu.h" +#include "audio/audio.h" +#include "net.h" +#include "smbus.h" +#include "boards.h" +#include "firmware.h" +#include "ia64intrin.h" +#include <unistd.h> +#include "device-assignment.h" +#include "virtio-blk.h" + +#include "qemu-kvm.h" + +#define FW_FILENAME "Flash.fd" + +/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */ +#define ACPI_DATA_SIZE 0x10000 + +#define MAX_IDE_BUS 2 + +static fdctrl_t *floppy_controller; +static RTCState *rtc_state; +static PCIDevice *i440fx_state; + +static uint32_t ipf_to_legacy_io(target_phys_addr_t addr) +{ + return (uint32_t)(((addr&0x3ffffff) >> 12 << 2)|((addr) & 0x3)); +} + +static void ipf_legacy_io_writeb(void *opaque, target_phys_addr_t addr, + uint32_t val) { + uint32_t port = ipf_to_legacy_io(addr); + + cpu_outb(0, port, val); +} + +static void ipf_legacy_io_writew(void *opaque, target_phys_addr_t addr, + uint32_t val) { + uint32_t port = ipf_to_legacy_io(addr); + + cpu_outw(0, port, val); +} + +static void ipf_legacy_io_writel(void *opaque, target_phys_addr_t addr, + uint32_t val) { + uint32_t port = ipf_to_legacy_io(addr); + + cpu_outl(0, port, val); +} + +static uint32_t ipf_legacy_io_readb(void *opaque, target_phys_addr_t addr) +{ + uint32_t port = ipf_to_legacy_io(addr); + + return cpu_inb(0, port); +} + +static uint32_t ipf_legacy_io_readw(void *opaque, target_phys_addr_t addr) +{ + uint32_t port = ipf_to_legacy_io(addr); + + return cpu_inw(0, port); +} + +static uint32_t ipf_legacy_io_readl(void *opaque, target_phys_addr_t addr) +{ + uint32_t port = ipf_to_legacy_io(addr); + + return cpu_inl(0, port); +} + +static CPUReadMemoryFunc *ipf_legacy_io_read[3] = { + ipf_legacy_io_readb, + ipf_legacy_io_readw, + ipf_legacy_io_readl, +}; + +static CPUWriteMemoryFunc *ipf_legacy_io_write[3] = { + ipf_legacy_io_writeb, + ipf_legacy_io_writew, + ipf_legacy_io_writel, +}; + +static void pic_irq_request(void *opaque, int irq, int level) +{ + fprintf(stderr,"pic_irq_request called!\n"); +} + +/* PC cmos mappings */ + +#define REG_EQUIPMENT_BYTE 0x14 + +static int cmos_get_fd_drive_type(int fd0) +{ + int val; + + switch (fd0) { + case 0: + /* 1.44 Mb 3"5 drive */ + val = 4; + break; + case 1: + /* 2.88 Mb 3"5 drive */ + val = 5; + break; + case 2: + /* 1.2 Mb 5"5 drive */ + val = 2; + break; + default: + val = 0; + break; + } + return val; +} + +static void cmos_init_hd(int type_ofs, int info_ofs, BlockDriverState *hd) +{ + RTCState *s = rtc_state; + int cylinders, heads, sectors; + + bdrv_get_geometry_hint(hd, &cylinders, &heads, §ors); + rtc_set_memory(s, type_ofs, 47); + rtc_set_memory(s, info_ofs, cylinders); + rtc_set_memory(s, info_ofs + 1, cylinders >> 8); + rtc_set_memory(s, info_ofs + 2, heads); + rtc_set_memory(s, info_ofs + 3, 0xff); + rtc_set_memory(s, info_ofs + 4, 0xff); + rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3)); + rtc_set_memory(s, info_ofs + 6, cylinders); + rtc_set_memory(s, info_ofs + 7, cylinders >> 8); + rtc_set_memory(s, info_ofs + 8, sectors); +} + +/* convert boot_device letter to something recognizable by the bios */ +static int boot_device2nibble(char boot_device) +{ + switch(boot_device) { + case 'a': + case 'b': + return 0x01; /* floppy boot */ + case 'c': + return 0x02; /* hard drive boot */ + case 'd': + return 0x03; /* CD-ROM boot */ + case 'n': + return 0x04; /* Network boot */ + } + return 0; +} + +/* hd_table must contain 4 block drivers */ +static void cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size, + const char *boot_device, BlockDriverState **hd_table) +{ + RTCState *s = rtc_state; + int nbds, bds[3] = { 0, }; + int val; + int fd0, fd1, nb; + int i; + + /* various important CMOS locations needed by PC/Bochs bios */ + + /* memory size */ + val = 640; /* base memory in K */ + rtc_set_memory(s, 0x15, val); + rtc_set_memory(s, 0x16, val >> 8); + + val = (ram_size / 1024) - 1024; + if (val > 65535) + val = 65535; + rtc_set_memory(s, 0x17, val); + rtc_set_memory(s, 0x18, val >> 8); + rtc_set_memory(s, 0x30, val); + rtc_set_memory(s, 0x31, val >> 8); + + if (above_4g_mem_size) { + rtc_set_memory(s, 0x5b, (unsigned int)above_4g_mem_size >> 16); + rtc_set_memory(s, 0x5c, (unsigned int)above_4g_mem_size >> 24); + rtc_set_memory(s, 0x5d, above_4g_mem_size >> 32); + } + rtc_set_memory(s, 0x5f, smp_cpus - 1); + + if (ram_size > (16 * 1024 * 1024)) + val = (ram_size / 65536) - ((16 * 1024 * 1024) / 65536); + else + val = 0; + if (val > 65535) + val = 65535; + rtc_set_memory(s, 0x34, val); + rtc_set_memory(s, 0x35, val >> 8); + + /* set boot devices, and disable floppy signature check if requested */ +#define PC_MAX_BOOT_DEVICES 3 + nbds = strlen(boot_device); + + if (nbds > PC_MAX_BOOT_DEVICES) { + fprintf(stderr, "Too many boot devices for PC\n"); + exit(1); + } + + for (i = 0; i < nbds; i++) { + bds[i] = boot_device2nibble(boot_device[i]); + if (bds[i] == 0) { + fprintf(stderr, "Invalid boot device for PC: '%c'\n", + boot_device[i]); + exit(1); + } + } + + rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]); + rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1)); + + /* floppy type */ + + fd0 = fdctrl_get_drive_type(floppy_controller, 0); + fd1 = fdctrl_get_drive_type(floppy_controller, 1); + + val = (cmos_get_fd_drive_type(fd0) << 4) | cmos_get_fd_drive_type(fd1); + rtc_set_memory(s, 0x10, val); + + val = 0; + nb = 0; + if (fd0 < 3) + nb++; + if (fd1 < 3) + nb++; + + switch (nb) { + case 0: + break; + case 1: + val |= 0x01; /* 1 drive, ready for boot */ + break; + case 2: + val |= 0x41; /* 2 drives, ready for boot */ + break; + } + + val |= 0x02; /* FPU is there */ + val |= 0x04; /* PS/2 mouse installed */ + rtc_set_memory(s, REG_EQUIPMENT_BYTE, val); + + /* hard drives */ + + rtc_set_memory(s, 0x12, (hd_table[0] ? 0xf0 : 0) | (hd_table[1] ? 0x0f : 0)); + if (hd_table[0]) + cmos_init_hd(0x19, 0x1b, hd_table[0]); + if (hd_table[1]) + cmos_init_hd(0x1a, 0x24, hd_table[1]); + + val = 0; + for (i = 0; i < 4; i++) { + if (hd_table[i]) { + int cylinders, heads, sectors, translation; + /* NOTE: bdrv_get_geometry_hint() returns the physical + geometry. It is always such that: 1 <= sects <= 63, 1 + <= heads <= 16, 1 <= cylinders <= 16383. The BIOS + geometry can be different if a translation is done. */ + translation = bdrv_get_translation_hint(hd_table[i]); + if (translation == BIOS_ATA_TRANSLATION_AUTO) { + bdrv_get_geometry_hint(hd_table[i], &cylinders, + &heads, §ors); + if (cylinders <= 1024 && heads <= 16 && sectors <= 63) { + /* No translation. */ + translation = 0; + } else { + /* LBA translation. */ + translation = 1; + } + } else { + translation--; + } + val |= translation << (i * 2); + } + } + rtc_set_memory(s, 0x39, val); +} + +static void main_cpu_reset(void *opaque) +{ + CPUState *env = opaque; + cpu_reset(env); +} + +static const int ide_iobase[2] = { 0x1f0, 0x170 }; +static const int ide_iobase2[2] = { 0x3f6, 0x376 }; +static const int ide_irq[2] = { 14, 15 }; + +#define NE2000_NB_MAX 6 + +static int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340, + 0x360, 0x280, 0x380 }; +static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 }; + +static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 }; +static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 }; + +static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc }; +static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 }; + +#ifdef HAS_AUDIO +static void audio_init (PCIBus *pci_bus, qemu_irq *pic) +{ + struct soundhw *c; + int audio_enabled = 0; + + for (c = soundhw; !audio_enabled && c->name; ++c) { + audio_enabled = c->enabled; + } + + if (audio_enabled) { + AudioState *s; + + s = AUD_init (); + if (s) { + for (c = soundhw; c->name; ++c) { + if (c->enabled) { + if (c->isa) { + c->init.init_isa (s, pic); + } else { + if (pci_bus) { + c->init.init_pci (pci_bus, s); + } + } + } + } + } + } +} +#endif + +static void pc_init_ne2k_isa(NICInfo *nd, qemu_irq *pic) +{ + static int nb_ne2k = 0; + + if (nb_ne2k == NE2000_NB_MAX) + return; + isa_ne2000_init(ne2000_io[nb_ne2k], pic[ne2000_irq[nb_ne2k]], nd); + nb_ne2k++; +} + +/* Itanium hardware initialisation */ +static void ipf_init1(ram_addr_t ram_size, + const char *boot_device, DisplayState *ds, + const char *kernel_filename, const char *kernel_cmdline, + const char *initrd_filename, + int pci_enabled, const char *cpu_model) +{ + char buf[1024]; + int i; + ram_addr_t ram_addr; + ram_addr_t above_4g_mem_size = 0; + PCIBus *pci_bus; + PCIDevice *pci_dev; + int piix3_devfn = -1; + CPUState *env; + qemu_irq *cpu_irq; + qemu_irq *i8259; + int page_size; + int index; + unsigned long ipf_legacy_io_base, ipf_legacy_io_mem; + BlockDriverState *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; + BlockDriverState *fd[MAX_FD]; + + page_size = getpagesize(); + if (page_size != TARGET_PAGE_SIZE) { + fprintf(stderr,"Error! Host page size != qemu target page size," + " you may need to change TARGET_PAGE_BITS in qemu!" + "host page size:0x%x\n", page_size); + exit(-1); + }; + + if (ram_size >= 0xc0000000 ) { + above_4g_mem_size = ram_size - 0xc0000000; + ram_size = 0xc0000000; + } + + /* init CPUs */ + if (cpu_model == NULL) { + cpu_model = "IA64"; + } + + for(i = 0; i < smp_cpus; i++) { + env = cpu_init(cpu_model); + if (!env) { + fprintf(stderr, "Unable to find CPU definition\n"); + exit(1); + } + if (i != 0) + env->hflags |= HF_HALTED_MASK; + register_savevm("cpu", i, 4, cpu_save, cpu_load, env); + qemu_register_reset(main_cpu_reset, 0, env); + } + + /* allocate RAM */ + if (kvm_enabled()) { + ram_addr = qemu_ram_alloc(0xa0000); + cpu_register_physical_memory(0, 0xa0000, ram_addr); + + ram_addr = qemu_ram_alloc(0x20000); // Workaround 0xa0000-0xc0000 + + ram_addr = qemu_ram_alloc(0x40000); + cpu_register_physical_memory(0xc0000, 0x40000, ram_addr); + + ram_addr = qemu_ram_alloc(ram_size - 0x100000); + cpu_register_physical_memory(0x100000, ram_size - 0x100000, ram_addr); + } else { + ram_addr = qemu_ram_alloc(ram_size); + cpu_register_physical_memory(0, ram_size, ram_addr); + } + + /* above 4giga memory allocation */ + if (above_4g_mem_size > 0) { + ram_addr = qemu_ram_alloc(above_4g_mem_size); + cpu_register_physical_memory(0x100000000, above_4g_mem_size, ram_addr); + } + + /*Load firware to its proper position.*/ + if (kvm_enabled()) { + unsigned long image_size; + uint8_t *image = NULL; + unsigned long nvram_addr; + unsigned long nvram_fd = 0; + unsigned long type = READ_FROM_NVRAM; + unsigned long i = 0; + unsigned long fw_offset; + ram_addr_t fw_mem = qemu_ram_alloc(GFW_SIZE); + + snprintf(buf, sizeof(buf), "%s/%s", bios_dir, FW_FILENAME); + image = read_image(buf, &image_size ); + if (NULL == image || !image_size) { + fprintf(stderr, "Error when reading Guest Firmware!\n"); + fprintf(stderr, "Please check Guest firmware at %s\n", buf); + exit(1); + } + fw_offset = GFW_START + GFW_SIZE - image_size; + + cpu_register_physical_memory(GFW_START, GFW_SIZE, fw_mem); + cpu_physical_memory_write(fw_offset, image, image_size); + + free(image); + + if (nvram) { + nvram_addr = NVRAM_START; + nvram_fd = kvm_ia64_nvram_init(type); + if (nvram_fd != -1) { + kvm_ia64_copy_from_nvram_to_GFW(nvram_fd); + close(nvram_fd); + } + i = atexit((void *)kvm_ia64_copy_from_GFW_to_nvram); + if (i != 0) + fprintf(stderr, "cannot set exit function\n"); + } else + nvram_addr = 0; + + kvm_ia64_build_hob(ram_size + above_4g_mem_size, smp_cpus, nvram_addr); + } + + /*Register legacy io address space, size:64M*/ + ipf_legacy_io_base = 0xE0000000; + ipf_legacy_io_mem = cpu_register_io_memory(0, ipf_legacy_io_read, + ipf_legacy_io_write, NULL); + cpu_register_physical_memory(ipf_legacy_io_base, 64*1024*1024, + ipf_legacy_io_mem); + + cpu_irq = qemu_allocate_irqs(pic_irq_request, first_cpu, 1); + i8259 = kvm_i8259_init(cpu_irq[0]); + + if (pci_enabled) { + pci_bus = i440fx_init(&i440fx_state, i8259); + piix3_devfn = piix3_init(pci_bus, -1); + } else { + pci_bus = NULL; + } + + if (cirrus_vga_enabled) { + if (pci_enabled) + pci_cirrus_vga_init(pci_bus); + else + isa_cirrus_vga_init(); + } else { + if (pci_enabled) + pci_vga_init(pci_bus, 0, 0); + else + isa_vga_init(); + } + + rtc_state = rtc_init(0x70, i8259[8], 2000); + + if (pci_enabled) { + pic_set_alt_irq_func(isa_pic, NULL, NULL); + } + + for(i = 0; i < MAX_SERIAL_PORTS; i++) { + if (serial_hds[i]) { + serial_init(serial_io[i], i8259[serial_irq[i]], 115200, + serial_hds[i]); + } + } + + for(i = 0; i < MAX_PARALLEL_PORTS; i++) { + if (parallel_hds[i]) { + parallel_init(parallel_io[i], i8259[parallel_irq[i]], + parallel_hds[i]); + } + } + + for(i = 0; i < nb_nics; i++) { + NICInfo *nd = &nd_table[i]; + + if (!pci_enabled || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) + pc_init_ne2k_isa(nd, i8259); + else + pci_nic_init(nd, "e1000", NULL); + } + +#undef USE_HYPERCALL //Disable it now, need to implement later! +#ifdef USE_HYPERCALL + pci_hypercall_init(pci_bus); +#endif + + if (drive_get_max_bus(IF_IDE) >= MAX_IDE_BUS) { + fprintf(stderr, "qemu: too many IDE bus\n"); + exit(1); + } + + for(i = 0; i < MAX_IDE_BUS * MAX_IDE_DEVS; i++) { + index = drive_get_index(IF_IDE, i / MAX_IDE_DEVS, i % MAX_IDE_DEVS); + if (index != -1) + hd[i] = drives_table[index].bdrv; + else + hd[i] = NULL; + } + + if (pci_enabled) { + pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1, i8259); + } else { + for(i = 0; i < MAX_IDE_BUS; i++) { + isa_ide_init(ide_iobase[i], ide_iobase2[i], i8259[ide_irq[i]], + hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]); + } + } + + i8042_init(i8259[1], i8259[12], 0x60); + DMA_init(0); +#ifdef HAS_AUDIO + audio_init(pci_enabled ? pci_bus : NULL, i8259); +#endif + + for(i = 0; i < MAX_FD; i++) { + index = drive_get_index(IF_FLOPPY, 0, i); + if (index != -1) + fd[i] = drives_table[index].bdrv; + else + fd[i] = NULL; + } + floppy_controller = fdctrl_init(i8259[6], 2, 0, 0x3f0, fd); + + cmos_init(ram_size, above_4g_mem_size, boot_device, hd); + + if (pci_enabled && usb_enabled) { + usb_uhci_piix3_init(pci_bus, piix3_devfn + 2); + } + + if (pci_enabled && acpi_enabled) { + uint8_t *eeprom_buf = qemu_mallocz(8 * 256); /* XXX: make this persistent */ + i2c_bus *smbus; + + /* TODO: Populate SPD eeprom data. */ + smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, i8259[9]); + for (i = 0; i < 8; i++) { + DeviceState *eeprom; + eeprom = qdev_create((BusState *)smbus, "smbus-eeprom"); + qdev_set_prop_int(eeprom, "address", 0x50 + i); + qdev_set_prop_ptr(eeprom, "data", eeprom_buf + (i * 256)); + qdev_init(eeprom); + } + } + + if (i440fx_state) { + i440fx_init_memory_mappings(i440fx_state); + } + + if (pci_enabled) { + int max_bus; + int bus; + + max_bus = drive_get_max_bus(IF_SCSI); + for (bus = 0; bus <= max_bus; bus++) { + pci_create_simple(pci_bus, -1, "lsi53c895a"); + } + } + /* Add virtio block devices */ + if (pci_enabled) { + int index; + int unit_id = 0; + + while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) { + pci_dev = pci_create("virtio-blk-pci", + drives_table[index].devaddr); + qdev_init(&pci_dev->qdev); + unit_id++; + } + } + +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT + if (kvm_enabled()) + add_assigned_devices(pci_bus, assigned_devices, assigned_devices_index); +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ + +} + +static void ipf_init_pci(ram_addr_t ram_size, + const char *boot_device, DisplayState *ds, + const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + const char *cpu_model) +{ + ipf_init1(ram_size, boot_device, ds, kernel_filename, + kernel_cmdline, initrd_filename, 1, cpu_model); +} + +QEMUMachine ipf_machine = { + .name = "itanium", + .desc = "Itanium Platform", + .init = (QEMUMachineInitFunc *)ipf_init_pci, + .max_cpus = 255, + .is_default = 1, +}; + +static void ipf_machine_init(void) +{ + qemu_register_machine(&ipf_machine); +} + +machine_init(ipf_machine_init); + +#define IOAPIC_NUM_PINS 48 + +static int ioapic_irq_count[IOAPIC_NUM_PINS]; + +static int ioapic_map_irq(int devfn, int irq_num) +{ + int irq, dev; + dev = devfn >> 3; + irq = ((((dev << 2) + (dev >> 3) + irq_num) & 31) + 16); + return irq; +} + +/* + * Dummy function to provide match for call from hw/apic.c + */ +void apic_set_irq_delivered(void) { +} + +void ioapic_set_irq(void *opaque, int irq_num, int level) +{ + int vector, pic_ret; + + PCIDevice *pci_dev = (PCIDevice *)opaque; + vector = ioapic_map_irq(pci_dev->devfn, irq_num); + + if (level) + ioapic_irq_count[vector] += 1; + else + ioapic_irq_count[vector] -= 1; + + if (kvm_enabled()) { + if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0, &pic_ret)) + if (pic_ret != 0) + apic_set_irq_delivered(); + return; + } +} + +int ipf_map_irq(PCIDevice *pci_dev, int irq_num) +{ + return ioapic_map_irq(pci_dev->devfn, irq_num); +} @@ -14,6 +14,7 @@ #include "hw.h" #include "msix.h" #include "pci.h" +#include "kvm.h" /* MSI-X capability structure */ #define MSIX_TABLE_OFFSET 4 @@ -45,6 +46,117 @@ /* Flag for interrupt controller to declare MSI-X support */ int msix_supported; +#ifdef CONFIG_KVM +/* KVM specific MSIX helpers */ +static void kvm_msix_free(PCIDevice *dev) +{ + int vector, changed = 0; + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { + if (dev->msix_entry_used[vector]) { + kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]); + changed = 1; + } + } + if (changed) { + kvm_commit_irq_routes(kvm_context); + } +} + +static void kvm_msix_routing_entry(PCIDevice *dev, unsigned vector, + struct kvm_irq_routing_entry *entry) +{ + uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE; + entry->type = KVM_IRQ_ROUTING_MSI; + entry->flags = 0; + entry->u.msi.address_lo = pci_get_long(table_entry + MSIX_MSG_ADDR); + entry->u.msi.address_hi = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR); + entry->u.msi.data = pci_get_long(table_entry + MSIX_MSG_DATA); +} + +static void kvm_msix_update(PCIDevice *dev, int vector, + int was_masked, int is_masked) +{ + struct kvm_irq_routing_entry e = {}, *entry; + int mask_cleared = was_masked && !is_masked; + /* It is only legal to change an entry when it is masked. Therefore, it is + * enough to update the routing in kernel when mask is being cleared. */ + if (!mask_cleared) { + return; + } + if (!dev->msix_entry_used[vector]) { + return; + } + entry = dev->msix_irq_entries + vector; + e.gsi = entry->gsi; + kvm_msix_routing_entry(dev, vector, &e); + if (memcmp(&entry->u.msi, &e.u.msi, sizeof entry->u.msi)) { + int r; + r = kvm_update_routing_entry(kvm_context, entry, &e); + if (r) { + fprintf(stderr, "%s: kvm_update_routing_entry failed: %s\n", __func__, + strerror(-r)); + exit(1); + } + memcpy(&entry->u.msi, &e.u.msi, sizeof entry->u.msi); + r = kvm_commit_irq_routes(kvm_context); + if (r) { + fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, + strerror(-r)); + exit(1); + } + } +} + +static int kvm_msix_add(PCIDevice *dev, unsigned vector) +{ + struct kvm_irq_routing_entry *entry = dev->msix_irq_entries + vector; + int r; + + if (!kvm_has_gsi_routing(kvm_context)) { + fprintf(stderr, "Warning: no MSI-X support found. " + "At least kernel 2.6.30 is required for MSI-X support.\n" + ); + return -EOPNOTSUPP; + } + + r = kvm_get_irq_route_gsi(kvm_context); + if (r < 0) { + fprintf(stderr, "%s: kvm_get_irq_route_gsi failed: %s\n", __func__, strerror(-r)); + return r; + } + entry->gsi = r; + kvm_msix_routing_entry(dev, vector, entry); + r = kvm_add_routing_entry(kvm_context, entry); + if (r < 0) { + fprintf(stderr, "%s: kvm_add_routing_entry failed: %s\n", __func__, strerror(-r)); + return r; + } + + r = kvm_commit_irq_routes(kvm_context); + if (r < 0) { + fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, strerror(-r)); + return r; + } + return 0; +} + +static void kvm_msix_del(PCIDevice *dev, unsigned vector) +{ + if (dev->msix_entry_used[vector]) { + return; + } + kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]); + kvm_commit_irq_routes(kvm_context); +} +#else + +static void kvm_msix_free(PCIDevice *dev) {} +static void kvm_msix_update(PCIDevice *dev, int vector, + int was_masked, int is_masked) {} +static int kvm_msix_add(PCIDevice *dev, unsigned vector) { return -1; } +static void kvm_msix_del(PCIDevice *dev, unsigned vector) {} +#endif + /* Add MSI-X capability to the config space for the device. */ /* Given a bar and its size, add MSI-X table on top of it * and fill MSI-X capability in the config space. @@ -183,7 +295,18 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr, PCIDevice *dev = opaque; unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3; int vector = offset / MSIX_ENTRY_SIZE; + int was_masked = msix_is_masked(dev, vector); pci_set_long(dev->msix_table_page + offset, val); + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector)); + } + if (was_masked != msix_is_masked(dev, vector) && + dev->msix_mask_notifier && dev->msix_mask_notifier_opaque[vector]) { + int r = dev->msix_mask_notifier(dev, vector, + dev->msix_mask_notifier_opaque[vector], + msix_is_masked(dev, vector)); + assert(r >= 0); + } msix_handle_mask_update(dev, vector); } @@ -222,10 +345,18 @@ void msix_mmio_map(PCIDevice *d, int region_num, static void msix_mask_all(struct PCIDevice *dev, unsigned nentries) { - int vector; + int vector, r; for (vector = 0; vector < nentries; ++vector) { unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL; + int was_masked = msix_is_masked(dev, vector); dev->msix_table_page[offset] |= MSIX_VECTOR_MASK; + if (was_masked != msix_is_masked(dev, vector) && + dev->msix_mask_notifier && dev->msix_mask_notifier_opaque[vector]) { + r = dev->msix_mask_notifier(dev, vector, + dev->msix_mask_notifier_opaque[vector], + msix_is_masked(dev, vector)); + assert(r >= 0); + } } } @@ -242,6 +373,15 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, if (nentries > MSIX_MAX_ENTRIES) return -EINVAL; +#ifdef KVM_CAP_IRQCHIP + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + dev->msix_irq_entries = qemu_malloc(nentries * + sizeof *dev->msix_irq_entries); + } +#endif + dev->msix_mask_notifier_opaque = + qemu_mallocz(nentries * sizeof *dev->msix_mask_notifier_opaque); + dev->msix_mask_notifier = NULL; dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES * sizeof *dev->msix_entry_used); @@ -278,6 +418,10 @@ static void msix_free_irq_entries(PCIDevice *dev) { int vector; + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_msix_free(dev); + } + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { dev->msix_entry_used[vector] = 0; msix_clr_pending(dev, vector); @@ -298,6 +442,10 @@ int msix_uninit(PCIDevice *dev) dev->msix_table_page = NULL; qemu_free(dev->msix_entry_used); dev->msix_entry_used = NULL; + qemu_free(dev->msix_irq_entries); + dev->msix_irq_entries = NULL; + qemu_free(dev->msix_mask_notifier_opaque); + dev->msix_mask_notifier_opaque = NULL; dev->cap_present &= ~QEMU_PCI_CAP_MSIX; return 0; } @@ -306,10 +454,13 @@ void msix_save(PCIDevice *dev, QEMUFile *f) { unsigned n = dev->msix_entries_nr; - if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { + if (!msix_supported) { return; } + if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { + return; + } qemu_put_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE); qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8); } @@ -319,6 +470,9 @@ void msix_load(PCIDevice *dev, QEMUFile *f) { unsigned n = dev->msix_entries_nr; + if (!msix_supported) + return; + if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) { return; } @@ -363,6 +517,13 @@ void msix_notify(PCIDevice *dev, unsigned vector) return; } +#ifdef KVM_CAP_IRQCHIP + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL); + return; + } +#endif + address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR); address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR); data = pci_get_long(table_entry + MSIX_MSG_DATA); @@ -391,9 +552,19 @@ void msix_reset(PCIDevice *dev) /* Mark vector as used. */ int msix_vector_use(PCIDevice *dev, unsigned vector) { + int ret; if (vector >= dev->msix_entries_nr) return -EINVAL; - dev->msix_entry_used[vector]++; + if (dev->msix_entry_used[vector]) { + return 0; + } + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + ret = kvm_msix_add(dev, vector); + if (ret) { + return ret; + } + } + ++dev->msix_entry_used[vector]; return 0; } @@ -406,6 +577,9 @@ void msix_vector_unuse(PCIDevice *dev, unsigned vector) if (--dev->msix_entry_used[vector]) { return; } + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + kvm_msix_del(dev, vector); + } msix_clr_pending(dev, vector); } @@ -415,3 +589,47 @@ void msix_unuse_all_vectors(PCIDevice *dev) return; msix_free_irq_entries(dev); } + +int msix_set_mask_notifier(PCIDevice *dev, unsigned vector, void *opaque) +{ + int r = 0; + if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) + return 0; + + assert(dev->msix_mask_notifier); + assert(opaque); + assert(!dev->msix_mask_notifier_opaque[vector]); + + /* Unmask the new notifier unless vector is masked. */ + if (!msix_is_masked(dev, vector)) { + r = dev->msix_mask_notifier(dev, vector, opaque, false); + if (r < 0) { + return r; + } + } + dev->msix_mask_notifier_opaque[vector] = opaque; + return r; +} + +int msix_unset_mask_notifier(PCIDevice *dev, unsigned vector) +{ + int r = 0; + void *opaque; + if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) + return 0; + + opaque = dev->msix_mask_notifier_opaque[vector]; + + assert(dev->msix_mask_notifier); + assert(opaque); + + /* Mask the old notifier unless it is already masked. */ + if (!msix_is_masked(dev, vector)) { + r = dev->msix_mask_notifier(dev, vector, opaque, true); + if (r < 0) { + return r; + } + } + dev->msix_mask_notifier_opaque[vector] = NULL; + return r; +} @@ -33,4 +33,6 @@ void msix_reset(PCIDevice *dev); extern int msix_supported; +int msix_set_mask_notifier(PCIDevice *dev, unsigned vector, void *opaque); +int msix_unset_mask_notifier(PCIDevice *dev, unsigned vector); #endif @@ -39,6 +39,8 @@ #include "msix.h" #include "sysbus.h" #include "sysemu.h" +#include "device-assignment.h" +#include "kvm.h" /* output Bochs bios info messages */ //#define DEBUG_BIOS @@ -54,6 +56,8 @@ #endif #define BIOS_FILENAME "bios.bin" +#define EXTBOOT_FILENAME "extboot.bin" +#define VAPIC_FILENAME "vapic.bin" #define PC_MAX_BIOS_SIZE (4 * 1024 * 1024) @@ -842,10 +846,18 @@ static void pc_cpu_reset(void *opaque) env->halted = !cpu_is_bsp(env); } -static CPUState *pc_new_cpu(const char *cpu_model) +CPUState *pc_new_cpu(const char *cpu_model) { CPUState *env; + if (cpu_model == NULL) { +#ifdef TARGET_X86_64 + cpu_model = "qemu64"; +#else + cpu_model = "qemu32"; +#endif + } + env = cpu_init(cpu_model); if (!env) { fprintf(stderr, "Unable to find x86 CPU definition\n"); @@ -865,14 +877,6 @@ void pc_cpus_init(const char *cpu_model) int i; /* init CPUs */ - if (cpu_model == NULL) { -#ifdef TARGET_X86_64 - cpu_model = "qemu64"; -#else - cpu_model = "qemu32"; -#endif - } - for(i = 0; i < smp_cpus; i++) { pc_new_cpu(cpu_model); } @@ -947,9 +951,17 @@ void pc_memory_init(ram_addr_t ram_size, isa_bios_size = bios_size; if (isa_bios_size > (128 * 1024)) isa_bios_size = 128 * 1024; + cpu_register_physical_memory(0xd0000, (192 * 1024) - isa_bios_size, + IO_MEM_UNASSIGNED); + /* kvm tpr optimization needs the bios accessible for write, at least to qemu itself */ cpu_register_physical_memory(0x100000 - isa_bios_size, isa_bios_size, - (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM); + (bios_offset + bios_size - isa_bios_size) /* | IO_MEM_ROM */); + + if (extboot_drive) { + option_rom[nb_option_roms++] = qemu_strdup(EXTBOOT_FILENAME); + } + option_rom[nb_option_roms++] = qemu_strdup(VAPIC_FILENAME); option_rom_offset = qemu_ram_alloc(NULL, "pc.rom", PC_ROM_SIZE); cpu_register_physical_memory(PC_ROM_MIN_VGA, PC_ROM_SIZE, option_rom_offset); @@ -1034,7 +1046,14 @@ void pc_basic_device_init(qemu_irq *isa_irq, qemu_register_boot_set(pc_boot_set, *rtc_state); +#ifdef CONFIG_KVM_PIT + if (kvm_enabled() && kvm_pit_in_kernel()) + pit = kvm_pit_init(0x40, isa_reserve_irq(0)); + else +#endif + pit = pit_init(0x40, isa_reserve_irq(0)); + pcspk_init(pit); for(i = 0; i < MAX_SERIAL_PORTS; i++) { @@ -1072,4 +1091,22 @@ void pc_pci_device_init(PCIBus *pci_bus) for (bus = 0; bus <= max_bus; bus++) { pci_create_simple(pci_bus, -1, "lsi53c895a"); } + + if (extboot_drive) { + DriveInfo *info = extboot_drive; + int cyls, heads, secs; + + if (info->type != IF_IDE && info->type != IF_VIRTIO) { + bdrv_guess_geometry(info->bdrv, &cyls, &heads, &secs); + bdrv_set_geometry_hint(info->bdrv, cyls, heads, secs); + } + + extboot_init(info->bdrv); + } + +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT + if (kvm_enabled()) { + add_assigned_devices(pci_bus, assigned_devices, assigned_devices_index); + } +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ } @@ -32,6 +32,7 @@ extern PicState2 *isa_pic; void pic_set_irq(int irq, int level); void pic_set_irq_new(void *opaque, int irq, int level); qemu_irq *i8259_init(qemu_irq parent_irq); +qemu_irq *kvm_i8259_init(qemu_irq parent_irq); int pic_read_irq(PicState2 *s); void pic_update_irq(PicState2 *s); uint32_t pic_intack_read(PicState2 *s); @@ -61,6 +62,10 @@ int pit_get_initial_count(PITState *pit, int channel); int pit_get_mode(PITState *pit, int channel); int pit_get_out(PITState *pit, int channel, int64_t current_time); +/* i8254-kvm.c */ + +PITState *kvm_pit_init(int base, qemu_irq irq); + void hpet_pit_disable(void); void hpet_pit_enable(void); @@ -135,6 +140,9 @@ void pcspk_init(PITState *); int pcspk_audio_init(qemu_irq *pic); /* piix_pci.c */ +/* config space register for IRQ routing */ +#define PIIX_CONFIG_IRQ_ROUTE 0x60 + struct PCII440FXState; typedef struct PCII440FXState PCII440FXState; @@ -145,6 +153,10 @@ void i440fx_init_memory_mappings(PCII440FXState *d); extern PCIDevice *piix4_dev; int piix4_init(PCIBus *bus, int devfn); +int piix_get_irq(int pin); + +int ipf_map_irq(PCIDevice *pci_dev, int irq_num); + /* vga.c */ enum vga_retrace_method { VGA_RETRACE_DUMB, @@ -167,6 +179,10 @@ void isa_cirrus_vga_init(void); void isa_ne2000_init(int base, int irq, NICInfo *nd); +/* extboot.c */ + +void extboot_init(BlockDriverState *bs); + /* e820 types */ #define E820_RAM 1 #define E820_RESERVED 2 diff --git a/hw/pc_piix.c b/hw/pc_piix.c index 519e8a5cc..3a1c67023 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -35,12 +35,16 @@ #include "sysemu.h" #include "sysbus.h" +qemu_irq *ioapic_irq_hack; + #define MAX_IDE_BUS 2 static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 }; static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 }; static const int ide_irq[MAX_IDE_BUS] = { 14, 15 }; +const char *global_cpu_model; /* cpu hotadd */ + static void ioapic_init(IsaIrqState *isa_irq_state) { DeviceState *dev; @@ -82,6 +86,8 @@ static void pc_init1(ram_addr_t ram_size, BusState *idebus[MAX_IDE_BUS]; ISADevice *rtc_state; + global_cpu_model = cpu_model; + pc_cpus_init(cpu_model); vmport_init(); @@ -91,13 +97,25 @@ static void pc_init1(ram_addr_t ram_size, &below_4g_mem_size, &above_4g_mem_size); cpu_irq = pc_allocate_cpu_irq(); - i8259 = i8259_init(cpu_irq[0]); - isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state)); - isa_irq_state->i8259 = i8259; - if (pci_enabled) { - ioapic_init(isa_irq_state); +#ifdef KVM_CAP_IRQCHIP + if (kvm_enabled() && kvm_irqchip_in_kernel()) { + isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state)); + if (pci_enabled) { + ioapic_init(isa_irq_state); + } + isa_irq = i8259 = kvm_i8259_init(cpu_irq[0]); + ioapic_irq_hack = isa_irq; + } else +#endif + { + i8259 = i8259_init(cpu_irq[0]); + isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state)); + isa_irq_state->i8259 = i8259; + if (pci_enabled) { + ioapic_init(isa_irq_state); + } + isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24); } - isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24); if (pci_enabled) { pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size); @@ -108,6 +126,7 @@ static void pc_init1(ram_addr_t ram_size, isa_bus_irqs(isa_irq); pc_register_ferr_irq(isa_reserve_irq(13)); + cpu_irq = pc_allocate_cpu_irq(); pc_vga_init(pci_enabled? pci_bus: NULL); @@ -120,7 +139,7 @@ static void pc_init1(ram_addr_t ram_size, if (!pci_enabled || (nd->model && strcmp(nd->model, "ne2k_isa") == 0)) pc_init_ne2k_isa(nd); else - pci_nic_init_nofail(nd, "e1000", NULL); + pci_nic_init_nofail(nd, "rtl8139", NULL); } if (drive_get_max_bus(IF_IDE) >= MAX_IDE_BUS) { diff --git a/hw/pci-hotplug.c b/hw/pci-hotplug.c index fe468d646..d2456adf1 100644 --- a/hw/pci-hotplug.c +++ b/hw/pci-hotplug.c @@ -31,6 +31,7 @@ #include "scsi.h" #include "virtio-blk.h" #include "qemu-config.h" +#include "device-assignment.h" #if defined(TARGET_I386) static PCIDevice *qemu_pci_hot_add_nic(Monitor *mon, @@ -228,6 +229,24 @@ static PCIDevice *qemu_pci_hot_add_storage(Monitor *mon, return dev; } +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT +static PCIDevice *qemu_pci_hot_assign_device(Monitor *mon, + const char *devaddr, + const char *opts_str) +{ + QemuOpts *opts; + DeviceState *dev; + + opts = add_assigned_device(opts_str); + if (opts == NULL) { + monitor_printf(mon, "Error adding device; check syntax\n"); + return NULL; + } + dev = qdev_device_add(opts); + return DO_UPCAST(PCIDevice, qdev, dev); +} +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ + void pci_device_hot_add(Monitor *mon, const QDict *qdict) { PCIDevice *dev = NULL; @@ -251,6 +270,10 @@ void pci_device_hot_add(Monitor *mon, const QDict *qdict) dev = qemu_pci_hot_add_nic(mon, pci_addr, opts); } else if (strcmp(type, "storage") == 0) { dev = qemu_pci_hot_add_storage(mon, pci_addr, opts); +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT + } else if (strcmp(type, "host") == 0) { + dev = qemu_pci_hot_assign_device(mon, pci_addr, opts); +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ } else { monitor_printf(mon, "invalid type: %s\n", type); } @@ -27,6 +27,9 @@ #include "net.h" #include "sysemu.h" #include "loader.h" +#include "qemu-kvm.h" +#include "hw/pc.h" +#include "device-assignment.h" #include "qemu-objects.h" //#define DEBUG_PCI @@ -450,6 +453,7 @@ static void pci_set_default_subsystem_id(PCIDevice *pci_dev) } /* + * Parse pci address in qemu command * Parse [[<domain>:]<bus>:]<slot>, return -1 on error */ static int pci_parse_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp) @@ -498,6 +502,83 @@ static int pci_parse_devaddr(const char *addr, int *domp, int *busp, unsigned *s return 0; } +/* + * Parse device seg and bdf in device assignment command: + * + * -pcidevice host=[seg:]bus:dev.func + * + * Parse [seg:]<bus>:<slot>.<func> return -1 on error + */ +int pci_parse_host_devaddr(const char *addr, int *segp, int *busp, + int *slotp, int *funcp) +{ + const char *p; + char *e; + int val; + int seg = 0, bus = 0, slot = 0, func = 0; + + /* parse optional seg */ + p = addr; + val = 0; + while (1) { + p = strchr(p, ':'); + if (p) { + val++; + p++; + } else + break; + } + if (val <= 0 || val > 2) + return -1; + + p = addr; + if (val == 2) { + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == ':') { + seg = val; + p = e + 1; + } + } else + seg = 0; + + + /* parse bdf */ + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == ':') { + bus = val; + p = e + 1; + val = strtoul(p, &e, 16); + if (e == p) + return -1; + if (*e == '.') { + slot = val; + p = e + 1; + val = strtoul(p, &e, 16); + if (e == p) + return -1; + func = val; + } else + return -1; + } else + return -1; + + if (seg > 0xffff || bus > 0xff || slot > 0x1f || func > 0x7) + return -1; + + if (*e) + return -1; + + *segp = seg; + *busp = bus; + *slotp = slot; + *funcp = func; + return 0; +} + int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp, unsigned *slotp) { @@ -1006,25 +1087,80 @@ static void pci_update_irq_disabled(PCIDevice *d, int was_irq_disabled) } } -uint32_t pci_default_read_config(PCIDevice *d, - uint32_t address, int len) +static uint32_t pci_read_config(PCIDevice *d, + uint32_t address, int len) { uint32_t val = 0; - assert(len == 1 || len == 2 || len == 4); + len = MIN(len, pci_config_size(d) - address); memcpy(&val, d->config + address, len); return le32_to_cpu(val); } +uint32_t pci_default_read_config(PCIDevice *d, + uint32_t address, int len) +{ + assert(len == 1 || len == 2 || len == 4); + + if (pci_access_cap_config(d, address, len)) { + return d->cap.config_read(d, address, len); + } + + return pci_read_config(d, address, len); +} + +static void pci_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len) +{ + int i; + for (i = 0; i < len; i++) { + pci_dev->config[address + i] = val & 0xff; + val >>= 8; + } +} + +int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len) +{ + if (pci_dev->cap.supported && address >= pci_dev->cap.start && + (address + len) < pci_dev->cap.start + pci_dev->cap.length) + return 1; + return 0; +} + +uint32_t pci_default_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + return pci_read_config(pci_dev, address, len); +} + +void pci_default_cap_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len) +{ + pci_write_config(pci_dev, address, val, len); +} + void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l) { int i, was_irq_disabled = pci_irq_disabled(d); uint32_t config_size = pci_config_size(d); + if (pci_access_cap_config(d, addr, l)) { + d->cap.config_write(d, addr, val, l); + return; + } + for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) { uint8_t wmask = d->wmask[addr + i]; d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask); } + +#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT + if (kvm_enabled() && kvm_irqchip_in_kernel() && + addr >= PIIX_CONFIG_IRQ_ROUTE && + addr < PIIX_CONFIG_IRQ_ROUTE + 4) + assigned_dev_update_irqs(); +#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */ + if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) || ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) || ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) || @@ -1048,6 +1184,10 @@ static void pci_set_irq(void *opaque, int irq_num, int level) if (!change) return; +#if defined(TARGET_IA64) + ioapic_set_irq(pci_dev, irq_num, level); +#endif + pci_set_irq_state(pci_dev, irq_num, level); pci_update_irq_status(pci_dev); if (pci_irq_disabled(pci_dev)) @@ -1055,6 +1195,11 @@ static void pci_set_irq(void *opaque, int irq_num, int level) pci_change_irq_level(pci_dev, irq_num, change); } +int pci_map_irq(PCIDevice *pci_dev, int pin) +{ + return pci_dev->bus->map_irq(pci_dev, pin); +} + /***********************************************************/ /* monitor info on PCI */ @@ -1671,6 +1816,37 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name) return dev; } +int pci_enable_capability_support(PCIDevice *pci_dev, + uint32_t config_start, + PCICapConfigReadFunc *config_read, + PCICapConfigWriteFunc *config_write, + PCICapConfigInitFunc *config_init) +{ + if (!pci_dev) + return -ENODEV; + + pci_dev->config[0x06] |= 0x10; // status = capabilities + + if (config_start == 0) + pci_dev->cap.start = PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR; + else if (config_start >= 0x40 && config_start < 0xff) + pci_dev->cap.start = config_start; + else + return -EINVAL; + + if (config_read) + pci_dev->cap.config_read = config_read; + else + pci_dev->cap.config_read = pci_default_cap_read_config; + if (config_write) + pci_dev->cap.config_write = config_write; + else + pci_dev->cap.config_write = pci_default_cap_write_config; + pci_dev->cap.supported = 1; + pci_dev->config[PCI_CAPABILITY_LIST] = pci_dev->cap.start; + return config_init(pci_dev); +} + static int pci_find_space(PCIDevice *pdev, uint8_t size) { int config_size = pci_config_size(pdev); @@ -6,6 +6,8 @@ #include "qdev.h" +struct kvm_irq_routing_entry; + /* PCI includes legacy ISA access. */ #include "isa.h" @@ -80,6 +82,12 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, pcibus_t addr, pcibus_t size, int type); typedef int PCIUnregisterFunc(PCIDevice *pci_dev); +typedef void PCICapConfigWriteFunc(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len); +typedef uint32_t PCICapConfigReadFunc(PCIDevice *pci_dev, + uint32_t address, int len); +typedef int PCICapConfigInitFunc(PCIDevice *pci_dev); + typedef struct PCIIORegion { pcibus_t addr; /* current PCI mapping address. -1 means not mapped */ #define PCI_BAR_UNMAPPED (~(pcibus_t)0) @@ -112,6 +120,14 @@ enum { QEMU_PCI_CAP_EXPRESS = 0x2, }; +#define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60 +#define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40 +#define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10 +#define PCI_CAPABILITY_CONFIG_MSIX_LENGTH 0x10 + +typedef int (*msix_mask_notifier_func)(PCIDevice *, unsigned vector, + void *opaque, int masked); + struct PCIDevice { DeviceState qdev; /* PCI config space */ @@ -167,6 +183,26 @@ struct PCIDevice { char *romfile; ram_addr_t rom_offset; uint32_t rom_bar; + + /* How much space does an MSIX table need. */ + /* The spec requires giving the table structure + * a 4K aligned region all by itself. Align it to + * target pages so that drivers can do passthrough + * on the rest of the region. */ + target_phys_addr_t msix_page_size; + + struct kvm_irq_routing_entry *msix_irq_entries; + + void **msix_mask_notifier_opaque; + msix_mask_notifier_func msix_mask_notifier; + + /* Device capability configuration space */ + struct { + int supported; + unsigned int start, length; + PCICapConfigReadFunc *config_read; + PCICapConfigWriteFunc *config_write; + } cap; }; PCIDevice *pci_register_device(PCIBus *bus, const char *name, @@ -178,6 +214,14 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num, pcibus_t size, int type, PCIMapIORegionFunc *map_func); +int pci_enable_capability_support(PCIDevice *pci_dev, + uint32_t config_start, + PCICapConfigReadFunc *config_read, + PCICapConfigWriteFunc *config_write, + PCICapConfigInitFunc *config_init); + +int pci_map_irq(PCIDevice *pci_dev, int pin); + int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size); int pci_add_capability_at_offset(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_offset, uint8_t cap_size); @@ -188,13 +232,17 @@ void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size); uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id); - uint32_t pci_default_read_config(PCIDevice *d, uint32_t address, int len); void pci_default_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len); void pci_device_save(PCIDevice *s, QEMUFile *f); int pci_device_load(PCIDevice *s, QEMUFile *f); +uint32_t pci_default_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len); +void pci_default_cap_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len); +int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len); typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level); typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); @@ -226,6 +274,9 @@ PCIBus *pci_get_bus_devfn(int *devfnp, const char *devaddr); int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp, unsigned *slotp); +int pci_parse_host_devaddr(const char *addr, int *segp, int *busp, + int *slotp, int *funcp); + void do_pci_info_print(Monitor *mon, const QObject *data); void do_pci_info(Monitor *mon, QObject **ret_data); PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did, diff --git a/hw/pci_regs.h b/hw/pci_regs.h index dd0bed4f1..1c675dc8b 100644 --- a/hw/pci_regs.h +++ b/hw/pci_regs.h @@ -44,9 +44,16 @@ #define PCI_STATUS 0x06 /* 16 bits */ #define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */ #define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ + +#ifndef PCI_STATUS_66MHZ #define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#endif + #define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#ifndef PCI_STATUS_FAST_BACK #define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#endif + #define PCI_STATUS_PARITY 0x100 /* Detected parity error */ #define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ #define PCI_STATUS_DEVSEL_FAST 0x000 diff --git a/hw/pcspk.c b/hw/pcspk.c index 26a0ecb9d..fb5f7637c 100644 --- a/hw/pcspk.c +++ b/hw/pcspk.c @@ -27,6 +27,8 @@ #include "isa.h" #include "audio/audio.h" #include "qemu-timer.h" +#include "i8254.h" +#include "qemu-kvm.h" #define PCSPK_BUF_LEN 1792 #define PCSPK_SAMPLE_RATE 32000 @@ -48,6 +50,43 @@ typedef struct { static const char *s_spk = "pcspk"; static PCSpkState pcspk_state; +#ifdef CONFIG_KVM_PIT +static void kvm_get_pit_ch2(PITState *pit, + struct kvm_pit_state *inkernel_state) +{ + struct kvm_pit_state pit_state; + + if (kvm_enabled() && kvm_pit_in_kernel()) { + kvm_get_pit(kvm_context, &pit_state); + pit->channels[2].mode = pit_state.channels[2].mode; + pit->channels[2].count = pit_state.channels[2].count; + pit->channels[2].count_load_time = pit_state.channels[2].count_load_time; + pit->channels[2].gate = pit_state.channels[2].gate; + if (inkernel_state) { + memcpy(inkernel_state, &pit_state, sizeof(*inkernel_state)); + } + } +} + +static void kvm_set_pit_ch2(PITState *pit, + struct kvm_pit_state *inkernel_state) +{ + if (kvm_enabled() && kvm_pit_in_kernel()) { + inkernel_state->channels[2].mode = pit->channels[2].mode; + inkernel_state->channels[2].count = pit->channels[2].count; + inkernel_state->channels[2].count_load_time = + pit->channels[2].count_load_time; + inkernel_state->channels[2].gate = pit->channels[2].gate; + kvm_set_pit(kvm_context, inkernel_state); + } +} +#else +static inline void kvm_get_pit_ch2(PITState *pit, + struct kvm_pit_state *inkernel_state) { } +static inline void kvm_set_pit_ch2(PITState *pit, + struct kvm_pit_state *inkernel_state) { } +#endif + static inline void generate_samples(PCSpkState *s) { unsigned int i; @@ -72,6 +111,8 @@ static void pcspk_callback(void *opaque, int free) PCSpkState *s = opaque; unsigned int n; + kvm_get_pit_ch2(s->pit, NULL); + if (pit_get_mode(s->pit, 2) != 3) return; @@ -117,6 +158,8 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t addr) PCSpkState *s = opaque; int out; + kvm_get_pit_ch2(s->pit, NULL); + s->dummy_refresh_clock ^= (1 << 4); out = pit_get_out(s->pit, 2, qemu_get_clock(vm_clock)) << 5; @@ -125,9 +168,12 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t addr) static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val) { + struct kvm_pit_state inkernel_state; PCSpkState *s = opaque; const int gate = val & 1; + kvm_get_pit_ch2(s->pit, &inkernel_state); + s->data_on = (val >> 1) & 1; pit_set_gate(s->pit, 2, gate); if (s->voice) { @@ -135,6 +181,8 @@ static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val) s->play_pos = 0; AUD_set_active_out(s->voice, gate & s->data_on); } + + kvm_set_pit_ch2(s->pit, &inkernel_state); } void pcspk_init(PITState *pit) diff --git a/hw/piix_pci.c b/hw/piix_pci.c index d14d05e1f..d01618f8c 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -28,6 +28,7 @@ #include "pci_host.h" #include "isa.h" #include "sysbus.h" +#include "kvm.h" /* * I440FX chipset data sheet. @@ -98,6 +99,10 @@ static void i440fx_update_memory_mappings(PCII440FXState *d) int i, r; uint32_t smram, addr; + if (kvm_enabled()) { + /* FIXME: Support remappings and protection changes. */ + return; + } update_pam(d, 0xf0000, 0x100000, (d->dev.config[I440FX_PAM] >> 4) & 3); for(i = 0; i < 12; i++) { r = (d->dev.config[(i >> 1) + (I440FX_PAM + 1)] >> ((i & 1) * 4)) & 3; @@ -216,6 +221,8 @@ static int i440fx_initfn(PCIDevice *dev) return 0; } +static PIIX3State *piix3_dev; + PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size) { DeviceState *dev; @@ -246,6 +253,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq * ram_size = 255; (*pi440fx_state)->dev.config[0x57]=ram_size; + piix3_dev = piix3; + return b; } @@ -273,6 +282,13 @@ static void piix3_set_irq(void *opaque, int irq_num, int level) } } +int piix_get_irq(int pin) +{ + if (piix3_dev) + return piix3_dev->dev.config[0x60+pin]; + return 0; +} + static void piix3_reset(void *opaque) { PIIX3State *d = opaque; diff --git a/hw/ppc440.c b/hw/ppc440.c index d12cf7181..89c16c051 100644 --- a/hw/ppc440.c +++ b/hw/ppc440.c @@ -20,6 +20,7 @@ #include "ppc405.h" #include "sysemu.h" #include "kvm.h" +#include "qemu-kvm.h" #define PPC440EP_PCI_CONFIG 0xeec00000 #define PPC440EP_PCI_INTACK 0xeed00000 diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c index 6ca873ee7..79eb0b357 100644 --- a/hw/ppc440_bamboo.c +++ b/hw/ppc440_bamboo.c @@ -24,6 +24,7 @@ #include "device_tree.h" #include "loader.h" #include "elf.h" +#include "qemu-kvm.h" #define BINARY_DEVICE_TREE_FILE "bamboo.dtb" diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c index 1422fad07..4eb9bec00 100644 --- a/hw/ppce500_mpc8544ds.c +++ b/hw/ppce500_mpc8544ds.c @@ -31,6 +31,7 @@ #include "ppce500.h" #include "loader.h" #include "elf.h" +#include "qemu-kvm.h" #define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb" #define UIMAGE_LOAD_BASE 0 diff --git a/hw/testdev.c b/hw/testdev.c new file mode 100644 index 000000000..a8c49a366 --- /dev/null +++ b/hw/testdev.c @@ -0,0 +1,129 @@ +#include "hw.h" +#include "qdev.h" +#include "isa.h" + +struct testdev { + ISADevice dev; + CharDriverState *chr; +}; + +static void test_device_serial_write(void *opaque, uint32_t addr, uint32_t data) +{ + struct testdev *dev = opaque; + uint8_t buf[1] = { data }; + + if (dev->chr) { + qemu_chr_write(dev->chr, buf, 1); + } +} + +static void test_device_exit(void *opaque, uint32_t addr, uint32_t data) +{ + exit(data); +} + +static uint32_t test_device_memsize_read(void *opaque, uint32_t addr) +{ + return ram_size; +} + +static void test_device_irq_line(void *opaque, uint32_t addr, uint32_t data) +{ + extern qemu_irq *ioapic_irq_hack; + + qemu_set_irq(ioapic_irq_hack[addr - 0x2000], !!data); +} + +static uint32 test_device_ioport_data; + +static void test_device_ioport_write(void *opaque, uint32_t addr, uint32_t data) +{ + test_device_ioport_data = data; +} + +static uint32_t test_device_ioport_read(void *opaque, uint32_t addr) +{ + return test_device_ioport_data; +} + +static char *iomem_buf; + +static uint32_t test_iomem_readb(void *opaque, target_phys_addr_t addr) +{ + return iomem_buf[addr]; +} + +static uint32_t test_iomem_readw(void *opaque, target_phys_addr_t addr) +{ + return *(uint16_t*)(iomem_buf + addr); +} + +static uint32_t test_iomem_readl(void *opaque, target_phys_addr_t addr) +{ + return *(uint32_t*)(iomem_buf + addr); +} + +static void test_iomem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + iomem_buf[addr] = val; +} + +static void test_iomem_writew(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + *(uint16_t*)(iomem_buf + addr) = val; +} + +static void test_iomem_writel(void *opaque, target_phys_addr_t addr, uint32_t val) +{ + *(uint32_t*)(iomem_buf + addr) = val; +} + +static CPUReadMemoryFunc * const test_iomem_read[3] = { + test_iomem_readb, + test_iomem_readw, + test_iomem_readl, +}; + +static CPUWriteMemoryFunc * const test_iomem_write[3] = { + test_iomem_writeb, + test_iomem_writew, + test_iomem_writel, +}; + +static int init_test_device(ISADevice *isa) +{ + struct testdev *dev = DO_UPCAST(struct testdev, dev, isa); + int iomem; + + register_ioport_write(0xf1, 1, 1, test_device_serial_write, dev); + register_ioport_write(0xf4, 1, 4, test_device_exit, dev); + register_ioport_read(0xd1, 1, 4, test_device_memsize_read, dev); + register_ioport_read(0xe0, 1, 1, test_device_ioport_read, dev); + register_ioport_write(0xe0, 1, 1, test_device_ioport_write, dev); + register_ioport_read(0xe0, 1, 2, test_device_ioport_read, dev); + register_ioport_write(0xe0, 1, 2, test_device_ioport_write, dev); + register_ioport_read(0xe0, 1, 4, test_device_ioport_read, dev); + register_ioport_write(0xe0, 1, 4, test_device_ioport_write, dev); + register_ioport_write(0x2000, 24, 1, test_device_irq_line, NULL); + iomem_buf = qemu_mallocz(0x10000); + iomem = cpu_register_io_memory(test_iomem_read, test_iomem_write, NULL); + cpu_register_physical_memory(0xff000000, 0x10000, iomem); + return 0; +} + +static ISADeviceInfo testdev_info = { + .qdev.name = "testdev", + .qdev.size = sizeof(struct testdev), + .init = init_test_device, + .qdev.props = (Property[]) { + DEFINE_PROP_CHR("chardev", struct testdev, chr), + DEFINE_PROP_END_OF_LIST(), + }, +}; + +static void testdev_register_devices(void) +{ + isa_qdev_register(&testdev_info); +} + +device_init(testdev_register_devices) diff --git a/hw/vga-pci.c b/hw/vga-pci.c index eef78ed08..9089c9f5d 100644 --- a/hw/vga-pci.c +++ b/hw/vga-pci.c @@ -68,9 +68,11 @@ static void pci_vga_write_config(PCIDevice *d, PCIVGAState *pvs = container_of(d, PCIVGAState, dev); VGACommonState *s = &pvs->vga; + vga_dirty_log_stop(s); pci_default_write_config(d, address, val, len); if (s->map_addr && pvs->dev.io_regions[0].addr == -1) s->map_addr = 0; + vga_dirty_log_start(s); } static int pci_vga_initfn(PCIDevice *dev) @@ -1282,6 +1282,8 @@ static void vga_draw_text(VGACommonState *s, int full_update) vga_draw_glyph8_func *vga_draw_glyph8; vga_draw_glyph9_func *vga_draw_glyph9; + vga_dirty_log_stop(s); + /* compute font data address (in plane 2) */ v = s->sr[3]; offset = (((v >> 4) & 1) | ((v << 1) & 6)) * 8192 * 4 + 2; @@ -1593,40 +1595,65 @@ static void vga_sync_dirty_bitmap(VGACommonState *s) } #endif + vga_dirty_log_start(s); +} + +static int s1, s2, s3; + +static void mark_dirty(target_phys_addr_t start, target_phys_addr_t len) +{ + target_phys_addr_t end = start + len; + + while (start < end) { + cpu_physical_memory_set_dirty(cpu_get_physical_page_desc(start)); + start += TARGET_PAGE_SIZE; + } } void vga_dirty_log_start(VGACommonState *s) { if (kvm_enabled() && s->map_addr) - kvm_log_start(s->map_addr, s->map_end - s->map_addr); - + if (!s1) { + kvm_log_start(s->map_addr, s->map_end - s->map_addr); + mark_dirty(s->map_addr, s->map_end - s->map_addr); + s1 = 1; + } if (kvm_enabled() && s->lfb_vram_mapped) { - kvm_log_start(isa_mem_base + 0xa0000, 0x8000); - kvm_log_start(isa_mem_base + 0xa8000, 0x8000); + if (!s2) { + kvm_log_start(isa_mem_base + 0xa0000, 0x8000); + kvm_log_start(isa_mem_base + 0xa8000, 0x8000); + mark_dirty(isa_mem_base + 0xa0000, 0x10000); + } + s2 = 1; } #ifdef CONFIG_BOCHS_VBE if (kvm_enabled() && s->vbe_mapped) { - kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); + if (!s3) { + kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); + } + s3 = 1; } #endif } void vga_dirty_log_stop(VGACommonState *s) { - if (kvm_enabled() && s->map_addr) + if (kvm_enabled() && s->map_addr && s1) kvm_log_stop(s->map_addr, s->map_end - s->map_addr); - if (kvm_enabled() && s->lfb_vram_mapped) { + if (kvm_enabled() && s->lfb_vram_mapped && s1) { kvm_log_stop(isa_mem_base + 0xa0000, 0x8000); kvm_log_stop(isa_mem_base + 0xa8000, 0x8000); } #ifdef CONFIG_BOCHS_VBE - if (kvm_enabled() && s->vbe_mapped) { + if (kvm_enabled() && s->vbe_mapped && s3) { kvm_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); } #endif + + s1 = s2 = s3 = 0; } void vga_dirty_log_restart(VGACommonState *s) @@ -1864,6 +1891,7 @@ static void vga_draw_blank(VGACommonState *s, int full_update) return; if (s->last_scr_width <= 0 || s->last_scr_height <= 0) return; + vga_dirty_log_stop(s); s->rgb_to_pixel = rgb_to_pixel_dup_table[get_depth_index(s->ds)]; @@ -1908,6 +1936,9 @@ static void vga_update_display(void *opaque) vga_draw_text(s, full_update); break; case GMODE_GRAPH: +#ifdef TARGET_IA64 + full_update = 1; +#endif vga_draw_graphic(s, full_update); break; case GMODE_BLANK: diff --git a/hw/vga_int.h b/hw/vga_int.h index 6a46a434f..70e0f19c4 100644 --- a/hw/vga_int.h +++ b/hw/vga_int.h @@ -33,8 +33,8 @@ /* bochs VBE support */ #define CONFIG_BOCHS_VBE -#define VBE_DISPI_MAX_XRES 1600 -#define VBE_DISPI_MAX_YRES 1200 +#define VBE_DISPI_MAX_XRES 2560 +#define VBE_DISPI_MAX_YRES 1600 #define VBE_DISPI_MAX_BPP 32 #define VBE_DISPI_INDEX_ID 0x0 @@ -226,7 +226,7 @@ void vga_init_vbe(VGACommonState *s); extern const uint8_t sr_mask[8]; extern const uint8_t gr_mask[16]; -#define VGA_RAM_SIZE (8192 * 1024) +#define VGA_RAM_SIZE (16 * 1024 * 1024) #define VGABIOS_FILENAME "vgabios.bin" #define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin" diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index 9fe3886b0..1e4dfdd0e 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -21,6 +21,7 @@ #include "balloon.h" #include "virtio-balloon.h" #include "kvm.h" +#include "qemu-kvm.h" #include "qlist.h" #include "qint.h" #include "qstring.h" diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index c6ef8254e..a55c9af8c 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -427,6 +427,31 @@ static void virtio_pci_guest_notifier_read(void *opaque) } } +static int virtio_pci_mask_notifier(PCIDevice *dev, unsigned vector, + void *opaque, int masked) +{ +#ifdef CONFIG_KVM + VirtQueue *vq = opaque; + EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); + int r = kvm_set_irqfd(dev->msix_irq_entries[vector].gsi, + event_notifier_get_fd(notifier), + !masked); + if (r < 0) { + return (r == -ENOSYS) ? 0 : r; + } + if (masked) { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + virtio_pci_guest_notifier_read, NULL, vq); + } else { + qemu_set_fd_handler(event_notifier_get_fd(notifier), + NULL, NULL, NULL); + } + return 0; +#else + return -ENOSYS; +#endif +} + static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) { VirtIOPCIProxy *proxy = opaque; @@ -440,9 +465,16 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign) } qemu_set_fd_handler(event_notifier_get_fd(notifier), virtio_pci_guest_notifier_read, NULL, vq); + msix_set_mask_notifier(&proxy->pci_dev, + virtio_queue_vector(proxy->vdev, n), vq); } else { + msix_unset_mask_notifier(&proxy->pci_dev, + virtio_queue_vector(proxy->vdev, n)); qemu_set_fd_handler(event_notifier_get_fd(notifier), NULL, NULL, NULL); + /* Test and clear notifier before closing it, + * in case poll callback didn't have time to run. */ + virtio_pci_guest_notifier_read(vq); event_notifier_cleanup(notifier); } @@ -525,6 +557,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, proxy->pci_dev.config_write = virtio_write_config; + proxy->pci_dev.msix_mask_notifier = virtio_pci_mask_notifier; + size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len; if (size & (size-1)) size = 1 << qemu_fls(size); |