summaryrefslogtreecommitdiff
path: root/hw
diff options
context:
space:
mode:
Diffstat (limited to 'hw')
-rw-r--r--hw/acpi.c3
-rw-r--r--hw/acpi_piix4.c67
-rw-r--r--hw/apic.c112
-rw-r--r--hw/apic.h1
-rw-r--r--hw/cirrus_vga.c18
-rw-r--r--hw/device-assignment.c1672
-rw-r--r--hw/device-assignment.h119
-rw-r--r--hw/extboot.c123
-rw-r--r--hw/hpet.c5
-rw-r--r--hw/hw.h13
-rw-r--r--hw/i8254-kvm.c122
-rw-r--r--hw/i8254.c135
-rw-r--r--hw/i8254.h69
-rw-r--r--hw/i8259.c145
-rw-r--r--hw/ioapic.c94
-rw-r--r--hw/ipf.c713
-rw-r--r--hw/msix.c224
-rw-r--r--hw/msix.h2
-rw-r--r--hw/pc.c57
-rw-r--r--hw/pc.h16
-rw-r--r--hw/pc_piix.c33
-rw-r--r--hw/pci-hotplug.c23
-rw-r--r--hw/pci.c182
-rw-r--r--hw/pci.h53
-rw-r--r--hw/pci_regs.h7
-rw-r--r--hw/pcspk.c48
-rw-r--r--hw/piix_pci.c16
-rw-r--r--hw/ppc440.c1
-rw-r--r--hw/ppc440_bamboo.c1
-rw-r--r--hw/ppce500_mpc8544ds.c1
-rw-r--r--hw/testdev.c129
-rw-r--r--hw/vga-pci.c2
-rw-r--r--hw/vga.c47
-rw-r--r--hw/vga_int.h6
-rw-r--r--hw/virtio-balloon.c1
-rw-r--r--hw/virtio-pci.c34
36 files changed, 4202 insertions, 92 deletions
diff --git a/hw/acpi.c b/hw/acpi.c
index c7044b1fa..b2cc4b105 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -18,6 +18,9 @@
#include "hw.h"
#include "pc.h"
#include "acpi.h"
+#include "kvm.h"
+#include "qemu-kvm.h"
+#include "string.h"
struct acpi_table_header
{
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index 8d1a62820..227aba8f2 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -34,12 +34,14 @@
#define ACPI_DBG_IO_ADDR 0xb044
#define GPE_BASE 0xafe0
+#define PROC_BASE 0xaf00
#define PCI_BASE 0xae00
#define PCI_EJ_BASE 0xae08
struct gpe_regs {
uint16_t sts; /* status */
uint16_t en; /* enabled */
+ uint8_t cpus_sts[32];
};
struct pci_status {
@@ -356,11 +358,16 @@ static void piix4_powerdown(void *opaque, int irq, int power_failing)
}
}
+static PIIX4PMState *global_piix4_pm_state; /* cpu hotadd */
+
static int piix4_pm_initfn(PCIDevice *dev)
{
PIIX4PMState *s = DO_UPCAST(PIIX4PMState, dev, dev);
uint8_t *pci_conf;
+ /* for cpu hotadd */
+ global_piix4_pm_state = s;
+
pci_conf = s->dev.config;
pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL);
pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3);
@@ -374,6 +381,13 @@ static int piix4_pm_initfn(PCIDevice *dev)
pci_conf[0x40] = 0x01; /* PM io base read only bit */
+#if defined(TARGET_IA64)
+ pci_conf[0x40] = 0x41; /* PM io base read only bit */
+ pci_conf[0x41] = 0x1f;
+ pm_write_config(s, 0x80, 0x01, 1); /*Set default pm_io_base 0x1f40*/
+ s->pmcntrl = SCI_EN;
+#endif
+
/* APM */
apm_init(&s->apm, apm_ctrl_changed, s);
@@ -462,6 +476,10 @@ static uint32_t gpe_readb(void *opaque, uint32_t addr)
uint32_t val = 0;
struct gpe_regs *g = opaque;
switch (addr) {
+ case PROC_BASE ... PROC_BASE+31:
+ val = g->cpus_sts[addr - PROC_BASE];
+ break;
+
case GPE_BASE:
case GPE_BASE + 1:
val = gpe_read_val(g->sts, addr);
@@ -575,16 +593,27 @@ static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
PIIX4_DPRINTF("pciej write %x <== %d\n", addr, val);
}
+extern const char *global_cpu_model;
+
static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev, int state);
static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s)
{
struct gpe_regs *gpe = &s->gpe;
struct pci_status *pci0_status = &s->pci0_status;
+ int i = 0, cpus = smp_cpus;
+
+ while (cpus > 0) {
+ gpe->cpus_sts[i++] = (cpus < 8) ? (1 << cpus) - 1 : 0xff;
+ cpus -= 8;
+ }
register_ioport_write(GPE_BASE, 4, 1, gpe_writeb, gpe);
register_ioport_read(GPE_BASE, 4, 1, gpe_readb, gpe);
+ register_ioport_write(PROC_BASE, 32, 1, gpe_writeb, gpe);
+ register_ioport_read(PROC_BASE, 32, 1, gpe_readb, gpe);
+
register_ioport_write(PCI_BASE, 8, 4, pcihotplug_write, pci0_status);
register_ioport_read(PCI_BASE, 8, 4, pcihotplug_read, pci0_status);
@@ -594,6 +623,44 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s)
pci_bus_hotplug(bus, piix4_device_hotplug, &s->dev.qdev);
}
+#if defined(TARGET_I386)
+static void enable_processor(struct gpe_regs *g, int cpu)
+{
+ g->sts |= 4;
+ g->cpus_sts[cpu/8] |= (1 << (cpu%8));
+}
+
+static void disable_processor(struct gpe_regs *g, int cpu)
+{
+ g->sts |= 4;
+ g->cpus_sts[cpu/8] &= ~(1 << (cpu%8));
+}
+
+void qemu_system_cpu_hot_add(int cpu, int state)
+{
+ CPUState *env;
+ PIIX4PMState *s = global_piix4_pm_state;
+
+ if (state && !qemu_get_cpu(cpu)) {
+ env = pc_new_cpu(global_cpu_model);
+ if (!env) {
+ fprintf(stderr, "cpu %d creation failed\n", cpu);
+ return;
+ }
+ env->cpuid_apic_id = cpu;
+ }
+
+ if (state)
+ enable_processor(&s->gpe, cpu);
+ else
+ disable_processor(&s->gpe, cpu);
+ if (s->gpe.en & 4) {
+ qemu_set_irq(s->irq, 1);
+ qemu_set_irq(s->irq, 0);
+ }
+}
+#endif
+
static void enable_device(PIIX4PMState *s, int slot)
{
s->gpe.sts |= 2;
diff --git a/hw/apic.c b/hw/apic.c
index d686b510b..cf0b03cbc 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -21,6 +21,7 @@
#include "qemu-timer.h"
#include "host-utils.h"
#include "sysbus.h"
+#include "kvm.h"
//#define DEBUG_APIC
//#define DEBUG_COALESCING
@@ -315,8 +316,11 @@ void cpu_set_apic_base(DeviceState *d, uint64_t val)
DPRINTF("cpu_set_apic_base: %016" PRIx64 "\n", val);
if (!s)
return;
- s->apicbase = (val & 0xfffff000) |
- (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
+ if (kvm_enabled() && kvm_irqchip_in_kernel())
+ s->apicbase = val;
+ else
+ s->apicbase = (val & 0xfffff000) |
+ (s->apicbase & (MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE));
/* if disabled, cannot be enabled again */
if (!(val & MSR_IA32_APICBASE_ENABLE)) {
s->apicbase &= ~MSR_IA32_APICBASE_ENABLE;
@@ -412,6 +416,11 @@ int apic_get_irq_delivered(void)
return apic_irq_delivered;
}
+void apic_set_irq_delivered(void)
+{
+ apic_irq_delivered = 1;
+}
+
static void apic_set_irq(APICState *s, int vector_num, int trigger_mode)
{
apic_irq_delivered += !get_bit(s->irr, vector_num);
@@ -876,6 +885,105 @@ static void apic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
}
}
+#ifdef KVM_CAP_IRQCHIP
+
+static inline uint32_t kapic_reg(struct kvm_lapic_state *kapic, int reg_id)
+{
+ return *((uint32_t *) (kapic->regs + (reg_id << 4)));
+}
+
+static inline void kapic_set_reg(struct kvm_lapic_state *kapic,
+ int reg_id, uint32_t val)
+{
+ *((uint32_t *) (kapic->regs + (reg_id << 4))) = val;
+}
+
+static void kvm_kernel_lapic_save_to_user(APICState *s)
+{
+ struct kvm_lapic_state apic;
+ struct kvm_lapic_state *kapic = &apic;
+ int i, v;
+
+ kvm_get_lapic(s->cpu_env, kapic);
+
+ s->id = kapic_reg(kapic, 0x2) >> 24;
+ s->tpr = kapic_reg(kapic, 0x8);
+ s->arb_id = kapic_reg(kapic, 0x9);
+ s->log_dest = kapic_reg(kapic, 0xd) >> 24;
+ s->dest_mode = kapic_reg(kapic, 0xe) >> 28;
+ s->spurious_vec = kapic_reg(kapic, 0xf);
+ for (i = 0; i < 8; i++) {
+ s->isr[i] = kapic_reg(kapic, 0x10 + i);
+ s->tmr[i] = kapic_reg(kapic, 0x18 + i);
+ s->irr[i] = kapic_reg(kapic, 0x20 + i);
+ }
+ s->esr = kapic_reg(kapic, 0x28);
+ s->icr[0] = kapic_reg(kapic, 0x30);
+ s->icr[1] = kapic_reg(kapic, 0x31);
+ for (i = 0; i < APIC_LVT_NB; i++)
+ s->lvt[i] = kapic_reg(kapic, 0x32 + i);
+ s->initial_count = kapic_reg(kapic, 0x38);
+ s->divide_conf = kapic_reg(kapic, 0x3e);
+
+ v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4);
+ s->count_shift = (v + 1) & 7;
+
+ s->initial_count_load_time = qemu_get_clock(vm_clock);
+ apic_timer_update(s, s->initial_count_load_time);
+}
+
+static void kvm_kernel_lapic_load_from_user(APICState *s)
+{
+ struct kvm_lapic_state apic;
+ struct kvm_lapic_state *klapic = &apic;
+ int i;
+
+ memset(klapic, 0, sizeof apic);
+ kapic_set_reg(klapic, 0x2, s->id << 24);
+ kapic_set_reg(klapic, 0x8, s->tpr);
+ kapic_set_reg(klapic, 0xd, s->log_dest << 24);
+ kapic_set_reg(klapic, 0xe, s->dest_mode << 28 | 0x0fffffff);
+ kapic_set_reg(klapic, 0xf, s->spurious_vec);
+ for (i = 0; i < 8; i++) {
+ kapic_set_reg(klapic, 0x10 + i, s->isr[i]);
+ kapic_set_reg(klapic, 0x18 + i, s->tmr[i]);
+ kapic_set_reg(klapic, 0x20 + i, s->irr[i]);
+ }
+ kapic_set_reg(klapic, 0x28, s->esr);
+ kapic_set_reg(klapic, 0x30, s->icr[0]);
+ kapic_set_reg(klapic, 0x31, s->icr[1]);
+ for (i = 0; i < APIC_LVT_NB; i++)
+ kapic_set_reg(klapic, 0x32 + i, s->lvt[i]);
+ kapic_set_reg(klapic, 0x38, s->initial_count);
+ kapic_set_reg(klapic, 0x3e, s->divide_conf);
+
+ kvm_set_lapic(s->cpu_env, klapic);
+}
+
+#endif
+
+void kvm_load_lapic(CPUState *env)
+{
+ APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
+
+#ifdef KVM_CAP_IRQCHIP
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_lapic_load_from_user(s);
+ }
+#endif
+}
+
+void kvm_save_lapic(CPUState *env)
+{
+ APICState *s = DO_UPCAST(APICState, busdev.qdev, env->apic_state);
+
+#ifdef KVM_CAP_IRQCHIP
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_lapic_save_to_user(s);
+ }
+#endif
+}
+
/* This function is only used for old state version 1 and 2 */
static int apic_load_old(QEMUFile *f, void *opaque, int version_id)
{
diff --git a/hw/apic.h b/hw/apic.h
index 8a0c9d0bf..c857d5254 100644
--- a/hw/apic.h
+++ b/hw/apic.h
@@ -13,6 +13,7 @@ void apic_deliver_pic_intr(DeviceState *s, int level);
int apic_get_interrupt(DeviceState *s);
void apic_reset_irq_delivered(void);
int apic_get_irq_delivered(void);
+void apic_set_irq_delivered(void);
void cpu_set_apic_base(DeviceState *s, uint64_t val);
uint64_t cpu_get_apic_base(DeviceState *s);
void cpu_set_apic_tpr(DeviceState *s, uint8_t val);
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index bbd4b082d..efa7a42f5 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -32,6 +32,7 @@
#include "console.h"
#include "vga_int.h"
#include "kvm.h"
+#include "qemu-kvm.h"
#include "loader.h"
/*
@@ -2552,6 +2553,7 @@ static CPUWriteMemoryFunc * const cirrus_linear_bitblt_write[3] = {
static void map_linear_vram(CirrusVGAState *s)
{
+ vga_dirty_log_stop(&s->vga);
if (!s->vga.map_addr && s->vga.lfb_addr && s->vga.lfb_end) {
s->vga.map_addr = s->vga.lfb_addr;
s->vga.map_end = s->vga.lfb_end;
@@ -2561,13 +2563,19 @@ static void map_linear_vram(CirrusVGAState *s)
if (!s->vga.map_addr)
return;
+#ifndef TARGET_IA64
s->vga.lfb_vram_mapped = 0;
+ cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000,
+ (s->vga.vram_offset + s->cirrus_bank_base[0]) | IO_MEM_UNASSIGNED);
+ cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000,
+ (s->vga.vram_offset + s->cirrus_bank_base[1]) | IO_MEM_UNASSIGNED);
if (!(s->cirrus_srcptr != s->cirrus_srcptr_end)
&& !((s->vga.sr[0x07] & 0x01) == 0)
&& !((s->vga.gr[0x0B] & 0x14) == 0x14)
&& !(s->vga.gr[0x0B] & 0x02)) {
+ vga_dirty_log_stop(&s->vga);
cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x8000,
(s->vga.vram_offset + s->cirrus_bank_base[0]) | IO_MEM_RAM);
cpu_register_physical_memory(isa_mem_base + 0xa8000, 0x8000,
@@ -2579,12 +2587,14 @@ static void map_linear_vram(CirrusVGAState *s)
cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000,
s->vga.vga_io_memory);
}
+#endif
vga_dirty_log_start(&s->vga);
}
static void unmap_linear_vram(CirrusVGAState *s)
{
+ vga_dirty_log_stop(&s->vga);
if (s->vga.map_addr && s->vga.lfb_addr && s->vga.lfb_end) {
s->vga.map_addr = s->vga.map_end = 0;
cpu_register_physical_memory(s->vga.lfb_addr, s->vga.vram_size,
@@ -2592,6 +2602,8 @@ static void unmap_linear_vram(CirrusVGAState *s)
}
cpu_register_physical_memory(isa_mem_base + 0xa0000, 0x20000,
s->vga.vga_io_memory);
+
+ vga_dirty_log_start(&s->vga);
}
/* Compute the memory access functions */
@@ -3144,6 +3156,8 @@ static void cirrus_pci_lfb_map(PCIDevice *d, int region_num,
{
CirrusVGAState *s = &DO_UPCAST(PCICirrusVGAState, dev, d)->cirrus_vga;
+ vga_dirty_log_stop(&s->vga);
+
/* XXX: add byte swapping apertures */
cpu_register_physical_memory(addr, s->vga.vram_size,
s->cirrus_linear_io_addr);
@@ -3175,10 +3189,14 @@ static void pci_cirrus_write_config(PCIDevice *d,
PCICirrusVGAState *pvs = DO_UPCAST(PCICirrusVGAState, dev, d);
CirrusVGAState *s = &pvs->cirrus_vga;
+ vga_dirty_log_stop(&s->vga);
+
pci_default_write_config(d, address, val, len);
if (s->vga.map_addr && d->io_regions[0].addr == PCI_BAR_UNMAPPED)
s->vga.map_addr = 0;
cirrus_update_memory_access(s);
+
+ vga_dirty_log_start(&s->vga);
}
static int pci_cirrus_vga_initfn(PCIDevice *dev)
diff --git a/hw/device-assignment.c b/hw/device-assignment.c
new file mode 100644
index 000000000..2bba22f83
--- /dev/null
+++ b/hw/device-assignment.c
@@ -0,0 +1,1672 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *
+ * Assign a PCI device from the host to a guest VM.
+ *
+ * Adapted for KVM by Qumranet.
+ *
+ * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ * Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/io.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "qemu-kvm.h"
+#include "hw.h"
+#include "pc.h"
+#include "qemu-error.h"
+#include "console.h"
+#include "device-assignment.h"
+#include "loader.h"
+#include "monitor.h"
+#include <pci/header.h>
+
+/* From linux/ioport.h */
+#define IORESOURCE_IO 0x00000100 /* Resource type */
+#define IORESOURCE_MEM 0x00000200
+#define IORESOURCE_IRQ 0x00000400
+#define IORESOURCE_DMA 0x00000800
+#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */
+
+/* #define DEVICE_ASSIGNMENT_DEBUG 1 */
+
+#ifdef DEVICE_ASSIGNMENT_DEBUG
+#define DEBUG(fmt, ...) \
+ do { \
+ fprintf(stderr, "%s: " fmt, __func__ , __VA_ARGS__); \
+ } while (0)
+#else
+#define DEBUG(fmt, ...) do { } while(0)
+#endif
+
+static void assigned_dev_load_option_rom(AssignedDevice *dev);
+
+static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
+
+static uint32_t guest_to_host_ioport(AssignedDevRegion *region, uint32_t addr)
+{
+ return region->u.r_baseport + (addr - region->e_physbase);
+}
+
+static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
+ uint32_t value)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ outb(value, r_pio);
+}
+
+static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
+ uint32_t value)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ outw(value, r_pio);
+}
+
+static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
+ uint32_t value)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ outl(value, r_pio);
+}
+
+static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+ uint32_t value;
+
+ value = inb(r_pio);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ return value;
+}
+
+static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+ uint32_t value;
+
+ value = inw(r_pio);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ return value;
+}
+
+static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
+{
+ AssignedDevRegion *r_access = opaque;
+ uint32_t r_pio = guest_to_host_ioport(r_access, addr);
+ uint32_t value;
+
+ value = inl(r_pio);
+
+ DEBUG("r_pio=%08x e_physbase=%08x r_baseport=%08lx value=%08x\n",
+ r_pio, (int)r_access->e_physbase,
+ (unsigned long)r_access->u.r_baseport, value);
+
+ return value;
+}
+
+static uint32_t slow_bar_readb(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevRegion *d = opaque;
+ uint8_t *in = d->u.r_virtbase + addr;
+ uint32_t r;
+
+ r = *in;
+ DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
+
+ return r;
+}
+
+static uint32_t slow_bar_readw(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevRegion *d = opaque;
+ uint16_t *in = d->u.r_virtbase + addr;
+ uint32_t r;
+
+ r = *in;
+ DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
+
+ return r;
+}
+
+static uint32_t slow_bar_readl(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevRegion *d = opaque;
+ uint32_t *in = d->u.r_virtbase + addr;
+ uint32_t r;
+
+ r = *in;
+ DEBUG("slow_bar_readl addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, r);
+
+ return r;
+}
+
+static void slow_bar_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevRegion *d = opaque;
+ uint8_t *out = d->u.r_virtbase + addr;
+
+ DEBUG("slow_bar_writeb addr=0x" TARGET_FMT_plx " val=0x%02x\n", addr, val);
+ *out = val;
+}
+
+static void slow_bar_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevRegion *d = opaque;
+ uint16_t *out = d->u.r_virtbase + addr;
+
+ DEBUG("slow_bar_writew addr=0x" TARGET_FMT_plx " val=0x%04x\n", addr, val);
+ *out = val;
+}
+
+static void slow_bar_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevRegion *d = opaque;
+ uint32_t *out = d->u.r_virtbase + addr;
+
+ DEBUG("slow_bar_writel addr=0x" TARGET_FMT_plx " val=0x%08x\n", addr, val);
+ *out = val;
+}
+
+static CPUWriteMemoryFunc * const slow_bar_write[] = {
+ &slow_bar_writeb,
+ &slow_bar_writew,
+ &slow_bar_writel
+};
+
+static CPUReadMemoryFunc * const slow_bar_read[] = {
+ &slow_bar_readb,
+ &slow_bar_readw,
+ &slow_bar_readl
+};
+
+static void assigned_dev_iomem_map_slow(PCIDevice *pci_dev, int region_num,
+ pcibus_t e_phys, pcibus_t e_size,
+ int type)
+{
+ AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
+ AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+ PCIRegion *real_region = &r_dev->real_device.regions[region_num];
+ int m;
+
+ DEBUG("%s", "slow map\n");
+ if (region_num == PCI_ROM_SLOT)
+ m = cpu_register_io_memory(slow_bar_read, NULL, region);
+ else
+ m = cpu_register_io_memory(slow_bar_read, slow_bar_write, region);
+ cpu_register_physical_memory(e_phys, e_size, m);
+
+ /* MSI-X MMIO page */
+ if ((e_size > 0) &&
+ real_region->base_addr <= r_dev->msix_table_addr &&
+ real_region->base_addr + real_region->size >= r_dev->msix_table_addr) {
+ int offset = r_dev->msix_table_addr - real_region->base_addr;
+
+ cpu_register_physical_memory(e_phys + offset,
+ TARGET_PAGE_SIZE, r_dev->mmio_index);
+ }
+}
+
+static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
+ pcibus_t e_phys, pcibus_t e_size, int type)
+{
+ AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
+ AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+ PCIRegion *real_region = &r_dev->real_device.regions[region_num];
+ int ret = 0, flags = 0;
+
+ DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n",
+ e_phys, region->u.r_virtbase, type, e_size, region_num);
+
+ region->e_physbase = e_phys;
+ region->e_size = e_size;
+
+ if (e_size > 0) {
+
+ if (region_num == PCI_ROM_SLOT)
+ flags |= IO_MEM_ROM;
+
+ cpu_register_physical_memory(e_phys, e_size, region->memory_index | flags);
+
+ /* deal with MSI-X MMIO page */
+ if (real_region->base_addr <= r_dev->msix_table_addr &&
+ real_region->base_addr + real_region->size >=
+ r_dev->msix_table_addr) {
+ int offset = r_dev->msix_table_addr - real_region->base_addr;
+
+ cpu_register_physical_memory(e_phys + offset,
+ TARGET_PAGE_SIZE, r_dev->mmio_index);
+ }
+ }
+
+ if (ret != 0) {
+ fprintf(stderr, "%s: Error: create new mapping failed\n", __func__);
+ exit(1);
+ }
+}
+
+static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
+ pcibus_t addr, pcibus_t size, int type)
+{
+ AssignedDevice *r_dev = container_of(pci_dev, AssignedDevice, dev);
+ AssignedDevRegion *region = &r_dev->v_addrs[region_num];
+ int first_map = (region->e_size == 0);
+ CPUState *env;
+
+ region->e_physbase = addr;
+ region->e_size = size;
+
+ DEBUG("e_phys=0x%" FMT_PCIBUS " r_baseport=%x type=0x%x len=%" FMT_PCIBUS " region_num=%d \n",
+ addr, region->u.r_baseport, type, size, region_num);
+
+ if (first_map) {
+ struct ioperm_data *data;
+
+ data = qemu_mallocz(sizeof(struct ioperm_data));
+ if (data == NULL) {
+ fprintf(stderr, "%s: Out of memory\n", __func__);
+ exit(1);
+ }
+
+ data->start_port = region->u.r_baseport;
+ data->num = region->r_size;
+ data->turn_on = 1;
+
+ kvm_add_ioperm_data(data);
+
+ for (env = first_cpu; env; env = env->next_cpu)
+ kvm_ioperm(env, data);
+ }
+
+ register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
+ (r_dev->v_addrs + region_num));
+ register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
+ (r_dev->v_addrs + region_num));
+ register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
+ (r_dev->v_addrs + region_num));
+ register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
+ (r_dev->v_addrs + region_num));
+ register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
+ (r_dev->v_addrs + region_num));
+ register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
+ (r_dev->v_addrs + region_num));
+}
+
+static uint32_t assigned_dev_pci_read(PCIDevice *d, int pos, int len)
+{
+ AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
+ uint32_t val;
+ ssize_t ret;
+ int fd = pci_dev->real_device.config_fd;
+
+again:
+ ret = pread(fd, &val, len, pos);
+ if (ret != len) {
+ if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
+ goto again;
+
+ fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
+ __func__, ret, errno);
+
+ exit(1);
+ }
+
+ return val;
+}
+
+static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos)
+{
+ return (uint8_t)assigned_dev_pci_read(d, pos, 1);
+}
+
+static uint16_t assigned_dev_pci_read_word(PCIDevice *d, int pos)
+{
+ return (uint16_t)assigned_dev_pci_read(d, pos, 2);
+}
+
+static uint32_t assigned_dev_pci_read_long(PCIDevice *d, int pos)
+{
+ return assigned_dev_pci_read(d, pos, 4);
+}
+
+static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap)
+{
+ int id;
+ int max_cap = 48;
+ int pos = PCI_CAPABILITY_LIST;
+ int status;
+
+ status = assigned_dev_pci_read_byte(d, PCI_STATUS);
+ if ((status & PCI_STATUS_CAP_LIST) == 0)
+ return 0;
+
+ while (max_cap--) {
+ pos = assigned_dev_pci_read_byte(d, pos);
+ if (pos < 0x40)
+ break;
+
+ pos &= ~3;
+ id = assigned_dev_pci_read_byte(d, pos + PCI_CAP_LIST_ID);
+
+ if (id == 0xff)
+ break;
+ if (id == cap)
+ return pos;
+
+ pos += PCI_CAP_LIST_NEXT;
+ }
+ return 0;
+}
+
+static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
+ uint32_t val, int len)
+{
+ int fd;
+ ssize_t ret;
+ AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
+
+ DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+ ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+ (uint16_t) address, val, len);
+
+ if (address == 0x4) {
+ pci_default_write_config(d, address, val, len);
+ /* Continue to program the card */
+ }
+
+ if ((address >= 0x10 && address <= 0x24) || address == 0x30 ||
+ address == 0x34 || address == 0x3c || address == 0x3d ||
+ pci_access_cap_config(d, address, len)) {
+ /* used for update-mappings (BAR emulation) */
+ pci_default_write_config(d, address, val, len);
+ return;
+ }
+
+ DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
+ ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
+ (uint16_t) address, val, len);
+
+ fd = pci_dev->real_device.config_fd;
+
+again:
+ ret = pwrite(fd, &val, len, address);
+ if (ret != len) {
+ if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
+ goto again;
+
+ fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
+ __func__, ret, errno);
+
+ exit(1);
+ }
+}
+
+static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
+ int len)
+{
+ uint32_t val = 0;
+ int fd;
+ ssize_t ret;
+ AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
+
+ if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) ||
+ (address >= 0x10 && address <= 0x24) || address == 0x30 ||
+ address == 0x34 || address == 0x3c || address == 0x3d ||
+ pci_access_cap_config(d, address, len)) {
+ val = pci_default_read_config(d, address, len);
+ DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+ (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
+ return val;
+ }
+
+ /* vga specific, remove later */
+ if (address == 0xFC)
+ goto do_log;
+
+ fd = pci_dev->real_device.config_fd;
+
+again:
+ ret = pread(fd, &val, len, address);
+ if (ret != len) {
+ if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
+ goto again;
+
+ fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
+ __func__, ret, errno);
+
+ exit(1);
+ }
+
+do_log:
+ DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
+ (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
+
+ if (!pci_dev->cap.available) {
+ /* kill the special capabilities */
+ if (address == 4 && len == 4)
+ val &= ~0x100000;
+ else if (address == 6)
+ val &= ~0x10;
+ }
+
+ return val;
+}
+
+static int assigned_dev_register_regions(PCIRegion *io_regions,
+ unsigned long regions_num,
+ AssignedDevice *pci_dev)
+{
+ uint32_t i;
+ PCIRegion *cur_region = io_regions;
+
+ for (i = 0; i < regions_num; i++, cur_region++) {
+ if (!cur_region->valid)
+ continue;
+ pci_dev->v_addrs[i].num = i;
+
+ /* handle memory io regions */
+ if (cur_region->type & IORESOURCE_MEM) {
+ int slow_map = 0;
+ int t = cur_region->type & IORESOURCE_PREFETCH
+ ? PCI_BASE_ADDRESS_MEM_PREFETCH
+ : PCI_BASE_ADDRESS_SPACE_MEMORY;
+
+ if (cur_region->size & 0xFFF) {
+ if (i != PCI_ROM_SLOT) {
+ fprintf(stderr, "PCI region %d at address 0x%llx "
+ "has size 0x%x, which is not a multiple of 4K. "
+ "You might experience some performance hit "
+ "due to that.\n",
+ i, (unsigned long long)cur_region->base_addr,
+ cur_region->size);
+ }
+ slow_map = 1;
+ }
+
+ /* map physical memory */
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ if (i == PCI_ROM_SLOT) {
+ pci_dev->v_addrs[i].u.r_virtbase =
+ mmap(NULL,
+ cur_region->size,
+ PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE,
+ 0, (off_t) 0);
+
+ } else {
+ pci_dev->v_addrs[i].u.r_virtbase =
+ mmap(NULL,
+ cur_region->size,
+ PROT_WRITE | PROT_READ, MAP_SHARED,
+ cur_region->resource_fd, (off_t) 0);
+ }
+
+ if (pci_dev->v_addrs[i].u.r_virtbase == MAP_FAILED) {
+ pci_dev->v_addrs[i].u.r_virtbase = NULL;
+ fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!"
+ "\n", __func__,
+ (uint32_t) (cur_region->base_addr));
+ return -1;
+ }
+
+ if (i == PCI_ROM_SLOT) {
+ memset(pci_dev->v_addrs[i].u.r_virtbase, 0,
+ (cur_region->size + 0xFFF) & 0xFFFFF000);
+ mprotect(pci_dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
+ (cur_region->size + 0xFFF) & 0xFFFFF000, PROT_READ);
+ }
+
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ /* add offset */
+ pci_dev->v_addrs[i].u.r_virtbase +=
+ (cur_region->base_addr & 0xFFF);
+
+
+ if (!slow_map) {
+ void *virtbase = pci_dev->v_addrs[i].u.r_virtbase;
+ char name[32];
+ snprintf(name, sizeof(name), "%s.bar%d",
+ pci_dev->dev.qdev.info->name, i);
+ pci_dev->v_addrs[i].memory_index =
+ qemu_ram_map(&pci_dev->dev.qdev,
+ name, cur_region->size,
+ virtbase);
+ } else
+ pci_dev->v_addrs[i].memory_index = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, t,
+ slow_map ? assigned_dev_iomem_map_slow
+ : assigned_dev_iomem_map);
+ continue;
+ }
+ /* handle port io regions */
+ pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
+ pci_dev->v_addrs[i].u.r_baseport = cur_region->base_addr;
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
+
+ pci_register_bar((PCIDevice *) pci_dev, i,
+ cur_region->size, PCI_BASE_ADDRESS_SPACE_IO,
+ assigned_dev_ioport_map);
+
+ /* not relevant for port io */
+ pci_dev->v_addrs[i].memory_index = 0;
+ }
+
+ /* success */
+ return 0;
+}
+
+static int get_real_id(const char *devpath, const char *idname, uint16_t *val)
+{
+ FILE *f;
+ char name[128];
+ long id;
+
+ snprintf(name, sizeof(name), "%s%s", devpath, idname);
+ f = fopen(name, "r");
+ if (f == NULL) {
+ fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ return -1;
+ }
+ if (fscanf(f, "%li\n", &id) == 1) {
+ *val = id;
+ } else {
+ return -1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int get_real_vendor_id(const char *devpath, uint16_t *val)
+{
+ return get_real_id(devpath, "vendor", val);
+}
+
+static int get_real_device_id(const char *devpath, uint16_t *val)
+{
+ return get_real_id(devpath, "device", val);
+}
+
+static int get_real_device(AssignedDevice *pci_dev, uint16_t r_seg,
+ uint8_t r_bus, uint8_t r_dev, uint8_t r_func)
+{
+ char dir[128], name[128];
+ int fd, r = 0, v;
+ FILE *f;
+ unsigned long long start, end, size, flags;
+ uint16_t id;
+ struct stat statbuf;
+ PCIRegion *rp;
+ PCIDevRegions *dev = &pci_dev->real_device;
+
+ dev->region_number = 0;
+
+ snprintf(dir, sizeof(dir), "/sys/bus/pci/devices/%04x:%02x:%02x.%x/",
+ r_seg, r_bus, r_dev, r_func);
+
+ snprintf(name, sizeof(name), "%sconfig", dir);
+
+ if (pci_dev->configfd_name && *pci_dev->configfd_name) {
+ if (qemu_isdigit(pci_dev->configfd_name[0])) {
+ dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0);
+ } else {
+ dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name);
+ if (dev->config_fd < 0) {
+ fprintf(stderr, "%s: (%s) unkown\n", __func__,
+ pci_dev->configfd_name);
+ return 1;
+ }
+ }
+ } else {
+ dev->config_fd = open(name, O_RDWR);
+
+ if (dev->config_fd == -1) {
+ fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ return 1;
+ }
+ }
+again:
+ r = read(dev->config_fd, pci_dev->dev.config,
+ pci_config_size(&pci_dev->dev));
+ if (r < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ goto again;
+ fprintf(stderr, "%s: read failed, errno = %d\n", __func__, errno);
+ }
+
+ snprintf(name, sizeof(name), "%sresource", dir);
+
+ f = fopen(name, "r");
+ if (f == NULL) {
+ fprintf(stderr, "%s: %s: %m\n", __func__, name);
+ return 1;
+ }
+
+ for (r = 0; r < PCI_NUM_REGIONS; r++) {
+ if (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) != 3)
+ break;
+
+ rp = dev->regions + r;
+ rp->valid = 0;
+ rp->resource_fd = -1;
+ size = end - start + 1;
+ flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH;
+ if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0)
+ continue;
+ if (flags & IORESOURCE_MEM) {
+ flags &= ~IORESOURCE_IO;
+ if (r != PCI_ROM_SLOT) {
+ snprintf(name, sizeof(name), "%sresource%d", dir, r);
+ fd = open(name, O_RDWR);
+ if (fd == -1)
+ continue;
+ rp->resource_fd = fd;
+ }
+ } else
+ flags &= ~IORESOURCE_PREFETCH;
+
+ rp->type = flags;
+ rp->valid = 1;
+ rp->base_addr = start;
+ rp->size = size;
+ DEBUG("region %d size %d start 0x%llx type %d resource_fd %d\n",
+ r, rp->size, start, rp->type, rp->resource_fd);
+ }
+
+ fclose(f);
+
+ /* read and fill vendor ID */
+ v = get_real_vendor_id(dir, &id);
+ if (v) {
+ return 1;
+ }
+ pci_dev->dev.config[0] = id & 0xff;
+ pci_dev->dev.config[1] = (id & 0xff00) >> 8;
+
+ /* read and fill device ID */
+ v = get_real_device_id(dir, &id);
+ if (v) {
+ return 1;
+ }
+ pci_dev->dev.config[2] = id & 0xff;
+ pci_dev->dev.config[3] = (id & 0xff00) >> 8;
+
+ /* dealing with virtual function device */
+ snprintf(name, sizeof(name), "%sphysfn/", dir);
+ if (!stat(name, &statbuf))
+ pci_dev->need_emulate_cmd = 1;
+ else
+ pci_dev->need_emulate_cmd = 0;
+
+ dev->region_number = r;
+ return 0;
+}
+
+static QLIST_HEAD(, AssignedDevice) devs = QLIST_HEAD_INITIALIZER(devs);
+
+#ifdef KVM_CAP_IRQ_ROUTING
+static void free_dev_irq_entries(AssignedDevice *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->irq_entries_nr; i++)
+ kvm_del_routing_entry(kvm_context, &dev->entry[i]);
+ free(dev->entry);
+ dev->entry = NULL;
+ dev->irq_entries_nr = 0;
+}
+#endif
+
+static void free_assigned_device(AssignedDevice *dev)
+{
+ if (dev) {
+ int i;
+
+ for (i = 0; i < dev->real_device.region_number; i++) {
+ PCIRegion *pci_region = &dev->real_device.regions[i];
+ AssignedDevRegion *region = &dev->v_addrs[i];
+
+ if (!pci_region->valid)
+ continue;
+
+ if (pci_region->type & IORESOURCE_IO) {
+ kvm_remove_ioperm_data(region->u.r_baseport, region->r_size);
+ continue;
+ } else if (pci_region->type & IORESOURCE_MEM) {
+ if (region->u.r_virtbase) {
+ if (region->memory_index) {
+ cpu_register_physical_memory(region->e_physbase,
+ region->e_size,
+ IO_MEM_UNASSIGNED);
+ qemu_ram_unmap(region->memory_index);
+ }
+ if (munmap(region->u.r_virtbase,
+ (pci_region->size + 0xFFF) & 0xFFFFF000))
+ fprintf(stderr,
+ "Failed to unmap assigned device region: %s\n",
+ strerror(errno));
+ if (pci_region->resource_fd >= 0) {
+ close(pci_region->resource_fd);
+ }
+ }
+ }
+ }
+
+ if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
+ assigned_dev_unregister_msix_mmio(dev);
+
+ if (dev->real_device.config_fd >= 0) {
+ close(dev->real_device.config_fd);
+ }
+
+#ifdef KVM_CAP_IRQ_ROUTING
+ free_dev_irq_entries(dev);
+#endif
+ }
+}
+
+static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn)
+{
+ return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn;
+}
+
+static void assign_failed_examine(AssignedDevice *dev)
+{
+ char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns;
+ uint16_t vendor_id, device_id;
+ int r;
+
+ sprintf(dir, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
+
+ sprintf(name, "%sdriver", dir);
+
+ r = readlink(name, driver, sizeof(driver));
+ if ((r <= 0) || r >= sizeof(driver) || !(ns = strrchr(driver, '/'))) {
+ goto fail;
+ }
+
+ ns++;
+
+ if (get_real_vendor_id(dir, &vendor_id) ||
+ get_real_device_id(dir, &device_id)) {
+ goto fail;
+ }
+
+ fprintf(stderr, "*** The driver '%s' is occupying your device "
+ "%04x:%02x:%02x.%x.\n",
+ ns, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
+ fprintf(stderr, "***\n");
+ fprintf(stderr, "*** You can try the following commands to free it:\n");
+ fprintf(stderr, "***\n");
+ fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub/"
+ "new_id\n", vendor_id, device_id);
+ fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+ "%s/unbind\n",
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func, ns);
+ fprintf(stderr, "*** $ echo \"%04x:%02x:%02x.%x\" > /sys/bus/pci/drivers/"
+ "pci-stub/bind\n",
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
+ fprintf(stderr, "*** $ echo \"%04x %04x\" > /sys/bus/pci/drivers/pci-stub"
+ "/remove_id\n", vendor_id, device_id);
+ fprintf(stderr, "***\n");
+
+ return;
+
+fail:
+ fprintf(stderr, "Couldn't find out why.\n");
+}
+
+static int assign_device(AssignedDevice *dev)
+{
+ struct kvm_assigned_pci_dev assigned_dev_data;
+ int r;
+
+#ifdef KVM_CAP_PCI_SEGMENT
+ /* Only pass non-zero PCI segment to capable module */
+ if (!kvm_check_extension(kvm_state, KVM_CAP_PCI_SEGMENT) &&
+ dev->h_segnr) {
+ fprintf(stderr, "Can't assign device inside non-zero PCI segment "
+ "as this KVM module doesn't support it.\n");
+ return -ENODEV;
+ }
+#endif
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
+#ifdef KVM_CAP_PCI_SEGMENT
+ assigned_dev_data.segnr = dev->h_segnr;
+#endif
+ assigned_dev_data.busnr = dev->h_busnr;
+ assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+ /* We always enable the IOMMU unless disabled on the command line */
+ if (dev->use_iommu) {
+ if (!kvm_check_extension(kvm_state, KVM_CAP_IOMMU)) {
+ fprintf(stderr, "No IOMMU found. Unable to assign device \"%s\"\n",
+ dev->dev.qdev.id);
+ return -ENODEV;
+ }
+ assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+ }
+#else
+ dev->use_iommu = 0;
+#endif
+
+ r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
+ if (r < 0) {
+ fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
+ dev->dev.qdev.id, strerror(-r));
+
+ switch (r) {
+ case -EBUSY:
+ assign_failed_examine(dev);
+ break;
+ default:
+ break;
+ }
+ }
+ return r;
+}
+
+static int assign_irq(AssignedDevice *dev)
+{
+ struct kvm_assigned_irq assigned_irq_data;
+ int irq, r = 0;
+
+ /* Interrupt PIN 0 means don't use INTx */
+ if (assigned_dev_pci_read_byte(&dev->dev, PCI_INTERRUPT_PIN) == 0)
+ return 0;
+
+ irq = pci_map_irq(&dev->dev, dev->intpin);
+ irq = piix_get_irq(irq);
+
+#ifdef TARGET_IA64
+ irq = ipf_map_irq(&dev->dev, irq);
+#endif
+
+ if (dev->girq == irq)
+ return r;
+
+ memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
+ assigned_irq_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
+ assigned_irq_data.guest_irq = irq;
+ assigned_irq_data.host_irq = dev->real_device.irq;
+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
+ if (dev->irq_requested_type) {
+ assigned_irq_data.flags = dev->irq_requested_type;
+ r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
+ /* -ENXIO means no assigned irq */
+ if (r && r != -ENXIO)
+ perror("assign_irq: deassign");
+ }
+
+ assigned_irq_data.flags = KVM_DEV_IRQ_GUEST_INTX;
+ if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
+ assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_MSI;
+ else
+ assigned_irq_data.flags |= KVM_DEV_IRQ_HOST_INTX;
+#endif
+
+ r = kvm_assign_irq(kvm_context, &assigned_irq_data);
+ if (r < 0) {
+ fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
+ dev->dev.qdev.id, strerror(-r));
+ fprintf(stderr, "Perhaps you are assigning a device "
+ "that shares an IRQ with another device?\n");
+ return r;
+ }
+
+ dev->girq = irq;
+ dev->irq_requested_type = assigned_irq_data.flags;
+ return r;
+}
+
+static void deassign_device(AssignedDevice *dev)
+{
+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
+ struct kvm_assigned_pci_dev assigned_dev_data;
+ int r;
+
+ memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+ assigned_dev_data.assigned_dev_id =
+ calc_assigned_dev_id(dev->h_segnr, dev->h_busnr, dev->h_devfn);
+
+ r = kvm_deassign_pci_device(kvm_context, &assigned_dev_data);
+ if (r < 0)
+ fprintf(stderr, "Failed to deassign device \"%s\" : %s\n",
+ dev->dev.qdev.id, strerror(-r));
+#endif
+}
+
+#if 0
+AssignedDevInfo *get_assigned_device(int pcibus, int slot)
+{
+ AssignedDevice *assigned_dev = NULL;
+ AssignedDevInfo *adev = NULL;
+
+ QLIST_FOREACH(adev, &adev_head, next) {
+ assigned_dev = adev->assigned_dev;
+ if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
+ PCI_SLOT(assigned_dev->dev.devfn) == slot)
+ return adev;
+ }
+
+ return NULL;
+}
+#endif
+
+/* The pci config space got updated. Check if irq numbers have changed
+ * for our devices
+ */
+void assigned_dev_update_irqs(void)
+{
+ AssignedDevice *dev, *next;
+ int r;
+
+ dev = QLIST_FIRST(&devs);
+ while (dev) {
+ next = QLIST_NEXT(dev, next);
+ r = assign_irq(dev);
+ if (r < 0)
+ qdev_unplug(&dev->dev.qdev);
+ dev = next;
+ }
+}
+
+#ifdef KVM_CAP_IRQ_ROUTING
+
+#ifdef KVM_CAP_DEVICE_MSI
+static void assigned_dev_update_msi(PCIDevice *pci_dev, unsigned int ctrl_pos)
+{
+ struct kvm_assigned_irq assigned_irq_data;
+ AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+ uint8_t ctrl_byte = pci_dev->config[ctrl_pos];
+ int r;
+
+ memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
+ assigned_irq_data.assigned_dev_id =
+ calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
+ (uint8_t)assigned_dev->h_devfn);
+
+ /* Some guests gratuitously disable MSI even if they're not using it,
+ * try to catch this by only deassigning irqs if the guest is using
+ * MSI or intends to start. */
+ if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSI) ||
+ (ctrl_byte & PCI_MSI_FLAGS_ENABLE)) {
+
+ assigned_irq_data.flags = assigned_dev->irq_requested_type;
+ free_dev_irq_entries(assigned_dev);
+ r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
+ /* -ENXIO means no assigned irq */
+ if (r && r != -ENXIO)
+ perror("assigned_dev_update_msi: deassign irq");
+
+ assigned_dev->irq_requested_type = 0;
+ }
+
+ if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
+ assigned_dev->entry = calloc(1, sizeof(struct kvm_irq_routing_entry));
+ if (!assigned_dev->entry) {
+ perror("assigned_dev_update_msi: ");
+ return;
+ }
+ assigned_dev->entry->u.msi.address_lo =
+ *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
+ PCI_MSI_ADDRESS_LO);
+ assigned_dev->entry->u.msi.address_hi = 0;
+ assigned_dev->entry->u.msi.data = *(uint16_t *)(pci_dev->config +
+ pci_dev->cap.start + PCI_MSI_DATA_32);
+ assigned_dev->entry->type = KVM_IRQ_ROUTING_MSI;
+ r = kvm_get_irq_route_gsi(kvm_context);
+ if (r < 0) {
+ perror("assigned_dev_update_msi: kvm_get_irq_route_gsi");
+ return;
+ }
+ assigned_dev->entry->gsi = r;
+
+ kvm_add_routing_entry(kvm_context, assigned_dev->entry);
+ if (kvm_commit_irq_routes(kvm_context) < 0) {
+ perror("assigned_dev_update_msi: kvm_commit_irq_routes");
+ assigned_dev->cap.state &= ~ASSIGNED_DEVICE_MSI_ENABLED;
+ return;
+ }
+ assigned_dev->irq_entries_nr = 1;
+
+ assigned_irq_data.guest_irq = assigned_dev->entry->gsi;
+ assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSI | KVM_DEV_IRQ_GUEST_MSI;
+ if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0)
+ perror("assigned_dev_enable_msi: assign irq");
+
+ assigned_dev->irq_requested_type = assigned_irq_data.flags;
+ }
+}
+#endif
+
+#ifdef KVM_CAP_DEVICE_MSIX
+static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
+{
+ AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
+ uint16_t entries_nr = 0, entries_max_nr;
+ int pos = 0, i, r = 0;
+ uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
+ struct kvm_assigned_msix_nr msix_nr;
+ struct kvm_assigned_msix_entry msix_entry;
+ void *va = adev->msix_table_page;
+
+ if (adev->cap.available & ASSIGNED_DEVICE_CAP_MSI)
+ pos = pci_dev->cap.start + PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+ else
+ pos = pci_dev->cap.start;
+
+ entries_max_nr = *(uint16_t *)(pci_dev->config + pos + 2);
+ entries_max_nr &= PCI_MSIX_TABSIZE;
+ entries_max_nr += 1;
+
+ /* Get the usable entry number for allocating */
+ for (i = 0; i < entries_max_nr; i++) {
+ memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+ memcpy(&msg_data, va + i * 16 + 8, 4);
+ /* Ignore unused entry even it's unmasked */
+ if (msg_data == 0)
+ continue;
+ entries_nr ++;
+ }
+
+ if (entries_nr == 0) {
+ fprintf(stderr, "MSI-X entry number is zero!\n");
+ return -EINVAL;
+ }
+ msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr,
+ (uint8_t)adev->h_devfn);
+ msix_nr.entry_nr = entries_nr;
+ r = kvm_assign_set_msix_nr(kvm_context, &msix_nr);
+ if (r != 0) {
+ fprintf(stderr, "fail to set MSI-X entry number for MSIX! %s\n",
+ strerror(-r));
+ return r;
+ }
+
+ free_dev_irq_entries(adev);
+ adev->irq_entries_nr = entries_nr;
+ adev->entry = calloc(entries_nr, sizeof(struct kvm_irq_routing_entry));
+ if (!adev->entry) {
+ perror("assigned_dev_update_msix_mmio: ");
+ return -errno;
+ }
+
+ msix_entry.assigned_dev_id = msix_nr.assigned_dev_id;
+ entries_nr = 0;
+ for (i = 0; i < entries_max_nr; i++) {
+ if (entries_nr >= msix_nr.entry_nr)
+ break;
+ memcpy(&msg_ctrl, va + i * 16 + 12, 4);
+ memcpy(&msg_data, va + i * 16 + 8, 4);
+ if (msg_data == 0)
+ continue;
+
+ memcpy(&msg_addr, va + i * 16, 4);
+ memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
+
+ r = kvm_get_irq_route_gsi(kvm_context);
+ if (r < 0)
+ return r;
+
+ adev->entry[entries_nr].gsi = r;
+ adev->entry[entries_nr].type = KVM_IRQ_ROUTING_MSI;
+ adev->entry[entries_nr].flags = 0;
+ adev->entry[entries_nr].u.msi.address_lo = msg_addr;
+ adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
+ adev->entry[entries_nr].u.msi.data = msg_data;
+ DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data, msg_addr);
+ kvm_add_routing_entry(kvm_context, &adev->entry[entries_nr]);
+
+ msix_entry.gsi = adev->entry[entries_nr].gsi;
+ msix_entry.entry = i;
+ r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
+ if (r) {
+ fprintf(stderr, "fail to set MSI-X entry! %s\n", strerror(-r));
+ break;
+ }
+ DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
+ msix_entry.gsi, msix_entry.entry);
+ entries_nr ++;
+ }
+
+ if (r == 0 && kvm_commit_irq_routes(kvm_context) < 0) {
+ perror("assigned_dev_update_msix_mmio: kvm_commit_irq_routes");
+ return -EINVAL;
+ }
+
+ return r;
+}
+
+static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
+{
+ struct kvm_assigned_irq assigned_irq_data;
+ AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+ uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
+ int r;
+
+ memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
+ assigned_irq_data.assigned_dev_id =
+ calc_assigned_dev_id(assigned_dev->h_segnr, assigned_dev->h_busnr,
+ (uint8_t)assigned_dev->h_devfn);
+
+ /* Some guests gratuitously disable MSIX even if they're not using it,
+ * try to catch this by only deassigning irqs if the guest is using
+ * MSIX or intends to start. */
+ if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
+ (*ctrl_word & PCI_MSIX_ENABLE)) {
+
+ assigned_irq_data.flags = assigned_dev->irq_requested_type;
+ free_dev_irq_entries(assigned_dev);
+ r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
+ /* -ENXIO means no assigned irq */
+ if (r && r != -ENXIO)
+ perror("assigned_dev_update_msix: deassign irq");
+
+ assigned_dev->irq_requested_type = 0;
+ }
+
+ if (*ctrl_word & PCI_MSIX_ENABLE) {
+ assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
+ KVM_DEV_IRQ_GUEST_MSIX;
+
+ if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
+ perror("assigned_dev_update_msix_mmio");
+ return;
+ }
+ if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
+ perror("assigned_dev_enable_msix: assign irq");
+ return;
+ }
+ assigned_dev->irq_requested_type = assigned_irq_data.flags;
+ }
+}
+#endif
+#endif
+
+static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address,
+ uint32_t val, int len)
+{
+ AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice, dev);
+ unsigned int pos = pci_dev->cap.start, ctrl_pos;
+
+ pci_default_cap_write_config(pci_dev, address, val, len);
+#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef KVM_CAP_DEVICE_MSI
+ if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
+ ctrl_pos = pos + PCI_MSI_FLAGS;
+ if (address <= ctrl_pos && address + len > ctrl_pos)
+ assigned_dev_update_msi(pci_dev, ctrl_pos);
+ pos += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+ }
+#endif
+#ifdef KVM_CAP_DEVICE_MSIX
+ if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
+ ctrl_pos = pos + 3;
+ if (address <= ctrl_pos && address + len > ctrl_pos) {
+ ctrl_pos--; /* control is word long */
+ assigned_dev_update_msix(pci_dev, ctrl_pos);
+ }
+ pos += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
+ }
+#endif
+#endif
+ return;
+}
+
+static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
+{
+ AssignedDevice *dev = container_of(pci_dev, AssignedDevice, dev);
+ PCIRegion *pci_region = dev->real_device.regions;
+ int next_cap_pt = 0;
+
+ pci_dev->cap.length = 0;
+#ifdef KVM_CAP_IRQ_ROUTING
+#ifdef KVM_CAP_DEVICE_MSI
+ /* Expose MSI capability
+ * MSI capability is the 1st capability in capability config */
+ if (pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSI)) {
+ dev->cap.available |= ASSIGNED_DEVICE_CAP_MSI;
+ memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
+ 0, PCI_CAPABILITY_CONFIG_MSI_LENGTH);
+ pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] =
+ PCI_CAP_ID_MSI;
+ pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+ next_cap_pt = 1;
+ }
+#endif
+#ifdef KVM_CAP_DEVICE_MSIX
+ /* Expose MSI-X capability */
+ if (pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX)) {
+ int pos, entry_nr, bar_nr;
+ uint32_t msix_table_entry;
+ dev->cap.available |= ASSIGNED_DEVICE_CAP_MSIX;
+ memset(&pci_dev->config[pci_dev->cap.start + pci_dev->cap.length],
+ 0, PCI_CAPABILITY_CONFIG_MSIX_LENGTH);
+ pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_MSIX);
+ entry_nr = assigned_dev_pci_read_word(pci_dev, pos + 2) &
+ PCI_MSIX_TABSIZE;
+ pci_dev->config[pci_dev->cap.start + pci_dev->cap.length] = 0x11;
+ *(uint16_t *)(pci_dev->config + pci_dev->cap.start +
+ pci_dev->cap.length + 2) = entry_nr;
+ msix_table_entry = assigned_dev_pci_read_long(pci_dev,
+ pos + PCI_MSIX_TABLE);
+ *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
+ pci_dev->cap.length + PCI_MSIX_TABLE) = msix_table_entry;
+ *(uint32_t *)(pci_dev->config + pci_dev->cap.start +
+ pci_dev->cap.length + PCI_MSIX_PBA) =
+ assigned_dev_pci_read_long(pci_dev, pos + PCI_MSIX_PBA);
+ bar_nr = msix_table_entry & PCI_MSIX_BIR;
+ msix_table_entry &= ~PCI_MSIX_BIR;
+ dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
+ if (next_cap_pt != 0) {
+ pci_dev->config[pci_dev->cap.start + next_cap_pt] =
+ pci_dev->cap.start + pci_dev->cap.length;
+ next_cap_pt += PCI_CAPABILITY_CONFIG_MSI_LENGTH;
+ } else
+ next_cap_pt = 1;
+ pci_dev->cap.length += PCI_CAPABILITY_CONFIG_MSIX_LENGTH;
+ }
+#endif
+#endif
+
+ return 0;
+}
+
+static uint32_t msix_mmio_readl(void *opaque, target_phys_addr_t addr)
+{
+ AssignedDevice *adev = opaque;
+ unsigned int offset = addr & 0xfff;
+ void *page = adev->msix_table_page;
+ uint32_t val = 0;
+
+ memcpy(&val, (void *)((char *)page + offset), 4);
+
+ return val;
+}
+
+static uint32_t msix_mmio_readb(void *opaque, target_phys_addr_t addr)
+{
+ return ((msix_mmio_readl(opaque, addr & ~3)) >>
+ (8 * (addr & 3))) & 0xff;
+}
+
+static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr)
+{
+ return ((msix_mmio_readl(opaque, addr & ~3)) >>
+ (8 * (addr & 3))) & 0xffff;
+}
+
+static void msix_mmio_writel(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ AssignedDevice *adev = opaque;
+ unsigned int offset = addr & 0xfff;
+ void *page = adev->msix_table_page;
+
+ DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
+ addr, val);
+ memcpy((void *)((char *)page + offset), &val, 4);
+}
+
+static void msix_mmio_writew(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ msix_mmio_writel(opaque, addr & ~3,
+ (val & 0xffff) << (8*(addr & 3)));
+}
+
+static void msix_mmio_writeb(void *opaque,
+ target_phys_addr_t addr, uint32_t val)
+{
+ msix_mmio_writel(opaque, addr & ~3,
+ (val & 0xff) << (8*(addr & 3)));
+}
+
+static CPUWriteMemoryFunc *msix_mmio_write[] = {
+ msix_mmio_writeb, msix_mmio_writew, msix_mmio_writel
+};
+
+static CPUReadMemoryFunc *msix_mmio_read[] = {
+ msix_mmio_readb, msix_mmio_readw, msix_mmio_readl
+};
+
+static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
+{
+ dev->msix_table_page = mmap(NULL, 0x1000,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
+ if (dev->msix_table_page == MAP_FAILED) {
+ fprintf(stderr, "fail allocate msix_table_page! %s\n",
+ strerror(errno));
+ return -EFAULT;
+ }
+ memset(dev->msix_table_page, 0, 0x1000);
+ dev->mmio_index = cpu_register_io_memory(
+ msix_mmio_read, msix_mmio_write, dev);
+ return 0;
+}
+
+static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev)
+{
+ if (!dev->msix_table_page)
+ return;
+
+ cpu_unregister_io_memory(dev->mmio_index);
+ dev->mmio_index = 0;
+
+ if (munmap(dev->msix_table_page, 0x1000) == -1) {
+ fprintf(stderr, "error unmapping msix_table_page! %s\n",
+ strerror(errno));
+ }
+ dev->msix_table_page = NULL;
+}
+
+static int assigned_initfn(struct PCIDevice *pci_dev)
+{
+ AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
+ uint8_t e_device, e_intx;
+ int r;
+
+ if (!dev->host.seg && !dev->host.bus && !dev->host.dev && !dev->host.func) {
+ error_report("pci-assign: error: no host device specified");
+ return -1;
+ }
+
+ if (get_real_device(dev, dev->host.seg, dev->host.bus,
+ dev->host.dev, dev->host.func)) {
+ error_report("pci-assign: Error: Couldn't get real device (%s)!",
+ dev->dev.qdev.id);
+ goto out;
+ }
+
+ /* handle real device's MMIO/PIO BARs */
+ if (assigned_dev_register_regions(dev->real_device.regions,
+ dev->real_device.region_number,
+ dev))
+ goto out;
+
+ /* handle interrupt routing */
+ e_device = (dev->dev.devfn >> 3) & 0x1f;
+ e_intx = dev->dev.config[0x3d] - 1;
+ dev->intpin = e_intx;
+ dev->run = 0;
+ dev->girq = -1;
+ dev->h_segnr = dev->host.seg;
+ dev->h_busnr = dev->host.bus;
+ dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
+
+ if (pci_enable_capability_support(pci_dev, 0, NULL,
+ assigned_device_pci_cap_write_config,
+ assigned_device_pci_cap_init) < 0)
+ goto out;
+
+ /* assign device to guest */
+ r = assign_device(dev);
+ if (r < 0)
+ goto out;
+
+ /* assign irq for the device */
+ r = assign_irq(dev);
+ if (r < 0)
+ goto assigned_out;
+
+ /* intercept MSI-X entry page in the MMIO */
+ if (dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX)
+ if (assigned_dev_register_msix_mmio(dev))
+ goto assigned_out;
+
+ assigned_dev_load_option_rom(dev);
+ QLIST_INSERT_HEAD(&devs, dev, next);
+ return 0;
+
+assigned_out:
+ deassign_device(dev);
+out:
+ free_assigned_device(dev);
+ return -1;
+}
+
+static int assigned_exitfn(struct PCIDevice *pci_dev)
+{
+ AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
+
+ QLIST_REMOVE(dev, next);
+ deassign_device(dev);
+ free_assigned_device(dev);
+ return 0;
+}
+
+static int parse_hostaddr(DeviceState *dev, Property *prop, const char *str)
+{
+ PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
+ int rc;
+
+ rc = pci_parse_host_devaddr(str, &ptr->seg, &ptr->bus, &ptr->dev, &ptr->func);
+ if (rc != 0)
+ return -1;
+ return 0;
+}
+
+static int print_hostaddr(DeviceState *dev, Property *prop, char *dest, size_t len)
+{
+ PCIHostDevice *ptr = qdev_get_prop_ptr(dev, prop);
+
+ return snprintf(dest, len, "%02x:%02x.%x", ptr->bus, ptr->dev, ptr->func);
+}
+
+PropertyInfo qdev_prop_hostaddr = {
+ .name = "pci-hostaddr",
+ .type = -1,
+ .size = sizeof(PCIHostDevice),
+ .parse = parse_hostaddr,
+ .print = print_hostaddr,
+};
+
+static PCIDeviceInfo assign_info = {
+ .qdev.name = "pci-assign",
+ .qdev.desc = "pass through host pci devices to the guest",
+ .qdev.size = sizeof(AssignedDevice),
+ .init = assigned_initfn,
+ .exit = assigned_exitfn,
+ .config_read = assigned_dev_pci_read_config,
+ .config_write = assigned_dev_pci_write_config,
+ .qdev.props = (Property[]) {
+ DEFINE_PROP("host", AssignedDevice, host, qdev_prop_hostaddr, PCIHostDevice),
+ DEFINE_PROP_UINT32("iommu", AssignedDevice, use_iommu, 1),
+ DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name),
+ DEFINE_PROP_END_OF_LIST(),
+ },
+};
+
+static void assign_register_devices(void)
+{
+ pci_qdev_register(&assign_info);
+}
+
+device_init(assign_register_devices)
+
+
+/*
+ * Syntax to assign device:
+ *
+ * -pcidevice host=bus:dev.func[,dma=none][,name=Foo]
+ *
+ * Example:
+ * -pcidevice host=00:13.0,dma=pvdma
+ *
+ * dma can currently only be 'none' to disable iommu support.
+ */
+QemuOpts *add_assigned_device(const char *arg)
+{
+ QemuOpts *opts = NULL;
+ char host[64], id[64], dma[8];
+ int r;
+
+ r = get_param_value(host, sizeof(host), "host", arg);
+ if (!r)
+ goto bad;
+ r = get_param_value(id, sizeof(id), "id", arg);
+ if (!r)
+ r = get_param_value(id, sizeof(id), "name", arg);
+ if (!r)
+ r = get_param_value(id, sizeof(id), "host", arg);
+
+ opts = qemu_opts_create(&qemu_device_opts, id, 0);
+ if (!opts)
+ goto bad;
+ qemu_opt_set(opts, "driver", "pci-assign");
+ qemu_opt_set(opts, "host", host);
+
+#ifdef KVM_CAP_IOMMU
+ r = get_param_value(dma, sizeof(dma), "dma", arg);
+ if (r && !strncmp(dma, "none", 4))
+ qemu_opt_set(opts, "iommu", "0");
+#endif
+ qemu_opts_print(opts, NULL);
+ return opts;
+
+bad:
+ fprintf(stderr, "pcidevice argument parse error; "
+ "please check the help text for usage\n");
+ if (opts)
+ qemu_opts_del(opts);
+ return NULL;
+}
+
+void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices)
+{
+ QemuOpts *opts;
+ int i;
+
+ for (i = 0; i < n_devices; i++) {
+ opts = add_assigned_device(devices[i]);
+ if (opts == NULL) {
+ fprintf(stderr, "Could not add assigned device %s\n", devices[i]);
+ exit(1);
+ }
+ /* generic code will call qdev_device_add() for the device */
+ }
+}
+
+/*
+ * Scan the assigned devices for the devices that have an option ROM, and then
+ * load the corresponding ROM data to RAM. If an error occurs while loading an
+ * option ROM, we just ignore that option ROM and continue with the next one.
+ */
+static void assigned_dev_load_option_rom(AssignedDevice *dev)
+{
+ int size, len, ret;
+ void *buf;
+ FILE *fp;
+ uint8_t i = 1;
+ char rom_file[64];
+
+ snprintf(rom_file, sizeof(rom_file),
+ "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom",
+ dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func);
+
+ if (access(rom_file, F_OK))
+ return;
+
+ /* Write something to the ROM file to enable it */
+ fp = fopen(rom_file, "wb");
+ if (fp == NULL)
+ return;
+ len = fwrite(&i, 1, 1, fp);
+ fclose(fp);
+ if (len != 1)
+ return;
+
+ /* The file has to be closed and reopened, otherwise it won't work */
+ fp = fopen(rom_file, "rb");
+ if (fp == NULL)
+ return;
+
+ fseek(fp, 0, SEEK_END);
+ size = ftell(fp);
+ fseek(fp, 0, SEEK_SET);
+
+ buf = malloc(size);
+ if (buf == NULL) {
+ fclose(fp);
+ return;
+ }
+
+ ret = fread(buf, size, 1, fp);
+ if (!feof(fp) || ferror(fp) || ret != 1) {
+ free(buf);
+ fclose(fp);
+ return;
+ }
+ fclose(fp);
+
+ /* Copy ROM contents into the space backing the ROM BAR */
+ if (dev->v_addrs[PCI_ROM_SLOT].r_size >= size &&
+ dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase) {
+ mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
+ size, PROT_READ | PROT_WRITE);
+ memcpy(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
+ buf, size);
+ mprotect(dev->v_addrs[PCI_ROM_SLOT].u.r_virtbase,
+ size, PROT_READ);
+ }
+
+ free(buf);
+}
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
new file mode 100644
index 000000000..4e7fe878c
--- /dev/null
+++ b/hw/device-assignment.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2007, Neocleus Corporation.
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Data structures for storing PCI state
+ *
+ * Adapted to kvm by Qumranet
+ *
+ * Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ * Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ * Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ * Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ */
+
+#ifndef __DEVICE_ASSIGNMENT_H__
+#define __DEVICE_ASSIGNMENT_H__
+
+#include <sys/mman.h>
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "pci.h"
+
+/* From include/linux/pci.h in the kernel sources */
+#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
+
+typedef struct PCIHostDevice {
+ int seg;
+ int bus;
+ int dev;
+ int func;
+} PCIHostDevice;
+
+typedef struct {
+ int type; /* Memory or port I/O */
+ int valid;
+ uint32_t base_addr;
+ uint32_t size; /* size of the region */
+ int resource_fd;
+} PCIRegion;
+
+typedef struct {
+ uint8_t bus, dev, func; /* Bus inside domain, device and function */
+ int irq; /* IRQ number */
+ uint16_t region_number; /* number of active regions */
+
+ /* Port I/O or MMIO Regions */
+ PCIRegion regions[PCI_NUM_REGIONS];
+ int config_fd;
+} PCIDevRegions;
+
+typedef struct {
+ pcibus_t e_physbase;
+ ram_addr_t memory_index;
+ union {
+ void *r_virtbase; /* mmapped access address for memory regions */
+ uint32_t r_baseport; /* the base guest port for I/O regions */
+ } u;
+ int num; /* our index within v_addrs[] */
+ pcibus_t e_size; /* emulated size of region in bytes */
+ pcibus_t r_size; /* real size of region in bytes */
+} AssignedDevRegion;
+
+typedef struct AssignedDevice {
+ PCIDevice dev;
+ PCIHostDevice host;
+ uint32_t use_iommu;
+ int intpin;
+ uint8_t debug_flags;
+ AssignedDevRegion v_addrs[PCI_NUM_REGIONS];
+ PCIDevRegions real_device;
+ int run;
+ int girq;
+ unsigned int h_segnr;
+ unsigned char h_busnr;
+ unsigned int h_devfn;
+ int irq_requested_type;
+ int bound;
+ struct {
+#define ASSIGNED_DEVICE_CAP_MSI (1 << 0)
+#define ASSIGNED_DEVICE_CAP_MSIX (1 << 1)
+ uint32_t available;
+#define ASSIGNED_DEVICE_MSI_ENABLED (1 << 0)
+#define ASSIGNED_DEVICE_MSIX_ENABLED (1 << 1)
+#define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
+ uint32_t state;
+ } cap;
+ int irq_entries_nr;
+ struct kvm_irq_routing_entry *entry;
+ void *msix_table_page;
+ target_phys_addr_t msix_table_addr;
+ int mmio_index;
+ int need_emulate_cmd;
+ char *configfd_name;
+ QLIST_ENTRY(AssignedDevice) next;
+} AssignedDevice;
+
+QemuOpts *add_assigned_device(const char *arg);
+void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices);
+void assigned_dev_update_irqs(void);
+
+#define MAX_DEV_ASSIGN_CMDLINE 8
+
+extern const char *assigned_devices[MAX_DEV_ASSIGN_CMDLINE];
+extern int assigned_devices_index;
+
+#endif /* __DEVICE_ASSIGNMENT_H__ */
diff --git a/hw/extboot.c b/hw/extboot.c
new file mode 100644
index 000000000..8ada21bf5
--- /dev/null
+++ b/hw/extboot.c
@@ -0,0 +1,123 @@
+/*
+ * Extended boot option ROM support.
+ *
+ * Copyright IBM, Corp. 2007
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "isa.h"
+#include "block.h"
+
+/* Extended Boot ROM suport */
+
+union extboot_cmd
+{
+ uint16_t type;
+ struct {
+ uint16_t type;
+ uint16_t cylinders;
+ uint16_t heads;
+ uint16_t sectors;
+ uint64_t nb_sectors;
+ } query_geometry;
+ struct {
+ uint16_t type;
+ uint16_t nb_sectors;
+ uint16_t segment;
+ uint16_t offset;
+ uint64_t sector;
+ } xfer;
+};
+
+static void get_translated_chs(BlockDriverState *bs, int *c, int *h, int *s)
+{
+ bdrv_get_geometry_hint(bs, c, h, s);
+
+ if (*c <= 1024) {
+ *c >>= 0;
+ *h <<= 0;
+ } else if (*c <= 2048) {
+ *c >>= 1;
+ *h <<= 1;
+ } else if (*c <= 4096) {
+ *c >>= 2;
+ *h <<= 2;
+ } else if (*c <= 8192) {
+ *c >>= 3;
+ *h <<= 3;
+ } else {
+ *c >>= 4;
+ *h <<= 4;
+ }
+
+ /* what is the correct algorithm for this?? */
+ if (*h == 256) {
+ *h = 255;
+ *c = *c + 1;
+ }
+}
+
+static void extboot_write_cmd(void *opaque, uint32_t addr, uint32_t value)
+{
+ union extboot_cmd cmd;
+ BlockDriverState *bs = opaque;
+ int cylinders, heads, sectors, err;
+ uint64_t nb_sectors;
+ target_phys_addr_t pa = 0;
+ int blen = 0;
+ void *buf = NULL;
+
+ cpu_physical_memory_read((value & 0xFFFF) << 4, (uint8_t *)&cmd,
+ sizeof(cmd));
+
+ if (cmd.type == 0x01 || cmd.type == 0x02) {
+ pa = cmd.xfer.segment * 16 + cmd.xfer.offset;
+ blen = cmd.xfer.nb_sectors * 512;
+ buf = qemu_memalign(512, blen);
+ }
+
+ switch (cmd.type) {
+ case 0x00:
+ get_translated_chs(bs, &cylinders, &heads, &sectors);
+ bdrv_get_geometry(bs, &nb_sectors);
+ cmd.query_geometry.cylinders = cylinders;
+ cmd.query_geometry.heads = heads;
+ cmd.query_geometry.sectors = sectors;
+ cmd.query_geometry.nb_sectors = nb_sectors;
+ break;
+ case 0x01:
+ err = bdrv_read(bs, cmd.xfer.sector, buf, cmd.xfer.nb_sectors);
+ if (err)
+ printf("Read failed\n");
+
+ cpu_physical_memory_write(pa, buf, blen);
+
+ break;
+ case 0x02:
+ cpu_physical_memory_read(pa, buf, blen);
+
+ err = bdrv_write(bs, cmd.xfer.sector, buf, cmd.xfer.nb_sectors);
+ if (err)
+ printf("Write failed\n");
+
+ break;
+ }
+
+ cpu_physical_memory_write((value & 0xFFFF) << 4, (uint8_t *)&cmd,
+ sizeof(cmd));
+ if (buf)
+ qemu_free(buf);
+}
+
+void extboot_init(BlockDriverState *bs)
+{
+ register_ioport_write(0x405, 1, 2, extboot_write_cmd, bs);
+}
diff --git a/hw/hpet.c b/hw/hpet.c
index d5c406cca..ba80504c4 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -238,6 +238,11 @@ static int hpet_post_load(void *opaque, int version_id)
if (s->timer[0].config & HPET_TN_FSB_CAP) {
s->flags |= 1 << HPET_MSI_SUPPORT;
}
+
+ if (hpet_in_legacy_mode(s)) {
+ hpet_pit_disable();
+ }
+
return 0;
}
diff --git a/hw/hw.h b/hw/hw.h
index c2de6fe8c..cb553a6e8 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -340,6 +340,10 @@ extern const VMStateInfo vmstate_info_uint16;
extern const VMStateInfo vmstate_info_uint32;
extern const VMStateInfo vmstate_info_uint64;
+#ifdef __linux__
+extern const VMStateInfo vmstate_info_u64;
+#endif
+
extern const VMStateInfo vmstate_info_timer;
extern const VMStateInfo vmstate_info_ptimer;
extern const VMStateInfo vmstate_info_buffer;
@@ -630,6 +634,15 @@ extern const VMStateDescription vmstate_i2c_slave;
#define VMSTATE_UINT64(_f, _s) \
VMSTATE_UINT64_V(_f, _s, 0)
+/* This is needed because on linux __u64 is unsigned long long
+ and on glibc uint64_t is unsigned long on 64 bits */
+#ifdef __linux__
+#define VMSTATE_U64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_u64, __u64)
+#define VMSTATE_U64(_f, _s) \
+ VMSTATE_U64_V(_f, _s, 0)
+#endif
+
#define VMSTATE_UINT8_EQUAL(_f, _s) \
VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t)
diff --git a/hw/i8254-kvm.c b/hw/i8254-kvm.c
new file mode 100644
index 000000000..6125213ce
--- /dev/null
+++ b/hw/i8254-kvm.c
@@ -0,0 +1,122 @@
+/*
+ * QEMU 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "hw.h"
+#include "pc.h"
+#include "isa.h"
+#include "qemu-timer.h"
+#include "i8254.h"
+#include "qemu-kvm.h"
+
+extern VMStateDescription vmstate_pit;
+
+static PITState pit_state;
+
+static void kvm_pit_pre_save(void *opaque)
+{
+ PITState *s = (void *)opaque;
+ struct kvm_pit_state2 pit2;
+ struct kvm_pit_channel_state *c;
+ struct PITChannelState *sc;
+ int i;
+
+ if(qemu_kvm_has_pit_state2()) {
+ kvm_get_pit2(kvm_context, &pit2);
+ s->flags = pit2.flags;
+ } else {
+ /* pit2 is superset of pit struct so just cast it and use it */
+ kvm_get_pit(kvm_context, (struct kvm_pit_state *)&pit2);
+ }
+ for (i = 0; i < 3; i++) {
+ c = &pit2.channels[i];
+ sc = &s->channels[i];
+ sc->count = c->count;
+ sc->latched_count = c->latched_count;
+ sc->count_latched = c->count_latched;
+ sc->status_latched = c->status_latched;
+ sc->status = c->status;
+ sc->read_state = c->read_state;
+ sc->write_state = c->write_state;
+ sc->write_latch = c->write_latch;
+ sc->rw_mode = c->rw_mode;
+ sc->mode = c->mode;
+ sc->bcd = c->bcd;
+ sc->gate = c->gate;
+ sc->count_load_time = c->count_load_time;
+ }
+}
+
+static int kvm_pit_post_load(void *opaque, int version_id)
+{
+ PITState *s = opaque;
+ struct kvm_pit_state2 pit2;
+ struct kvm_pit_channel_state *c;
+ struct PITChannelState *sc;
+ int i;
+
+ pit2.flags = s->flags;
+ for (i = 0; i < 3; i++) {
+ c = &pit2.channels[i];
+ sc = &s->channels[i];
+ c->count = sc->count;
+ c->latched_count = sc->latched_count;
+ c->count_latched = sc->count_latched;
+ c->status_latched = sc->status_latched;
+ c->status = sc->status;
+ c->read_state = sc->read_state;
+ c->write_state = sc->write_state;
+ c->write_latch = sc->write_latch;
+ c->rw_mode = sc->rw_mode;
+ c->mode = sc->mode;
+ c->bcd = sc->bcd;
+ c->gate = sc->gate;
+ c->count_load_time = sc->count_load_time;
+ }
+
+ if(qemu_kvm_has_pit_state2()) {
+ kvm_set_pit2(kvm_context, &pit2);
+ } else {
+ kvm_set_pit(kvm_context, (struct kvm_pit_state *)&pit2);
+ }
+ return 0;
+}
+
+static void dummy_timer(void *opaque)
+{
+}
+
+PITState *kvm_pit_init(int base, qemu_irq irq)
+{
+ PITState *pit = &pit_state;
+ PITChannelState *s;
+
+ s = &pit->channels[0];
+ s->irq_timer = qemu_new_timer(vm_clock, dummy_timer, s);
+ vmstate_pit.pre_save = kvm_pit_pre_save;
+ vmstate_pit.post_load = kvm_pit_post_load;
+ vmstate_register(NULL, base, &vmstate_pit, pit);
+ qemu_register_reset(pit_reset, pit);
+ pit_reset(pit);
+
+ return pit;
+}
diff --git a/hw/i8254.c b/hw/i8254.c
index 06b225cf4..321a5d535 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -25,38 +25,11 @@
#include "pc.h"
#include "isa.h"
#include "qemu-timer.h"
+#include "kvm.h"
+#include "i8254.h"
//#define DEBUG_PIT
-#define RW_STATE_LSB 1
-#define RW_STATE_MSB 2
-#define RW_STATE_WORD0 3
-#define RW_STATE_WORD1 4
-
-typedef struct PITChannelState {
- int count; /* can be 65536 */
- uint16_t latched_count;
- uint8_t count_latched;
- uint8_t status_latched;
- uint8_t status;
- uint8_t read_state;
- uint8_t write_state;
- uint8_t write_latch;
- uint8_t rw_mode;
- uint8_t mode;
- uint8_t bcd; /* not supported */
- uint8_t gate; /* timer start */
- int64_t count_load_time;
- /* irq handling */
- int64_t next_transition_time;
- QEMUTimer *irq_timer;
- qemu_irq irq;
-} PITChannelState;
-
-struct PITState {
- PITChannelState channels[3];
-};
-
static PITState pit_state;
static void pit_irq_timer_update(PITChannelState *s, int64_t current_time);
@@ -228,13 +201,18 @@ int pit_get_mode(PITState *pit, int channel)
return s->mode;
}
-static inline void pit_load_count(PITChannelState *s, int val)
+static inline void pit_load_count(PITState *s, int val, int chan)
{
if (val == 0)
val = 0x10000;
- s->count_load_time = qemu_get_clock(vm_clock);
- s->count = val;
- pit_irq_timer_update(s, s->count_load_time);
+ s->channels[chan].count_load_time = qemu_get_clock(vm_clock);
+ s->channels[chan].count = val;
+#ifdef TARGET_I386
+ if (chan == 0 && pit_state.flags & PIT_FLAGS_HPET_LEGACY) {
+ return;
+ }
+#endif
+ pit_irq_timer_update(&s->channels[chan], s->channels[chan].count_load_time);
}
/* if already latched, do not latch again */
@@ -294,17 +272,17 @@ static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
switch(s->write_state) {
default:
case RW_STATE_LSB:
- pit_load_count(s, val);
+ pit_load_count(pit, val, addr);
break;
case RW_STATE_MSB:
- pit_load_count(s, val << 8);
+ pit_load_count(pit, val << 8, addr);
break;
case RW_STATE_WORD0:
s->write_latch = val;
s->write_state = RW_STATE_WORD1;
break;
case RW_STATE_WORD1:
- pit_load_count(s, s->write_latch | (val << 8));
+ pit_load_count(pit, s->write_latch | (val << 8), addr);
s->write_state = RW_STATE_WORD0;
break;
}
@@ -364,6 +342,11 @@ static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
return ret;
}
+/* global counters for time-drift fix */
+int64_t timer_acks=0, timer_interrupts=0, timer_ints_to_push=0;
+
+extern int time_drift_fix;
+
static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
{
int64_t expire_time;
@@ -374,16 +357,35 @@ static void pit_irq_timer_update(PITChannelState *s, int64_t current_time)
expire_time = pit_get_next_transition_time(s, current_time);
irq_level = pit_get_out1(s, current_time);
qemu_set_irq(s->irq, irq_level);
+ if (time_drift_fix && irq_level==1) {
+ /* FIXME: fine tune timer_max_fix (max fix per tick).
+ * Should it be 1 (double time), 2 , 4, 10 ?
+ * Currently setting it to 5% of PIT-ticks-per-second (per PIT-tick)
+ */
+ const long pit_ticks_per_sec = (s->count>0) ? (PIT_FREQ/s->count) : 0;
+ const long timer_max_fix = pit_ticks_per_sec/20;
+ const long delta = timer_interrupts - timer_acks;
+ const long max_delta = pit_ticks_per_sec * 60; /* one minute */
+ if ((delta > max_delta) && (pit_ticks_per_sec > 0)) {
+ printf("time drift is too long, %ld seconds were lost\n", delta/pit_ticks_per_sec);
+ timer_acks = timer_interrupts;
+ timer_ints_to_push = 0;
+ } else if (delta > 0) {
+ timer_ints_to_push = MIN(delta, timer_max_fix);
+ }
+ timer_interrupts++;
+ }
#ifdef DEBUG_PIT
printf("irq_level=%d next_delay=%f\n",
irq_level,
(double)(expire_time - current_time) / get_ticks_per_sec());
#endif
s->next_transition_time = expire_time;
- if (expire_time != -1)
+ if (expire_time != -1) {
qemu_mod_timer(s->irq_timer, expire_time);
- else
+ } else {
qemu_del_timer(s->irq_timer);
+ }
}
static void pit_irq_timer(void *opaque)
@@ -423,9 +425,10 @@ static int pit_load_old(QEMUFile *f, void *opaque, int version_id)
PITChannelState *s;
int i;
- if (version_id != 1)
+ if (version_id != PIT_SAVEVM_VERSION)
return -EINVAL;
+ pit->flags = qemu_get_be32(f);
for(i = 0; i < 3; i++) {
s = &pit->channels[i];
s->count=qemu_get_be32(f);
@@ -446,42 +449,61 @@ static int pit_load_old(QEMUFile *f, void *opaque, int version_id)
qemu_get_timer(f, s->irq_timer);
}
}
+
return 0;
}
-static const VMStateDescription vmstate_pit = {
+VMStateDescription vmstate_pit = {
.name = "i8254",
.version_id = 2,
.minimum_version_id = 2,
.minimum_version_id_old = 1,
.load_state_old = pit_load_old,
.fields = (VMStateField []) {
+ VMSTATE_UINT32(flags, PITState),
VMSTATE_STRUCT_ARRAY(channels, PITState, 3, 2, vmstate_pit_channel, PITChannelState),
VMSTATE_TIMER(channels[0].irq_timer, PITState),
VMSTATE_END_OF_LIST()
}
};
-static void pit_reset(void *opaque)
+void pit_reset(void *opaque)
{
PITState *pit = opaque;
PITChannelState *s;
int i;
+#ifdef TARGET_I386
+ pit->flags &= ~PIT_FLAGS_HPET_LEGACY;
+#endif
for(i = 0;i < 3; i++) {
s = &pit->channels[i];
s->mode = 3;
s->gate = (i != 2);
- pit_load_count(s, 0);
+ pit_load_count(pit, 0, i);
}
}
+#ifdef TARGET_I386
/* When HPET is operating in legacy mode, i8254 timer0 is disabled */
-void hpet_pit_disable(void) {
- PITChannelState *s;
- s = &pit_state.channels[0];
- if (s->irq_timer)
- qemu_del_timer(s->irq_timer);
+
+void hpet_pit_disable(void)
+{
+ PITChannelState *s = &pit_state.channels[0];
+
+ if (kvm_enabled() && kvm_pit_in_kernel()) {
+ if (qemu_kvm_has_pit_state2()) {
+ kvm_hpet_disable_kpit();
+ } else {
+ fprintf(stderr, "%s: kvm does not support pit_state2!\n", __FUNCTION__);
+ exit(1);
+ }
+ } else {
+ pit_state.flags |= PIT_FLAGS_HPET_LEGACY;
+ if (s->irq_timer) {
+ qemu_del_timer(s->irq_timer);
+ }
+ }
}
/* When HPET is reset or leaving legacy mode, it must reenable i8254
@@ -491,12 +513,21 @@ void hpet_pit_disable(void) {
void hpet_pit_enable(void)
{
PITState *pit = &pit_state;
- PITChannelState *s;
- s = &pit->channels[0];
- s->mode = 3;
- s->gate = 1;
- pit_load_count(s, 0);
+ PITChannelState *s = &pit->channels[0];
+
+ if (kvm_enabled() && kvm_pit_in_kernel()) {
+ if (qemu_kvm_has_pit_state2()) {
+ kvm_hpet_enable_kpit();
+ } else {
+ fprintf(stderr, "%s: kvm does not support pit_state2!\n", __FUNCTION__);
+ exit(1);
+ }
+ } else {
+ pit_state.flags &= ~PIT_FLAGS_HPET_LEGACY;
+ pit_load_count(pit, s->count, 0);
+ }
}
+#endif
PITState *pit_init(int base, qemu_irq irq)
{
diff --git a/hw/i8254.h b/hw/i8254.h
new file mode 100644
index 000000000..d23303a8b
--- /dev/null
+++ b/hw/i8254.h
@@ -0,0 +1,69 @@
+/*
+ * QEMU 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_I8254_H
+#define QEMU_I8254_H
+
+#define PIT_SAVEVM_NAME "i8254"
+#define PIT_SAVEVM_VERSION 2
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+#define PIT_FLAGS_HPET_LEGACY 1
+
+typedef struct PITChannelState {
+ int count; /* can be 65536 */
+ uint16_t latched_count;
+ uint8_t count_latched;
+ uint8_t status_latched;
+ uint8_t status;
+ uint8_t read_state;
+ uint8_t write_state;
+ uint8_t write_latch;
+ uint8_t rw_mode;
+ uint8_t mode;
+ uint8_t bcd; /* not supported */
+ uint8_t gate; /* timer start */
+ int64_t count_load_time;
+ /* irq handling */
+ int64_t next_transition_time;
+ QEMUTimer *irq_timer;
+ qemu_irq irq;
+} PITChannelState;
+
+struct PITState {
+ PITChannelState channels[3];
+ uint32_t flags;
+};
+
+void pit_save(QEMUFile *f, void *opaque);
+
+int pit_load(QEMUFile *f, void *opaque, int version_id);
+
+void pit_reset(void *opaque);
+
+#endif
diff --git a/hw/i8259.c b/hw/i8259.c
index a8dbee647..11060d3d1 100644
--- a/hw/i8259.c
+++ b/hw/i8259.c
@@ -23,10 +23,13 @@
*/
#include "hw.h"
#include "pc.h"
+#include "apic.h"
#include "isa.h"
#include "monitor.h"
#include "qemu-timer.h"
+#include "kvm.h"
+
/* debug PIC */
//#define DEBUG_PIC
@@ -189,7 +192,6 @@ int64_t irq_time[16];
static void i8259_set_irq(void *opaque, int irq, int level)
{
PicState2 *s = opaque;
-
#if defined(DEBUG_PIC) || defined(DEBUG_IRQ_COUNT)
if (level != irq_level[irq]) {
DPRINTF("i8259_set_irq: irq=%d level=%d\n", irq, level);
@@ -218,18 +220,35 @@ static inline void pic_intack(PicState *s, int irq)
} else {
s->isr |= (1 << irq);
}
+
/* We don't clear a level sensitive interrupt here */
if (!(s->elcr & (1 << irq)))
s->irr &= ~(1 << irq);
+
}
+extern int time_drift_fix;
+
int pic_read_irq(PicState2 *s)
{
int irq, irq2, intno;
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
+
pic_intack(&s->pics[0], irq);
+#ifdef TARGET_I386
+ if (time_drift_fix && irq == 0) {
+ extern int64_t timer_acks, timer_ints_to_push;
+ timer_acks++;
+ if (timer_ints_to_push > 0) {
+ timer_ints_to_push--;
+ /* simulate an edge irq0, like the one generated by i8254 */
+ pic_set_irq1(&s->pics[0], 0, 0);
+ pic_set_irq1(&s->pics[0], 0, 1);
+ }
+ }
+#endif
if (irq == 2) {
irq2 = pic_get_irq(&s->pics[1]);
if (irq2 >= 0) {
@@ -448,9 +467,33 @@ static uint32_t elcr_ioport_read(void *opaque, uint32_t addr1)
return s->elcr;
}
+static void kvm_kernel_pic_save_to_user(PicState *s);
+static int kvm_kernel_pic_load_from_user(PicState *s);
+
+static void pic_pre_save(void *opaque)
+{
+ PicState *s = opaque;
+
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_pic_save_to_user(s);
+ }
+}
+
+static int pic_post_load(void *opaque, int version_id)
+{
+ PicState *s = opaque;
+
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_pic_load_from_user(s);
+ }
+ return 0;
+}
+
static const VMStateDescription vmstate_pic = {
.name = "i8259",
.version_id = 1,
+ .pre_save = pic_pre_save,
+ .post_load = pic_post_load,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
.fields = (VMStateField []) {
@@ -537,3 +580,103 @@ qemu_irq *i8259_init(qemu_irq parent_irq)
isa_pic = s;
return qemu_allocate_irqs(i8259_set_irq, s, 16);
}
+
+static void kvm_kernel_pic_save_to_user(PicState *s)
+{
+#ifdef KVM_CAP_IRQCHIP
+ struct kvm_irqchip chip;
+ struct kvm_pic_state *kpic;
+
+ chip.chip_id = (&s->pics_state->pics[0] == s) ?
+ KVM_IRQCHIP_PIC_MASTER :
+ KVM_IRQCHIP_PIC_SLAVE;
+ kvm_get_irqchip(kvm_context, &chip);
+ kpic = &chip.chip.pic;
+
+ s->last_irr = kpic->last_irr;
+ s->irr = kpic->irr;
+ s->imr = kpic->imr;
+ s->isr = kpic->isr;
+ s->priority_add = kpic->priority_add;
+ s->irq_base = kpic->irq_base;
+ s->read_reg_select = kpic->read_reg_select;
+ s->poll = kpic->poll;
+ s->special_mask = kpic->special_mask;
+ s->init_state = kpic->init_state;
+ s->auto_eoi = kpic->auto_eoi;
+ s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi;
+ s->special_fully_nested_mode = kpic->special_fully_nested_mode;
+ s->init4 = kpic->init4;
+ s->elcr = kpic->elcr;
+ s->elcr_mask = kpic->elcr_mask;
+#endif
+}
+
+static int kvm_kernel_pic_load_from_user(PicState *s)
+{
+#ifdef KVM_CAP_IRQCHIP
+ struct kvm_irqchip chip;
+ struct kvm_pic_state *kpic;
+
+ chip.chip_id = (&s->pics_state->pics[0] == s) ?
+ KVM_IRQCHIP_PIC_MASTER :
+ KVM_IRQCHIP_PIC_SLAVE;
+ kpic = &chip.chip.pic;
+
+ kpic->last_irr = s->last_irr;
+ kpic->irr = s->irr;
+ kpic->imr = s->imr;
+ kpic->isr = s->isr;
+ kpic->priority_add = s->priority_add;
+ kpic->irq_base = s->irq_base;
+ kpic->read_reg_select = s->read_reg_select;
+ kpic->poll = s->poll;
+ kpic->special_mask = s->special_mask;
+ kpic->init_state = s->init_state;
+ kpic->auto_eoi = s->auto_eoi;
+ kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi;
+ kpic->special_fully_nested_mode = s->special_fully_nested_mode;
+ kpic->init4 = s->init4;
+ kpic->elcr = s->elcr;
+ kpic->elcr_mask = s->elcr_mask;
+
+ kvm_set_irqchip(kvm_context, &chip);
+#endif
+ return 0;
+}
+
+#ifdef KVM_CAP_IRQCHIP
+static void kvm_i8259_set_irq(void *opaque, int irq, int level)
+{
+ int pic_ret;
+ if (kvm_set_irq(irq, level, &pic_ret)) {
+ if (pic_ret != 0)
+ apic_set_irq_delivered();
+ return;
+ }
+}
+
+static void kvm_pic_init1(int io_addr, PicState *s)
+{
+ vmstate_register(NULL, io_addr, &vmstate_pic, s);
+ qemu_register_reset(pic_reset, s);
+}
+
+qemu_irq *kvm_i8259_init(qemu_irq parent_irq)
+{
+ PicState2 *s;
+
+ s = qemu_mallocz(sizeof(PicState2));
+
+ kvm_pic_init1(0x20, &s->pics[0]);
+ kvm_pic_init1(0xa0, &s->pics[1]);
+ s->parent_irq = parent_irq;
+ s->pics[0].pics_state = s;
+ s->pics[1].pics_state = s;
+ isa_pic = s;
+ return qemu_allocate_irqs(kvm_i8259_set_irq, s, 24);
+}
+#endif
+
+
+
diff --git a/hw/ioapic.c b/hw/ioapic.c
index 5ae21e910..276c72ed2 100644
--- a/hw/ioapic.c
+++ b/hw/ioapic.c
@@ -23,10 +23,14 @@
#include "hw.h"
#include "pc.h"
#include "apic.h"
+#include "sysemu.h"
+#include "apic.h"
#include "qemu-timer.h"
#include "host-utils.h"
#include "sysbus.h"
+#include "kvm.h"
+
//#define DEBUG_IOAPIC
#ifdef DEBUG_IOAPIC
@@ -36,6 +40,7 @@
#define DPRINTF(fmt, ...)
#endif
+#define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000
#define IOAPIC_LVT_MASKED (1<<16)
#define IOAPIC_TRIGGER_EDGE 0
@@ -56,6 +61,7 @@ struct IOAPICState {
SysBusDevice busdev;
uint8_t id;
uint8_t ioregsel;
+ uint64_t base_address;
uint32_t irr;
uint64_t ioredtbl[IOAPIC_NUM_PINS];
@@ -106,8 +112,9 @@ static void ioapic_set_irq(void *opaque, int vector, int level)
* the cleanest way of doing it but it should work. */
DPRINTF("%s: %s vec %x\n", __func__, level? "raise" : "lower", vector);
- if (vector == 0)
+ if (vector == 0 && irq0override) {
vector = 2;
+ }
if (vector >= 0 && vector < IOAPIC_NUM_PINS) {
uint32_t mask = 1 << vector;
@@ -199,14 +206,91 @@ static void ioapic_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t va
}
}
+static void kvm_kernel_ioapic_save_to_user(IOAPICState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+ struct kvm_irqchip chip;
+ struct kvm_ioapic_state *kioapic;
+ int i;
+
+ chip.chip_id = KVM_IRQCHIP_IOAPIC;
+ kvm_get_irqchip(kvm_context, &chip);
+ kioapic = &chip.chip.ioapic;
+
+ s->id = kioapic->id;
+ s->ioregsel = kioapic->ioregsel;
+ s->base_address = kioapic->base_address;
+ s->irr = kioapic->irr;
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ s->ioredtbl[i] = kioapic->redirtbl[i].bits;
+ }
+#endif
+}
+
+static void kvm_kernel_ioapic_load_from_user(IOAPICState *s)
+{
+#if defined(KVM_CAP_IRQCHIP) && defined(TARGET_I386)
+ struct kvm_irqchip chip;
+ struct kvm_ioapic_state *kioapic;
+ int i;
+
+ chip.chip_id = KVM_IRQCHIP_IOAPIC;
+ kioapic = &chip.chip.ioapic;
+
+ kioapic->id = s->id;
+ kioapic->ioregsel = s->ioregsel;
+ kioapic->base_address = s->base_address;
+ kioapic->irr = s->irr;
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ kioapic->redirtbl[i].bits = s->ioredtbl[i];
+ }
+
+ kvm_set_irqchip(kvm_context, &chip);
+#endif
+}
+
+static void ioapic_pre_save(void *opaque)
+{
+ IOAPICState *s = (void *)opaque;
+
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_ioapic_save_to_user(s);
+ }
+}
+
+static int ioapic_pre_load(void *opaque)
+{
+ IOAPICState *s = opaque;
+
+ /* in case we are doing version 1, we just set these to sane values */
+ s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+ s->irr = 0;
+ return 0;
+}
+
+static int ioapic_post_load(void *opaque, int version_id)
+{
+ IOAPICState *s = opaque;
+
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_ioapic_load_from_user(s);
+ }
+ return 0;
+}
+
static const VMStateDescription vmstate_ioapic = {
.name = "ioapic",
- .version_id = 1,
+ .version_id = 2,
.minimum_version_id = 1,
.minimum_version_id_old = 1,
+ .pre_load = ioapic_pre_load,
+ .post_load = ioapic_post_load,
+ .pre_save = ioapic_pre_save,
.fields = (VMStateField []) {
VMSTATE_UINT8(id, IOAPICState),
VMSTATE_UINT8(ioregsel, IOAPICState),
+ VMSTATE_UINT64_V(base_address, IOAPICState, 2),
+ VMSTATE_UINT32_V(irr, IOAPICState, 2),
VMSTATE_UINT64_ARRAY(ioredtbl, IOAPICState, IOAPIC_NUM_PINS),
VMSTATE_END_OF_LIST()
}
@@ -217,11 +301,17 @@ static void ioapic_reset(DeviceState *d)
IOAPICState *s = DO_UPCAST(IOAPICState, busdev.qdev, d);
int i;
+ s->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
s->id = 0;
s->ioregsel = 0;
s->irr = 0;
for(i = 0; i < IOAPIC_NUM_PINS; i++)
s->ioredtbl[i] = 1 << 16; /* mask LVT */
+#ifdef KVM_CAP_IRQCHIP
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_kernel_ioapic_load_from_user(s);
+ }
+#endif
}
static CPUReadMemoryFunc * const ioapic_mem_read[3] = {
diff --git a/hw/ipf.c b/hw/ipf.c
new file mode 100644
index 000000000..21cff72b7
--- /dev/null
+++ b/hw/ipf.c
@@ -0,0 +1,713 @@
+/*
+ * Itanium Platform Emulator derived from QEMU PC System Emulator
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Copyright (c) 2007 Intel
+ * Ported for IA64 Platform Zhang Xiantao <xiantao.zhang@intel.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw.h"
+#include "pc.h"
+#include "fdc.h"
+#include "pci.h"
+#include "block.h"
+#include "sysemu.h"
+#include "audio/audio.h"
+#include "net.h"
+#include "smbus.h"
+#include "boards.h"
+#include "firmware.h"
+#include "ia64intrin.h"
+#include <unistd.h>
+#include "device-assignment.h"
+#include "virtio-blk.h"
+
+#include "qemu-kvm.h"
+
+#define FW_FILENAME "Flash.fd"
+
+/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables. */
+#define ACPI_DATA_SIZE 0x10000
+
+#define MAX_IDE_BUS 2
+
+static fdctrl_t *floppy_controller;
+static RTCState *rtc_state;
+static PCIDevice *i440fx_state;
+
+static uint32_t ipf_to_legacy_io(target_phys_addr_t addr)
+{
+ return (uint32_t)(((addr&0x3ffffff) >> 12 << 2)|((addr) & 0x3));
+}
+
+static void ipf_legacy_io_writeb(void *opaque, target_phys_addr_t addr,
+ uint32_t val) {
+ uint32_t port = ipf_to_legacy_io(addr);
+
+ cpu_outb(0, port, val);
+}
+
+static void ipf_legacy_io_writew(void *opaque, target_phys_addr_t addr,
+ uint32_t val) {
+ uint32_t port = ipf_to_legacy_io(addr);
+
+ cpu_outw(0, port, val);
+}
+
+static void ipf_legacy_io_writel(void *opaque, target_phys_addr_t addr,
+ uint32_t val) {
+ uint32_t port = ipf_to_legacy_io(addr);
+
+ cpu_outl(0, port, val);
+}
+
+static uint32_t ipf_legacy_io_readb(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t port = ipf_to_legacy_io(addr);
+
+ return cpu_inb(0, port);
+}
+
+static uint32_t ipf_legacy_io_readw(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t port = ipf_to_legacy_io(addr);
+
+ return cpu_inw(0, port);
+}
+
+static uint32_t ipf_legacy_io_readl(void *opaque, target_phys_addr_t addr)
+{
+ uint32_t port = ipf_to_legacy_io(addr);
+
+ return cpu_inl(0, port);
+}
+
+static CPUReadMemoryFunc *ipf_legacy_io_read[3] = {
+ ipf_legacy_io_readb,
+ ipf_legacy_io_readw,
+ ipf_legacy_io_readl,
+};
+
+static CPUWriteMemoryFunc *ipf_legacy_io_write[3] = {
+ ipf_legacy_io_writeb,
+ ipf_legacy_io_writew,
+ ipf_legacy_io_writel,
+};
+
+static void pic_irq_request(void *opaque, int irq, int level)
+{
+ fprintf(stderr,"pic_irq_request called!\n");
+}
+
+/* PC cmos mappings */
+
+#define REG_EQUIPMENT_BYTE 0x14
+
+static int cmos_get_fd_drive_type(int fd0)
+{
+ int val;
+
+ switch (fd0) {
+ case 0:
+ /* 1.44 Mb 3"5 drive */
+ val = 4;
+ break;
+ case 1:
+ /* 2.88 Mb 3"5 drive */
+ val = 5;
+ break;
+ case 2:
+ /* 1.2 Mb 5"5 drive */
+ val = 2;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+ return val;
+}
+
+static void cmos_init_hd(int type_ofs, int info_ofs, BlockDriverState *hd)
+{
+ RTCState *s = rtc_state;
+ int cylinders, heads, sectors;
+
+ bdrv_get_geometry_hint(hd, &cylinders, &heads, &sectors);
+ rtc_set_memory(s, type_ofs, 47);
+ rtc_set_memory(s, info_ofs, cylinders);
+ rtc_set_memory(s, info_ofs + 1, cylinders >> 8);
+ rtc_set_memory(s, info_ofs + 2, heads);
+ rtc_set_memory(s, info_ofs + 3, 0xff);
+ rtc_set_memory(s, info_ofs + 4, 0xff);
+ rtc_set_memory(s, info_ofs + 5, 0xc0 | ((heads > 8) << 3));
+ rtc_set_memory(s, info_ofs + 6, cylinders);
+ rtc_set_memory(s, info_ofs + 7, cylinders >> 8);
+ rtc_set_memory(s, info_ofs + 8, sectors);
+}
+
+/* convert boot_device letter to something recognizable by the bios */
+static int boot_device2nibble(char boot_device)
+{
+ switch(boot_device) {
+ case 'a':
+ case 'b':
+ return 0x01; /* floppy boot */
+ case 'c':
+ return 0x02; /* hard drive boot */
+ case 'd':
+ return 0x03; /* CD-ROM boot */
+ case 'n':
+ return 0x04; /* Network boot */
+ }
+ return 0;
+}
+
+/* hd_table must contain 4 block drivers */
+static void cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
+ const char *boot_device, BlockDriverState **hd_table)
+{
+ RTCState *s = rtc_state;
+ int nbds, bds[3] = { 0, };
+ int val;
+ int fd0, fd1, nb;
+ int i;
+
+ /* various important CMOS locations needed by PC/Bochs bios */
+
+ /* memory size */
+ val = 640; /* base memory in K */
+ rtc_set_memory(s, 0x15, val);
+ rtc_set_memory(s, 0x16, val >> 8);
+
+ val = (ram_size / 1024) - 1024;
+ if (val > 65535)
+ val = 65535;
+ rtc_set_memory(s, 0x17, val);
+ rtc_set_memory(s, 0x18, val >> 8);
+ rtc_set_memory(s, 0x30, val);
+ rtc_set_memory(s, 0x31, val >> 8);
+
+ if (above_4g_mem_size) {
+ rtc_set_memory(s, 0x5b, (unsigned int)above_4g_mem_size >> 16);
+ rtc_set_memory(s, 0x5c, (unsigned int)above_4g_mem_size >> 24);
+ rtc_set_memory(s, 0x5d, above_4g_mem_size >> 32);
+ }
+ rtc_set_memory(s, 0x5f, smp_cpus - 1);
+
+ if (ram_size > (16 * 1024 * 1024))
+ val = (ram_size / 65536) - ((16 * 1024 * 1024) / 65536);
+ else
+ val = 0;
+ if (val > 65535)
+ val = 65535;
+ rtc_set_memory(s, 0x34, val);
+ rtc_set_memory(s, 0x35, val >> 8);
+
+ /* set boot devices, and disable floppy signature check if requested */
+#define PC_MAX_BOOT_DEVICES 3
+ nbds = strlen(boot_device);
+
+ if (nbds > PC_MAX_BOOT_DEVICES) {
+ fprintf(stderr, "Too many boot devices for PC\n");
+ exit(1);
+ }
+
+ for (i = 0; i < nbds; i++) {
+ bds[i] = boot_device2nibble(boot_device[i]);
+ if (bds[i] == 0) {
+ fprintf(stderr, "Invalid boot device for PC: '%c'\n",
+ boot_device[i]);
+ exit(1);
+ }
+ }
+
+ rtc_set_memory(s, 0x3d, (bds[1] << 4) | bds[0]);
+ rtc_set_memory(s, 0x38, (bds[2] << 4) | (fd_bootchk ? 0x0 : 0x1));
+
+ /* floppy type */
+
+ fd0 = fdctrl_get_drive_type(floppy_controller, 0);
+ fd1 = fdctrl_get_drive_type(floppy_controller, 1);
+
+ val = (cmos_get_fd_drive_type(fd0) << 4) | cmos_get_fd_drive_type(fd1);
+ rtc_set_memory(s, 0x10, val);
+
+ val = 0;
+ nb = 0;
+ if (fd0 < 3)
+ nb++;
+ if (fd1 < 3)
+ nb++;
+
+ switch (nb) {
+ case 0:
+ break;
+ case 1:
+ val |= 0x01; /* 1 drive, ready for boot */
+ break;
+ case 2:
+ val |= 0x41; /* 2 drives, ready for boot */
+ break;
+ }
+
+ val |= 0x02; /* FPU is there */
+ val |= 0x04; /* PS/2 mouse installed */
+ rtc_set_memory(s, REG_EQUIPMENT_BYTE, val);
+
+ /* hard drives */
+
+ rtc_set_memory(s, 0x12, (hd_table[0] ? 0xf0 : 0) | (hd_table[1] ? 0x0f : 0));
+ if (hd_table[0])
+ cmos_init_hd(0x19, 0x1b, hd_table[0]);
+ if (hd_table[1])
+ cmos_init_hd(0x1a, 0x24, hd_table[1]);
+
+ val = 0;
+ for (i = 0; i < 4; i++) {
+ if (hd_table[i]) {
+ int cylinders, heads, sectors, translation;
+ /* NOTE: bdrv_get_geometry_hint() returns the physical
+ geometry. It is always such that: 1 <= sects <= 63, 1
+ <= heads <= 16, 1 <= cylinders <= 16383. The BIOS
+ geometry can be different if a translation is done. */
+ translation = bdrv_get_translation_hint(hd_table[i]);
+ if (translation == BIOS_ATA_TRANSLATION_AUTO) {
+ bdrv_get_geometry_hint(hd_table[i], &cylinders,
+ &heads, &sectors);
+ if (cylinders <= 1024 && heads <= 16 && sectors <= 63) {
+ /* No translation. */
+ translation = 0;
+ } else {
+ /* LBA translation. */
+ translation = 1;
+ }
+ } else {
+ translation--;
+ }
+ val |= translation << (i * 2);
+ }
+ }
+ rtc_set_memory(s, 0x39, val);
+}
+
+static void main_cpu_reset(void *opaque)
+{
+ CPUState *env = opaque;
+ cpu_reset(env);
+}
+
+static const int ide_iobase[2] = { 0x1f0, 0x170 };
+static const int ide_iobase2[2] = { 0x3f6, 0x376 };
+static const int ide_irq[2] = { 14, 15 };
+
+#define NE2000_NB_MAX 6
+
+static int ne2000_io[NE2000_NB_MAX] = { 0x300, 0x320, 0x340,
+ 0x360, 0x280, 0x380 };
+static int ne2000_irq[NE2000_NB_MAX] = { 9, 10, 11, 3, 4, 5 };
+
+static int serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
+static int serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
+
+static int parallel_io[MAX_PARALLEL_PORTS] = { 0x378, 0x278, 0x3bc };
+static int parallel_irq[MAX_PARALLEL_PORTS] = { 7, 7, 7 };
+
+#ifdef HAS_AUDIO
+static void audio_init (PCIBus *pci_bus, qemu_irq *pic)
+{
+ struct soundhw *c;
+ int audio_enabled = 0;
+
+ for (c = soundhw; !audio_enabled && c->name; ++c) {
+ audio_enabled = c->enabled;
+ }
+
+ if (audio_enabled) {
+ AudioState *s;
+
+ s = AUD_init ();
+ if (s) {
+ for (c = soundhw; c->name; ++c) {
+ if (c->enabled) {
+ if (c->isa) {
+ c->init.init_isa (s, pic);
+ } else {
+ if (pci_bus) {
+ c->init.init_pci (pci_bus, s);
+ }
+ }
+ }
+ }
+ }
+ }
+}
+#endif
+
+static void pc_init_ne2k_isa(NICInfo *nd, qemu_irq *pic)
+{
+ static int nb_ne2k = 0;
+
+ if (nb_ne2k == NE2000_NB_MAX)
+ return;
+ isa_ne2000_init(ne2000_io[nb_ne2k], pic[ne2000_irq[nb_ne2k]], nd);
+ nb_ne2k++;
+}
+
+/* Itanium hardware initialisation */
+static void ipf_init1(ram_addr_t ram_size,
+ const char *boot_device, DisplayState *ds,
+ const char *kernel_filename, const char *kernel_cmdline,
+ const char *initrd_filename,
+ int pci_enabled, const char *cpu_model)
+{
+ char buf[1024];
+ int i;
+ ram_addr_t ram_addr;
+ ram_addr_t above_4g_mem_size = 0;
+ PCIBus *pci_bus;
+ PCIDevice *pci_dev;
+ int piix3_devfn = -1;
+ CPUState *env;
+ qemu_irq *cpu_irq;
+ qemu_irq *i8259;
+ int page_size;
+ int index;
+ unsigned long ipf_legacy_io_base, ipf_legacy_io_mem;
+ BlockDriverState *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+ BlockDriverState *fd[MAX_FD];
+
+ page_size = getpagesize();
+ if (page_size != TARGET_PAGE_SIZE) {
+ fprintf(stderr,"Error! Host page size != qemu target page size,"
+ " you may need to change TARGET_PAGE_BITS in qemu!"
+ "host page size:0x%x\n", page_size);
+ exit(-1);
+ };
+
+ if (ram_size >= 0xc0000000 ) {
+ above_4g_mem_size = ram_size - 0xc0000000;
+ ram_size = 0xc0000000;
+ }
+
+ /* init CPUs */
+ if (cpu_model == NULL) {
+ cpu_model = "IA64";
+ }
+
+ for(i = 0; i < smp_cpus; i++) {
+ env = cpu_init(cpu_model);
+ if (!env) {
+ fprintf(stderr, "Unable to find CPU definition\n");
+ exit(1);
+ }
+ if (i != 0)
+ env->hflags |= HF_HALTED_MASK;
+ register_savevm("cpu", i, 4, cpu_save, cpu_load, env);
+ qemu_register_reset(main_cpu_reset, 0, env);
+ }
+
+ /* allocate RAM */
+ if (kvm_enabled()) {
+ ram_addr = qemu_ram_alloc(0xa0000);
+ cpu_register_physical_memory(0, 0xa0000, ram_addr);
+
+ ram_addr = qemu_ram_alloc(0x20000); // Workaround 0xa0000-0xc0000
+
+ ram_addr = qemu_ram_alloc(0x40000);
+ cpu_register_physical_memory(0xc0000, 0x40000, ram_addr);
+
+ ram_addr = qemu_ram_alloc(ram_size - 0x100000);
+ cpu_register_physical_memory(0x100000, ram_size - 0x100000, ram_addr);
+ } else {
+ ram_addr = qemu_ram_alloc(ram_size);
+ cpu_register_physical_memory(0, ram_size, ram_addr);
+ }
+
+ /* above 4giga memory allocation */
+ if (above_4g_mem_size > 0) {
+ ram_addr = qemu_ram_alloc(above_4g_mem_size);
+ cpu_register_physical_memory(0x100000000, above_4g_mem_size, ram_addr);
+ }
+
+ /*Load firware to its proper position.*/
+ if (kvm_enabled()) {
+ unsigned long image_size;
+ uint8_t *image = NULL;
+ unsigned long nvram_addr;
+ unsigned long nvram_fd = 0;
+ unsigned long type = READ_FROM_NVRAM;
+ unsigned long i = 0;
+ unsigned long fw_offset;
+ ram_addr_t fw_mem = qemu_ram_alloc(GFW_SIZE);
+
+ snprintf(buf, sizeof(buf), "%s/%s", bios_dir, FW_FILENAME);
+ image = read_image(buf, &image_size );
+ if (NULL == image || !image_size) {
+ fprintf(stderr, "Error when reading Guest Firmware!\n");
+ fprintf(stderr, "Please check Guest firmware at %s\n", buf);
+ exit(1);
+ }
+ fw_offset = GFW_START + GFW_SIZE - image_size;
+
+ cpu_register_physical_memory(GFW_START, GFW_SIZE, fw_mem);
+ cpu_physical_memory_write(fw_offset, image, image_size);
+
+ free(image);
+
+ if (nvram) {
+ nvram_addr = NVRAM_START;
+ nvram_fd = kvm_ia64_nvram_init(type);
+ if (nvram_fd != -1) {
+ kvm_ia64_copy_from_nvram_to_GFW(nvram_fd);
+ close(nvram_fd);
+ }
+ i = atexit((void *)kvm_ia64_copy_from_GFW_to_nvram);
+ if (i != 0)
+ fprintf(stderr, "cannot set exit function\n");
+ } else
+ nvram_addr = 0;
+
+ kvm_ia64_build_hob(ram_size + above_4g_mem_size, smp_cpus, nvram_addr);
+ }
+
+ /*Register legacy io address space, size:64M*/
+ ipf_legacy_io_base = 0xE0000000;
+ ipf_legacy_io_mem = cpu_register_io_memory(0, ipf_legacy_io_read,
+ ipf_legacy_io_write, NULL);
+ cpu_register_physical_memory(ipf_legacy_io_base, 64*1024*1024,
+ ipf_legacy_io_mem);
+
+ cpu_irq = qemu_allocate_irqs(pic_irq_request, first_cpu, 1);
+ i8259 = kvm_i8259_init(cpu_irq[0]);
+
+ if (pci_enabled) {
+ pci_bus = i440fx_init(&i440fx_state, i8259);
+ piix3_devfn = piix3_init(pci_bus, -1);
+ } else {
+ pci_bus = NULL;
+ }
+
+ if (cirrus_vga_enabled) {
+ if (pci_enabled)
+ pci_cirrus_vga_init(pci_bus);
+ else
+ isa_cirrus_vga_init();
+ } else {
+ if (pci_enabled)
+ pci_vga_init(pci_bus, 0, 0);
+ else
+ isa_vga_init();
+ }
+
+ rtc_state = rtc_init(0x70, i8259[8], 2000);
+
+ if (pci_enabled) {
+ pic_set_alt_irq_func(isa_pic, NULL, NULL);
+ }
+
+ for(i = 0; i < MAX_SERIAL_PORTS; i++) {
+ if (serial_hds[i]) {
+ serial_init(serial_io[i], i8259[serial_irq[i]], 115200,
+ serial_hds[i]);
+ }
+ }
+
+ for(i = 0; i < MAX_PARALLEL_PORTS; i++) {
+ if (parallel_hds[i]) {
+ parallel_init(parallel_io[i], i8259[parallel_irq[i]],
+ parallel_hds[i]);
+ }
+ }
+
+ for(i = 0; i < nb_nics; i++) {
+ NICInfo *nd = &nd_table[i];
+
+ if (!pci_enabled || (nd->model && strcmp(nd->model, "ne2k_isa") == 0))
+ pc_init_ne2k_isa(nd, i8259);
+ else
+ pci_nic_init(nd, "e1000", NULL);
+ }
+
+#undef USE_HYPERCALL //Disable it now, need to implement later!
+#ifdef USE_HYPERCALL
+ pci_hypercall_init(pci_bus);
+#endif
+
+ if (drive_get_max_bus(IF_IDE) >= MAX_IDE_BUS) {
+ fprintf(stderr, "qemu: too many IDE bus\n");
+ exit(1);
+ }
+
+ for(i = 0; i < MAX_IDE_BUS * MAX_IDE_DEVS; i++) {
+ index = drive_get_index(IF_IDE, i / MAX_IDE_DEVS, i % MAX_IDE_DEVS);
+ if (index != -1)
+ hd[i] = drives_table[index].bdrv;
+ else
+ hd[i] = NULL;
+ }
+
+ if (pci_enabled) {
+ pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1, i8259);
+ } else {
+ for(i = 0; i < MAX_IDE_BUS; i++) {
+ isa_ide_init(ide_iobase[i], ide_iobase2[i], i8259[ide_irq[i]],
+ hd[MAX_IDE_DEVS * i], hd[MAX_IDE_DEVS * i + 1]);
+ }
+ }
+
+ i8042_init(i8259[1], i8259[12], 0x60);
+ DMA_init(0);
+#ifdef HAS_AUDIO
+ audio_init(pci_enabled ? pci_bus : NULL, i8259);
+#endif
+
+ for(i = 0; i < MAX_FD; i++) {
+ index = drive_get_index(IF_FLOPPY, 0, i);
+ if (index != -1)
+ fd[i] = drives_table[index].bdrv;
+ else
+ fd[i] = NULL;
+ }
+ floppy_controller = fdctrl_init(i8259[6], 2, 0, 0x3f0, fd);
+
+ cmos_init(ram_size, above_4g_mem_size, boot_device, hd);
+
+ if (pci_enabled && usb_enabled) {
+ usb_uhci_piix3_init(pci_bus, piix3_devfn + 2);
+ }
+
+ if (pci_enabled && acpi_enabled) {
+ uint8_t *eeprom_buf = qemu_mallocz(8 * 256); /* XXX: make this persistent */
+ i2c_bus *smbus;
+
+ /* TODO: Populate SPD eeprom data. */
+ smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100, i8259[9]);
+ for (i = 0; i < 8; i++) {
+ DeviceState *eeprom;
+ eeprom = qdev_create((BusState *)smbus, "smbus-eeprom");
+ qdev_set_prop_int(eeprom, "address", 0x50 + i);
+ qdev_set_prop_ptr(eeprom, "data", eeprom_buf + (i * 256));
+ qdev_init(eeprom);
+ }
+ }
+
+ if (i440fx_state) {
+ i440fx_init_memory_mappings(i440fx_state);
+ }
+
+ if (pci_enabled) {
+ int max_bus;
+ int bus;
+
+ max_bus = drive_get_max_bus(IF_SCSI);
+ for (bus = 0; bus <= max_bus; bus++) {
+ pci_create_simple(pci_bus, -1, "lsi53c895a");
+ }
+ }
+ /* Add virtio block devices */
+ if (pci_enabled) {
+ int index;
+ int unit_id = 0;
+
+ while ((index = drive_get_index(IF_VIRTIO, 0, unit_id)) != -1) {
+ pci_dev = pci_create("virtio-blk-pci",
+ drives_table[index].devaddr);
+ qdev_init(&pci_dev->qdev);
+ unit_id++;
+ }
+ }
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+ if (kvm_enabled())
+ add_assigned_devices(pci_bus, assigned_devices, assigned_devices_index);
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+}
+
+static void ipf_init_pci(ram_addr_t ram_size,
+ const char *boot_device, DisplayState *ds,
+ const char *kernel_filename,
+ const char *kernel_cmdline,
+ const char *initrd_filename,
+ const char *cpu_model)
+{
+ ipf_init1(ram_size, boot_device, ds, kernel_filename,
+ kernel_cmdline, initrd_filename, 1, cpu_model);
+}
+
+QEMUMachine ipf_machine = {
+ .name = "itanium",
+ .desc = "Itanium Platform",
+ .init = (QEMUMachineInitFunc *)ipf_init_pci,
+ .max_cpus = 255,
+ .is_default = 1,
+};
+
+static void ipf_machine_init(void)
+{
+ qemu_register_machine(&ipf_machine);
+}
+
+machine_init(ipf_machine_init);
+
+#define IOAPIC_NUM_PINS 48
+
+static int ioapic_irq_count[IOAPIC_NUM_PINS];
+
+static int ioapic_map_irq(int devfn, int irq_num)
+{
+ int irq, dev;
+ dev = devfn >> 3;
+ irq = ((((dev << 2) + (dev >> 3) + irq_num) & 31) + 16);
+ return irq;
+}
+
+/*
+ * Dummy function to provide match for call from hw/apic.c
+ */
+void apic_set_irq_delivered(void) {
+}
+
+void ioapic_set_irq(void *opaque, int irq_num, int level)
+{
+ int vector, pic_ret;
+
+ PCIDevice *pci_dev = (PCIDevice *)opaque;
+ vector = ioapic_map_irq(pci_dev->devfn, irq_num);
+
+ if (level)
+ ioapic_irq_count[vector] += 1;
+ else
+ ioapic_irq_count[vector] -= 1;
+
+ if (kvm_enabled()) {
+ if (kvm_set_irq(vector, ioapic_irq_count[vector] == 0, &pic_ret))
+ if (pic_ret != 0)
+ apic_set_irq_delivered();
+ return;
+ }
+}
+
+int ipf_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ return ioapic_map_irq(pci_dev->devfn, irq_num);
+}
diff --git a/hw/msix.c b/hw/msix.c
index d99403a0e..3dd045689 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -14,6 +14,7 @@
#include "hw.h"
#include "msix.h"
#include "pci.h"
+#include "kvm.h"
/* MSI-X capability structure */
#define MSIX_TABLE_OFFSET 4
@@ -45,6 +46,117 @@
/* Flag for interrupt controller to declare MSI-X support */
int msix_supported;
+#ifdef CONFIG_KVM
+/* KVM specific MSIX helpers */
+static void kvm_msix_free(PCIDevice *dev)
+{
+ int vector, changed = 0;
+ for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
+ if (dev->msix_entry_used[vector]) {
+ kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+ changed = 1;
+ }
+ }
+ if (changed) {
+ kvm_commit_irq_routes(kvm_context);
+ }
+}
+
+static void kvm_msix_routing_entry(PCIDevice *dev, unsigned vector,
+ struct kvm_irq_routing_entry *entry)
+{
+ uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+ entry->type = KVM_IRQ_ROUTING_MSI;
+ entry->flags = 0;
+ entry->u.msi.address_lo = pci_get_long(table_entry + MSIX_MSG_ADDR);
+ entry->u.msi.address_hi = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+ entry->u.msi.data = pci_get_long(table_entry + MSIX_MSG_DATA);
+}
+
+static void kvm_msix_update(PCIDevice *dev, int vector,
+ int was_masked, int is_masked)
+{
+ struct kvm_irq_routing_entry e = {}, *entry;
+ int mask_cleared = was_masked && !is_masked;
+ /* It is only legal to change an entry when it is masked. Therefore, it is
+ * enough to update the routing in kernel when mask is being cleared. */
+ if (!mask_cleared) {
+ return;
+ }
+ if (!dev->msix_entry_used[vector]) {
+ return;
+ }
+ entry = dev->msix_irq_entries + vector;
+ e.gsi = entry->gsi;
+ kvm_msix_routing_entry(dev, vector, &e);
+ if (memcmp(&entry->u.msi, &e.u.msi, sizeof entry->u.msi)) {
+ int r;
+ r = kvm_update_routing_entry(kvm_context, entry, &e);
+ if (r) {
+ fprintf(stderr, "%s: kvm_update_routing_entry failed: %s\n", __func__,
+ strerror(-r));
+ exit(1);
+ }
+ memcpy(&entry->u.msi, &e.u.msi, sizeof entry->u.msi);
+ r = kvm_commit_irq_routes(kvm_context);
+ if (r) {
+ fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__,
+ strerror(-r));
+ exit(1);
+ }
+ }
+}
+
+static int kvm_msix_add(PCIDevice *dev, unsigned vector)
+{
+ struct kvm_irq_routing_entry *entry = dev->msix_irq_entries + vector;
+ int r;
+
+ if (!kvm_has_gsi_routing(kvm_context)) {
+ fprintf(stderr, "Warning: no MSI-X support found. "
+ "At least kernel 2.6.30 is required for MSI-X support.\n"
+ );
+ return -EOPNOTSUPP;
+ }
+
+ r = kvm_get_irq_route_gsi(kvm_context);
+ if (r < 0) {
+ fprintf(stderr, "%s: kvm_get_irq_route_gsi failed: %s\n", __func__, strerror(-r));
+ return r;
+ }
+ entry->gsi = r;
+ kvm_msix_routing_entry(dev, vector, entry);
+ r = kvm_add_routing_entry(kvm_context, entry);
+ if (r < 0) {
+ fprintf(stderr, "%s: kvm_add_routing_entry failed: %s\n", __func__, strerror(-r));
+ return r;
+ }
+
+ r = kvm_commit_irq_routes(kvm_context);
+ if (r < 0) {
+ fprintf(stderr, "%s: kvm_commit_irq_routes failed: %s\n", __func__, strerror(-r));
+ return r;
+ }
+ return 0;
+}
+
+static void kvm_msix_del(PCIDevice *dev, unsigned vector)
+{
+ if (dev->msix_entry_used[vector]) {
+ return;
+ }
+ kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+ kvm_commit_irq_routes(kvm_context);
+}
+#else
+
+static void kvm_msix_free(PCIDevice *dev) {}
+static void kvm_msix_update(PCIDevice *dev, int vector,
+ int was_masked, int is_masked) {}
+static int kvm_msix_add(PCIDevice *dev, unsigned vector) { return -1; }
+static void kvm_msix_del(PCIDevice *dev, unsigned vector) {}
+#endif
+
/* Add MSI-X capability to the config space for the device. */
/* Given a bar and its size, add MSI-X table on top of it
* and fill MSI-X capability in the config space.
@@ -183,7 +295,18 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
PCIDevice *dev = opaque;
unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
int vector = offset / MSIX_ENTRY_SIZE;
+ int was_masked = msix_is_masked(dev, vector);
pci_set_long(dev->msix_table_page + offset, val);
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
+ }
+ if (was_masked != msix_is_masked(dev, vector) &&
+ dev->msix_mask_notifier && dev->msix_mask_notifier_opaque[vector]) {
+ int r = dev->msix_mask_notifier(dev, vector,
+ dev->msix_mask_notifier_opaque[vector],
+ msix_is_masked(dev, vector));
+ assert(r >= 0);
+ }
msix_handle_mask_update(dev, vector);
}
@@ -222,10 +345,18 @@ void msix_mmio_map(PCIDevice *d, int region_num,
static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
{
- int vector;
+ int vector, r;
for (vector = 0; vector < nentries; ++vector) {
unsigned offset = vector * MSIX_ENTRY_SIZE + MSIX_VECTOR_CTRL;
+ int was_masked = msix_is_masked(dev, vector);
dev->msix_table_page[offset] |= MSIX_VECTOR_MASK;
+ if (was_masked != msix_is_masked(dev, vector) &&
+ dev->msix_mask_notifier && dev->msix_mask_notifier_opaque[vector]) {
+ r = dev->msix_mask_notifier(dev, vector,
+ dev->msix_mask_notifier_opaque[vector],
+ msix_is_masked(dev, vector));
+ assert(r >= 0);
+ }
}
}
@@ -242,6 +373,15 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
if (nentries > MSIX_MAX_ENTRIES)
return -EINVAL;
+#ifdef KVM_CAP_IRQCHIP
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ dev->msix_irq_entries = qemu_malloc(nentries *
+ sizeof *dev->msix_irq_entries);
+ }
+#endif
+ dev->msix_mask_notifier_opaque =
+ qemu_mallocz(nentries * sizeof *dev->msix_mask_notifier_opaque);
+ dev->msix_mask_notifier = NULL;
dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
sizeof *dev->msix_entry_used);
@@ -278,6 +418,10 @@ static void msix_free_irq_entries(PCIDevice *dev)
{
int vector;
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_msix_free(dev);
+ }
+
for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
dev->msix_entry_used[vector] = 0;
msix_clr_pending(dev, vector);
@@ -298,6 +442,10 @@ int msix_uninit(PCIDevice *dev)
dev->msix_table_page = NULL;
qemu_free(dev->msix_entry_used);
dev->msix_entry_used = NULL;
+ qemu_free(dev->msix_irq_entries);
+ dev->msix_irq_entries = NULL;
+ qemu_free(dev->msix_mask_notifier_opaque);
+ dev->msix_mask_notifier_opaque = NULL;
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
return 0;
}
@@ -306,10 +454,13 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
{
unsigned n = dev->msix_entries_nr;
- if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
+ if (!msix_supported) {
return;
}
+ if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
+ return;
+ }
qemu_put_buffer(f, dev->msix_table_page, n * MSIX_ENTRY_SIZE);
qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
}
@@ -319,6 +470,9 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
{
unsigned n = dev->msix_entries_nr;
+ if (!msix_supported)
+ return;
+
if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
return;
}
@@ -363,6 +517,13 @@ void msix_notify(PCIDevice *dev, unsigned vector)
return;
}
+#ifdef KVM_CAP_IRQCHIP
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL);
+ return;
+ }
+#endif
+
address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
data = pci_get_long(table_entry + MSIX_MSG_DATA);
@@ -391,9 +552,19 @@ void msix_reset(PCIDevice *dev)
/* Mark vector as used. */
int msix_vector_use(PCIDevice *dev, unsigned vector)
{
+ int ret;
if (vector >= dev->msix_entries_nr)
return -EINVAL;
- dev->msix_entry_used[vector]++;
+ if (dev->msix_entry_used[vector]) {
+ return 0;
+ }
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ ret = kvm_msix_add(dev, vector);
+ if (ret) {
+ return ret;
+ }
+ }
+ ++dev->msix_entry_used[vector];
return 0;
}
@@ -406,6 +577,9 @@ void msix_vector_unuse(PCIDevice *dev, unsigned vector)
if (--dev->msix_entry_used[vector]) {
return;
}
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ kvm_msix_del(dev, vector);
+ }
msix_clr_pending(dev, vector);
}
@@ -415,3 +589,47 @@ void msix_unuse_all_vectors(PCIDevice *dev)
return;
msix_free_irq_entries(dev);
}
+
+int msix_set_mask_notifier(PCIDevice *dev, unsigned vector, void *opaque)
+{
+ int r = 0;
+ if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+ return 0;
+
+ assert(dev->msix_mask_notifier);
+ assert(opaque);
+ assert(!dev->msix_mask_notifier_opaque[vector]);
+
+ /* Unmask the new notifier unless vector is masked. */
+ if (!msix_is_masked(dev, vector)) {
+ r = dev->msix_mask_notifier(dev, vector, opaque, false);
+ if (r < 0) {
+ return r;
+ }
+ }
+ dev->msix_mask_notifier_opaque[vector] = opaque;
+ return r;
+}
+
+int msix_unset_mask_notifier(PCIDevice *dev, unsigned vector)
+{
+ int r = 0;
+ void *opaque;
+ if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector])
+ return 0;
+
+ opaque = dev->msix_mask_notifier_opaque[vector];
+
+ assert(dev->msix_mask_notifier);
+ assert(opaque);
+
+ /* Mask the old notifier unless it is already masked. */
+ if (!msix_is_masked(dev, vector)) {
+ r = dev->msix_mask_notifier(dev, vector, opaque, true);
+ if (r < 0) {
+ return r;
+ }
+ }
+ dev->msix_mask_notifier_opaque[vector] = NULL;
+ return r;
+}
diff --git a/hw/msix.h b/hw/msix.h
index a9f7993c3..6b21ffb5d 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -33,4 +33,6 @@ void msix_reset(PCIDevice *dev);
extern int msix_supported;
+int msix_set_mask_notifier(PCIDevice *dev, unsigned vector, void *opaque);
+int msix_unset_mask_notifier(PCIDevice *dev, unsigned vector);
#endif
diff --git a/hw/pc.c b/hw/pc.c
index a96187f5b..89bd4afe3 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -39,6 +39,8 @@
#include "msix.h"
#include "sysbus.h"
#include "sysemu.h"
+#include "device-assignment.h"
+#include "kvm.h"
/* output Bochs bios info messages */
//#define DEBUG_BIOS
@@ -54,6 +56,8 @@
#endif
#define BIOS_FILENAME "bios.bin"
+#define EXTBOOT_FILENAME "extboot.bin"
+#define VAPIC_FILENAME "vapic.bin"
#define PC_MAX_BIOS_SIZE (4 * 1024 * 1024)
@@ -842,10 +846,18 @@ static void pc_cpu_reset(void *opaque)
env->halted = !cpu_is_bsp(env);
}
-static CPUState *pc_new_cpu(const char *cpu_model)
+CPUState *pc_new_cpu(const char *cpu_model)
{
CPUState *env;
+ if (cpu_model == NULL) {
+#ifdef TARGET_X86_64
+ cpu_model = "qemu64";
+#else
+ cpu_model = "qemu32";
+#endif
+ }
+
env = cpu_init(cpu_model);
if (!env) {
fprintf(stderr, "Unable to find x86 CPU definition\n");
@@ -865,14 +877,6 @@ void pc_cpus_init(const char *cpu_model)
int i;
/* init CPUs */
- if (cpu_model == NULL) {
-#ifdef TARGET_X86_64
- cpu_model = "qemu64";
-#else
- cpu_model = "qemu32";
-#endif
- }
-
for(i = 0; i < smp_cpus; i++) {
pc_new_cpu(cpu_model);
}
@@ -947,9 +951,17 @@ void pc_memory_init(ram_addr_t ram_size,
isa_bios_size = bios_size;
if (isa_bios_size > (128 * 1024))
isa_bios_size = 128 * 1024;
+ cpu_register_physical_memory(0xd0000, (192 * 1024) - isa_bios_size,
+ IO_MEM_UNASSIGNED);
+ /* kvm tpr optimization needs the bios accessible for write, at least to qemu itself */
cpu_register_physical_memory(0x100000 - isa_bios_size,
isa_bios_size,
- (bios_offset + bios_size - isa_bios_size) | IO_MEM_ROM);
+ (bios_offset + bios_size - isa_bios_size) /* | IO_MEM_ROM */);
+
+ if (extboot_drive) {
+ option_rom[nb_option_roms++] = qemu_strdup(EXTBOOT_FILENAME);
+ }
+ option_rom[nb_option_roms++] = qemu_strdup(VAPIC_FILENAME);
option_rom_offset = qemu_ram_alloc(NULL, "pc.rom", PC_ROM_SIZE);
cpu_register_physical_memory(PC_ROM_MIN_VGA, PC_ROM_SIZE, option_rom_offset);
@@ -1034,7 +1046,14 @@ void pc_basic_device_init(qemu_irq *isa_irq,
qemu_register_boot_set(pc_boot_set, *rtc_state);
+#ifdef CONFIG_KVM_PIT
+ if (kvm_enabled() && kvm_pit_in_kernel())
+ pit = kvm_pit_init(0x40, isa_reserve_irq(0));
+ else
+#endif
+
pit = pit_init(0x40, isa_reserve_irq(0));
+
pcspk_init(pit);
for(i = 0; i < MAX_SERIAL_PORTS; i++) {
@@ -1072,4 +1091,22 @@ void pc_pci_device_init(PCIBus *pci_bus)
for (bus = 0; bus <= max_bus; bus++) {
pci_create_simple(pci_bus, -1, "lsi53c895a");
}
+
+ if (extboot_drive) {
+ DriveInfo *info = extboot_drive;
+ int cyls, heads, secs;
+
+ if (info->type != IF_IDE && info->type != IF_VIRTIO) {
+ bdrv_guess_geometry(info->bdrv, &cyls, &heads, &secs);
+ bdrv_set_geometry_hint(info->bdrv, cyls, heads, secs);
+ }
+
+ extboot_init(info->bdrv);
+ }
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+ if (kvm_enabled()) {
+ add_assigned_devices(pci_bus, assigned_devices, assigned_devices_index);
+ }
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
}
diff --git a/hw/pc.h b/hw/pc.h
index 63b0249f2..f819e80e9 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -32,6 +32,7 @@ extern PicState2 *isa_pic;
void pic_set_irq(int irq, int level);
void pic_set_irq_new(void *opaque, int irq, int level);
qemu_irq *i8259_init(qemu_irq parent_irq);
+qemu_irq *kvm_i8259_init(qemu_irq parent_irq);
int pic_read_irq(PicState2 *s);
void pic_update_irq(PicState2 *s);
uint32_t pic_intack_read(PicState2 *s);
@@ -61,6 +62,10 @@ int pit_get_initial_count(PITState *pit, int channel);
int pit_get_mode(PITState *pit, int channel);
int pit_get_out(PITState *pit, int channel, int64_t current_time);
+/* i8254-kvm.c */
+
+PITState *kvm_pit_init(int base, qemu_irq irq);
+
void hpet_pit_disable(void);
void hpet_pit_enable(void);
@@ -135,6 +140,9 @@ void pcspk_init(PITState *);
int pcspk_audio_init(qemu_irq *pic);
/* piix_pci.c */
+/* config space register for IRQ routing */
+#define PIIX_CONFIG_IRQ_ROUTE 0x60
+
struct PCII440FXState;
typedef struct PCII440FXState PCII440FXState;
@@ -145,6 +153,10 @@ void i440fx_init_memory_mappings(PCII440FXState *d);
extern PCIDevice *piix4_dev;
int piix4_init(PCIBus *bus, int devfn);
+int piix_get_irq(int pin);
+
+int ipf_map_irq(PCIDevice *pci_dev, int irq_num);
+
/* vga.c */
enum vga_retrace_method {
VGA_RETRACE_DUMB,
@@ -167,6 +179,10 @@ void isa_cirrus_vga_init(void);
void isa_ne2000_init(int base, int irq, NICInfo *nd);
+/* extboot.c */
+
+void extboot_init(BlockDriverState *bs);
+
/* e820 types */
#define E820_RAM 1
#define E820_RESERVED 2
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 519e8a5cc..3a1c67023 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -35,12 +35,16 @@
#include "sysemu.h"
#include "sysbus.h"
+qemu_irq *ioapic_irq_hack;
+
#define MAX_IDE_BUS 2
static const int ide_iobase[MAX_IDE_BUS] = { 0x1f0, 0x170 };
static const int ide_iobase2[MAX_IDE_BUS] = { 0x3f6, 0x376 };
static const int ide_irq[MAX_IDE_BUS] = { 14, 15 };
+const char *global_cpu_model; /* cpu hotadd */
+
static void ioapic_init(IsaIrqState *isa_irq_state)
{
DeviceState *dev;
@@ -82,6 +86,8 @@ static void pc_init1(ram_addr_t ram_size,
BusState *idebus[MAX_IDE_BUS];
ISADevice *rtc_state;
+ global_cpu_model = cpu_model;
+
pc_cpus_init(cpu_model);
vmport_init();
@@ -91,13 +97,25 @@ static void pc_init1(ram_addr_t ram_size,
&below_4g_mem_size, &above_4g_mem_size);
cpu_irq = pc_allocate_cpu_irq();
- i8259 = i8259_init(cpu_irq[0]);
- isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state));
- isa_irq_state->i8259 = i8259;
- if (pci_enabled) {
- ioapic_init(isa_irq_state);
+#ifdef KVM_CAP_IRQCHIP
+ if (kvm_enabled() && kvm_irqchip_in_kernel()) {
+ isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state));
+ if (pci_enabled) {
+ ioapic_init(isa_irq_state);
+ }
+ isa_irq = i8259 = kvm_i8259_init(cpu_irq[0]);
+ ioapic_irq_hack = isa_irq;
+ } else
+#endif
+ {
+ i8259 = i8259_init(cpu_irq[0]);
+ isa_irq_state = qemu_mallocz(sizeof(*isa_irq_state));
+ isa_irq_state->i8259 = i8259;
+ if (pci_enabled) {
+ ioapic_init(isa_irq_state);
+ }
+ isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
}
- isa_irq = qemu_allocate_irqs(isa_irq_handler, isa_irq_state, 24);
if (pci_enabled) {
pci_bus = i440fx_init(&i440fx_state, &piix3_devfn, isa_irq, ram_size);
@@ -108,6 +126,7 @@ static void pc_init1(ram_addr_t ram_size,
isa_bus_irqs(isa_irq);
pc_register_ferr_irq(isa_reserve_irq(13));
+ cpu_irq = pc_allocate_cpu_irq();
pc_vga_init(pci_enabled? pci_bus: NULL);
@@ -120,7 +139,7 @@ static void pc_init1(ram_addr_t ram_size,
if (!pci_enabled || (nd->model && strcmp(nd->model, "ne2k_isa") == 0))
pc_init_ne2k_isa(nd);
else
- pci_nic_init_nofail(nd, "e1000", NULL);
+ pci_nic_init_nofail(nd, "rtl8139", NULL);
}
if (drive_get_max_bus(IF_IDE) >= MAX_IDE_BUS) {
diff --git a/hw/pci-hotplug.c b/hw/pci-hotplug.c
index fe468d646..d2456adf1 100644
--- a/hw/pci-hotplug.c
+++ b/hw/pci-hotplug.c
@@ -31,6 +31,7 @@
#include "scsi.h"
#include "virtio-blk.h"
#include "qemu-config.h"
+#include "device-assignment.h"
#if defined(TARGET_I386)
static PCIDevice *qemu_pci_hot_add_nic(Monitor *mon,
@@ -228,6 +229,24 @@ static PCIDevice *qemu_pci_hot_add_storage(Monitor *mon,
return dev;
}
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+static PCIDevice *qemu_pci_hot_assign_device(Monitor *mon,
+ const char *devaddr,
+ const char *opts_str)
+{
+ QemuOpts *opts;
+ DeviceState *dev;
+
+ opts = add_assigned_device(opts_str);
+ if (opts == NULL) {
+ monitor_printf(mon, "Error adding device; check syntax\n");
+ return NULL;
+ }
+ dev = qdev_device_add(opts);
+ return DO_UPCAST(PCIDevice, qdev, dev);
+}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
void pci_device_hot_add(Monitor *mon, const QDict *qdict)
{
PCIDevice *dev = NULL;
@@ -251,6 +270,10 @@ void pci_device_hot_add(Monitor *mon, const QDict *qdict)
dev = qemu_pci_hot_add_nic(mon, pci_addr, opts);
} else if (strcmp(type, "storage") == 0) {
dev = qemu_pci_hot_add_storage(mon, pci_addr, opts);
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+ } else if (strcmp(type, "host") == 0) {
+ dev = qemu_pci_hot_assign_device(mon, pci_addr, opts);
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
} else {
monitor_printf(mon, "invalid type: %s\n", type);
}
diff --git a/hw/pci.c b/hw/pci.c
index fe7c5c3bc..56ad13afb 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -27,6 +27,9 @@
#include "net.h"
#include "sysemu.h"
#include "loader.h"
+#include "qemu-kvm.h"
+#include "hw/pc.h"
+#include "device-assignment.h"
#include "qemu-objects.h"
//#define DEBUG_PCI
@@ -450,6 +453,7 @@ static void pci_set_default_subsystem_id(PCIDevice *pci_dev)
}
/*
+ * Parse pci address in qemu command
* Parse [[<domain>:]<bus>:]<slot>, return -1 on error
*/
static int pci_parse_devaddr(const char *addr, int *domp, int *busp, unsigned *slotp)
@@ -498,6 +502,83 @@ static int pci_parse_devaddr(const char *addr, int *domp, int *busp, unsigned *s
return 0;
}
+/*
+ * Parse device seg and bdf in device assignment command:
+ *
+ * -pcidevice host=[seg:]bus:dev.func
+ *
+ * Parse [seg:]<bus>:<slot>.<func> return -1 on error
+ */
+int pci_parse_host_devaddr(const char *addr, int *segp, int *busp,
+ int *slotp, int *funcp)
+{
+ const char *p;
+ char *e;
+ int val;
+ int seg = 0, bus = 0, slot = 0, func = 0;
+
+ /* parse optional seg */
+ p = addr;
+ val = 0;
+ while (1) {
+ p = strchr(p, ':');
+ if (p) {
+ val++;
+ p++;
+ } else
+ break;
+ }
+ if (val <= 0 || val > 2)
+ return -1;
+
+ p = addr;
+ if (val == 2) {
+ val = strtoul(p, &e, 16);
+ if (e == p)
+ return -1;
+ if (*e == ':') {
+ seg = val;
+ p = e + 1;
+ }
+ } else
+ seg = 0;
+
+
+ /* parse bdf */
+ val = strtoul(p, &e, 16);
+ if (e == p)
+ return -1;
+ if (*e == ':') {
+ bus = val;
+ p = e + 1;
+ val = strtoul(p, &e, 16);
+ if (e == p)
+ return -1;
+ if (*e == '.') {
+ slot = val;
+ p = e + 1;
+ val = strtoul(p, &e, 16);
+ if (e == p)
+ return -1;
+ func = val;
+ } else
+ return -1;
+ } else
+ return -1;
+
+ if (seg > 0xffff || bus > 0xff || slot > 0x1f || func > 0x7)
+ return -1;
+
+ if (*e)
+ return -1;
+
+ *segp = seg;
+ *busp = bus;
+ *slotp = slot;
+ *funcp = func;
+ return 0;
+}
+
int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
unsigned *slotp)
{
@@ -1006,25 +1087,80 @@ static void pci_update_irq_disabled(PCIDevice *d, int was_irq_disabled)
}
}
-uint32_t pci_default_read_config(PCIDevice *d,
- uint32_t address, int len)
+static uint32_t pci_read_config(PCIDevice *d,
+ uint32_t address, int len)
{
uint32_t val = 0;
- assert(len == 1 || len == 2 || len == 4);
+
len = MIN(len, pci_config_size(d) - address);
memcpy(&val, d->config + address, len);
return le32_to_cpu(val);
}
+uint32_t pci_default_read_config(PCIDevice *d,
+ uint32_t address, int len)
+{
+ assert(len == 1 || len == 2 || len == 4);
+
+ if (pci_access_cap_config(d, address, len)) {
+ return d->cap.config_read(d, address, len);
+ }
+
+ return pci_read_config(d, address, len);
+}
+
+static void pci_write_config(PCIDevice *pci_dev,
+ uint32_t address, uint32_t val, int len)
+{
+ int i;
+ for (i = 0; i < len; i++) {
+ pci_dev->config[address + i] = val & 0xff;
+ val >>= 8;
+ }
+}
+
+int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len)
+{
+ if (pci_dev->cap.supported && address >= pci_dev->cap.start &&
+ (address + len) < pci_dev->cap.start + pci_dev->cap.length)
+ return 1;
+ return 0;
+}
+
+uint32_t pci_default_cap_read_config(PCIDevice *pci_dev,
+ uint32_t address, int len)
+{
+ return pci_read_config(pci_dev, address, len);
+}
+
+void pci_default_cap_write_config(PCIDevice *pci_dev,
+ uint32_t address, uint32_t val, int len)
+{
+ pci_write_config(pci_dev, address, val, len);
+}
+
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
{
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t config_size = pci_config_size(d);
+ if (pci_access_cap_config(d, addr, l)) {
+ d->cap.config_write(d, addr, val, l);
+ return;
+ }
+
for (i = 0; i < l && addr + i < config_size; val >>= 8, ++i) {
uint8_t wmask = d->wmask[addr + i];
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
}
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+ if (kvm_enabled() && kvm_irqchip_in_kernel() &&
+ addr >= PIIX_CONFIG_IRQ_ROUTE &&
+ addr < PIIX_CONFIG_IRQ_ROUTE + 4)
+ assigned_dev_update_irqs();
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
@@ -1048,6 +1184,10 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
if (!change)
return;
+#if defined(TARGET_IA64)
+ ioapic_set_irq(pci_dev, irq_num, level);
+#endif
+
pci_set_irq_state(pci_dev, irq_num, level);
pci_update_irq_status(pci_dev);
if (pci_irq_disabled(pci_dev))
@@ -1055,6 +1195,11 @@ static void pci_set_irq(void *opaque, int irq_num, int level)
pci_change_irq_level(pci_dev, irq_num, change);
}
+int pci_map_irq(PCIDevice *pci_dev, int pin)
+{
+ return pci_dev->bus->map_irq(pci_dev, pin);
+}
+
/***********************************************************/
/* monitor info on PCI */
@@ -1671,6 +1816,37 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
return dev;
}
+int pci_enable_capability_support(PCIDevice *pci_dev,
+ uint32_t config_start,
+ PCICapConfigReadFunc *config_read,
+ PCICapConfigWriteFunc *config_write,
+ PCICapConfigInitFunc *config_init)
+{
+ if (!pci_dev)
+ return -ENODEV;
+
+ pci_dev->config[0x06] |= 0x10; // status = capabilities
+
+ if (config_start == 0)
+ pci_dev->cap.start = PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR;
+ else if (config_start >= 0x40 && config_start < 0xff)
+ pci_dev->cap.start = config_start;
+ else
+ return -EINVAL;
+
+ if (config_read)
+ pci_dev->cap.config_read = config_read;
+ else
+ pci_dev->cap.config_read = pci_default_cap_read_config;
+ if (config_write)
+ pci_dev->cap.config_write = config_write;
+ else
+ pci_dev->cap.config_write = pci_default_cap_write_config;
+ pci_dev->cap.supported = 1;
+ pci_dev->config[PCI_CAPABILITY_LIST] = pci_dev->cap.start;
+ return config_init(pci_dev);
+}
+
static int pci_find_space(PCIDevice *pdev, uint8_t size)
{
int config_size = pci_config_size(pdev);
diff --git a/hw/pci.h b/hw/pci.h
index 3a15bd401..4bd8a1a1b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -6,6 +6,8 @@
#include "qdev.h"
+struct kvm_irq_routing_entry;
+
/* PCI includes legacy ISA access. */
#include "isa.h"
@@ -80,6 +82,12 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
pcibus_t addr, pcibus_t size, int type);
typedef int PCIUnregisterFunc(PCIDevice *pci_dev);
+typedef void PCICapConfigWriteFunc(PCIDevice *pci_dev,
+ uint32_t address, uint32_t val, int len);
+typedef uint32_t PCICapConfigReadFunc(PCIDevice *pci_dev,
+ uint32_t address, int len);
+typedef int PCICapConfigInitFunc(PCIDevice *pci_dev);
+
typedef struct PCIIORegion {
pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
#define PCI_BAR_UNMAPPED (~(pcibus_t)0)
@@ -112,6 +120,14 @@ enum {
QEMU_PCI_CAP_EXPRESS = 0x2,
};
+#define PCI_CAPABILITY_CONFIG_MAX_LENGTH 0x60
+#define PCI_CAPABILITY_CONFIG_DEFAULT_START_ADDR 0x40
+#define PCI_CAPABILITY_CONFIG_MSI_LENGTH 0x10
+#define PCI_CAPABILITY_CONFIG_MSIX_LENGTH 0x10
+
+typedef int (*msix_mask_notifier_func)(PCIDevice *, unsigned vector,
+ void *opaque, int masked);
+
struct PCIDevice {
DeviceState qdev;
/* PCI config space */
@@ -167,6 +183,26 @@ struct PCIDevice {
char *romfile;
ram_addr_t rom_offset;
uint32_t rom_bar;
+
+ /* How much space does an MSIX table need. */
+ /* The spec requires giving the table structure
+ * a 4K aligned region all by itself. Align it to
+ * target pages so that drivers can do passthrough
+ * on the rest of the region. */
+ target_phys_addr_t msix_page_size;
+
+ struct kvm_irq_routing_entry *msix_irq_entries;
+
+ void **msix_mask_notifier_opaque;
+ msix_mask_notifier_func msix_mask_notifier;
+
+ /* Device capability configuration space */
+ struct {
+ int supported;
+ unsigned int start, length;
+ PCICapConfigReadFunc *config_read;
+ PCICapConfigWriteFunc *config_write;
+ } cap;
};
PCIDevice *pci_register_device(PCIBus *bus, const char *name,
@@ -178,6 +214,14 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size, int type,
PCIMapIORegionFunc *map_func);
+int pci_enable_capability_support(PCIDevice *pci_dev,
+ uint32_t config_start,
+ PCICapConfigReadFunc *config_read,
+ PCICapConfigWriteFunc *config_write,
+ PCICapConfigInitFunc *config_init);
+
+int pci_map_irq(PCIDevice *pci_dev, int pin);
+
int pci_add_capability(PCIDevice *pci_dev, uint8_t cap_id, uint8_t cap_size);
int pci_add_capability_at_offset(PCIDevice *pci_dev, uint8_t cap_id,
uint8_t cap_offset, uint8_t cap_size);
@@ -188,13 +232,17 @@ void pci_reserve_capability(PCIDevice *pci_dev, uint8_t offset, uint8_t size);
uint8_t pci_find_capability(PCIDevice *pci_dev, uint8_t cap_id);
-
uint32_t pci_default_read_config(PCIDevice *d,
uint32_t address, int len);
void pci_default_write_config(PCIDevice *d,
uint32_t address, uint32_t val, int len);
void pci_device_save(PCIDevice *s, QEMUFile *f);
int pci_device_load(PCIDevice *s, QEMUFile *f);
+uint32_t pci_default_cap_read_config(PCIDevice *pci_dev,
+ uint32_t address, int len);
+void pci_default_cap_write_config(PCIDevice *pci_dev,
+ uint32_t address, uint32_t val, int len);
+int pci_access_cap_config(PCIDevice *pci_dev, uint32_t address, int len);
typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level);
typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num);
@@ -226,6 +274,9 @@ PCIBus *pci_get_bus_devfn(int *devfnp, const char *devaddr);
int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
unsigned *slotp);
+int pci_parse_host_devaddr(const char *addr, int *segp, int *busp,
+ int *slotp, int *funcp);
+
void do_pci_info_print(Monitor *mon, const QObject *data);
void do_pci_info(Monitor *mon, QObject **ret_data);
PCIBus *pci_bridge_init(PCIBus *bus, int devfn, uint16_t vid, uint16_t did,
diff --git a/hw/pci_regs.h b/hw/pci_regs.h
index dd0bed4f1..1c675dc8b 100644
--- a/hw/pci_regs.h
+++ b/hw/pci_regs.h
@@ -44,9 +44,16 @@
#define PCI_STATUS 0x06 /* 16 bits */
#define PCI_STATUS_INTERRUPT 0x08 /* Interrupt status */
#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */
+
+#ifndef PCI_STATUS_66MHZ
#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */
+#endif
+
#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */
+#ifndef PCI_STATUS_FAST_BACK
#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */
+#endif
+
#define PCI_STATUS_PARITY 0x100 /* Detected parity error */
#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */
#define PCI_STATUS_DEVSEL_FAST 0x000
diff --git a/hw/pcspk.c b/hw/pcspk.c
index 26a0ecb9d..fb5f7637c 100644
--- a/hw/pcspk.c
+++ b/hw/pcspk.c
@@ -27,6 +27,8 @@
#include "isa.h"
#include "audio/audio.h"
#include "qemu-timer.h"
+#include "i8254.h"
+#include "qemu-kvm.h"
#define PCSPK_BUF_LEN 1792
#define PCSPK_SAMPLE_RATE 32000
@@ -48,6 +50,43 @@ typedef struct {
static const char *s_spk = "pcspk";
static PCSpkState pcspk_state;
+#ifdef CONFIG_KVM_PIT
+static void kvm_get_pit_ch2(PITState *pit,
+ struct kvm_pit_state *inkernel_state)
+{
+ struct kvm_pit_state pit_state;
+
+ if (kvm_enabled() && kvm_pit_in_kernel()) {
+ kvm_get_pit(kvm_context, &pit_state);
+ pit->channels[2].mode = pit_state.channels[2].mode;
+ pit->channels[2].count = pit_state.channels[2].count;
+ pit->channels[2].count_load_time = pit_state.channels[2].count_load_time;
+ pit->channels[2].gate = pit_state.channels[2].gate;
+ if (inkernel_state) {
+ memcpy(inkernel_state, &pit_state, sizeof(*inkernel_state));
+ }
+ }
+}
+
+static void kvm_set_pit_ch2(PITState *pit,
+ struct kvm_pit_state *inkernel_state)
+{
+ if (kvm_enabled() && kvm_pit_in_kernel()) {
+ inkernel_state->channels[2].mode = pit->channels[2].mode;
+ inkernel_state->channels[2].count = pit->channels[2].count;
+ inkernel_state->channels[2].count_load_time =
+ pit->channels[2].count_load_time;
+ inkernel_state->channels[2].gate = pit->channels[2].gate;
+ kvm_set_pit(kvm_context, inkernel_state);
+ }
+}
+#else
+static inline void kvm_get_pit_ch2(PITState *pit,
+ struct kvm_pit_state *inkernel_state) { }
+static inline void kvm_set_pit_ch2(PITState *pit,
+ struct kvm_pit_state *inkernel_state) { }
+#endif
+
static inline void generate_samples(PCSpkState *s)
{
unsigned int i;
@@ -72,6 +111,8 @@ static void pcspk_callback(void *opaque, int free)
PCSpkState *s = opaque;
unsigned int n;
+ kvm_get_pit_ch2(s->pit, NULL);
+
if (pit_get_mode(s->pit, 2) != 3)
return;
@@ -117,6 +158,8 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t addr)
PCSpkState *s = opaque;
int out;
+ kvm_get_pit_ch2(s->pit, NULL);
+
s->dummy_refresh_clock ^= (1 << 4);
out = pit_get_out(s->pit, 2, qemu_get_clock(vm_clock)) << 5;
@@ -125,9 +168,12 @@ static uint32_t pcspk_ioport_read(void *opaque, uint32_t addr)
static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val)
{
+ struct kvm_pit_state inkernel_state;
PCSpkState *s = opaque;
const int gate = val & 1;
+ kvm_get_pit_ch2(s->pit, &inkernel_state);
+
s->data_on = (val >> 1) & 1;
pit_set_gate(s->pit, 2, gate);
if (s->voice) {
@@ -135,6 +181,8 @@ static void pcspk_ioport_write(void *opaque, uint32_t addr, uint32_t val)
s->play_pos = 0;
AUD_set_active_out(s->voice, gate & s->data_on);
}
+
+ kvm_set_pit_ch2(s->pit, &inkernel_state);
}
void pcspk_init(PITState *pit)
diff --git a/hw/piix_pci.c b/hw/piix_pci.c
index d14d05e1f..d01618f8c 100644
--- a/hw/piix_pci.c
+++ b/hw/piix_pci.c
@@ -28,6 +28,7 @@
#include "pci_host.h"
#include "isa.h"
#include "sysbus.h"
+#include "kvm.h"
/*
* I440FX chipset data sheet.
@@ -98,6 +99,10 @@ static void i440fx_update_memory_mappings(PCII440FXState *d)
int i, r;
uint32_t smram, addr;
+ if (kvm_enabled()) {
+ /* FIXME: Support remappings and protection changes. */
+ return;
+ }
update_pam(d, 0xf0000, 0x100000, (d->dev.config[I440FX_PAM] >> 4) & 3);
for(i = 0; i < 12; i++) {
r = (d->dev.config[(i >> 1) + (I440FX_PAM + 1)] >> ((i & 1) * 4)) & 3;
@@ -216,6 +221,8 @@ static int i440fx_initfn(PCIDevice *dev)
return 0;
}
+static PIIX3State *piix3_dev;
+
PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *pic, ram_addr_t ram_size)
{
DeviceState *dev;
@@ -246,6 +253,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, int *piix3_devfn, qemu_irq *
ram_size = 255;
(*pi440fx_state)->dev.config[0x57]=ram_size;
+ piix3_dev = piix3;
+
return b;
}
@@ -273,6 +282,13 @@ static void piix3_set_irq(void *opaque, int irq_num, int level)
}
}
+int piix_get_irq(int pin)
+{
+ if (piix3_dev)
+ return piix3_dev->dev.config[0x60+pin];
+ return 0;
+}
+
static void piix3_reset(void *opaque)
{
PIIX3State *d = opaque;
diff --git a/hw/ppc440.c b/hw/ppc440.c
index d12cf7181..89c16c051 100644
--- a/hw/ppc440.c
+++ b/hw/ppc440.c
@@ -20,6 +20,7 @@
#include "ppc405.h"
#include "sysemu.h"
#include "kvm.h"
+#include "qemu-kvm.h"
#define PPC440EP_PCI_CONFIG 0xeec00000
#define PPC440EP_PCI_INTACK 0xeed00000
diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index 6ca873ee7..79eb0b357 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -24,6 +24,7 @@
#include "device_tree.h"
#include "loader.h"
#include "elf.h"
+#include "qemu-kvm.h"
#define BINARY_DEVICE_TREE_FILE "bamboo.dtb"
diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
index 1422fad07..4eb9bec00 100644
--- a/hw/ppce500_mpc8544ds.c
+++ b/hw/ppce500_mpc8544ds.c
@@ -31,6 +31,7 @@
#include "ppce500.h"
#include "loader.h"
#include "elf.h"
+#include "qemu-kvm.h"
#define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb"
#define UIMAGE_LOAD_BASE 0
diff --git a/hw/testdev.c b/hw/testdev.c
new file mode 100644
index 000000000..a8c49a366
--- /dev/null
+++ b/hw/testdev.c
@@ -0,0 +1,129 @@
+#include "hw.h"
+#include "qdev.h"
+#include "isa.h"
+
+struct testdev {
+ ISADevice dev;
+ CharDriverState *chr;
+};
+
+static void test_device_serial_write(void *opaque, uint32_t addr, uint32_t data)
+{
+ struct testdev *dev = opaque;
+ uint8_t buf[1] = { data };
+
+ if (dev->chr) {
+ qemu_chr_write(dev->chr, buf, 1);
+ }
+}
+
+static void test_device_exit(void *opaque, uint32_t addr, uint32_t data)
+{
+ exit(data);
+}
+
+static uint32_t test_device_memsize_read(void *opaque, uint32_t addr)
+{
+ return ram_size;
+}
+
+static void test_device_irq_line(void *opaque, uint32_t addr, uint32_t data)
+{
+ extern qemu_irq *ioapic_irq_hack;
+
+ qemu_set_irq(ioapic_irq_hack[addr - 0x2000], !!data);
+}
+
+static uint32 test_device_ioport_data;
+
+static void test_device_ioport_write(void *opaque, uint32_t addr, uint32_t data)
+{
+ test_device_ioport_data = data;
+}
+
+static uint32_t test_device_ioport_read(void *opaque, uint32_t addr)
+{
+ return test_device_ioport_data;
+}
+
+static char *iomem_buf;
+
+static uint32_t test_iomem_readb(void *opaque, target_phys_addr_t addr)
+{
+ return iomem_buf[addr];
+}
+
+static uint32_t test_iomem_readw(void *opaque, target_phys_addr_t addr)
+{
+ return *(uint16_t*)(iomem_buf + addr);
+}
+
+static uint32_t test_iomem_readl(void *opaque, target_phys_addr_t addr)
+{
+ return *(uint32_t*)(iomem_buf + addr);
+}
+
+static void test_iomem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ iomem_buf[addr] = val;
+}
+
+static void test_iomem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ *(uint16_t*)(iomem_buf + addr) = val;
+}
+
+static void test_iomem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
+{
+ *(uint32_t*)(iomem_buf + addr) = val;
+}
+
+static CPUReadMemoryFunc * const test_iomem_read[3] = {
+ test_iomem_readb,
+ test_iomem_readw,
+ test_iomem_readl,
+};
+
+static CPUWriteMemoryFunc * const test_iomem_write[3] = {
+ test_iomem_writeb,
+ test_iomem_writew,
+ test_iomem_writel,
+};
+
+static int init_test_device(ISADevice *isa)
+{
+ struct testdev *dev = DO_UPCAST(struct testdev, dev, isa);
+ int iomem;
+
+ register_ioport_write(0xf1, 1, 1, test_device_serial_write, dev);
+ register_ioport_write(0xf4, 1, 4, test_device_exit, dev);
+ register_ioport_read(0xd1, 1, 4, test_device_memsize_read, dev);
+ register_ioport_read(0xe0, 1, 1, test_device_ioport_read, dev);
+ register_ioport_write(0xe0, 1, 1, test_device_ioport_write, dev);
+ register_ioport_read(0xe0, 1, 2, test_device_ioport_read, dev);
+ register_ioport_write(0xe0, 1, 2, test_device_ioport_write, dev);
+ register_ioport_read(0xe0, 1, 4, test_device_ioport_read, dev);
+ register_ioport_write(0xe0, 1, 4, test_device_ioport_write, dev);
+ register_ioport_write(0x2000, 24, 1, test_device_irq_line, NULL);
+ iomem_buf = qemu_mallocz(0x10000);
+ iomem = cpu_register_io_memory(test_iomem_read, test_iomem_write, NULL);
+ cpu_register_physical_memory(0xff000000, 0x10000, iomem);
+ return 0;
+}
+
+static ISADeviceInfo testdev_info = {
+ .qdev.name = "testdev",
+ .qdev.size = sizeof(struct testdev),
+ .init = init_test_device,
+ .qdev.props = (Property[]) {
+ DEFINE_PROP_CHR("chardev", struct testdev, chr),
+ DEFINE_PROP_END_OF_LIST(),
+ },
+};
+
+static void testdev_register_devices(void)
+{
+ isa_qdev_register(&testdev_info);
+}
+
+device_init(testdev_register_devices)
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index eef78ed08..9089c9f5d 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -68,9 +68,11 @@ static void pci_vga_write_config(PCIDevice *d,
PCIVGAState *pvs = container_of(d, PCIVGAState, dev);
VGACommonState *s = &pvs->vga;
+ vga_dirty_log_stop(s);
pci_default_write_config(d, address, val, len);
if (s->map_addr && pvs->dev.io_regions[0].addr == -1)
s->map_addr = 0;
+ vga_dirty_log_start(s);
}
static int pci_vga_initfn(PCIDevice *dev)
diff --git a/hw/vga.c b/hw/vga.c
index b5c7ee7fe..54fcce4a2 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -1282,6 +1282,8 @@ static void vga_draw_text(VGACommonState *s, int full_update)
vga_draw_glyph8_func *vga_draw_glyph8;
vga_draw_glyph9_func *vga_draw_glyph9;
+ vga_dirty_log_stop(s);
+
/* compute font data address (in plane 2) */
v = s->sr[3];
offset = (((v >> 4) & 1) | ((v << 1) & 6)) * 8192 * 4 + 2;
@@ -1593,40 +1595,65 @@ static void vga_sync_dirty_bitmap(VGACommonState *s)
}
#endif
+ vga_dirty_log_start(s);
+}
+
+static int s1, s2, s3;
+
+static void mark_dirty(target_phys_addr_t start, target_phys_addr_t len)
+{
+ target_phys_addr_t end = start + len;
+
+ while (start < end) {
+ cpu_physical_memory_set_dirty(cpu_get_physical_page_desc(start));
+ start += TARGET_PAGE_SIZE;
+ }
}
void vga_dirty_log_start(VGACommonState *s)
{
if (kvm_enabled() && s->map_addr)
- kvm_log_start(s->map_addr, s->map_end - s->map_addr);
-
+ if (!s1) {
+ kvm_log_start(s->map_addr, s->map_end - s->map_addr);
+ mark_dirty(s->map_addr, s->map_end - s->map_addr);
+ s1 = 1;
+ }
if (kvm_enabled() && s->lfb_vram_mapped) {
- kvm_log_start(isa_mem_base + 0xa0000, 0x8000);
- kvm_log_start(isa_mem_base + 0xa8000, 0x8000);
+ if (!s2) {
+ kvm_log_start(isa_mem_base + 0xa0000, 0x8000);
+ kvm_log_start(isa_mem_base + 0xa8000, 0x8000);
+ mark_dirty(isa_mem_base + 0xa0000, 0x10000);
+ }
+ s2 = 1;
}
#ifdef CONFIG_BOCHS_VBE
if (kvm_enabled() && s->vbe_mapped) {
- kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
+ if (!s3) {
+ kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
+ }
+ s3 = 1;
}
#endif
}
void vga_dirty_log_stop(VGACommonState *s)
{
- if (kvm_enabled() && s->map_addr)
+ if (kvm_enabled() && s->map_addr && s1)
kvm_log_stop(s->map_addr, s->map_end - s->map_addr);
- if (kvm_enabled() && s->lfb_vram_mapped) {
+ if (kvm_enabled() && s->lfb_vram_mapped && s1) {
kvm_log_stop(isa_mem_base + 0xa0000, 0x8000);
kvm_log_stop(isa_mem_base + 0xa8000, 0x8000);
}
#ifdef CONFIG_BOCHS_VBE
- if (kvm_enabled() && s->vbe_mapped) {
+ if (kvm_enabled() && s->vbe_mapped && s3) {
kvm_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
}
#endif
+
+ s1 = s2 = s3 = 0;
}
void vga_dirty_log_restart(VGACommonState *s)
@@ -1864,6 +1891,7 @@ static void vga_draw_blank(VGACommonState *s, int full_update)
return;
if (s->last_scr_width <= 0 || s->last_scr_height <= 0)
return;
+ vga_dirty_log_stop(s);
s->rgb_to_pixel =
rgb_to_pixel_dup_table[get_depth_index(s->ds)];
@@ -1908,6 +1936,9 @@ static void vga_update_display(void *opaque)
vga_draw_text(s, full_update);
break;
case GMODE_GRAPH:
+#ifdef TARGET_IA64
+ full_update = 1;
+#endif
vga_draw_graphic(s, full_update);
break;
case GMODE_BLANK:
diff --git a/hw/vga_int.h b/hw/vga_int.h
index 6a46a434f..70e0f19c4 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -33,8 +33,8 @@
/* bochs VBE support */
#define CONFIG_BOCHS_VBE
-#define VBE_DISPI_MAX_XRES 1600
-#define VBE_DISPI_MAX_YRES 1200
+#define VBE_DISPI_MAX_XRES 2560
+#define VBE_DISPI_MAX_YRES 1600
#define VBE_DISPI_MAX_BPP 32
#define VBE_DISPI_INDEX_ID 0x0
@@ -226,7 +226,7 @@ void vga_init_vbe(VGACommonState *s);
extern const uint8_t sr_mask[8];
extern const uint8_t gr_mask[16];
-#define VGA_RAM_SIZE (8192 * 1024)
+#define VGA_RAM_SIZE (16 * 1024 * 1024)
#define VGABIOS_FILENAME "vgabios.bin"
#define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 9fe3886b0..1e4dfdd0e 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -21,6 +21,7 @@
#include "balloon.h"
#include "virtio-balloon.h"
#include "kvm.h"
+#include "qemu-kvm.h"
#include "qlist.h"
#include "qint.h"
#include "qstring.h"
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c6ef8254e..a55c9af8c 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -427,6 +427,31 @@ static void virtio_pci_guest_notifier_read(void *opaque)
}
}
+static int virtio_pci_mask_notifier(PCIDevice *dev, unsigned vector,
+ void *opaque, int masked)
+{
+#ifdef CONFIG_KVM
+ VirtQueue *vq = opaque;
+ EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+ int r = kvm_set_irqfd(dev->msix_irq_entries[vector].gsi,
+ event_notifier_get_fd(notifier),
+ !masked);
+ if (r < 0) {
+ return (r == -ENOSYS) ? 0 : r;
+ }
+ if (masked) {
+ qemu_set_fd_handler(event_notifier_get_fd(notifier),
+ virtio_pci_guest_notifier_read, NULL, vq);
+ } else {
+ qemu_set_fd_handler(event_notifier_get_fd(notifier),
+ NULL, NULL, NULL);
+ }
+ return 0;
+#else
+ return -ENOSYS;
+#endif
+}
+
static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
{
VirtIOPCIProxy *proxy = opaque;
@@ -440,9 +465,16 @@ static int virtio_pci_set_guest_notifier(void *opaque, int n, bool assign)
}
qemu_set_fd_handler(event_notifier_get_fd(notifier),
virtio_pci_guest_notifier_read, NULL, vq);
+ msix_set_mask_notifier(&proxy->pci_dev,
+ virtio_queue_vector(proxy->vdev, n), vq);
} else {
+ msix_unset_mask_notifier(&proxy->pci_dev,
+ virtio_queue_vector(proxy->vdev, n));
qemu_set_fd_handler(event_notifier_get_fd(notifier),
NULL, NULL, NULL);
+ /* Test and clear notifier before closing it,
+ * in case poll callback didn't have time to run. */
+ virtio_pci_guest_notifier_read(vq);
event_notifier_cleanup(notifier);
}
@@ -525,6 +557,8 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev,
proxy->pci_dev.config_write = virtio_write_config;
+ proxy->pci_dev.msix_mask_notifier = virtio_pci_mask_notifier;
+
size = VIRTIO_PCI_REGION_SIZE(&proxy->pci_dev) + vdev->config_len;
if (size & (size-1))
size = 1 << qemu_fls(size);