From 8175e5b79c38a1d85225da516fa1a0ecbf2fdbca Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 15 Apr 2013 10:42:33 +0200 Subject: KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS The concept of routing interrupt lines to an irqchip is nothing that is IOAPIC specific. Every irqchip has a maximum number of pins that can be linked to irq lines. So let's add a new define that allows us to reuse generic code for non-IOAPIC platforms. Signed-off-by: Alexander Graf Acked-by: Michael S. Tsirkin --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93a50054d46c..bf3b1dcb8b3d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -307,7 +307,7 @@ struct kvm_kernel_irq_routing_entry { #ifdef __KVM_HAVE_IOAPIC struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* -- cgit v1.2.3 From a725d56a02ec3582bb5b9756f261fdc6962c79ee Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Wed, 17 Apr 2013 13:29:30 +0200 Subject: KVM: Introduce CONFIG_HAVE_KVM_IRQ_ROUTING Quite a bit of code in KVM has been conditionalized on availability of IOAPIC emulation. However, most of it is generically applicable to platforms that don't have an IOPIC, but a different type of irq chip. Make code that only relies on IRQ routing, not an APIC itself, on CONFIG_HAVE_KVM_IRQ_ROUTING, so that we can reuse it later. Signed-off-by: Alexander Graf Acked-by: Michael S. Tsirkin --- arch/x86/kvm/Kconfig | 1 + include/linux/kvm_host.h | 6 +++--- virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 6 +++--- virt/kvm/kvm_main.c | 2 +- 5 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 586f00059805..9d50efdb719d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -29,6 +29,7 @@ config KVM select MMU_NOTIFIER select ANON_INODES select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_EVENTFD select KVM_APIC_ARCHITECTURE select KVM_ASYNC_PF diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bf3b1dcb8b3d..4215d4f3d86f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -304,7 +304,7 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING struct kvm_irq_routing_table { int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; @@ -432,7 +432,7 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu); int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else @@ -957,7 +957,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) } #endif -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING #define KVM_MAX_IRQ_ROUTES 1024 diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index d01b24b72c61..779262f59e25 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -6,6 +6,9 @@ config HAVE_KVM config HAVE_KVM_IRQCHIP bool +config HAVE_KVM_IRQ_ROUTING + bool + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c5d43ffbf1f3..64ee720b75c7 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,7 +35,7 @@ #include "iodev.h" -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * -------------------------------------------------------------------- * irqfd: Allows an fd to be used to inject an interrupt to the guest @@ -433,7 +433,7 @@ fail: void kvm_eventfd_init(struct kvm *kvm) { -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); INIT_LIST_HEAD(&kvm->irqfds.resampler_list); @@ -442,7 +442,7 @@ kvm_eventfd_init(struct kvm *kvm) INIT_LIST_HEAD(&kvm->ioeventfds); } -#ifdef __KVM_HAVE_IOAPIC +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING /* * shutdown any irqfd's that match fd+gsi */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aaac1a7a9ea8..2c3b226bc13b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2404,7 +2404,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_SIGNAL_MSI: #endif return 1; -#ifdef KVM_CAP_IRQ_ROUTING +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: return KVM_MAX_IRQ_ROUTES; #endif -- cgit v1.2.3 From 7eee2efdc6978aa70ab4046394128c1a10bc2d80 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 15 Apr 2013 10:50:54 +0200 Subject: KVM: Remove kvm_get_intr_delivery_bitmask The prototype has been stale for a while, I can't spot any real function define behind it. Let's just remove it. Signed-off-by: Alexander Graf Acked-by: Michael S. Tsirkin --- include/linux/kvm_host.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4215d4f3d86f..a7bfe9d3aa5e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -719,11 +719,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -#ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); -#endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); -- cgit v1.2.3 From e8cde0939d8ebe9c8ebe5a4bdf71af51a0d56053 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 15 Apr 2013 23:23:21 +0200 Subject: KVM: Move irq routing setup to irqchip.c Setting up IRQ routes is nothing IOAPIC specific. Extract everything that really is generic code into irqchip.c and only leave the ioapic specific bits to irq_comm.c. Signed-off-by: Alexander Graf Acked-by: Michael S. Tsirkin --- include/linux/kvm_host.h | 3 ++ virt/kvm/irq_comm.c | 76 ++----------------------------------------- virt/kvm/irqchip.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 73 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a7bfe9d3aa5e..dcef724f4ba6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -961,6 +961,9 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index d5008f4aade7..e2e6b4473a96 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -271,27 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, rcu_read_unlock(); } -static int setup_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; unsigned max_pin; - struct kvm_kernel_irq_routing_entry *ei; - /* - * Do not allow GSI to be mapped to the same irqchip more than once. - * Allow only one to one mapping between GSI and MSI. - */ - hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type == KVM_IRQ_ROUTING_MSI || - ue->type == KVM_IRQ_ROUTING_MSI || - ue->u.irqchip.irqchip == ei->irqchip.irqchip) - return r; - - e->gsi = ue->gsi; - e->type = ue->type; switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; @@ -328,68 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, goto out; } - hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; out: return r; } -int kvm_set_irq_routing(struct kvm *kvm, - const struct kvm_irq_routing_entry *ue, - unsigned nr, - unsigned flags) -{ - struct kvm_irq_routing_table *new, *old; - u32 i, j, nr_rt_entries = 0; - int r; - - for (i = 0; i < nr; ++i) { - if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) - return -EINVAL; - nr_rt_entries = max(nr_rt_entries, ue[i].gsi); - } - - nr_rt_entries += 1; - - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) - + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), - GFP_KERNEL); - - if (!new) - return -ENOMEM; - - new->rt_entries = (void *)&new->map[nr_rt_entries]; - - new->nr_rt_entries = nr_rt_entries; - for (i = 0; i < 3; i++) - for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) - new->chip[i][j] = -1; - - for (i = 0; i < nr; ++i) { - r = -EINVAL; - if (ue->flags) - goto out; - r = setup_routing_entry(new, &new->rt_entries[i], ue); - if (r) - goto out; - ++ue; - } - - mutex_lock(&kvm->irq_lock); - old = kvm->irq_routing; - kvm_irq_routing_update(kvm, new); - mutex_unlock(&kvm->irq_lock); - - synchronize_rcu(); - - new = old; - r = 0; - -out: - kfree(new); - return r; -} - #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 12f7f26af4f8..20dc9e4a8f6c 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -150,3 +150,88 @@ void kvm_free_irq_routing(struct kvm *kvm) at this stage */ kfree(kvm->irq_routing); } + +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + struct kvm_kernel_irq_routing_entry *ei; + + /* + * Do not allow GSI to be mapped to the same irqchip more than once. + * Allow only one to one mapping between GSI and MSI. + */ + hlist_for_each_entry(ei, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; + + e->gsi = ue->gsi; + e->type = ue->type; + r = kvm_set_routing_entry(rt, e, ue); + if (r) + goto out; + + hlist_add_head(&e->link, &rt->map[e->gsi]); + r = 0; +out: + return r; +} + +int kvm_set_irq_routing(struct kvm *kvm, + const struct kvm_irq_routing_entry *ue, + unsigned nr, + unsigned flags) +{ + struct kvm_irq_routing_table *new, *old; + u32 i, j, nr_rt_entries = 0; + int r; + + for (i = 0; i < nr; ++i) { + if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + return -EINVAL; + nr_rt_entries = max(nr_rt_entries, ue[i].gsi); + } + + nr_rt_entries += 1; + + new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) + + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), + GFP_KERNEL); + + if (!new) + return -ENOMEM; + + new->rt_entries = (void *)&new->map[nr_rt_entries]; + + new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < KVM_NR_IRQCHIPS; i++) + for (j = 0; j < KVM_IRQCHIP_NUM_PINS; j++) + new->chip[i][j] = -1; + + for (i = 0; i < nr; ++i) { + r = -EINVAL; + if (ue->flags) + goto out; + r = setup_routing_entry(new, &new->rt_entries[i], ue); + if (r) + goto out; + ++ue; + } + + mutex_lock(&kvm->irq_lock); + old = kvm->irq_routing; + kvm_irq_routing_update(kvm, new); + mutex_unlock(&kvm->irq_lock); + + synchronize_rcu(); + + new = old; + r = 0; + +out: + kfree(new); + return r; +} -- cgit v1.2.3 From 852b6d57dc7fa378019786fa84727036e56839ea Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 12 Apr 2013 14:08:42 +0000 Subject: kvm: add device control API Currently, devices that are emulated inside KVM are configured in a hardcoded manner based on an assumption that any given architecture only has one way to do it. If there's any need to access device state, it is done through inflexible one-purpose-only IOCTLs (e.g. KVM_GET/SET_LAPIC). Defining new IOCTLs for every little thing is cumbersome and depletes a limited numberspace. This API provides a mechanism to instantiate a device of a certain type, returning an ID that can be used to set/get attributes of the device. Attributes may include configuration parameters (e.g. register base address), device state, operational commands, etc. It is similar to the ONE_REG API, except that it acts on devices rather than vcpus. Both device types and individual attributes can be tested without having to create the device or get/set the attribute, without the need for separately managing enumerated capabilities. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 70 +++++++++++++++++ Documentation/virtual/kvm/devices/README | 1 + include/linux/kvm_host.h | 35 +++++++++ include/uapi/linux/kvm.h | 27 +++++++ virt/kvm/kvm_main.c | 129 +++++++++++++++++++++++++++++++ 5 files changed, 262 insertions(+) create mode 100644 Documentation/virtual/kvm/devices/README (limited to 'include/linux/kvm_host.h') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a1f2200e43d0..66b58e494935 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2189,6 +2189,76 @@ header; first `n_valid' valid entries with contents from the data written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.79 KVM_CREATE_DEVICE + +Capability: KVM_CAP_DEVICE_CTRL +Type: vm ioctl +Parameters: struct kvm_create_device (in/out) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device type is unknown or unsupported + EEXIST: Device already created, and this type of device may not + be instantiated multiple times + + Other error conditions may be defined by individual device types or + have their standard meanings. + +Creates an emulated device in the kernel. The file descriptor returned +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR. + +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the +device type is supported (not necessarily whether it can be created +in the current vm). + +Individual devices should not define flags. Attributes should be used +for specifying any behavior that is not implied by the device type +number. + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + EPERM: The attribute cannot (currently) be accessed this way + (e.g. read-only attribute, or attribute that only makes + sense when the device is in a different state) + + Other error conditions may be defined by individual device types. + +Gets/sets a specified piece of device configuration and/or state. The +semantics are device-specific. See individual device documentation in +the "devices" directory. As with ONE_REG, the size of the data +transferred is defined by the particular attribute. + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +4.81 KVM_HAS_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + +Tests whether a device supports a particular attribute. A successful +return indicates the attribute is implemented. It does not necessarily +indicate that the attribute can be read or written in the device's +current state. "addr" is ignored. 4.77 KVM_ARM_VCPU_INIT diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README new file mode 100644 index 000000000000..34a69834124a --- /dev/null +++ b/Documentation/virtual/kvm/devices/README @@ -0,0 +1 @@ +This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dcef724f4ba6..6dab6b5b3e3b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1064,6 +1064,41 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) extern bool kvm_rebooting; +struct kvm_device_ops; + +struct kvm_device { + struct kvm_device_ops *ops; + struct kvm *kvm; + atomic_t users; + void *private; +}; + +/* create, destroy, and name are mandatory */ +struct kvm_device_ops { + const char *name; + int (*create)(struct kvm_device *dev, u32 type); + + /* + * Destroy is responsible for freeing dev. + * + * Destroy may be called before or after destructors are called + * on emulated I/O regions, depending on whether a reference is + * held by a vcpu or other kvm component that gets destroyed + * after the emulated I/O. + */ + void (*destroy)(struct kvm_device *dev); + + int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); + long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, + unsigned long arg); +}; + +void kvm_device_get(struct kvm_device *dev); +void kvm_device_put(struct kvm_device *dev); +struct kvm_device *kvm_device_from_filp(struct file *filp); + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c741902c9e0b..38a0be0c199f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_EPR 86 #define KVM_CAP_ARM_PSCI 87 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88 +#define KVM_CAP_DEVICE_CTRL 89 #ifdef KVM_CAP_IRQ_ROUTING @@ -818,6 +819,24 @@ struct kvm_arm_device_addr { __u64 addr; }; +/* + * Device control API, available with KVM_CAP_DEVICE_CTRL + */ +#define KVM_CREATE_DEVICE_TEST 1 + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + /* * ioctls for VM fds */ @@ -906,6 +925,14 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ #define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) +/* ioctl for vm fd */ +#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) + +/* ioctls for fds returned by KVM_CREATE_DEVICE */ +#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) +#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) +#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) + /* * ioctls for vcpu fds */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f9492f3847d6..5f0d78c2537b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2159,6 +2159,117 @@ out: } #endif +static int kvm_device_ioctl_attr(struct kvm_device *dev, + int (*accessor)(struct kvm_device *dev, + struct kvm_device_attr *attr), + unsigned long arg) +{ + struct kvm_device_attr attr; + + if (!accessor) + return -EPERM; + + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + return -EFAULT; + + return accessor(dev, &attr); +} + +static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, + unsigned long arg) +{ + struct kvm_device *dev = filp->private_data; + + switch (ioctl) { + case KVM_SET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); + case KVM_GET_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg); + case KVM_HAS_DEVICE_ATTR: + return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg); + default: + if (dev->ops->ioctl) + return dev->ops->ioctl(dev, ioctl, arg); + + return -ENOTTY; + } +} + +void kvm_device_get(struct kvm_device *dev) +{ + atomic_inc(&dev->users); +} + +void kvm_device_put(struct kvm_device *dev) +{ + if (atomic_dec_and_test(&dev->users)) + dev->ops->destroy(dev); +} + +static int kvm_device_release(struct inode *inode, struct file *filp) +{ + struct kvm_device *dev = filp->private_data; + struct kvm *kvm = dev->kvm; + + kvm_device_put(dev); + kvm_put_kvm(kvm); + return 0; +} + +static const struct file_operations kvm_device_fops = { + .unlocked_ioctl = kvm_device_ioctl, + .release = kvm_device_release, +}; + +struct kvm_device *kvm_device_from_filp(struct file *filp) +{ + if (filp->f_op != &kvm_device_fops) + return NULL; + + return filp->private_data; +} + +static int kvm_ioctl_create_device(struct kvm *kvm, + struct kvm_create_device *cd) +{ + struct kvm_device_ops *ops = NULL; + struct kvm_device *dev; + bool test = cd->flags & KVM_CREATE_DEVICE_TEST; + int ret; + + switch (cd->type) { + default: + return -ENODEV; + } + + if (test) + return 0; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + dev->ops = ops; + dev->kvm = kvm; + atomic_set(&dev->users, 1); + + ret = ops->create(dev, cd->type); + if (ret < 0) { + kfree(dev); + return ret; + } + + ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR); + if (ret < 0) { + ops->destroy(dev); + return ret; + } + + kvm_get_kvm(kvm); + cd->fd = ret; + return 0; +} + static long kvm_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2304,6 +2415,24 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ + case KVM_CREATE_DEVICE: { + struct kvm_create_device cd; + + r = -EFAULT; + if (copy_from_user(&cd, argp, sizeof(cd))) + goto out; + + r = kvm_ioctl_create_device(kvm, &cd); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(argp, &cd, sizeof(cd))) + goto out; + + r = 0; + break; + } default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) -- cgit v1.2.3 From 5df554ad5b7522ea62b0ff9d5be35183494efc21 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 12 Apr 2013 14:08:46 +0000 Subject: kvm/ppc/mpic: in-kernel MPIC emulation Hook the MPIC code up to the KVM interfaces, add locking, etc. Signed-off-by: Scott Wood [agraf: add stub function for kvmppc_mpic_set_epr, non-booke, 64bit] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/devices/mpic.txt | 37 ++ arch/powerpc/include/asm/kvm_host.h | 8 +- arch/powerpc/include/asm/kvm_ppc.h | 17 + arch/powerpc/include/uapi/asm/kvm.h | 8 + arch/powerpc/kvm/Kconfig | 9 + arch/powerpc/kvm/Makefile | 2 + arch/powerpc/kvm/booke.c | 8 +- arch/powerpc/kvm/mpic.c | 762 ++++++++++++++++++++++------- arch/powerpc/kvm/powerpc.c | 12 +- include/linux/kvm_host.h | 2 + include/uapi/linux/kvm.h | 3 + virt/kvm/kvm_main.c | 6 + 12 files changed, 674 insertions(+), 200 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/mpic.txt (limited to 'include/linux/kvm_host.h') diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt new file mode 100644 index 000000000000..ce98e3264fa8 --- /dev/null +++ b/Documentation/virtual/kvm/devices/mpic.txt @@ -0,0 +1,37 @@ +MPIC interrupt controller +========================= + +Device types supported: + KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0 + KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 + +Only one MPIC instance, of any type, may be instantiated. The created +MPIC will act as the system interrupt controller, connecting to each +vcpu's interrupt inputs. + +Groups: + KVM_DEV_MPIC_GRP_MISC + Attributes: + KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) + Base address of the 256 KiB MPIC register space. Must be + naturally aligned. A value of zero disables the mapping. + Reset value is zero. + + KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit) + Access an MPIC register, as if the access were made from the guest. + "attr" is the byte offset into the MPIC register space. Accesses + must be 4-byte aligned. + + MSIs may be signaled by using this attribute group to write + to the relevant MSIIR. + + KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit) + IRQ input line for each standard openpic source. 0 is inactive and 1 + is active, regardless of interrupt sense. + + For edge-triggered interrupts: Writing 1 is considered an activating + edge, and writing 0 is ignored. Reading returns 1 if a previously + signaled edge has not been acknowledged, and 0 otherwise. + + "attr" is the IRQ number. IRQ numbers for standard sources are the + byte offset of the relevant IVPR from EIVPR0, divided by 32. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1443768a6588..153c8c2b0f88 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -361,6 +361,11 @@ struct kvmppc_slb { #define KVMPPC_BOOKE_MAX_IAC 4 #define KVMPPC_BOOKE_MAX_DAC 2 +/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ +#define KVMPPC_EPR_NONE 0 /* EPR not supported */ +#define KVMPPC_EPR_USER 1 /* exit to userspace to fill EPR */ +#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ + struct kvmppc_booke_debug_reg { u32 dbcr0; u32 dbcr1; @@ -526,7 +531,7 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; - u8 epr_enabled; + u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ @@ -593,5 +598,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x0060 #define __KVM_HAVE_ARCH_WQP +#define __KVM_HAVE_CREATE_DEVICE #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index bcc68b1afc66..3810f9c7616c 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. @@ -245,6 +247,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); +struct openpic; +void kvmppc_mpic_put(struct openpic *opp); + #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { @@ -270,6 +275,18 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #endif } +#ifdef CONFIG_KVM_MPIC + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); + +#else + +static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ +} + +#endif /* CONFIG_KVM_MPIC */ + int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, struct kvm_config_tlb *cfg); int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 41d59d8bdfe8..02ad96606860 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -382,6 +382,14 @@ struct kvm_get_htab_header { __u16 n_invalid; }; +/* Device control API: PPC-specific devices */ +#define KVM_DEV_MPIC_GRP_MISC 1 +#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ + +#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */ +#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */ + +/* One-Reg API: PPC-specific registers */ #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 448952035b6e..f47e95e0b6de 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -151,6 +151,15 @@ config KVM_E500MC If unsure, say N. +config KVM_MPIC + bool "KVM in-kernel MPIC emulation" + depends on KVM + help + Enable support for emulating MPIC devices inside the + host kernel, rather than relying on userspace to emulate. + Currently, support is limited to certain versions of + Freescale's MPIC implementation. + source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index b772eded8c26..4a2277a221bb 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -103,6 +103,8 @@ kvm-book3s_32-objs := \ book3s_32_mmu.o kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) +kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o + kvm-objs := $(kvm-objs-m) $(kvm-objs-y) obj-$(CONFIG_KVM_440) += kvm.o diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 02756537cd90..4da11ed48c59 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -346,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } - if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags) update_epr = true; switch (priority) { @@ -427,8 +427,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); - if (update_epr == true) - kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + if (update_epr == true) { + if (vcpu->arch.epr_flags & KVMPPC_EPR_USER) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + } new_msr &= msr_mask; #if defined(CONFIG_64BIT) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 1df67aed7a91..cb451b91e342 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -23,6 +23,19 @@ * THE SOFTWARE. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "iodev.h" + #define MAX_CPU 32 #define MAX_SRC 256 #define MAX_TMR 4 @@ -36,6 +49,7 @@ #define OPENPIC_FLAG_ILR (2 << 0) /* OpenPIC address map */ +#define OPENPIC_REG_SIZE 0x40000 #define OPENPIC_GLB_REG_START 0x0 #define OPENPIC_GLB_REG_SIZE 0x10F0 #define OPENPIC_TMR_REG_START 0x10F0 @@ -89,6 +103,7 @@ static struct fsl_mpic_info fsl_mpic_42 = { #define ILR_INTTGT_INT 0x00 #define ILR_INTTGT_CINT 0x01 /* critical */ #define ILR_INTTGT_MCP 0x02 /* machine check */ +#define NUM_OUTPUTS 3 #define MSIIR_OFFSET 0x140 #define MSIIR_SRS_SHIFT 29 @@ -98,18 +113,19 @@ static struct fsl_mpic_info fsl_mpic_42 = { static int get_current_cpu(void) { - CPUState *cpu_single_cpu; - - if (!cpu_single_env) - return -1; - - cpu_single_cpu = ENV_GET_CPU(cpu_single_env); - return cpu_single_cpu->cpu_index; +#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE) + struct kvm_vcpu *vcpu = current->thread.kvm_vcpu; + return vcpu ? vcpu->vcpu_id : -1; +#else + /* XXX */ + return -1; +#endif } -static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx); -static void openpic_cpu_write_internal(void *opaque, gpa_t addr, - uint32_t val, int idx); +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx); +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx); enum irq_type { IRQ_TYPE_NORMAL = 0, @@ -131,7 +147,7 @@ struct irq_source { uint32_t idr; /* IRQ destination register */ uint32_t destmask; /* bitmap of CPU destinations */ int last_cpu; - int output; /* IRQ level, e.g. OPENPIC_OUTPUT_INT */ + int output; /* IRQ level, e.g. ILR_INTTGT_INT */ int pending; /* TRUE if IRQ is pending */ enum irq_type type; bool level:1; /* level-triggered */ @@ -158,16 +174,27 @@ struct irq_source { #define IDR_CI 0x40000000 /* critical interrupt */ struct irq_dest { + struct kvm_vcpu *vcpu; + int32_t ctpr; /* CPU current task priority */ struct irq_queue raised; struct irq_queue servicing; - qemu_irq *irqs; /* Count of IRQ sources asserting on non-INT outputs */ - uint32_t outputs_active[OPENPIC_OUTPUT_NB]; + uint32_t outputs_active[NUM_OUTPUTS]; }; struct openpic { + struct kvm *kvm; + struct kvm_device *dev; + struct kvm_io_device mmio; + struct list_head mmio_regions; + atomic_t users; + bool mmio_mapped; + + gpa_t reg_base; + spinlock_t lock; + /* Behavior control */ struct fsl_mpic_info *fsl; uint32_t model; @@ -208,6 +235,47 @@ struct openpic { uint32_t irq_msi; }; + +static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst, + int output) +{ + struct kvm_interrupt irq = { + .irq = KVM_INTERRUPT_SET_LEVEL, + }; + + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq); +} + +static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst, + int output) +{ + if (!dst->vcpu) { + pr_debug("%s: destination cpu %d does not exist\n", + __func__, (int)(dst - &opp->dst[0])); + return; + } + + pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id, + output); + + if (output != ILR_INTTGT_INT) /* TODO */ + return; + + kvmppc_core_dequeue_external(dst->vcpu); +} + static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ) { set_bit(n_IRQ, q->queue); @@ -268,7 +336,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, pr_debug("%s: IRQ %d active %d was %d\n", __func__, n_IRQ, active, was_active); - if (src->output != OPENPIC_OUTPUT_INT) { + if (src->output != ILR_INTTGT_INT) { pr_debug("%s: output %d irq %d active %d was %d count %d\n", __func__, src->output, n_IRQ, active, was_active, dst->outputs_active[src->output]); @@ -282,14 +350,14 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, dst->outputs_active[src->output]++ == 0) { pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n", __func__, src->output, n_CPU, n_IRQ); - qemu_irq_raise(dst->irqs[src->output]); + mpic_irq_raise(opp, dst, src->output); } } else { if (was_active && --dst->outputs_active[src->output] == 0) { pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n", __func__, src->output, n_CPU, n_IRQ); - qemu_irq_lower(dst->irqs[src->output]); + mpic_irq_lower(opp, dst, src->output); } } @@ -322,8 +390,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, } else { pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n", __func__, n_CPU, n_IRQ, dst->raised.next); - qemu_irq_raise(opp->dst[n_CPU]. - irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } } else { IRQ_get_next(opp, &dst->servicing); @@ -338,8 +405,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ, pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n", __func__, n_IRQ, dst->ctpr, dst->servicing.priority, n_CPU); - qemu_irq_lower(opp->dst[n_CPU]. - irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); } } } @@ -415,8 +481,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) struct irq_source *src; if (n_IRQ >= MAX_IRQ) { - pr_err("%s: IRQ %d out of range\n", __func__, n_IRQ); - abort(); + WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ); + return; } src = &opp->src[n_IRQ]; @@ -433,7 +499,7 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) openpic_update_irq(opp, n_IRQ); } - if (src->output != OPENPIC_OUTPUT_INT) { + if (src->output != ILR_INTTGT_INT) { /* Edge-triggered interrupts shouldn't be used * with non-INT delivery, but just in case, * try to make it do something sane rather than @@ -446,15 +512,13 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level) } } -static void openpic_reset(DeviceState *d) +static void openpic_reset(struct openpic *opp) { - struct openpic *opp = FROM_SYSBUS(typeof(*opp), SYS_BUS_DEVICE(d)); int i; opp->gcr = GCR_RESET; /* Initialise controller registers */ opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) | - ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) | (opp->vid << FRR_VID_SHIFT); opp->pir = 0; @@ -504,7 +568,7 @@ static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ) static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ) { if (opp->flags & OPENPIC_FLAG_ILR) - return output_to_inttgt(opp->src[n_IRQ].output); + return opp->src[n_IRQ].output; return 0xffffffff; } @@ -539,7 +603,7 @@ static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, __func__); } - src->output = OPENPIC_OUTPUT_CINT; + src->output = ILR_INTTGT_CINT; src->nomask = true; src->destmask = 0; @@ -550,7 +614,7 @@ static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ, src->destmask |= 1UL << i; } } else { - src->output = OPENPIC_OUTPUT_INT; + src->output = ILR_INTTGT_INT; src->nomask = false; src->destmask = src->idr & normal_mask; } @@ -565,7 +629,7 @@ static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ, if (opp->flags & OPENPIC_FLAG_ILR) { struct irq_source *src = &opp->src[n_IRQ]; - src->output = inttgt_to_output(val & ILR_INTTGT_MASK); + src->output = val & ILR_INTTGT_MASK; pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr, src->output); @@ -614,34 +678,23 @@ static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ, static void openpic_gcr_write(struct openpic *opp, uint64_t val) { - bool mpic_proxy = false; - if (val & GCR_RESET) { - openpic_reset(&opp->busdev.qdev); + openpic_reset(opp); return; } opp->gcr &= ~opp->mpic_mode_mask; opp->gcr |= val & opp->mpic_mode_mask; - - /* Set external proxy mode */ - if ((val & opp->mpic_mode_mask) == GCR_MODE_PROXY) - mpic_proxy = true; - - ppce500_set_mpic_proxy(mpic_proxy); } -static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; - struct irq_dest *dst; - int idx; + int err = 0; - pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); if (addr & 0xF) - return; + return 0; switch (addr) { case 0x00: /* Block Revision Register1 (BRR1) is Readonly */ @@ -654,7 +707,8 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, case 0x90: case 0xA0: case 0xB0: - openpic_cpu_write_internal(opp, addr, val, get_current_cpu()); + err = openpic_cpu_write_internal(opp, addr, val, + get_current_cpu()); break; case 0x1000: /* FRR */ break; @@ -664,21 +718,11 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, case 0x1080: /* VIR */ break; case 0x1090: /* PIR */ - for (idx = 0; idx < opp->nb_cpus; idx++) { - if ((val & (1 << idx)) && !(opp->pir & (1 << idx))) { - pr_debug("Raise OpenPIC RESET output for CPU %d\n", - idx); - dst = &opp->dst[idx]; - qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_RESET]); - } else if (!(val & (1 << idx)) && - (opp->pir & (1 << idx))) { - pr_debug("Lower OpenPIC RESET output for CPU %d\n", - idx); - dst = &opp->dst[idx]; - qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_RESET]); - } - } - opp->pir = val; + /* + * This register is used to reset a CPU core -- + * let userspace handle it. + */ + err = -ENXIO; break; case 0x10A0: /* IPI_IVPR */ case 0x10B0: @@ -695,21 +739,25 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val, default: break; } + + return err; } -static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len) +static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; - uint32_t retval; + u32 retval; + int err = 0; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); retval = 0xFFFFFFFF; if (addr & 0xF) - return retval; + goto out; switch (addr) { case 0x1000: /* FRR */ retval = opp->frr; + retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT; break; case 0x1020: /* GCR */ retval = opp->gcr; @@ -731,8 +779,8 @@ static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len) case 0x90: case 0xA0: case 0xB0: - retval = - openpic_cpu_read_internal(opp, addr, get_current_cpu()); + err = openpic_cpu_read_internal(opp, addr, + &retval, get_current_cpu()); break; case 0x10A0: /* IPI_IVPR */ case 0x10B0: @@ -750,28 +798,28 @@ static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len) default: break; } - pr_debug("%s: => 0x%08x\n", __func__, retval); - return retval; +out: + pr_debug("%s: => 0x%08x\n", __func__, retval); + *ptr = retval; + return err; } -static void openpic_tmr_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; int idx; addr += 0x10f0; - pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); if (addr & 0xF) - return; + return 0; if (addr == 0x10f0) { /* TFRR */ opp->tfrr = val; - return; + return 0; } idx = (addr >> 6) & 0x3; @@ -795,15 +843,17 @@ static void openpic_tmr_write(void *opaque, gpa_t addr, uint64_t val, write_IRQreg_idr(opp, opp->irq_tim0 + idx, val); break; } + + return 0; } -static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len) +static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; uint32_t retval = -1; int idx; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); if (addr & 0xF) goto out; @@ -813,6 +863,7 @@ static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len) retval = opp->tfrr; goto out; } + switch (addr & 0x30) { case 0x00: /* TCCR */ retval = opp->timers[idx].tccr; @@ -830,18 +881,16 @@ static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len) out: pr_debug("%s: => 0x%08x\n", __func__, retval); - - return retval; + *ptr = retval; + return 0; } -static void openpic_src_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_src_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; int idx; - pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val); addr = addr & 0xffff; idx = addr >> 5; @@ -857,15 +906,17 @@ static void openpic_src_write(void *opaque, gpa_t addr, uint64_t val, write_IRQreg_ilr(opp, idx, val); break; } + + return 0; } -static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) +static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; uint32_t retval; int idx; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); retval = 0xFFFFFFFF; addr = addr & 0xffff; @@ -884,20 +935,19 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len) } pr_debug("%s: => 0x%08x\n", __func__, retval); - return retval; + *ptr = retval; + return 0; } -static void openpic_msi_write(void *opaque, gpa_t addr, uint64_t val, - unsigned size) +static int openpic_msi_write(void *opaque, gpa_t addr, u32 val) { struct openpic *opp = opaque; int idx = opp->irq_msi; int srs, ibs; - pr_debug("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); if (addr & 0xF) - return; + return 0; switch (addr) { case MSIIR_OFFSET: @@ -911,17 +961,19 @@ static void openpic_msi_write(void *opaque, gpa_t addr, uint64_t val, /* most registers are read-only, thus ignored */ break; } + + return 0; } -static uint64_t openpic_msi_read(void *opaque, gpa_t addr, unsigned size) +static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr) { struct openpic *opp = opaque; - uint64_t r = 0; + uint32_t r = 0; int i, srs; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); if (addr & 0xF) - return -1; + return -ENXIO; srs = addr >> 4; @@ -945,45 +997,47 @@ static uint64_t openpic_msi_read(void *opaque, gpa_t addr, unsigned size) break; } - return r; + pr_debug("%s: => 0x%08x\n", __func__, r); + *ptr = r; + return 0; } -static uint64_t openpic_summary_read(void *opaque, gpa_t addr, unsigned size) +static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr) { - uint64_t r = 0; + uint32_t r = 0; - pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr); + pr_debug("%s: addr %#llx\n", __func__, addr); /* TODO: EISR/EIMR */ - return r; + *ptr = r; + return 0; } -static void openpic_summary_write(void *opaque, gpa_t addr, uint64_t val, - unsigned size) +static int openpic_summary_write(void *opaque, gpa_t addr, u32 val) { - pr_debug("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n", - __func__, addr, val); + pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val); /* TODO: EISR/EIMR */ + return 0; } -static void openpic_cpu_write_internal(void *opaque, gpa_t addr, - uint32_t val, int idx) +static int openpic_cpu_write_internal(void *opaque, gpa_t addr, + u32 val, int idx) { struct openpic *opp = opaque; struct irq_source *src; struct irq_dest *dst; int s_IRQ, n_IRQ; - pr_debug("%s: cpu %d addr %#" HWADDR_PRIx " <= 0x%08x\n", __func__, idx, + pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx, addr, val); if (idx < 0) - return; + return 0; if (addr & 0xF) - return; + return 0; dst = &opp->dst[idx]; addr &= 0xFF0; @@ -1008,11 +1062,11 @@ static void openpic_cpu_write_internal(void *opaque, gpa_t addr, if (dst->raised.priority <= dst->ctpr) { pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n", __func__, idx); - qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); } else if (dst->raised.priority > dst->servicing.priority) { pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n", __func__, idx, dst->raised.next); - qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } break; @@ -1043,18 +1097,22 @@ static void openpic_cpu_write_internal(void *opaque, gpa_t addr, IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) { pr_debug("Raise OpenPIC INT output cpu %d irq %d\n", idx, n_IRQ); - qemu_irq_raise(opp->dst[idx].irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } break; default: break; } + + return 0; } -static void openpic_cpu_write(void *opaque, gpa_t addr, uint64_t val, - unsigned len) +static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val) { - openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12); + struct openpic *opp = opaque; + + return openpic_cpu_write_internal(opp, addr, val, + (addr & 0x1f000) >> 12); } static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, @@ -1064,7 +1122,7 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, int retval, irq; pr_debug("Lower OpenPIC INT output\n"); - qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]); + mpic_irq_lower(opp, dst, ILR_INTTGT_INT); irq = IRQ_get_next(opp, &dst->raised); pr_debug("IACK: irq=%d\n", irq); @@ -1107,20 +1165,21 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst, return retval; } -static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx) +static int openpic_cpu_read_internal(void *opaque, gpa_t addr, + u32 *ptr, int idx) { struct openpic *opp = opaque; struct irq_dest *dst; uint32_t retval; - pr_debug("%s: cpu %d addr %#" HWADDR_PRIx "\n", __func__, idx, addr); + pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr); retval = 0xFFFFFFFF; if (idx < 0) - return retval; + goto out; if (addr & 0xF) - return retval; + goto out; dst = &opp->dst[idx]; addr &= 0xFF0; @@ -1142,49 +1201,67 @@ static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx) } pr_debug("%s: => 0x%08x\n", __func__, retval); - return retval; +out: + *ptr = retval; + return 0; } -static uint64_t openpic_cpu_read(void *opaque, gpa_t addr, unsigned len) +static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr) { - return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12); + struct openpic *opp = opaque; + + return openpic_cpu_read_internal(opp, addr, ptr, + (addr & 0x1f000) >> 12); } -static const struct kvm_io_device_ops openpic_glb_ops_be = { +struct mem_reg { + struct list_head list; + int (*read)(void *opaque, gpa_t addr, u32 *ptr); + int (*write)(void *opaque, gpa_t addr, u32 val); + gpa_t start_addr; + int size; +}; + +static struct mem_reg openpic_gbl_mmio = { .write = openpic_gbl_write, .read = openpic_gbl_read, + .start_addr = OPENPIC_GLB_REG_START, + .size = OPENPIC_GLB_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_tmr_ops_be = { +static struct mem_reg openpic_tmr_mmio = { .write = openpic_tmr_write, .read = openpic_tmr_read, + .start_addr = OPENPIC_TMR_REG_START, + .size = OPENPIC_TMR_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_cpu_ops_be = { +static struct mem_reg openpic_cpu_mmio = { .write = openpic_cpu_write, .read = openpic_cpu_read, + .start_addr = OPENPIC_CPU_REG_START, + .size = OPENPIC_CPU_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_src_ops_be = { +static struct mem_reg openpic_src_mmio = { .write = openpic_src_write, .read = openpic_src_read, + .start_addr = OPENPIC_SRC_REG_START, + .size = OPENPIC_SRC_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_msi_ops_be = { +static struct mem_reg openpic_msi_mmio = { .read = openpic_msi_read, .write = openpic_msi_write, + .start_addr = OPENPIC_MSI_REG_START, + .size = OPENPIC_MSI_REG_SIZE, }; -static const struct kvm_io_device_ops openpic_summary_ops_be = { +static struct mem_reg openpic_summary_mmio = { .read = openpic_summary_read, .write = openpic_summary_write, -}; - -struct mem_reg { - const char *name; - const struct kvm_io_device_ops *ops; - gpa_t start_addr; - int size; + .start_addr = OPENPIC_SUMMARY_REG_START, + .size = OPENPIC_SUMMARY_REG_SIZE, }; static void fsl_common_init(struct openpic *opp) @@ -1192,6 +1269,9 @@ static void fsl_common_init(struct openpic *opp) int i; int virq = MAX_SRC; + list_add(&openpic_msi_mmio.list, &opp->mmio_regions); + list_add(&openpic_summary_mmio.list, &opp->mmio_regions); + opp->vid = VID_REVISION_1_2; opp->vir = VIR_GENERIC; opp->vector_mask = 0xFFFF; @@ -1205,11 +1285,10 @@ static void fsl_common_init(struct openpic *opp) opp->irq_tim0 = virq; virq += MAX_TMR; - assert(virq <= MAX_IRQ); + BUG_ON(virq > MAX_IRQ); opp->irq_msi = 224; - msi_supported = true; for (i = 0; i < opp->fsl->max_ext; i++) opp->src[i].level = false; @@ -1226,63 +1305,352 @@ static void fsl_common_init(struct openpic *opp) } } -static void map_list(struct openpic *opp, const struct mem_reg *list, - int *count) +static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr) { - while (list->name) { - assert(*count < ARRAY_SIZE(opp->sub_io_mem)); + struct list_head *node; - memory_region_init_io(&opp->sub_io_mem[*count], list->ops, opp, - list->name, list->size); + list_for_each(node, &opp->mmio_regions) { + struct mem_reg *mr = list_entry(node, struct mem_reg, list); - memory_region_add_subregion(&opp->mem, list->start_addr, - &opp->sub_io_mem[*count]); + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; - (*count)++; - list++; + return mr->read(opp, addr - mr->start_addr, ptr); } + + return -ENXIO; } -static int openpic_init(SysBusDevice *dev) +static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) { - struct openpic *opp = FROM_SYSBUS(typeof(*opp), dev); - int i, j; - int list_count = 0; - static const struct mem_reg list_le[] = { - {"glb", &openpic_glb_ops_le, - OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE}, - {"tmr", &openpic_tmr_ops_le, - OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE}, - {"src", &openpic_src_ops_le, - OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE}, - {"cpu", &openpic_cpu_ops_le, - OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE}, - {NULL} - }; - static const struct mem_reg list_be[] = { - {"glb", &openpic_glb_ops_be, - OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE}, - {"tmr", &openpic_tmr_ops_be, - OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE}, - {"src", &openpic_src_ops_be, - OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE}, - {"cpu", &openpic_cpu_ops_be, - OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE}, - {NULL} - }; - static const struct mem_reg list_fsl[] = { - {"msi", &openpic_msi_ops_be, - OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE}, - {"summary", &openpic_summary_ops_be, - OPENPIC_SUMMARY_REG_START, OPENPIC_SUMMARY_REG_SIZE}, - {NULL} - }; + struct list_head *node; + + list_for_each(node, &opp->mmio_regions) { + struct mem_reg *mr = list_entry(node, struct mem_reg, list); + + if (mr->start_addr > addr || addr >= mr->start_addr + mr->size) + continue; - memory_region_init(&opp->mem, "openpic", 0x40000); + return mr->write(opp, addr - mr->start_addr, val); + } + + return -ENXIO; +} + +static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, + int len, void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + union { + u32 val; + u8 bytes[4]; + } u; + + if (addr & (len - 1)) { + pr_debug("%s: bad alignment %llx/%d\n", + __func__, addr, len); + return -EINVAL; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val); + spin_unlock_irq(&opp->lock); + + /* + * Technically only 32-bit accesses are allowed, but be nice to + * people dumping registers a byte at a time -- it works in real + * hardware (reads only, not writes). + */ + if (len == 4) { + *(u32 *)ptr = u.val; + pr_debug("%s: addr %llx ret %d len 4 val %x\n", + __func__, addr, ret, u.val); + } else if (len == 1) { + *(u8 *)ptr = u.bytes[addr & 3]; + pr_debug("%s: addr %llx ret %d len 1 val %x\n", + __func__, addr, ret, u.bytes[addr & 3]); + } else { + pr_debug("%s: bad length %d\n", __func__, len); + return -EINVAL; + } + + return ret; +} + +static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, + int len, const void *ptr) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + int ret; + + if (len != 4) { + pr_debug("%s: bad length %d\n", __func__, len); + return -EOPNOTSUPP; + } + if (addr & 3) { + pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len); + return -EOPNOTSUPP; + } + + spin_lock_irq(&opp->lock); + ret = kvm_mpic_write_internal(opp, addr - opp->reg_base, + *(const u32 *)ptr); + spin_unlock_irq(&opp->lock); + + pr_debug("%s: addr %llx ret %d val %x\n", + __func__, addr, ret, *(const u32 *)ptr); + + return ret; +} + +static void kvm_mpic_dtor(struct kvm_io_device *this) +{ + struct openpic *opp = container_of(this, struct openpic, mmio); + + opp->mmio_mapped = false; +} + +static const struct kvm_io_device_ops mpic_mmio_ops = { + .read = kvm_mpic_read, + .write = kvm_mpic_write, + .destructor = kvm_mpic_dtor, +}; + +static void map_mmio(struct openpic *opp) +{ + BUG_ON(opp->mmio_mapped); + opp->mmio_mapped = true; + + kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops); + + kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS, + opp->reg_base, OPENPIC_REG_SIZE, + &opp->mmio); +} + +static void unmap_mmio(struct openpic *opp) +{ + BUG_ON(opp->mmio_mapped); + opp->mmio_mapped = false; + + kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio); +} + +static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr) +{ + u64 base; + + if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64))) + return -EFAULT; + + if (base & 0x3ffff) { + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n", + __func__, base); + return -EINVAL; + } + + if (base == opp->reg_base) + return 0; + + mutex_lock(&opp->kvm->slots_lock); + + unmap_mmio(opp); + opp->reg_base = base; + + pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n", + __func__, base); + + if (base == 0) + goto out; + + map_mmio(opp); + + mutex_unlock(&opp->kvm->slots_lock); +out: + return 0; +} + +#define ATTR_SET 0 +#define ATTR_GET 1 + +static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) +{ + int ret; + + if (addr & 3) + return -ENXIO; + + spin_lock_irq(&opp->lock); + + if (type == ATTR_SET) + ret = kvm_mpic_write_internal(opp, addr, *val); + else + ret = kvm_mpic_read_internal(opp, addr, val); + + spin_unlock_irq(&opp->lock); + + pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val); + + return ret; +} + +static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u32 attr32; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return set_base_addr(opp, attr); + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return access_reg(opp, attr->attr, &attr32, ATTR_SET); + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + if (get_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + if (attr32 != 0 && attr32 != 1) + return -EINVAL; + + spin_lock_irq(&opp->lock); + openpic_set_irq(opp, attr->attr, attr32); + spin_unlock_irq(&opp->lock); + return 0; + } + + return -ENXIO; +} + +static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + struct openpic *opp = dev->private; + u64 attr64; + u32 attr32; + int ret; + + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + mutex_lock(&opp->kvm->slots_lock); + attr64 = opp->reg_base; + mutex_unlock(&opp->kvm->slots_lock); + + if (copy_to_user((u64 __user *)(long)attr->addr, + &attr64, sizeof(u64))) + return -EFAULT; + + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + ret = access_reg(opp, attr->attr, &attr32, ATTR_GET); + if (ret) + return ret; + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + return -EINVAL; + + spin_lock_irq(&opp->lock); + attr32 = opp->src[attr->attr].pending; + spin_unlock_irq(&opp->lock); + + if (put_user(attr32, (u32 __user *)(long)attr->addr)) + return -EFAULT; + + return 0; + } + + return -ENXIO; +} + +static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_MPIC_GRP_MISC: + switch (attr->attr) { + case KVM_DEV_MPIC_BASE_ADDR: + return 0; + } + + break; + + case KVM_DEV_MPIC_GRP_REGISTER: + return 0; + + case KVM_DEV_MPIC_GRP_IRQ_ACTIVE: + if (attr->attr > MAX_SRC) + break; + + return 0; + } + + return -ENXIO; +} + +static void mpic_destroy(struct kvm_device *dev) +{ + struct openpic *opp = dev->private; + + if (opp->mmio_mapped) { + /* + * Normally we get unmapped by kvm_io_bus_destroy(), + * which happens before the VCPUs release their references. + * + * Thus, we should only get here if no VCPUs took a reference + * to us in the first place. + */ + WARN_ON(opp->nb_cpus != 0); + unmap_mmio(opp); + } + + kfree(opp); +} + +static int mpic_create(struct kvm_device *dev, u32 type) +{ + struct openpic *opp; + int ret; + + opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); + if (!opp) + return -ENOMEM; + + dev->private = opp; + opp->kvm = dev->kvm; + opp->dev = dev; + opp->model = type; + spin_lock_init(&opp->lock); + + INIT_LIST_HEAD(&opp->mmio_regions); + list_add(&openpic_gbl_mmio.list, &opp->mmio_regions); + list_add(&openpic_tmr_mmio.list, &opp->mmio_regions); + list_add(&openpic_src_mmio.list, &opp->mmio_regions); + list_add(&openpic_cpu_mmio.list, &opp->mmio_regions); switch (opp->model) { - case OPENPIC_MODEL_FSL_MPIC_20: - default: + case KVM_DEV_TYPE_FSL_MPIC_20: opp->fsl = &fsl_mpic_20; opp->brr1 = 0x00400200; opp->flags |= OPENPIC_FLAG_IDR_CRIT; @@ -1290,12 +1658,10 @@ static int openpic_init(SysBusDevice *dev) opp->mpic_mode_mask = GCR_MODE_MIXED; fsl_common_init(opp); - map_list(opp, list_be, &list_count); - map_list(opp, list_fsl, &list_count); break; - case OPENPIC_MODEL_FSL_MPIC_42: + case KVM_DEV_TYPE_FSL_MPIC_42: opp->fsl = &fsl_mpic_42; opp->brr1 = 0x00400402; opp->flags |= OPENPIC_FLAG_ILR; @@ -1303,11 +1669,27 @@ static int openpic_init(SysBusDevice *dev) opp->mpic_mode_mask = GCR_MODE_PROXY; fsl_common_init(opp); - map_list(opp, list_be, &list_count); - map_list(opp, list_fsl, &list_count); break; + + default: + ret = -ENODEV; + goto err; } + openpic_reset(opp); return 0; + +err: + kfree(opp); + return ret; } + +struct kvm_device_ops kvm_mpic_ops = { + .name = "kvm-mpic", + .create = mpic_create, + .destroy = mpic_destroy, + .set_attr = mpic_set_attr, + .get_attr = mpic_get_attr, + .has_attr = mpic_has_attr, +}; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6b8108624851..88d69cf1f953 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -317,6 +317,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: r = 1; break; #ifndef CONFIG_KVM_BOOK3S_64_HV @@ -762,7 +763,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, break; case KVM_CAP_PPC_EPR: r = 0; - vcpu->arch.epr_enabled = cap->args[0]; + if (cap->args[0]) + vcpu->arch.epr_flags |= KVMPPC_EPR_USER; + else + vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER; break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: @@ -908,6 +912,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + struct kvm *kvm __maybe_unused = filp->private_data; void __user *argp = (void __user *)arg; long r; @@ -926,7 +931,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CREATE_SPAPR_TCE: { struct kvm_create_spapr_tce create_tce; - struct kvm *kvm = filp->private_data; r = -EFAULT; if (copy_from_user(&create_tce, argp, sizeof(create_tce))) @@ -938,7 +942,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_KVM_BOOK3S_64_HV case KVM_ALLOCATE_RMA: { - struct kvm *kvm = filp->private_data; struct kvm_allocate_rma rma; r = kvm_vm_ioctl_allocate_rma(kvm, &rma); @@ -948,7 +951,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_ALLOCATE_HTAB: { - struct kvm *kvm = filp->private_data; u32 htab_order; r = -EFAULT; @@ -965,7 +967,6 @@ long kvm_arch_vm_ioctl(struct file *filp, } case KVM_PPC_GET_HTAB_FD: { - struct kvm *kvm = filp->private_data; struct kvm_get_htab_fd ghf; r = -EFAULT; @@ -978,7 +979,6 @@ long kvm_arch_vm_ioctl(struct file *filp, #ifdef CONFIG_PPC_BOOK3S_64 case KVM_PPC_GET_SMMU_INFO: { - struct kvm *kvm = filp->private_data; struct kvm_ppc_smmu_info info; memset(&info, 0, sizeof(info)); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6dab6b5b3e3b..feffbdaf8986 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1099,6 +1099,8 @@ void kvm_device_get(struct kvm_device *dev); void kvm_device_put(struct kvm_device *dev); struct kvm_device *kvm_device_from_filp(struct file *filp); +extern struct kvm_device_ops kvm_mpic_ops; + #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 38a0be0c199f..4148becdc93f 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -837,6 +837,9 @@ struct kvm_device_attr { __u64 addr; /* userspace address of attr data */ }; +#define KVM_DEV_TYPE_FSL_MPIC_20 1 +#define KVM_DEV_TYPE_FSL_MPIC_42 2 + /* * ioctls for VM fds */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5f0d78c2537b..f6cd14d2f0d9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2238,6 +2238,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm, int ret; switch (cd->type) { +#ifdef CONFIG_KVM_MPIC + case KVM_DEV_TYPE_FSL_MPIC_20: + case KVM_DEV_TYPE_FSL_MPIC_42: + ops = &kvm_mpic_ops; + break; +#endif default: return -ENODEV; } -- cgit v1.2.3 From 07f0a7bdec5c4039cfb9b836482c45004d4c21cc Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 25 Apr 2013 14:11:23 +0000 Subject: kvm: destroy emulated devices on VM exit The hassle of getting refcounting right was greater than the hassle of keeping a list of devices to destroy on VM exit. Signed-off-by: Scott Wood Signed-off-by: Alexander Graf --- arch/powerpc/kvm/mpic.c | 2 -- include/linux/kvm_host.h | 3 ++- virt/kvm/kvm_main.c | 29 ++++++++++++++++------------- 3 files changed, 18 insertions(+), 16 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 89fe1d66a7fb..795ca0c9ae69 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1781,7 +1781,6 @@ int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, if (opp->mpic_mode_mask == GCR_MODE_PROXY) vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL; - kvm_device_get(dev); out: spin_unlock_irq(&opp->lock); return ret; @@ -1797,7 +1796,6 @@ void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu) BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu); opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL; - kvm_device_put(opp->dev); } /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index feffbdaf8986..36c977694741 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -393,6 +393,7 @@ struct kvm { long mmu_notifier_count; #endif long tlbs_dirty; + struct list_head devices; }; #define kvm_err(fmt, ...) \ @@ -1069,8 +1070,8 @@ struct kvm_device_ops; struct kvm_device { struct kvm_device_ops *ops; struct kvm *kvm; - atomic_t users; void *private; + struct list_head vm_node; }; /* create, destroy, and name are mandatory */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f6cd14d2f0d9..5da9f02a2a67 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -504,6 +504,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); + INIT_LIST_HEAD(&kvm->devices); r = kvm_init_mmu_notifier(kvm); if (r) @@ -581,6 +582,19 @@ void kvm_free_physmem(struct kvm *kvm) kfree(kvm->memslots); } +static void kvm_destroy_devices(struct kvm *kvm) +{ + struct list_head *node, *tmp; + + list_for_each_safe(node, tmp, &kvm->devices) { + struct kvm_device *dev = + list_entry(node, struct kvm_device, vm_node); + + list_del(node); + dev->ops->destroy(dev); + } +} + static void kvm_destroy_vm(struct kvm *kvm) { int i; @@ -600,6 +614,7 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_flush_shadow_all(kvm); #endif kvm_arch_destroy_vm(kvm); + kvm_destroy_devices(kvm); kvm_free_physmem(kvm); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); @@ -2195,23 +2210,11 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, } } -void kvm_device_get(struct kvm_device *dev) -{ - atomic_inc(&dev->users); -} - -void kvm_device_put(struct kvm_device *dev) -{ - if (atomic_dec_and_test(&dev->users)) - dev->ops->destroy(dev); -} - static int kvm_device_release(struct inode *inode, struct file *filp) { struct kvm_device *dev = filp->private_data; struct kvm *kvm = dev->kvm; - kvm_device_put(dev); kvm_put_kvm(kvm); return 0; } @@ -2257,7 +2260,6 @@ static int kvm_ioctl_create_device(struct kvm *kvm, dev->ops = ops; dev->kvm = kvm; - atomic_set(&dev->users, 1); ret = ops->create(dev, cd->type); if (ret < 0) { @@ -2271,6 +2273,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return ret; } + list_add(&dev->vm_node, &kvm->devices); kvm_get_kvm(kvm); cd->fd = ret; return 0; -- cgit v1.2.3