summaryrefslogtreecommitdiff
path: root/virt/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-15 10:18:16 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-15 10:18:16 -0700
commite37a07e0c29cd2cef4633b1e6db5579cc99ba4cd (patch)
treed968ca38ebb196c1cf55aa83554623b326592cf1 /virt/kvm
parenta80099a152d0719e2d8d750e07f4ffa991553d30 (diff)
parentd3457c877b14aaee8c52923eedf05a3b78af0476 (diff)
Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Radim Krčmář: "Second batch of KVM updates for v4.13 Common: - add uevents for VM creation/destruction - annotate and properly access RCU-protected objects s390: - rename IOCTL added in the first v4.13 merge x86: - emulate VMLOAD VMSAVE feature in SVM - support paravirtual asynchronous page fault while nested - add Hyper-V userspace interfaces for better migration - improve master clock corner cases - extend internal error reporting after EPT misconfig - correct single-stepping of emulated instructions in SVM - handle MCE during VM entry - fix nVMX VM entry checks and nVMX VMCS shadowing" * tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits) kvm: x86: hyperv: make VP_INDEX managed by userspace KVM: async_pf: Let guest support delivery of async_pf from guest mode KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf KVM: async_pf: Add L1 guest async_pf #PF vmexit handler KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2 KVM: x86: make backwards_tsc_observed a per-VM variable KVM: trigger uevents when creating or destroying a VM KVM: SVM: Enable Virtual VMLOAD VMSAVE feature KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition KVM: SVM: Rename lbr_ctl field in the vmcb control area KVM: SVM: Prepare for new bit definition in lbr_ctl KVM: SVM: handle singlestep exception when skipping emulated instructions KVM: x86: take slots_lock in kvm_free_pit KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition kvm: vmx: Properly handle machine check during VM-entry KVM: x86: update master clock before computing kvmclock_offset kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls kvm: nVMX: Validate the I/O bitmaps on nested VM-entry ...
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/eventfd.c8
-rw-r--r--virt/kvm/irqchip.c2
-rw-r--r--virt/kvm/kvm_main.c131
3 files changed, 115 insertions, 26 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9120edf3c94b..f2ac53ab8243 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
if (ret < 0)
goto unlock_fail;
- kvm->buses[bus_idx]->ioeventfd_count++;
+ kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
list_add_tail(&p->list, &kvm->ioeventfds);
mutex_unlock(&kvm->slots_lock);
@@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
{
struct _ioeventfd *p, *tmp;
struct eventfd_ctx *eventfd;
+ struct kvm_io_bus *bus;
int ret = -ENOENT;
eventfd = eventfd_ctx_fdget(args->fd);
@@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- if (kvm->buses[bus_idx])
- kvm->buses[bus_idx]->ioeventfd_count--;
+ bus = kvm_get_bus(kvm, bus_idx);
+ if (bus)
+ bus->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 31e40c9e81df..b1286c4e0712 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
}
mutex_lock(&kvm->irq_lock);
- old = kvm->irq_routing;
+ old = rcu_dereference_protected(kvm->irq_routing, 1);
rcu_assign_pointer(kvm->irq_routing, new);
kvm_irq_routing_update(kvm);
kvm_arch_irq_routing_update(kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 19f0ecb9b93e..82987d457b8b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
+#define KVM_EVENT_CREATE_VM 0
+#define KVM_EVENT_DESTROY_VM 1
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
+static unsigned long long kvm_createvm_count;
+static unsigned long long kvm_active_vms;
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
if (pfn_valid(pfn))
@@ -187,12 +193,23 @@ static void ack_flush(void *_completed)
{
}
+static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
+{
+ if (unlikely(!cpus))
+ cpus = cpu_online_mask;
+
+ if (cpumask_empty(cpus))
+ return false;
+
+ smp_call_function_many(cpus, ack_flush, NULL, wait);
+ return true;
+}
+
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
{
int i, cpu, me;
cpumask_var_t cpus;
- bool called = true;
- bool wait = req & KVM_REQUEST_WAIT;
+ bool called;
struct kvm_vcpu *vcpu;
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
@@ -207,14 +224,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
if (cpus != NULL && cpu != -1 && cpu != me &&
kvm_request_needs_ipi(vcpu, req))
- cpumask_set_cpu(cpu, cpus);
+ __cpumask_set_cpu(cpu, cpus);
}
- if (unlikely(cpus == NULL))
- smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
- else if (!cpumask_empty(cpus))
- smp_call_function_many(cpus, ack_flush, NULL, wait);
- else
- called = false;
+ called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
put_cpu();
free_cpumask_var(cpus);
return called;
@@ -293,7 +305,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- put_pid(vcpu->pid);
+ /*
+ * no need for rcu_read_lock as VCPU_RUN is the only place that
+ * will change the vcpu->pid pointer and on uninit all file
+ * descriptors are already gone.
+ */
+ put_pid(rcu_dereference_protected(vcpu->pid, 1));
kvm_arch_vcpu_uninit(vcpu);
free_page((unsigned long)vcpu->run);
}
@@ -674,8 +691,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (init_srcu_struct(&kvm->irq_srcu))
goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
- kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
- GFP_KERNEL);
+ rcu_assign_pointer(kvm->buses[i],
+ kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
if (!kvm->buses[i])
goto out_err;
}
@@ -700,9 +717,10 @@ out_err_no_srcu:
hardware_disable_all();
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
- kfree(kvm->buses[i]);
+ kfree(rcu_access_pointer(kvm->buses[i]));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm, kvm->memslots[i]);
+ kvm_free_memslots(kvm,
+ rcu_dereference_protected(kvm->memslots[i], 1));
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -728,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
int i;
struct mm_struct *mm = kvm->mm;
+ kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
@@ -735,8 +754,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
- if (kvm->buses[i])
- kvm_io_bus_destroy(kvm->buses[i]);
+ struct kvm_io_bus *bus;
+
+ bus = rcu_dereference_protected(kvm->buses[i], 1);
+ if (bus)
+ kvm_io_bus_destroy(bus);
kvm->buses[i] = NULL;
}
kvm_coalesced_mmio_free(kvm);
@@ -748,7 +770,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
- kvm_free_memslots(kvm, kvm->memslots[i]);
+ kvm_free_memslots(kvm,
+ rcu_dereference_protected(kvm->memslots[i], 1));
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
@@ -2551,13 +2574,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (r)
return r;
switch (ioctl) {
- case KVM_RUN:
+ case KVM_RUN: {
+ struct pid *oldpid;
r = -EINVAL;
if (arg)
goto out;
- if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+ oldpid = rcu_access_pointer(vcpu->pid);
+ if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */
- struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid);
@@ -2568,6 +2592,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
break;
+ }
case KVM_GET_REGS: {
struct kvm_regs *kvm_regs;
@@ -3202,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
fput(file);
return -ENOMEM;
}
+ kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
fd_install(r, file);
return r;
@@ -3563,7 +3589,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
{
struct kvm_io_bus *new_bus, *bus;
- bus = kvm->buses[bus_idx];
+ bus = kvm_get_bus(kvm, bus_idx);
if (!bus)
return -ENOMEM;
@@ -3592,7 +3618,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
int i;
struct kvm_io_bus *new_bus, *bus;
- bus = kvm->buses[bus_idx];
+ bus = kvm_get_bus(kvm, bus_idx);
if (!bus)
return;
@@ -3854,6 +3880,67 @@ static const struct file_operations *stat_fops[] = {
[KVM_STAT_VM] = &vm_stat_fops,
};
+static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
+{
+ struct kobj_uevent_env *env;
+ char *tmp, *pathbuf = NULL;
+ unsigned long long created, active;
+
+ if (!kvm_dev.this_device || !kvm)
+ return;
+
+ spin_lock(&kvm_lock);
+ if (type == KVM_EVENT_CREATE_VM) {
+ kvm_createvm_count++;
+ kvm_active_vms++;
+ } else if (type == KVM_EVENT_DESTROY_VM) {
+ kvm_active_vms--;
+ }
+ created = kvm_createvm_count;
+ active = kvm_active_vms;
+ spin_unlock(&kvm_lock);
+
+ env = kzalloc(sizeof(*env), GFP_KERNEL);
+ if (!env)
+ return;
+
+ add_uevent_var(env, "CREATED=%llu", created);
+ add_uevent_var(env, "COUNT=%llu", active);
+
+ if (type == KVM_EVENT_CREATE_VM)
+ add_uevent_var(env, "EVENT=create");
+ else if (type == KVM_EVENT_DESTROY_VM)
+ add_uevent_var(env, "EVENT=destroy");
+
+ if (kvm->debugfs_dentry) {
+ char p[ITOA_MAX_LEN];
+
+ snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
+ tmp = strchrnul(p + 1, '-');
+ *tmp = '\0';
+ add_uevent_var(env, "PID=%s", p);
+ pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+ if (pathbuf) {
+ /* sizeof counts the final '\0' */
+ int len = sizeof("STATS_PATH=") - 1;
+ const char *pvar = "STATS_PATH=";
+
+ tmp = dentry_path_raw(kvm->debugfs_dentry,
+ pathbuf + len,
+ PATH_MAX - len);
+ if (!IS_ERR(tmp)) {
+ memcpy(tmp - len, pvar, len);
+ env->envp[env->envp_idx++] = tmp - len;
+ }
+ }
+ }
+ /* no need for checks, since we are adding at most only 5 keys */
+ env->envp[env->envp_idx++] = NULL;
+ kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
+ kfree(env);
+ kfree(pathbuf);
+}
+
static int kvm_init_debug(void)
{
int r = -EEXIST;