diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gvt/kvmgt.c')
-rw-r--r-- | drivers/gpu/drm/i915/gvt/kvmgt.c | 1097 |
1 files changed, 519 insertions, 578 deletions
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 057ec4490104..0787ba5c301f 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1,7 +1,7 @@ /* * KVMGT - the implementation of Intel mediated pass-through framework for KVM * - * Copyright(c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2011-2016 Intel Corporation. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,6 +26,11 @@ * Kevin Tian <kevin.tian@intel.com> * Jike Song <jike.song@intel.com> * Xiaoguang Chen <xiaoguang.chen@intel.com> + * Eddie Dong <eddie.dong@intel.com> + * + * Contributors: + * Niu Bing <bing.niu@intel.com> + * Zhi Wang <zhi.a.wang@intel.com> */ #include <linux/init.h> @@ -39,8 +44,6 @@ #include <linux/spinlock.h> #include <linux/eventfd.h> #include <linux/uuid.h> -#include <linux/kvm_host.h> -#include <linux/vfio.h> #include <linux/mdev.h> #include <linux/debugfs.h> @@ -49,9 +52,11 @@ #include <drm/drm_edid.h> #include "i915_drv.h" +#include "intel_gvt.h" #include "gvt.h" -static const struct intel_gvt_ops *intel_gvt_ops; +MODULE_IMPORT_NS(DMA_BUF); +MODULE_IMPORT_NS(I915_GVT); /* helper macros copied from vfio-pci */ #define VFIO_PCI_OFFSET_SHIFT 40 @@ -90,16 +95,6 @@ struct kvmgt_pgfn { struct hlist_node hnode; }; -#define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries" -struct kvmgt_guest_info { - struct kvm *kvm; - struct intel_vgpu *vgpu; - struct kvm_page_track_notifier_node track_node; -#define NR_BKT (1 << 18) - struct hlist_head ptable[NR_BKT]; -#undef NR_BKT -}; - struct gvt_dma { struct intel_vgpu *vgpu; struct rb_node gfn_node; @@ -110,41 +105,15 @@ struct gvt_dma { struct kref ref; }; -struct kvmgt_vdev { - struct intel_vgpu *vgpu; - struct mdev_device *mdev; - struct vfio_region *region; - int num_regions; - struct eventfd_ctx *intx_trigger; - struct eventfd_ctx *msi_trigger; +#define vfio_dev_to_vgpu(vfio_dev) \ + container_of((vfio_dev), struct intel_vgpu, vfio_device) - /* - * Two caches are used to avoid mapping duplicated pages (eg. - * scratch pages). This help to reduce dma setup overhead. - */ - struct rb_root gfn_cache; - struct rb_root dma_addr_cache; - unsigned long nr_cache_entries; - struct mutex cache_lock; - - struct notifier_block iommu_notifier; - struct notifier_block group_notifier; - struct kvm *kvm; - struct work_struct release_work; - atomic_t released; - struct vfio_device *vfio_device; - struct vfio_group *vfio_group; -}; - -static inline struct kvmgt_vdev *kvmgt_vdev(struct intel_vgpu *vgpu) -{ - return intel_vgpu_vdev(vgpu); -} - -static inline bool handle_valid(unsigned long handle) -{ - return !!(handle & ~0xff); -} +static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, + const u8 *val, int len, + struct kvm_page_track_notifier_node *node); +static void kvmgt_page_track_flush_slot(struct kvm *kvm, + struct kvm_memory_slot *slot, + struct kvm_page_track_notifier_node *node); static ssize_t available_instances_show(struct mdev_type *mtype, struct mdev_type_attribute *attr, @@ -259,15 +228,12 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) } } -static int kvmgt_guest_init(struct mdev_device *mdev); static void intel_vgpu_release_work(struct work_struct *work); -static bool kvmgt_guest_exit(struct kvmgt_guest_info *info); static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); int total_pages; int npage; int ret; @@ -277,7 +243,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, for (npage = 0; npage < total_pages; npage++) { unsigned long cur_gfn = gfn + npage; - ret = vfio_group_unpin_pages(vdev->vfio_group, &cur_gfn, 1); + ret = vfio_group_unpin_pages(vgpu->vfio_group, &cur_gfn, 1); drm_WARN_ON(&i915->drm, ret != 1); } } @@ -286,7 +252,6 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned long base_pfn = 0; int total_pages; int npage; @@ -301,7 +266,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long cur_gfn = gfn + npage; unsigned long pfn; - ret = vfio_group_pin_pages(vdev->vfio_group, &cur_gfn, 1, + ret = vfio_group_pin_pages(vgpu->vfio_group, &cur_gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn); if (ret != 1) { gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", @@ -368,7 +333,7 @@ static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn, static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, dma_addr_t dma_addr) { - struct rb_node *node = kvmgt_vdev(vgpu)->dma_addr_cache.rb_node; + struct rb_node *node = vgpu->dma_addr_cache.rb_node; struct gvt_dma *itr; while (node) { @@ -386,7 +351,7 @@ static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu, static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn) { - struct rb_node *node = kvmgt_vdev(vgpu)->gfn_cache.rb_node; + struct rb_node *node = vgpu->gfn_cache.rb_node; struct gvt_dma *itr; while (node) { @@ -407,7 +372,6 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, { struct gvt_dma *new, *itr; struct rb_node **link, *parent = NULL; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL); if (!new) @@ -420,7 +384,7 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, kref_init(&new->ref); /* gfn_cache maps gfn to struct gvt_dma. */ - link = &vdev->gfn_cache.rb_node; + link = &vgpu->gfn_cache.rb_node; while (*link) { parent = *link; itr = rb_entry(parent, struct gvt_dma, gfn_node); @@ -431,11 +395,11 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, link = &parent->rb_right; } rb_link_node(&new->gfn_node, parent, link); - rb_insert_color(&new->gfn_node, &vdev->gfn_cache); + rb_insert_color(&new->gfn_node, &vgpu->gfn_cache); /* dma_addr_cache maps dma addr to struct gvt_dma. */ parent = NULL; - link = &vdev->dma_addr_cache.rb_node; + link = &vgpu->dma_addr_cache.rb_node; while (*link) { parent = *link; itr = rb_entry(parent, struct gvt_dma, dma_addr_node); @@ -446,59 +410,54 @@ static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn, link = &parent->rb_right; } rb_link_node(&new->dma_addr_node, parent, link); - rb_insert_color(&new->dma_addr_node, &vdev->dma_addr_cache); + rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache); - vdev->nr_cache_entries++; + vgpu->nr_cache_entries++; return 0; } static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu, struct gvt_dma *entry) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - - rb_erase(&entry->gfn_node, &vdev->gfn_cache); - rb_erase(&entry->dma_addr_node, &vdev->dma_addr_cache); + rb_erase(&entry->gfn_node, &vgpu->gfn_cache); + rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache); kfree(entry); - vdev->nr_cache_entries--; + vgpu->nr_cache_entries--; } static void gvt_cache_destroy(struct intel_vgpu *vgpu) { struct gvt_dma *dma; struct rb_node *node = NULL; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); for (;;) { - mutex_lock(&vdev->cache_lock); - node = rb_first(&vdev->gfn_cache); + mutex_lock(&vgpu->cache_lock); + node = rb_first(&vgpu->gfn_cache); if (!node) { - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); break; } dma = rb_entry(node, struct gvt_dma, gfn_node); gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size); __gvt_cache_remove_entry(vgpu, dma); - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); } } static void gvt_cache_init(struct intel_vgpu *vgpu) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - - vdev->gfn_cache = RB_ROOT; - vdev->dma_addr_cache = RB_ROOT; - vdev->nr_cache_entries = 0; - mutex_init(&vdev->cache_lock); + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; + vgpu->nr_cache_entries = 0; + mutex_init(&vgpu->cache_lock); } -static void kvmgt_protect_table_init(struct kvmgt_guest_info *info) +static void kvmgt_protect_table_init(struct intel_vgpu *info) { hash_init(info->ptable); } -static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) +static void kvmgt_protect_table_destroy(struct intel_vgpu *info) { struct kvmgt_pgfn *p; struct hlist_node *tmp; @@ -511,7 +470,7 @@ static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info) } static struct kvmgt_pgfn * -__kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) +__kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p, *res = NULL; @@ -525,8 +484,7 @@ __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn) return res; } -static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, - gfn_t gfn) +static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; @@ -534,7 +492,7 @@ static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info, return !!p; } -static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) +static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; @@ -549,8 +507,7 @@ static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn) hash_add(info->ptable, &p->hnode, gfn); } -static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, - gfn_t gfn) +static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; @@ -564,18 +521,17 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info, static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf, size_t count, loff_t *ppos, bool iswrite) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - void *base = vdev->region[i].data; + void *base = vgpu->region[i].data; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; - if (pos >= vdev->region[i].size || iswrite) { + if (pos >= vgpu->region[i].size || iswrite) { gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n"); return -EINVAL; } - count = min(count, (size_t)(vdev->region[i].size - pos)); + count = min(count, (size_t)(vgpu->region[i].size - pos)); memcpy(buf, base + pos, count); return count; @@ -617,9 +573,9 @@ static int handle_edid_regs(struct intel_vgpu *vgpu, gvt_vgpu_err("invalid EDID blob\n"); return -EINVAL; } - intel_gvt_ops->emulate_hotplug(vgpu, true); + intel_vgpu_emulate_hotplug(vgpu, true); } else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN) - intel_gvt_ops->emulate_hotplug(vgpu, false); + intel_vgpu_emulate_hotplug(vgpu, false); else { gvt_vgpu_err("invalid EDID link state %d\n", regs->link_state); @@ -668,8 +624,7 @@ static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf, int ret; unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; - struct vfio_edid_region *region = - (struct vfio_edid_region *)kvmgt_vdev(vgpu)->region[i].data; + struct vfio_edid_region *region = vgpu->region[i].data; loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; if (pos < region->vfio_edid_regs.edid_offset) { @@ -701,44 +656,27 @@ static int intel_vgpu_register_reg(struct intel_vgpu *vgpu, const struct intel_vgpu_regops *ops, size_t size, u32 flags, void *data) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct vfio_region *region; - region = krealloc(vdev->region, - (vdev->num_regions + 1) * sizeof(*region), + region = krealloc(vgpu->region, + (vgpu->num_regions + 1) * sizeof(*region), GFP_KERNEL); if (!region) return -ENOMEM; - vdev->region = region; - vdev->region[vdev->num_regions].type = type; - vdev->region[vdev->num_regions].subtype = subtype; - vdev->region[vdev->num_regions].ops = ops; - vdev->region[vdev->num_regions].size = size; - vdev->region[vdev->num_regions].flags = flags; - vdev->region[vdev->num_regions].data = data; - vdev->num_regions++; + vgpu->region = region; + vgpu->region[vgpu->num_regions].type = type; + vgpu->region[vgpu->num_regions].subtype = subtype; + vgpu->region[vgpu->num_regions].ops = ops; + vgpu->region[vgpu->num_regions].size = size; + vgpu->region[vgpu->num_regions].flags = flags; + vgpu->region[vgpu->num_regions].data = data; + vgpu->num_regions++; return 0; } -static int kvmgt_get_vfio_device(void *p_vgpu) +int intel_gvt_set_opregion(struct intel_vgpu *vgpu) { - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - - vdev->vfio_device = vfio_device_get_from_dev( - mdev_dev(vdev->mdev)); - if (!vdev->vfio_device) { - gvt_vgpu_err("failed to get vfio device\n"); - return -ENODEV; - } - return 0; -} - - -static int kvmgt_set_opregion(void *p_vgpu) -{ - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; void *base; int ret; @@ -764,9 +702,8 @@ static int kvmgt_set_opregion(void *p_vgpu) return ret; } -static int kvmgt_set_edid(void *p_vgpu, int port_num) +int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) { - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); struct vfio_edid_region *base; int ret; @@ -794,71 +731,11 @@ static int kvmgt_set_edid(void *p_vgpu, int port_num) return ret; } -static void kvmgt_put_vfio_device(void *vgpu) -{ - struct kvmgt_vdev *vdev = kvmgt_vdev((struct intel_vgpu *)vgpu); - - if (WARN_ON(!vdev->vfio_device)) - return; - - vfio_device_put(vdev->vfio_device); -} - -static int intel_vgpu_create(struct mdev_device *mdev) -{ - struct intel_vgpu *vgpu = NULL; - struct intel_vgpu_type *type; - struct device *pdev; - struct intel_gvt *gvt; - int ret; - - pdev = mdev_parent_dev(mdev); - gvt = kdev_to_i915(pdev)->gvt; - - type = &gvt->types[mdev_get_type_group_id(mdev)]; - if (!type) { - ret = -EINVAL; - goto out; - } - - vgpu = intel_gvt_ops->vgpu_create(gvt, type); - if (IS_ERR_OR_NULL(vgpu)) { - ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu); - gvt_err("failed to create intel vgpu: %d\n", ret); - goto out; - } - - INIT_WORK(&kvmgt_vdev(vgpu)->release_work, intel_vgpu_release_work); - - kvmgt_vdev(vgpu)->mdev = mdev; - mdev_set_drvdata(mdev, vgpu); - - gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", - dev_name(mdev_dev(mdev))); - ret = 0; - -out: - return ret; -} - -static int intel_vgpu_remove(struct mdev_device *mdev) -{ - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - - if (handle_valid(vgpu->handle)) - return -EBUSY; - - intel_gvt_ops->vgpu_destroy(vgpu); - return 0; -} - static int intel_vgpu_iommu_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct kvmgt_vdev *vdev = container_of(nb, - struct kvmgt_vdev, - iommu_notifier); - struct intel_vgpu *vgpu = vdev->vgpu; + struct intel_vgpu *vgpu = + container_of(nb, struct intel_vgpu, iommu_notifier); if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { struct vfio_iommu_type1_dma_unmap *unmap = data; @@ -868,7 +745,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, iov_pfn = unmap->iova >> PAGE_SHIFT; end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; - mutex_lock(&vdev->cache_lock); + mutex_lock(&vgpu->cache_lock); for (; iov_pfn < end_iov_pfn; iov_pfn++) { entry = __gvt_cache_find_gfn(vgpu, iov_pfn); if (!entry) @@ -878,7 +755,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, entry->size); __gvt_cache_remove_entry(vgpu, entry); } - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); } return NOTIFY_OK; @@ -887,35 +764,54 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, static int intel_vgpu_group_notifier(struct notifier_block *nb, unsigned long action, void *data) { - struct kvmgt_vdev *vdev = container_of(nb, - struct kvmgt_vdev, - group_notifier); + struct intel_vgpu *vgpu = + container_of(nb, struct intel_vgpu, group_notifier); /* the only action we care about */ if (action == VFIO_GROUP_NOTIFY_SET_KVM) { - vdev->kvm = data; + vgpu->kvm = data; if (!data) - schedule_work(&vdev->release_work); + schedule_work(&vgpu->release_work); } return NOTIFY_OK; } -static int intel_vgpu_open_device(struct mdev_device *mdev) +static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + struct intel_vgpu *itr; + int id; + bool ret = false; + + mutex_lock(&vgpu->gvt->lock); + for_each_active_vgpu(vgpu->gvt, itr, id) { + if (!itr->attached) + continue; + + if (vgpu->kvm == itr->kvm) { + ret = true; + goto out; + } + } +out: + mutex_unlock(&vgpu->gvt->lock); + return ret; +} + +static int intel_vgpu_open_device(struct vfio_device *vfio_dev) +{ + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned long events; int ret; struct vfio_group *vfio_group; - vdev->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - vdev->group_notifier.notifier_call = intel_vgpu_group_notifier; + vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; + vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier; events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events, - &vdev->iommu_notifier); + ret = vfio_register_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, &events, + &vgpu->iommu_notifier); if (ret != 0) { gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", ret); @@ -923,117 +819,129 @@ static int intel_vgpu_open_device(struct mdev_device *mdev) } events = VFIO_GROUP_NOTIFY_SET_KVM; - ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events, - &vdev->group_notifier); + ret = vfio_register_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, &events, + &vgpu->group_notifier); if (ret != 0) { gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", ret); goto undo_iommu; } - vfio_group = vfio_group_get_external_user_from_dev(mdev_dev(mdev)); + vfio_group = + vfio_group_get_external_user_from_dev(vgpu->vfio_device.dev); if (IS_ERR_OR_NULL(vfio_group)) { ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group); gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n"); goto undo_register; } - vdev->vfio_group = vfio_group; + vgpu->vfio_group = vfio_group; - /* Take a module reference as mdev core doesn't take - * a reference for vendor driver. - */ - if (!try_module_get(THIS_MODULE)) { - ret = -ENODEV; + ret = -EEXIST; + if (vgpu->attached) + goto undo_group; + + ret = -ESRCH; + if (!vgpu->kvm || vgpu->kvm->mm != current->mm) { + gvt_vgpu_err("KVM is required to use Intel vGPU\n"); goto undo_group; } - ret = kvmgt_guest_init(mdev); - if (ret) + ret = -EEXIST; + if (__kvmgt_vgpu_exist(vgpu)) goto undo_group; - intel_gvt_ops->vgpu_activate(vgpu); + vgpu->attached = true; + kvm_get_kvm(vgpu->kvm); - atomic_set(&vdev->released, 0); - return ret; + kvmgt_protect_table_init(vgpu); + gvt_cache_init(vgpu); + + vgpu->track_node.track_write = kvmgt_page_track_write; + vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; + kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node); + + debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, + &vgpu->nr_cache_entries); + + intel_gvt_activate_vgpu(vgpu); + + atomic_set(&vgpu->released, 0); + return 0; undo_group: - vfio_group_put_external_user(vdev->vfio_group); - vdev->vfio_group = NULL; + vfio_group_put_external_user(vgpu->vfio_group); + vgpu->vfio_group = NULL; undo_register: - vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, - &vdev->group_notifier); + vfio_unregister_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, + &vgpu->group_notifier); undo_iommu: - vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, - &vdev->iommu_notifier); + vfio_unregister_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, + &vgpu->iommu_notifier); out: return ret; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct eventfd_ctx *trigger; - trigger = vdev->msi_trigger; + trigger = vgpu->msi_trigger; if (trigger) { eventfd_ctx_put(trigger); - vdev->msi_trigger = NULL; + vgpu->msi_trigger = NULL; } } static void __intel_vgpu_release(struct intel_vgpu *vgpu) { - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - struct kvmgt_guest_info *info; int ret; - if (!handle_valid(vgpu->handle)) + if (!vgpu->attached) return; - if (atomic_cmpxchg(&vdev->released, 0, 1)) + if (atomic_cmpxchg(&vgpu->released, 0, 1)) return; - intel_gvt_ops->vgpu_release(vgpu); + intel_gvt_release_vgpu(vgpu); - ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_IOMMU_NOTIFY, - &vdev->iommu_notifier); + ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_IOMMU_NOTIFY, + &vgpu->iommu_notifier); drm_WARN(&i915->drm, ret, "vfio_unregister_notifier for iommu failed: %d\n", ret); - ret = vfio_unregister_notifier(mdev_dev(vdev->mdev), VFIO_GROUP_NOTIFY, - &vdev->group_notifier); + ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_GROUP_NOTIFY, + &vgpu->group_notifier); drm_WARN(&i915->drm, ret, "vfio_unregister_notifier for group failed: %d\n", ret); - /* dereference module reference taken at open */ - module_put(THIS_MODULE); + debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); - info = (struct kvmgt_guest_info *)vgpu->handle; - kvmgt_guest_exit(info); + kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node); + kvm_put_kvm(vgpu->kvm); + kvmgt_protect_table_destroy(vgpu); + gvt_cache_destroy(vgpu); intel_vgpu_release_msi_eventfd_ctx(vgpu); - vfio_group_put_external_user(vdev->vfio_group); + vfio_group_put_external_user(vgpu->vfio_group); - vdev->kvm = NULL; - vgpu->handle = 0; + vgpu->kvm = NULL; + vgpu->attached = false; } -static void intel_vgpu_close_device(struct mdev_device *mdev) +static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - - __intel_vgpu_release(vgpu); + __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev)); } static void intel_vgpu_release_work(struct work_struct *work) { - struct kvmgt_vdev *vdev = container_of(work, struct kvmgt_vdev, - release_work); + struct intel_vgpu *vgpu = + container_of(work, struct intel_vgpu, release_work); - __intel_vgpu_release(vdev->vgpu); + __intel_vgpu_release(vgpu); } static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) @@ -1070,10 +978,10 @@ static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off, int ret; if (is_write) - ret = intel_gvt_ops->emulate_mmio_write(vgpu, + ret = intel_vgpu_emulate_mmio_write(vgpu, bar_start + off, buf, count); else - ret = intel_gvt_ops->emulate_mmio_read(vgpu, + ret = intel_vgpu_emulate_mmio_read(vgpu, bar_start + off, buf, count); return ret; } @@ -1111,17 +1019,15 @@ static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off, return 0; } -static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, +static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf, size_t count, loff_t *ppos, bool is_write) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); u64 pos = *ppos & VFIO_PCI_OFFSET_MASK; int ret = -EINVAL; - if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) { + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) { gvt_vgpu_err("invalid index: %u\n", index); return -EINVAL; } @@ -1129,10 +1035,10 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, switch (index) { case VFIO_PCI_CONFIG_REGION_INDEX: if (is_write) - ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos, + ret = intel_vgpu_emulate_cfg_write(vgpu, pos, buf, count); else - ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos, + ret = intel_vgpu_emulate_cfg_read(vgpu, pos, buf, count); break; case VFIO_PCI_BAR0_REGION_INDEX: @@ -1150,20 +1056,19 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf, case VFIO_PCI_ROM_REGION_INDEX: break; default: - if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) + if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) return -EINVAL; index -= VFIO_PCI_NUM_REGIONS; - return vdev->region[index].ops->rw(vgpu, buf, count, + return vgpu->region[index].ops->rw(vgpu, buf, count, ppos, is_write); } return ret == 0 ? count : ret; } -static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) +static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct intel_gvt *gvt = vgpu->gvt; int offset; @@ -1180,9 +1085,10 @@ static bool gtt_entry(struct mdev_device *mdev, loff_t *ppos) true : false; } -static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, +static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf, size_t count, loff_t *ppos) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned int done = 0; int ret; @@ -1191,10 +1097,10 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, /* Only support GGTT entry 8 bytes read */ if (count >= 8 && !(*ppos % 8) && - gtt_entry(mdev, ppos)) { + gtt_entry(vgpu, ppos)) { u64 val; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1206,7 +1112,7 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1218,7 +1124,7 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1230,7 +1136,7 @@ static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf, } else { u8 val; - ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos, + ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos, false); if (ret <= 0) goto read_err; @@ -1253,10 +1159,11 @@ read_err: return -EFAULT; } -static ssize_t intel_vgpu_write(struct mdev_device *mdev, +static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev, const char __user *buf, size_t count, loff_t *ppos) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned int done = 0; int ret; @@ -1265,13 +1172,13 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, /* Only support GGTT entry 8 bytes write */ if (count >= 8 && !(*ppos % 8) && - gtt_entry(mdev, ppos)) { + gtt_entry(vgpu, ppos)) { u64 val; if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1283,7 +1190,7 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val), + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1295,7 +1202,7 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, (char *)&val, + ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1307,7 +1214,7 @@ static ssize_t intel_vgpu_write(struct mdev_device *mdev, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = intel_vgpu_rw(mdev, &val, sizeof(val), + ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos, true); if (ret <= 0) goto write_err; @@ -1326,13 +1233,14 @@ write_err: return -EFAULT; } -static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) +static int intel_vgpu_mmap(struct vfio_device *vfio_dev, + struct vm_area_struct *vma) { + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned int index; u64 virtaddr; unsigned long req_size, pgoff, req_start; pgprot_t pg_prot; - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); if (index >= VFIO_PCI_ROM_REGION_INDEX) @@ -1407,7 +1315,7 @@ static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu, gvt_vgpu_err("eventfd_ctx_fdget failed\n"); return PTR_ERR(trigger); } - kvmgt_vdev(vgpu)->msi_trigger = trigger; + vgpu->msi_trigger = trigger; } else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count) intel_vgpu_release_msi_eventfd_ctx(vgpu); @@ -1455,11 +1363,10 @@ static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags, return func(vgpu, index, start, count, flags, data); } -static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, +static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd, unsigned long arg) { - struct intel_vgpu *vgpu = mdev_get_drvdata(mdev); - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); unsigned long minsz; gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd); @@ -1478,7 +1385,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, info.flags = VFIO_DEVICE_FLAGS_PCI; info.flags |= VFIO_DEVICE_FLAGS_RESET; info.num_regions = VFIO_PCI_NUM_REGIONS + - vdev->num_regions; + vgpu->num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; return copy_to_user((void __user *)arg, &info, minsz) ? @@ -1569,22 +1476,22 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, .header.version = 1 }; if (info.index >= VFIO_PCI_NUM_REGIONS + - vdev->num_regions) + vgpu->num_regions) return -EINVAL; info.index = array_index_nospec(info.index, VFIO_PCI_NUM_REGIONS + - vdev->num_regions); + vgpu->num_regions); i = info.index - VFIO_PCI_NUM_REGIONS; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); - info.size = vdev->region[i].size; - info.flags = vdev->region[i].flags; + info.size = vgpu->region[i].size; + info.flags = vgpu->region[i].flags; - cap_type.type = vdev->region[i].type; - cap_type.subtype = vdev->region[i].subtype; + cap_type.type = vgpu->region[i].type; + cap_type.subtype = vgpu->region[i].subtype; ret = vfio_info_add_capability(&caps, &cap_type.header, @@ -1700,7 +1607,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, return ret; } else if (cmd == VFIO_DEVICE_RESET) { - intel_gvt_ops->vgpu_reset(vgpu); + intel_gvt_reset_vgpu(vgpu); return 0; } else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) { struct vfio_device_gfx_plane_info dmabuf; @@ -1713,7 +1620,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, if (dmabuf.argsz < minsz) return -EINVAL; - ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf); + ret = intel_vgpu_query_plane(vgpu, &dmabuf); if (ret != 0) return ret; @@ -1721,14 +1628,10 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, -EFAULT : 0; } else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) { __u32 dmabuf_id; - __s32 dmabuf_fd; if (get_user(dmabuf_id, (__u32 __user *)arg)) return -EFAULT; - - dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id); - return dmabuf_fd; - + return intel_vgpu_get_dmabuf(vgpu, dmabuf_id); } return -ENOTTY; @@ -1738,14 +1641,9 @@ static ssize_t vgpu_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct mdev_device *mdev = mdev_from_dev(dev); + struct intel_vgpu *vgpu = dev_get_drvdata(dev); - if (mdev) { - struct intel_vgpu *vgpu = (struct intel_vgpu *) - mdev_get_drvdata(mdev); - return sprintf(buf, "%d\n", vgpu->id); - } - return sprintf(buf, "\n"); + return sprintf(buf, "%d\n", vgpu->id); } static DEVICE_ATTR_RO(vgpu_id); @@ -1765,57 +1663,78 @@ static const struct attribute_group *intel_vgpu_groups[] = { NULL, }; -static struct mdev_parent_ops intel_vgpu_ops = { - .mdev_attr_groups = intel_vgpu_groups, - .create = intel_vgpu_create, - .remove = intel_vgpu_remove, - - .open_device = intel_vgpu_open_device, - .close_device = intel_vgpu_close_device, - - .read = intel_vgpu_read, - .write = intel_vgpu_write, - .mmap = intel_vgpu_mmap, - .ioctl = intel_vgpu_ioctl, +static const struct vfio_device_ops intel_vgpu_dev_ops = { + .open_device = intel_vgpu_open_device, + .close_device = intel_vgpu_close_device, + .read = intel_vgpu_read, + .write = intel_vgpu_write, + .mmap = intel_vgpu_mmap, + .ioctl = intel_vgpu_ioctl, }; -static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops) +static int intel_vgpu_probe(struct mdev_device *mdev) { + struct device *pdev = mdev_parent_dev(mdev); + struct intel_gvt *gvt = kdev_to_i915(pdev)->gvt; + struct intel_vgpu_type *type; + struct intel_vgpu *vgpu; int ret; - ret = intel_gvt_init_vgpu_type_groups((struct intel_gvt *)gvt); - if (ret) - return ret; + type = &gvt->types[mdev_get_type_group_id(mdev)]; + if (!type) + return -EINVAL; - intel_gvt_ops = ops; - intel_vgpu_ops.supported_type_groups = gvt_vgpu_type_groups; + vgpu = intel_gvt_create_vgpu(gvt, type); + if (IS_ERR(vgpu)) { + gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu)); + return PTR_ERR(vgpu); + } - ret = mdev_register_device(dev, &intel_vgpu_ops); - if (ret) - intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); + INIT_WORK(&vgpu->release_work, intel_vgpu_release_work); + vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev, + &intel_vgpu_dev_ops); - return ret; + dev_set_drvdata(&mdev->dev, vgpu); + ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device); + if (ret) { + intel_gvt_destroy_vgpu(vgpu); + return ret; + } + + gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n", + dev_name(mdev_dev(mdev))); + return 0; } -static void kvmgt_host_exit(struct device *dev, void *gvt) +static void intel_vgpu_remove(struct mdev_device *mdev) { - mdev_unregister_device(dev); - intel_gvt_cleanup_vgpu_type_groups((struct intel_gvt *)gvt); -} + struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev); -static int kvmgt_page_track_add(unsigned long handle, u64 gfn) + if (WARN_ON_ONCE(vgpu->attached)) + return; + intel_gvt_destroy_vgpu(vgpu); +} + +static struct mdev_driver intel_vgpu_mdev_driver = { + .driver = { + .name = "intel_vgpu_mdev", + .owner = THIS_MODULE, + .dev_groups = intel_vgpu_groups, + }, + .probe = intel_vgpu_probe, + .remove = intel_vgpu_remove, + .supported_type_groups = gvt_vgpu_type_groups, +}; + +int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) { - struct kvmgt_guest_info *info; - struct kvm *kvm; + struct kvm *kvm = info->kvm; struct kvm_memory_slot *slot; int idx; - if (!handle_valid(handle)) + if (!info->attached) return -ESRCH; - info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; - idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); if (!slot) { @@ -1837,19 +1756,15 @@ out: return 0; } -static int kvmgt_page_track_remove(unsigned long handle, u64 gfn) +int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) { - struct kvmgt_guest_info *info; - struct kvm *kvm; + struct kvm *kvm = info->kvm; struct kvm_memory_slot *slot; int idx; - if (!handle_valid(handle)) + if (!info->attached) return 0; - info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; - idx = srcu_read_lock(&kvm->srcu); slot = gfn_to_memslot(kvm, gfn); if (!slot) { @@ -1875,11 +1790,11 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *val, int len, struct kvm_page_track_notifier_node *node) { - struct kvmgt_guest_info *info = container_of(node, - struct kvmgt_guest_info, track_node); + struct intel_vgpu *info = + container_of(node, struct intel_vgpu, track_node); if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa))) - intel_gvt_ops->write_protect_handler(info->vgpu, gpa, + intel_vgpu_page_track_handler(info, gpa, (void *)val, len); } @@ -1889,8 +1804,8 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm, { int i; gfn_t gfn; - struct kvmgt_guest_info *info = container_of(node, - struct kvmgt_guest_info, track_node); + struct intel_vgpu *info = + container_of(node, struct intel_vgpu, track_node); write_lock(&kvm->mmu_lock); for (i = 0; i < slot->npages; i++) { @@ -1904,182 +1819,32 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm, write_unlock(&kvm->mmu_lock); } -static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm) -{ - struct intel_vgpu *itr; - struct kvmgt_guest_info *info; - int id; - bool ret = false; - - mutex_lock(&vgpu->gvt->lock); - for_each_active_vgpu(vgpu->gvt, itr, id) { - if (!handle_valid(itr->handle)) - continue; - - info = (struct kvmgt_guest_info *)itr->handle; - if (kvm && kvm == info->kvm) { - ret = true; - goto out; - } - } -out: - mutex_unlock(&vgpu->gvt->lock); - return ret; -} - -static int kvmgt_guest_init(struct mdev_device *mdev) -{ - struct kvmgt_guest_info *info; - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; - struct kvm *kvm; - - vgpu = mdev_get_drvdata(mdev); - if (handle_valid(vgpu->handle)) - return -EEXIST; - - vdev = kvmgt_vdev(vgpu); - kvm = vdev->kvm; - if (!kvm || kvm->mm != current->mm) { - gvt_vgpu_err("KVM is required to use Intel vGPU\n"); - return -ESRCH; - } - - if (__kvmgt_vgpu_exist(vgpu, kvm)) - return -EEXIST; - - info = vzalloc(sizeof(struct kvmgt_guest_info)); - if (!info) - return -ENOMEM; - - vgpu->handle = (unsigned long)info; - info->vgpu = vgpu; - info->kvm = kvm; - kvm_get_kvm(info->kvm); - - kvmgt_protect_table_init(info); - gvt_cache_init(vgpu); - - info->track_node.track_write = kvmgt_page_track_write; - info->track_node.track_flush_slot = kvmgt_page_track_flush_slot; - kvm_page_track_register_notifier(kvm, &info->track_node); - - debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, - &vdev->nr_cache_entries); - return 0; -} - -static bool kvmgt_guest_exit(struct kvmgt_guest_info *info) -{ - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, - info->vgpu->debugfs)); - - kvm_page_track_unregister_notifier(info->kvm, &info->track_node); - kvm_put_kvm(info->kvm); - kvmgt_protect_table_destroy(info); - gvt_cache_destroy(info->vgpu); - vfree(info); - - return true; -} - -static int kvmgt_attach_vgpu(void *p_vgpu, unsigned long *handle) -{ - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; - - vgpu->vdev = kzalloc(sizeof(struct kvmgt_vdev), GFP_KERNEL); - - if (!vgpu->vdev) - return -ENOMEM; - - kvmgt_vdev(vgpu)->vgpu = vgpu; - - return 0; -} - -static void kvmgt_detach_vgpu(void *p_vgpu) +void intel_vgpu_detach_regions(struct intel_vgpu *vgpu) { int i; - struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu; - struct kvmgt_vdev *vdev = kvmgt_vdev(vgpu); - if (!vdev->region) + if (!vgpu->region) return; - for (i = 0; i < vdev->num_regions; i++) - if (vdev->region[i].ops->release) - vdev->region[i].ops->release(vgpu, - &vdev->region[i]); - vdev->num_regions = 0; - kfree(vdev->region); - vdev->region = NULL; - - kfree(vdev); + for (i = 0; i < vgpu->num_regions; i++) + if (vgpu->region[i].ops->release) + vgpu->region[i].ops->release(vgpu, + &vgpu->region[i]); + vgpu->num_regions = 0; + kfree(vgpu->region); + vgpu->region = NULL; } -static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data) -{ - struct kvmgt_guest_info *info; - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; - - if (!handle_valid(handle)) - return -ESRCH; - - info = (struct kvmgt_guest_info *)handle; - vgpu = info->vgpu; - vdev = kvmgt_vdev(vgpu); - - /* - * When guest is poweroff, msi_trigger is set to NULL, but vgpu's - * config and mmio register isn't restored to default during guest - * poweroff. If this vgpu is still used in next vm, this vgpu's pipe - * may be enabled, then once this vgpu is active, it will get inject - * vblank interrupt request. But msi_trigger is null until msi is - * enabled by guest. so if msi_trigger is null, success is still - * returned and don't inject interrupt into guest. - */ - if (vdev->msi_trigger == NULL) - return 0; - - if (eventfd_signal(vdev->msi_trigger, 1) == 1) - return 0; - - return -EFAULT; -} - -static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn) -{ - struct kvmgt_guest_info *info; - kvm_pfn_t pfn; - - if (!handle_valid(handle)) - return INTEL_GVT_INVALID_ADDR; - - info = (struct kvmgt_guest_info *)handle; - - pfn = gfn_to_pfn(info->kvm, gfn); - if (is_error_noslot_pfn(pfn)) - return INTEL_GVT_INVALID_ADDR; - - return pfn; -} - -static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, +int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, dma_addr_t *dma_addr) { - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; struct gvt_dma *entry; int ret; - if (!handle_valid(handle)) + if (!vgpu->attached) return -EINVAL; - vgpu = ((struct kvmgt_guest_info *)handle)->vgpu; - vdev = kvmgt_vdev(vgpu); - - mutex_lock(&vdev->cache_lock); + mutex_lock(&vgpu->cache_lock); entry = __gvt_cache_find_gfn(vgpu, gfn); if (!entry) { @@ -2107,36 +1872,31 @@ static int kvmgt_dma_map_guest_page(unsigned long handle, unsigned long gfn, *dma_addr = entry->dma_addr; } - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); return 0; err_unmap: gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size); err_unlock: - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); return ret; } -static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr) +int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr) { - struct kvmgt_guest_info *info; - struct kvmgt_vdev *vdev; struct gvt_dma *entry; int ret = 0; - if (!handle_valid(handle)) + if (!vgpu->attached) return -ENODEV; - info = (struct kvmgt_guest_info *)handle; - vdev = kvmgt_vdev(info->vgpu); - - mutex_lock(&vdev->cache_lock); - entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr); + mutex_lock(&vgpu->cache_lock); + entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); if (entry) kref_get(&entry->ref); else ret = -ENOMEM; - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); return ret; } @@ -2150,109 +1910,290 @@ static void __gvt_dma_release(struct kref *ref) __gvt_cache_remove_entry(entry->vgpu, entry); } -static void kvmgt_dma_unmap_guest_page(unsigned long handle, dma_addr_t dma_addr) +void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu, + dma_addr_t dma_addr) { - struct intel_vgpu *vgpu; - struct kvmgt_vdev *vdev; struct gvt_dma *entry; - if (!handle_valid(handle)) + if (!vgpu->attached) return; - vgpu = ((struct kvmgt_guest_info *)handle)->vgpu; - vdev = kvmgt_vdev(vgpu); - - mutex_lock(&vdev->cache_lock); + mutex_lock(&vgpu->cache_lock); entry = __gvt_cache_find_dma_addr(vgpu, dma_addr); if (entry) kref_put(&entry->ref, __gvt_dma_release); - mutex_unlock(&vdev->cache_lock); + mutex_unlock(&vgpu->cache_lock); } -static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa, - void *buf, unsigned long len, bool write) +static void init_device_info(struct intel_gvt *gvt) { - struct kvmgt_guest_info *info; + struct intel_gvt_device_info *info = &gvt->device_info; + struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev); - if (!handle_valid(handle)) - return -ESRCH; + info->max_support_vgpus = 8; + info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE; + info->mmio_size = 2 * 1024 * 1024; + info->mmio_bar = 0; + info->gtt_start_offset = 8 * 1024 * 1024; + info->gtt_entry_size = 8; + info->gtt_entry_size_shift = 3; + info->gmadr_bytes_in_cmd = 8; + info->max_surface_size = 36 * 1024 * 1024; + info->msi_cap_offset = pdev->msi_cap; +} - info = (struct kvmgt_guest_info *)handle; +static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt) +{ + struct intel_vgpu *vgpu; + int id; - return vfio_dma_rw(kvmgt_vdev(info->vgpu)->vfio_group, - gpa, buf, len, write); + mutex_lock(&gvt->lock); + idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) { + if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id, + (void *)&gvt->service_request)) { + if (vgpu->active) + intel_vgpu_emulate_vblank(vgpu); + } + } + mutex_unlock(&gvt->lock); } -static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa, - void *buf, unsigned long len) +static int gvt_service_thread(void *data) { - return kvmgt_rw_gpa(handle, gpa, buf, len, false); + struct intel_gvt *gvt = (struct intel_gvt *)data; + int ret; + + gvt_dbg_core("service thread start\n"); + + while (!kthread_should_stop()) { + ret = wait_event_interruptible(gvt->service_thread_wq, + kthread_should_stop() || gvt->service_request); + + if (kthread_should_stop()) + break; + + if (WARN_ONCE(ret, "service thread is waken up by signal.\n")) + continue; + + intel_gvt_test_and_emulate_vblank(gvt); + + if (test_bit(INTEL_GVT_REQUEST_SCHED, + (void *)&gvt->service_request) || + test_bit(INTEL_GVT_REQUEST_EVENT_SCHED, + (void *)&gvt->service_request)) { + intel_gvt_schedule(gvt); + } + } + + return 0; } -static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa, - void *buf, unsigned long len) +static void clean_service_thread(struct intel_gvt *gvt) { - return kvmgt_rw_gpa(handle, gpa, buf, len, true); + kthread_stop(gvt->service_thread); } -static unsigned long kvmgt_virt_to_pfn(void *addr) +static int init_service_thread(struct intel_gvt *gvt) { - return PFN_DOWN(__pa(addr)); + init_waitqueue_head(&gvt->service_thread_wq); + + gvt->service_thread = kthread_run(gvt_service_thread, + gvt, "gvt_service_thread"); + if (IS_ERR(gvt->service_thread)) { + gvt_err("fail to start service thread.\n"); + return PTR_ERR(gvt->service_thread); + } + return 0; } -static bool kvmgt_is_valid_gfn(unsigned long handle, unsigned long gfn) +/** + * intel_gvt_clean_device - clean a GVT device + * @i915: i915 private + * + * This function is called at the driver unloading stage, to free the + * resources owned by a GVT device. + * + */ +static void intel_gvt_clean_device(struct drm_i915_private *i915) { - struct kvmgt_guest_info *info; - struct kvm *kvm; - int idx; - bool ret; + struct intel_gvt *gvt = fetch_and_zero(&i915->gvt); - if (!handle_valid(handle)) - return false; + if (drm_WARN_ON(&i915->drm, !gvt)) + return; - info = (struct kvmgt_guest_info *)handle; - kvm = info->kvm; + mdev_unregister_device(i915->drm.dev); + intel_gvt_cleanup_vgpu_type_groups(gvt); + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + intel_gvt_clean_vgpu_types(gvt); - idx = srcu_read_lock(&kvm->srcu); - ret = kvm_is_visible_gfn(kvm, gfn); - srcu_read_unlock(&kvm->srcu, idx); + intel_gvt_debugfs_clean(gvt); + clean_service_thread(gvt); + intel_gvt_clean_cmd_parser(gvt); + intel_gvt_clean_sched_policy(gvt); + intel_gvt_clean_workload_scheduler(gvt); + intel_gvt_clean_gtt(gvt); + intel_gvt_free_firmware(gvt); + intel_gvt_clean_mmio_info(gvt); + idr_destroy(&gvt->vgpu_idr); + + kfree(i915->gvt); +} + +/** + * intel_gvt_init_device - initialize a GVT device + * @i915: drm i915 private data + * + * This function is called at the initialization stage, to initialize + * necessary GVT components. + * + * Returns: + * Zero on success, negative error code if failed. + * + */ +static int intel_gvt_init_device(struct drm_i915_private *i915) +{ + struct intel_gvt *gvt; + struct intel_vgpu *vgpu; + int ret; + + if (drm_WARN_ON(&i915->drm, i915->gvt)) + return -EEXIST; + + gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL); + if (!gvt) + return -ENOMEM; + + gvt_dbg_core("init gvt device\n"); + + idr_init_base(&gvt->vgpu_idr, 1); + spin_lock_init(&gvt->scheduler.mmio_context_lock); + mutex_init(&gvt->lock); + mutex_init(&gvt->sched_lock); + gvt->gt = to_gt(i915); + i915->gvt = gvt; + + init_device_info(gvt); + + ret = intel_gvt_setup_mmio_info(gvt); + if (ret) + goto out_clean_idr; + + intel_gvt_init_engine_mmio_context(gvt); + + ret = intel_gvt_load_firmware(gvt); + if (ret) + goto out_clean_mmio_info; + + ret = intel_gvt_init_irq(gvt); + if (ret) + goto out_free_firmware; + + ret = intel_gvt_init_gtt(gvt); + if (ret) + goto out_free_firmware; + ret = intel_gvt_init_workload_scheduler(gvt); + if (ret) + goto out_clean_gtt; + + ret = intel_gvt_init_sched_policy(gvt); + if (ret) + goto out_clean_workload_scheduler; + + ret = intel_gvt_init_cmd_parser(gvt); + if (ret) + goto out_clean_sched_policy; + + ret = init_service_thread(gvt); + if (ret) + goto out_clean_cmd_parser; + + ret = intel_gvt_init_vgpu_types(gvt); + if (ret) + goto out_clean_thread; + + vgpu = intel_gvt_create_idle_vgpu(gvt); + if (IS_ERR(vgpu)) { + ret = PTR_ERR(vgpu); + gvt_err("failed to create idle vgpu\n"); + goto out_clean_types; + } + gvt->idle_vgpu = vgpu; + + intel_gvt_debugfs_init(gvt); + + ret = intel_gvt_init_vgpu_type_groups(gvt); + if (ret) + goto out_destroy_idle_vgpu; + + ret = mdev_register_device(i915->drm.dev, &intel_vgpu_mdev_driver); + if (ret) + goto out_cleanup_vgpu_type_groups; + + gvt_dbg_core("gvt device initialization is done\n"); + return 0; + +out_cleanup_vgpu_type_groups: + intel_gvt_cleanup_vgpu_type_groups(gvt); +out_destroy_idle_vgpu: + intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu); + intel_gvt_debugfs_clean(gvt); +out_clean_types: + intel_gvt_clean_vgpu_types(gvt); +out_clean_thread: + clean_service_thread(gvt); +out_clean_cmd_parser: + intel_gvt_clean_cmd_parser(gvt); +out_clean_sched_policy: + intel_gvt_clean_sched_policy(gvt); +out_clean_workload_scheduler: + intel_gvt_clean_workload_scheduler(gvt); +out_clean_gtt: + intel_gvt_clean_gtt(gvt); +out_free_firmware: + intel_gvt_free_firmware(gvt); +out_clean_mmio_info: + intel_gvt_clean_mmio_info(gvt); +out_clean_idr: + idr_destroy(&gvt->vgpu_idr); + kfree(gvt); + i915->gvt = NULL; return ret; } -static const struct intel_gvt_mpt kvmgt_mpt = { - .type = INTEL_GVT_HYPERVISOR_KVM, - .host_init = kvmgt_host_init, - .host_exit = kvmgt_host_exit, - .attach_vgpu = kvmgt_attach_vgpu, - .detach_vgpu = kvmgt_detach_vgpu, - .inject_msi = kvmgt_inject_msi, - .from_virt_to_mfn = kvmgt_virt_to_pfn, - .enable_page_track = kvmgt_page_track_add, - .disable_page_track = kvmgt_page_track_remove, - .read_gpa = kvmgt_read_gpa, - .write_gpa = kvmgt_write_gpa, - .gfn_to_mfn = kvmgt_gfn_to_pfn, - .dma_map_guest_page = kvmgt_dma_map_guest_page, - .dma_unmap_guest_page = kvmgt_dma_unmap_guest_page, - .dma_pin_guest_page = kvmgt_dma_pin_guest_page, - .set_opregion = kvmgt_set_opregion, - .set_edid = kvmgt_set_edid, - .get_vfio_device = kvmgt_get_vfio_device, - .put_vfio_device = kvmgt_put_vfio_device, - .is_valid_gfn = kvmgt_is_valid_gfn, +static void intel_gvt_pm_resume(struct drm_i915_private *i915) +{ + struct intel_gvt *gvt = i915->gvt; + + intel_gvt_restore_fence(gvt); + intel_gvt_restore_mmio(gvt); + intel_gvt_restore_ggtt(gvt); +} + +static const struct intel_vgpu_ops intel_gvt_vgpu_ops = { + .init_device = intel_gvt_init_device, + .clean_device = intel_gvt_clean_device, + .pm_resume = intel_gvt_pm_resume, }; static int __init kvmgt_init(void) { - if (intel_gvt_register_hypervisor(&kvmgt_mpt) < 0) - return -ENODEV; - return 0; + int ret; + + ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops); + if (ret) + return ret; + + ret = mdev_register_driver(&intel_vgpu_mdev_driver); + if (ret) + intel_gvt_clear_ops(&intel_gvt_vgpu_ops); + return ret; } static void __exit kvmgt_exit(void) { - intel_gvt_unregister_hypervisor(); + mdev_unregister_driver(&intel_vgpu_mdev_driver); + intel_gvt_clear_ops(&intel_gvt_vgpu_ops); } module_init(kvmgt_init); |