From eed20c782aea57b7efb42af2905dc381268b21e9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 24 Jun 2022 18:51:44 +0100 Subject: vfio/type1: Simplify bus_type determination Since IOMMU groups are mandatory for drivers to support, it stands to reason that any device which has been successfully added to a group must be on a bus supported by that IOMMU driver, and therefore a domain viable for any device in the group must be viable for all devices in the group. This already has to be the case for the IOMMU API's internal default domain, for instance. Thus even if the group contains devices on different buses, that can only mean that the IOMMU driver actually supports such an odd topology, and so without loss of generality we can expect the bus type of any device in a group to be suitable for IOMMU API calls. Furthermore, scrutiny reveals a lack of protection for the bus being removed while vfio_iommu_type1_attach_group() is using it; the reference that VFIO holds on the iommu_group ensures that data remains valid, but does not prevent the group's membership changing underfoot. We can address both concerns by recycling vfio_bus_type() into some superficially similar logic to indirect the IOMMU API calls themselves. Each call is thus protected from races by the IOMMU group's own locking, and we no longer need to hold group-derived pointers beyond that scope. It also gives us an easy path for the IOMMU API's migration of bus-based interfaces to device-based, of which we can already take the first step with device_iommu_capable(). As with domains, any capability must in practice be consistent for devices in a given group - and after all it's still the same capability which was expected to be consistent across an entire bus! - so there's no need for any complicated validation. Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/194a12d3434d7b38f84fa96503c7664451c8c395.1656092606.git.robin.murphy@arm.com [aw: add comment to vfio_iommu_device_capable()] Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 43 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 20 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c13b9290e357..c496b7d0b96f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1679,18 +1679,6 @@ out_unlock: return ret; } -static int vfio_bus_type(struct device *dev, void *data) -{ - struct bus_type **bus = data; - - if (*bus && *bus != dev->bus) - return -EINVAL; - - *bus = dev->bus; - - return 0; -} - static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *domain) { @@ -2153,13 +2141,26 @@ static void vfio_iommu_iova_insert_copy(struct vfio_iommu *iommu, list_splice_tail(iova_copy, iova); } +/* Redundantly walks non-present capabilities to simplify caller */ +static int vfio_iommu_device_capable(struct device *dev, void *data) +{ + return device_iommu_capable(dev, (enum iommu_cap)data); +} + +static int vfio_iommu_domain_alloc(struct device *dev, void *data) +{ + struct iommu_domain **domain = data; + + *domain = iommu_domain_alloc(dev->bus); + return 1; /* Don't iterate */ +} + static int vfio_iommu_type1_attach_group(void *iommu_data, struct iommu_group *iommu_group, enum vfio_group_type type) { struct vfio_iommu *iommu = iommu_data; struct vfio_iommu_group *group; struct vfio_domain *domain, *d; - struct bus_type *bus = NULL; bool resv_msi, msi_remap; phys_addr_t resv_msi_base = 0; struct iommu_domain_geometry *geo; @@ -2192,18 +2193,19 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, goto out_unlock; } - /* Determine bus_type in order to allocate a domain */ - ret = iommu_group_for_each_dev(iommu_group, &bus, vfio_bus_type); - if (ret) - goto out_free_group; - ret = -ENOMEM; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) goto out_free_group; + /* + * Going via the iommu_group iterator avoids races, and trivially gives + * us a representative device for the IOMMU API call. We don't actually + * want to iterate beyond the first device (if any). + */ ret = -EIO; - domain->domain = iommu_domain_alloc(bus); + iommu_group_for_each_dev(iommu_group, &domain->domain, + vfio_iommu_domain_alloc); if (!domain->domain) goto out_free_domain; @@ -2258,7 +2260,8 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &domain->group_list); msi_remap = irq_domain_check_msi_remap() || - iommu_capable(bus, IOMMU_CAP_INTR_REMAP); + iommu_group_for_each_dev(iommu_group, (void *)IOMMU_CAP_INTR_REMAP, + vfio_iommu_device_capable); if (!allow_unsafe_interrupts && !msi_remap) { pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", -- cgit v1.2.3 From 3b498b6656214d499d57f1e4935448821d0febf9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 24 Jun 2022 18:59:35 +0100 Subject: vfio: Use device_iommu_capable() Use the new interface to check the capabilities for our device specifically. Reviewed-by: Lu Baolu Signed-off-by: Robin Murphy Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4ea5eb64246f1ee188d1a61c3e93b37756932eb7.1656092606.git.robin.murphy@arm.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..4c06b571eaba 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -605,7 +605,7 @@ int vfio_register_group_dev(struct vfio_device *device) * VFIO always sets IOMMU_CACHE because we offer no way for userspace to * restore cache coherency. */ - if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY)) + if (!device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) return -EINVAL; return __vfio_register_dev(device, -- cgit v1.2.3 From a13b1e472b93f69c35976351e59831564ed6a376 Mon Sep 17 00:00:00 2001 From: Bo Liu Date: Wed, 22 Jun 2022 00:56:51 -0400 Subject: vfio: check vfio_register_iommu_driver() return value As vfio_register_iommu_driver() can fail, we should check the return value. Signed-off-by: Bo Liu Acked-by: Cornelia Huck Link: https://lore.kernel.org/r/20220622045651.5416-1-liubo03@inspur.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..8f435c0d7748 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2156,13 +2156,17 @@ static int __init vfio_init(void) if (ret) goto err_alloc_chrdev; - pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); - #ifdef CONFIG_VFIO_NOIOMMU - vfio_register_iommu_driver(&vfio_noiommu_ops); + ret = vfio_register_iommu_driver(&vfio_noiommu_ops); #endif + if (ret) + goto err_driver_register; + + pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; +err_driver_register: + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); err_alloc_chrdev: class_destroy(vfio.class); vfio.class = NULL; -- cgit v1.2.3 From 1c61d51e9695d00535d28fc2e123ba9397378707 Mon Sep 17 00:00:00 2001 From: Liam Ni Date: Sat, 25 Jun 2022 19:42:39 +0800 Subject: vfio: check iommu_group_set_name() return value As iommu_group_set_name() can fail, we should check the return value. Signed-off-by: Liam Ni Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220625114239.9301-1-zhiguangni01@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..ca823eeac237 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -504,7 +504,9 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, if (IS_ERR(iommu_group)) return ERR_CAST(iommu_group); - iommu_group_set_name(iommu_group, "vfio-noiommu"); + ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); + if (ret) + goto out_put_group; ret = iommu_group_add_device(iommu_group, dev); if (ret) goto out_put_group; -- cgit v1.2.3 From 6641085e8d7b3f061911517f79a2a15a0a21b97b Mon Sep 17 00:00:00 2001 From: Schspa Shi Date: Wed, 29 Jun 2022 10:29:48 +0800 Subject: vfio: Clear the caps->buf to NULL after free On buffer resize failure, vfio_info_cap_add() will free the buffer, report zero for the size, and return -ENOMEM. As additional hardening, also clear the buffer pointer to prevent any chance of a double free. Signed-off-by: Schspa Shi Reviewed-by: Cornelia Huck Link: https://lore.kernel.org/r/20220629022948.55608-1-schspa@gmail.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..a0fb93866f61 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1812,6 +1812,7 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); if (!buf) { kfree(caps->buf); + caps->buf = NULL; caps->size = 0; return ERR_PTR(-ENOMEM); } -- cgit v1.2.3 From ffed0518d871482e26c5826c0875bea6775446da Mon Sep 17 00:00:00 2001 From: Li Zhe Date: Mon, 27 Jun 2022 11:51:09 +0800 Subject: vfio: remove useless judgement In function vfio_dma_do_unmap(), we currently prevent process to unmap vfio dma region whose mm_struct is different from the vfio_dma->task. In our virtual machine scenario which is using kvm and qemu, this judgement stops us from liveupgrading our qemu, which uses fork() && exec() to load the new binary but the new process cannot do the VFIO_IOMMU_UNMAP_DMA action during vm exit because of this judgement. This judgement is added in commit 8f0d5bb95f76 ("vfio iommu type1: Add task structure to vfio_dma") for the security reason. But it seems that no other task who has no family relationship with old and new process can get the same vfio_dma struct here for the reason of resource isolation. So this patch delete it. Signed-off-by: Li Zhe Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20220627035109.73745-1-lizhe.67@bytedance.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index c13b9290e357..a8ff00dad834 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1377,12 +1377,6 @@ again: if (!iommu->v2 && iova > dma->iova) break; - /* - * Task with same address space who mapped this iova range is - * allowed to unmap the iova range. - */ - if (dma->task->mm != current->mm) - break; if (invalidate_vaddr) { if (dma->vaddr_invalid) { -- cgit v1.2.3 From 330c179976f3801526bf222b010b669bf6743098 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Mon, 27 Jun 2022 00:41:19 -0700 Subject: vfio: Move "device->open_count--" out of group_rwsem in vfio_device_open() We do not protect the vfio_device::open_count with group_rwsem elsewhere (see vfio_device_fops_release as a comparison, where we already drop group_rwsem before open_count--). So move the group_rwsem unlock prior to open_count--. This change now also drops group_rswem before setting device->kvm = NULL, but that's also OK (again, just like vfio_device_fops_release). The setting of device->kvm before open_device is technically done while holding the group_rwsem, this is done to protect the group kvm value we are copying from, and we should not be relying on that to protect the contents of device->kvm; instead we assume this value will not change until after the device is closed and while under the dev_set->lock. Cc: Matthew Rosato Cc: Jason Gunthorpe Signed-off-by: Yi Liu Reviewed-by: Matthew Rosato Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20220627074119.523274-1-yi.l.liu@intel.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 61e71c1154be..44c3bf8023ac 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1146,10 +1146,10 @@ err_close_device: if (device->open_count == 1 && device->ops->close_device) device->ops->close_device(device); err_undo_count: + up_read(&device->group->group_rwsem); device->open_count--; if (device->open_count == 0 && device->kvm) device->kvm = NULL; - up_read(&device->group->group_rwsem); mutex_unlock(&device->dev_set->lock); module_put(device->dev->driver->owner); err_unassign_container: -- cgit v1.2.3