From f9fc049ef1e05da3e3d2a45d098ec89b89bf687e Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 20 Dec 2017 09:47:07 -0700 Subject: iommu/amd - Record more information about unknown events When an unknown type event occurs, the default information written to the syslog should dump raw event data. This could provide insight into the event that occurred. Signed-off-by: Gary R Hook Signed-off-by: Alex Williamson --- drivers/iommu/amd_iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7d5eb004091d..6af7ae6eac5e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -616,7 +616,9 @@ retry: address, flags); break; default: - printk(KERN_ERR "UNKNOWN type=0x%02x]\n", type); + printk(KERN_ERR "UNKNOWN type=0x%02x event[0]=0x%08x " + "event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", + type, event[0], event[1], event[2], event[3]); } memset(__evt, 0, 4 * sizeof(u32)); -- cgit v1.2.3 From ff18c4e598de13af6503d6adb66f3ad768b6a53e Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 20 Dec 2017 09:47:08 -0700 Subject: iommu/amd: Set the device table entry PPR bit for IOMMU V2 devices The AMD IOMMU specification Rev 3.00 (December 2016) introduces a new Enhanced PPR Handling Support (EPHSup) bit in the MMIO register offset 0030h (IOMMU Extended Feature Register). When EPHSup=1, the IOMMU hardware requires the PPR bit of the device table entry (DTE) to be set in order to support PPR for a particular endpoint device. Please see https://support.amd.com/TechDocs/48882_IOMMU.pdf for this revision of the AMD IOMMU specification. Signed-off-by: Gary R Hook Signed-off-by: Alex Williamson --- drivers/iommu/amd_iommu.c | 20 +++++++++++++++----- drivers/iommu/amd_iommu_types.h | 2 ++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6af7ae6eac5e..0c71442e331c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1817,7 +1817,8 @@ static bool dma_ops_domain(struct protection_domain *domain) return domain->flags & PD_DMA_OPS_MASK; } -static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) +static void set_dte_entry(u16 devid, struct protection_domain *domain, + bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; @@ -1834,6 +1835,13 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) if (ats) flags |= DTE_FLAG_IOTLB; + if (ppr) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + + if (iommu_feature(iommu, FEATURE_EPHSUP)) + pte_root |= 1ULL << DEV_ENTRY_PPR; + } + if (domain->flags & PD_IOMMUV2_MASK) { u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); u64 glx = domain->glx; @@ -1896,9 +1904,9 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, ats); + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); if (alias != dev_data->devid) - set_dte_entry(alias, domain, ats); + set_dte_entry(alias, domain, ats, dev_data->iommu_v2); device_flush_dte(dev_data); } @@ -2277,13 +2285,15 @@ static void update_device_table(struct protection_domain *domain) struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled); + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, + dev_data->iommu_v2); if (dev_data->devid == dev_data->alias) continue; /* There is an alias, update device table entry for it */ - set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled); + set_dte_entry(dev_data->alias, domain, dev_data->ats.enabled, + dev_data->iommu_v2); } } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f6b24c7d8b70..6a877ebd058b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -98,6 +98,7 @@ #define FEATURE_HE (1ULL<<8) #define FEATURE_PC (1ULL<<9) #define FEATURE_GAM_VAPIC (1ULL<<21) +#define FEATURE_EPHSUP (1ULL<<50) #define FEATURE_PASID_SHIFT 32 #define FEATURE_PASID_MASK (0x1fULL << FEATURE_PASID_SHIFT) @@ -192,6 +193,7 @@ /* macros and definitions for device table entries */ #define DEV_ENTRY_VALID 0x00 #define DEV_ENTRY_TRANSLATION 0x01 +#define DEV_ENTRY_PPR 0x34 #define DEV_ENTRY_IR 0x3d #define DEV_ENTRY_IW 0x3e #define DEV_ENTRY_NO_PAGE_FAULT 0x62 -- cgit v1.2.3 From ca84eaeb3e0520555ee031b5ed26ce1b4b1bbd84 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 20 Dec 2017 09:48:00 -0700 Subject: iommu/ipmmu-vmsa: Add r8a7796 DT binding Update the IPMMU DT binding documentation to include the r8a7796 compat string for R-Car M3-W. Signed-off-by: Magnus Damm Acked-by: Laurent Pinchart Acked-by: Rob Herring Acked-by: Simon Horman Acked-by: Geert Uytterhoeven Signed-off-by: Alex Williamson --- Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt index 857df929a654..43cff3e449d0 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -16,6 +16,7 @@ Required Properties: - "renesas,ipmmu-r8a7793" for the R8A7793 (R-Car M2-N) IPMMU. - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU. - "renesas,ipmmu-r8a7795" for the R8A7795 (R-Car H3) IPMMU. + - "renesas,ipmmu-r8a7796" for the R8A7796 (R-Car M3-W) IPMMU. - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU. - reg: Base address and size of the IPMMU registers. -- cgit v1.2.3 From 9327b810bb7e1f3322fe8ed862a62fe0f2c9f14b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 20 Dec 2017 09:48:01 -0700 Subject: iommu/ipmmu-vmsa: Add r8a779(70|95) DT bindings Update the IPMMU DT binding documentation to include the r8a77970 (R-Car V3M) and r8a77995 (R-Car D3) compat strings. Based on work for r8a7796 by Magnus Damm. Signed-off-by: Simon Horman Reviewed-by: Geert Uytterhoeven Signed-off-by: Alex Williamson --- Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt index 43cff3e449d0..1fd5d69647ca 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -17,6 +17,8 @@ Required Properties: - "renesas,ipmmu-r8a7794" for the R8A7794 (R-Car E2) IPMMU. - "renesas,ipmmu-r8a7795" for the R8A7795 (R-Car H3) IPMMU. - "renesas,ipmmu-r8a7796" for the R8A7796 (R-Car M3-W) IPMMU. + - "renesas,ipmmu-r8a77970" for the R8A77970 (R-Car V3M) IPMMU. + - "renesas,ipmmu-r8a77995" for the R8A77995 (R-Car D3) IPMMU. - "renesas,ipmmu-vmsa" for generic R-Car Gen2 VMSA-compatible IPMMU. - reg: Base address and size of the IPMMU registers. -- cgit v1.2.3 From 9ae9df035c274c89b7fe3dbc74cbe2fa63386668 Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Wed, 20 Dec 2017 09:48:36 -0700 Subject: iommu: Check the result of iommu_group_get() for NULL The result of iommu_group_get() was being blindly used in both attach and detach which results in a dereference when trying to work with an unknown device. Signed-off-by: Jordan Crouse Signed-off-by: Alex Williamson --- drivers/iommu/iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3de5c0bcb5cc..69fef991c651 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1303,6 +1303,9 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) int ret; group = iommu_group_get(dev); + if (!group) + return -ENODEV; + /* * Lock the group to make sure the device-count doesn't * change while we are attaching @@ -1341,6 +1344,8 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev) struct iommu_group *group; group = iommu_group_get(dev); + if (!group) + return; mutex_lock(&group->mutex); if (iommu_group_device_count(group) != 1) { -- cgit v1.2.3 From 72d548113881dd32bf7f0b221d031e6586468437 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Wed, 20 Dec 2017 09:48:56 -0700 Subject: iommu/vt-d: clean up pr_irq if request_threaded_irq fails It is unlikely request_threaded_irq will fail, but if it does for some reason we should clear iommu->pr_irq in the error path. Also intel_svm_finish_prq shouldn't try to clean up the page request interrupt if pr_irq is 0. Without these, if request_threaded_irq were to fail the following occurs: fail with no fixes: [ 0.683147] ------------[ cut here ]------------ [ 0.683148] NULL pointer, cannot free irq [ 0.683158] WARNING: CPU: 1 PID: 1 at kernel/irq/irqdomain.c:1632 irq_domain_free_irqs+0x126/0x140 [ 0.683160] Modules linked in: [ 0.683163] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2 #3 [ 0.683165] Hardware name: /NUC7i3BNB, BIOS BNKBL357.86A.0036.2017.0105.1112 01/05/2017 [ 0.683168] RIP: 0010:irq_domain_free_irqs+0x126/0x140 [ 0.683169] RSP: 0000:ffffc90000037ce8 EFLAGS: 00010292 [ 0.683171] RAX: 000000000000001d RBX: ffff880276283c00 RCX: ffffffff81c5e5e8 [ 0.683172] RDX: 0000000000000001 RSI: 0000000000000096 RDI: 0000000000000246 [ 0.683174] RBP: ffff880276283c00 R08: 0000000000000000 R09: 000000000000023c [ 0.683175] R10: 0000000000000007 R11: 0000000000000000 R12: 000000000000007a [ 0.683176] R13: 0000000000000001 R14: 0000000000000000 R15: 0000010010000000 [ 0.683178] FS: 0000000000000000(0000) GS:ffff88027ec80000(0000) knlGS:0000000000000000 [ 0.683180] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.683181] CR2: 0000000000000000 CR3: 0000000001c09001 CR4: 00000000003606e0 [ 0.683182] Call Trace: [ 0.683189] intel_svm_finish_prq+0x3c/0x60 [ 0.683191] free_dmar_iommu+0x1ac/0x1b0 [ 0.683195] init_dmars+0xaaa/0xaea [ 0.683200] ? klist_next+0x19/0xc0 [ 0.683203] ? pci_do_find_bus+0x50/0x50 [ 0.683205] ? pci_get_dev_by_id+0x52/0x70 [ 0.683208] intel_iommu_init+0x498/0x5c7 [ 0.683211] pci_iommu_init+0x13/0x3c [ 0.683214] ? e820__memblock_setup+0x61/0x61 [ 0.683217] do_one_initcall+0x4d/0x1a0 [ 0.683220] kernel_init_freeable+0x186/0x20e [ 0.683222] ? set_debug_rodata+0x11/0x11 [ 0.683225] ? rest_init+0xb0/0xb0 [ 0.683226] kernel_init+0xa/0xff [ 0.683229] ret_from_fork+0x1f/0x30 [ 0.683259] Code: 89 ee 44 89 e7 e8 3b e8 ff ff 5b 5d 44 89 e7 44 89 ee 41 5c 41 5d 41 5e e9 a8 84 ff ff 48 c7 c7 a8 71 a7 81 31 c0 e8 6a d3 f9 ff <0f> ff 5b 5d 41 5c 41 5d 41 5 e c3 0f 1f 44 00 00 66 2e 0f 1f 84 [ 0.683285] ---[ end trace f7650e42792627ca ]--- with iommu->pr_irq = 0, but no check in intel_svm_finish_prq: [ 0.669561] ------------[ cut here ]------------ [ 0.669563] Trying to free already-free IRQ 0 [ 0.669573] WARNING: CPU: 3 PID: 1 at kernel/irq/manage.c:1546 __free_irq+0xa4/0x2c0 [ 0.669574] Modules linked in: [ 0.669577] CPU: 3 PID: 1 Comm: swapper/0 Not tainted 4.15.0-rc2 #4 [ 0.669579] Hardware name: /NUC7i3BNB, BIOS BNKBL357.86A.0036.2017.0105.1112 01/05/2017 [ 0.669581] RIP: 0010:__free_irq+0xa4/0x2c0 [ 0.669582] RSP: 0000:ffffc90000037cc0 EFLAGS: 00010082 [ 0.669584] RAX: 0000000000000021 RBX: 0000000000000000 RCX: ffffffff81c5e5e8 [ 0.669585] RDX: 0000000000000001 RSI: 0000000000000086 RDI: 0000000000000046 [ 0.669587] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000000023c [ 0.669588] R10: 0000000000000007 R11: 0000000000000000 R12: ffff880276253960 [ 0.669589] R13: ffff8802762538a4 R14: ffff880276253800 R15: ffff880276283600 [ 0.669593] FS: 0000000000000000(0000) GS:ffff88027ed80000(0000) knlGS:0000000000000000 [ 0.669594] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 0.669596] CR2: 0000000000000000 CR3: 0000000001c09001 CR4: 00000000003606e0 [ 0.669602] Call Trace: [ 0.669616] free_irq+0x30/0x60 [ 0.669620] intel_svm_finish_prq+0x34/0x60 [ 0.669623] free_dmar_iommu+0x1ac/0x1b0 [ 0.669627] init_dmars+0xaaa/0xaea [ 0.669631] ? klist_next+0x19/0xc0 [ 0.669634] ? pci_do_find_bus+0x50/0x50 [ 0.669637] ? pci_get_dev_by_id+0x52/0x70 [ 0.669639] intel_iommu_init+0x498/0x5c7 [ 0.669642] pci_iommu_init+0x13/0x3c [ 0.669645] ? e820__memblock_setup+0x61/0x61 [ 0.669648] do_one_initcall+0x4d/0x1a0 [ 0.669651] kernel_init_freeable+0x186/0x20e [ 0.669653] ? set_debug_rodata+0x11/0x11 [ 0.669656] ? rest_init+0xb0/0xb0 [ 0.669658] kernel_init+0xa/0xff [ 0.669661] ret_from_fork+0x1f/0x30 [ 0.669662] Code: 7a 08 75 0e e9 c3 01 00 00 4c 39 7b 08 74 57 48 89 da 48 8b 5a 18 48 85 db 75 ee 89 ee 48 c7 c7 78 67 a7 81 31 c0 e8 4c 37 fa ff <0f> ff 48 8b 34 24 4c 89 ef e 8 0e 4c 68 00 49 8b 46 40 48 8b 80 [ 0.669688] ---[ end trace 58a470248700f2fc ]--- Cc: Alex Williamson Cc: Joerg Roedel Cc: Ashok Raj Signed-off-by: Jerry Snitselaar Reviewed-by: Ashok Raj Signed-off-by: Alex Williamson --- drivers/iommu/intel-svm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index ed1cf7c5a43b..6643277e321e 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -129,6 +129,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", iommu->name); dmar_free_hwirq(irq); + iommu->pr_irq = 0; goto err; } dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); @@ -144,9 +145,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); - free_irq(iommu->pr_irq, iommu); - dmar_free_hwirq(iommu->pr_irq); - iommu->pr_irq = 0; + if (iommu->pr_irq) { + free_irq(iommu->pr_irq, iommu); + dmar_free_hwirq(iommu->pr_irq); + iommu->pr_irq = 0; + } free_pages((unsigned long)iommu->prq, PRQ_ORDER); iommu->prq = NULL; -- cgit v1.2.3 From f18affbea8f7aebb235bfdaf3ad4c307aa5f3d64 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 2 Jan 2018 16:29:45 +0100 Subject: iommu/omap: Fix debugfs_create_*() usage When exposing data access through debugfs, the correct debugfs_create_*() functions must be used, depending on data type. Remove all casts from data pointers passed to debugfs_create_*() functions, as such casts prevent the compiler from flagging bugs. omap_iommu.nr_tlb_entries is "int", hence casting to "u8 *" exposes only a part of it. Fix this by using debugfs_create_u32() instead. Signed-off-by: Geert Uytterhoeven Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu-debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index 505548aafeff..50217548c3b8 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -274,8 +274,8 @@ void omap_iommu_debugfs_add(struct omap_iommu *obj) if (!obj->debug_dir) return; - d = debugfs_create_u8("nr_tlb_entries", 0400, obj->debug_dir, - (u8 *)&obj->nr_tlb_entries); + d = debugfs_create_u32("nr_tlb_entries", 0400, obj->debug_dir, + &obj->nr_tlb_entries); if (!d) return; -- cgit v1.2.3 From dc98b8480d8a68c2ce9aa28b9f0d714fd258bc0b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 9 Jan 2018 15:34:07 +0000 Subject: iommu/exynos: Don't unconditionally steal bus ops Removing the early device registration hook overlooked the fact that it only ran conditionally on a compatible device being present in the DT. With exynos_iommu_init() now running as an unconditional initcall, problems arise on non-Exynos systems when other IOMMU drivers find themselves unable to install their ops on the platform bus, or at worst the Exynos ops get called with someone else's domain and all hell breaks loose. The global ops/cache setup could probably all now be triggered from the first IOMMU probe, as with dma_dev assigment, but for the time being the simplest fix is to resurrect the logic from commit a7b67cd5d9af ("iommu/exynos: Play nice in multi-platform builds") to explicitly check the DT for the presence of an Exynos IOMMU before trying anything. Fixes: 928055a01b3f ("iommu/exynos: Remove custom platform device registration code") Signed-off-by: Robin Murphy Acked-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 79c45650f8de..736d4552d96f 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1353,8 +1353,15 @@ static const struct iommu_ops exynos_iommu_ops = { static int __init exynos_iommu_init(void) { + struct device_node *np; int ret; + np = of_find_matching_node(NULL, sysmmu_of_match); + if (!np) + return 0; + + of_node_put(np); + lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table", LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL); if (!lv2table_kmem_cache) { -- cgit v1.2.3 From 9d2e6505f6d6934e681aed502f566198cb25c74a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 10 Jan 2018 13:51:37 +0800 Subject: iommu/vt-d: Use domain instead of cache fetching after commit a1ddcbe93010 ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi", 2015-08-12), we have domain pointer as parameter to iommu_flush_iotlb_psi(), so no need to fetch it from cache again. More importantly, a NULL reference pointer bug is reported on RHEL7 (and it can be reproduced on some old upstream kernels too, e.g., v4.13) by unplugging an 40g nic from a VM (hard to test unplug on real host, but it should be the same): https://bugzilla.redhat.com/show_bug.cgi?id=1531367 [ 24.391863] pciehp 0000:00:03.0:pcie004: Slot(0): Attention button pressed [ 24.393442] pciehp 0000:00:03.0:pcie004: Slot(0): Powering off due to button press [ 29.721068] i40evf 0000:01:00.0: Unable to send opcode 2 to PF, err I40E_ERR_QUEUE_EMPTY, aq_err OK [ 29.783557] iommu: Removing device 0000:01:00.0 from group 3 [ 29.784662] BUG: unable to handle kernel NULL pointer dereference at 0000000000000304 [ 29.785817] IP: iommu_flush_iotlb_psi+0xcf/0x120 [ 29.786486] PGD 0 [ 29.786487] P4D 0 [ 29.786812] [ 29.787390] Oops: 0000 [#1] SMP [ 29.787876] Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ip_set nfnetlink ebtable_nat ebtable_broute bridge stp llc ip6table_ng [ 29.795371] CPU: 0 PID: 156 Comm: kworker/0:2 Not tainted 4.13.0 #14 [ 29.796366] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.11.0-1.el7 04/01/2014 [ 29.797593] Workqueue: pciehp-0 pciehp_power_thread [ 29.798328] task: ffff94f5745b4a00 task.stack: ffffb326805ac000 [ 29.799178] RIP: 0010:iommu_flush_iotlb_psi+0xcf/0x120 [ 29.799919] RSP: 0018:ffffb326805afbd0 EFLAGS: 00010086 [ 29.800666] RAX: ffff94f5bc56e800 RBX: 0000000000000000 RCX: 0000000200000025 [ 29.801667] RDX: ffff94f5bc56e000 RSI: 0000000000000082 RDI: 0000000000000000 [ 29.802755] RBP: ffffb326805afbf8 R08: 0000000000000000 R09: ffff94f5bc86bbf0 [ 29.803772] R10: ffffb326805afba8 R11: 00000000000ffdc4 R12: ffff94f5bc86a400 [ 29.804789] R13: 0000000000000000 R14: 00000000ffdc4000 R15: 0000000000000000 [ 29.805792] FS: 0000000000000000(0000) GS:ffff94f5bfc00000(0000) knlGS:0000000000000000 [ 29.806923] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 29.807736] CR2: 0000000000000304 CR3: 000000003499d000 CR4: 00000000000006f0 [ 29.808747] Call Trace: [ 29.809156] flush_unmaps_timeout+0x126/0x1c0 [ 29.809800] domain_exit+0xd6/0x100 [ 29.810322] device_notifier+0x6b/0x70 [ 29.810902] notifier_call_chain+0x4a/0x70 [ 29.812822] __blocking_notifier_call_chain+0x47/0x60 [ 29.814499] blocking_notifier_call_chain+0x16/0x20 [ 29.816137] device_del+0x233/0x320 [ 29.817588] pci_remove_bus_device+0x6f/0x110 [ 29.819133] pci_stop_and_remove_bus_device+0x1a/0x20 [ 29.820817] pciehp_unconfigure_device+0x7a/0x1d0 [ 29.822434] pciehp_disable_slot+0x52/0xe0 [ 29.823931] pciehp_power_thread+0x8a/0xa0 [ 29.825411] process_one_work+0x18c/0x3a0 [ 29.826875] worker_thread+0x4e/0x3b0 [ 29.828263] kthread+0x109/0x140 [ 29.829564] ? process_one_work+0x3a0/0x3a0 [ 29.831081] ? kthread_park+0x60/0x60 [ 29.832464] ret_from_fork+0x25/0x30 [ 29.833794] Code: 85 ed 74 0b 5b 41 5c 41 5d 41 5e 41 5f 5d c3 49 8b 54 24 60 44 89 f8 0f b6 c4 48 8b 04 c2 48 85 c0 74 49 45 0f b6 ff 4a 8b 3c f8 <80> bf [ 29.838514] RIP: iommu_flush_iotlb_psi+0xcf/0x120 RSP: ffffb326805afbd0 [ 29.840362] CR2: 0000000000000304 [ 29.841716] ---[ end trace b10ec0d6900868d3 ]--- This patch fixes that problem if applied to v4.13 kernel. The bug does not exist on latest upstream kernel since it's fixed as a side effect of commit 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing", 2017-08-15). But IMHO it's still good to have this patch upstream. CC: Alex Williamson Signed-off-by: Peter Xu Fixes: a1ddcbe93010 ("iommu/vt-d: Pass dmar_domain directly into iommu_flush_iotlb_psi") Reviewed-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 4a2de34895ec..869f37d1f3b7 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1601,8 +1601,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, * flush. However, device IOTLB doesn't need to be flushed in this case. */ if (!cap_caching_mode(iommu->cap) || !map) - iommu_flush_dev_iotlb(get_iommu_domain(iommu, did), - addr, mask); + iommu_flush_dev_iotlb(domain, addr, mask); } static void iommu_flush_iova(struct iova_domain *iovad) -- cgit v1.2.3 From 5e3b4a15dd8da6bded2092694c8db3b505cbf711 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 20 Dec 2017 11:59:24 -0800 Subject: iommu/vt-d: Enable upto 57 bits of domain address width Update the IOMMU default domain address width to 57 bits. This would enable the IOMMU to do upto 5-levels of paging for second level translations - IOVA translation requests without PASID. Even though the maximum supported address width is being increased to 57, __iommu_calculate_agaw() would set the actual supported address width to the maximum support available in IOMMU hardware. Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 869f37d1f3b7..53227d6e911e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -64,7 +64,7 @@ #define IOAPIC_RANGE_END (0xfeefffff) #define IOVA_START_ADDR (0x1000) -#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 57 #define MAX_AGAW_WIDTH 64 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT) -- cgit v1.2.3 From 59103caa6839592788e7ad58b35863aac034631a Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 20 Dec 2017 11:59:25 -0800 Subject: iommu/vt-d: Add a check for 1GB page support Add a check to verify IOMMU 1GB page support. If the CPU supports 1GB pages but the IOMMU does not support it then disable SVM by not allocating PASID tables. Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 4 ++++ include/linux/intel-iommu.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 6643277e321e..e9a56ad09a76 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -41,6 +41,10 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) struct page *pages; int order; + if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && + !cap_fl1gp_support(iommu->cap)) + return -EINVAL; + /* Start at 2 because it's defined as 2^(1+PSS) */ iommu->pasid_max = 2 << ecap_pss(iommu->ecap); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f3274d9f46a2..a56bab114f39 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -84,6 +84,7 @@ * Decoding Capability Register */ #define cap_pi_support(c) (((c) >> 59) & 1) +#define cap_fl1gp_support(c) (((c) >> 56) & 1) #define cap_read_drain(c) (((c) >> 55) & 1) #define cap_write_drain(c) (((c) >> 54) & 1) #define cap_max_amask_val(c) (((c) >> 48) & 0x3f) -- cgit v1.2.3 From f1ac10c24efbbcba0f8dae37ee90d45847f5c5af Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 20 Dec 2017 11:59:26 -0800 Subject: iommu/vt-d: Add a check for 5-level paging support Add a check to verify IOMMU 5-level paging support. If the CPU supports supports 5-level paging but the IOMMU does not support it then disable SVM by not allocating PASID tables. Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 4 ++++ include/linux/intel-iommu.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index e9a56ad09a76..b43dc1edcef0 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -45,6 +45,10 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu) !cap_fl1gp_support(iommu->cap)) return -EINVAL; + if (cpu_feature_enabled(X86_FEATURE_LA57) && + !cap_5lp_support(iommu->cap)) + return -EINVAL; + /* Start at 2 because it's defined as 2^(1+PSS) */ iommu->pasid_max = 2 << ecap_pss(iommu->ecap); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a56bab114f39..8dad3dd26eae 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -83,6 +83,7 @@ /* * Decoding Capability Register */ +#define cap_5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) #define cap_read_drain(c) (((c) >> 55) & 1) -- cgit v1.2.3 From 2f13eb7c580fcbe3d73ebbe6fb1841381cad0a05 Mon Sep 17 00:00:00 2001 From: Sohil Mehta Date: Wed, 20 Dec 2017 11:59:27 -0800 Subject: iommu/vt-d: Enable 5-level paging mode in the PASID entry If the CPU has support for 5-level paging enabled and the IOMMU also supports 5-level paging then enable the 5-level paging mode for first- level translations - used when SVM is enabled. Signed-off-by: Sohil Mehta Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index b43dc1edcef0..859b61ddd508 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -26,6 +26,10 @@ #include #include +#define PASID_ENTRY_P BIT_ULL(0) +#define PASID_ENTRY_FLPM_5LP BIT_ULL(9) +#define PASID_ENTRY_SRE BIT_ULL(11) + static irqreturn_t prq_event_thread(int irq, void *d); struct pasid_entry { @@ -300,6 +304,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ struct intel_svm_dev *sdev; struct intel_svm *svm = NULL; struct mm_struct *mm = NULL; + u64 pasid_entry_val; int pasid_max; int ret; @@ -406,9 +411,15 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ kfree(sdev); goto out; } - iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1; + pasid_entry_val = (u64)__pa(mm->pgd) | PASID_ENTRY_P; } else - iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11); + pasid_entry_val = (u64)__pa(init_mm.pgd) | + PASID_ENTRY_P | PASID_ENTRY_SRE; + if (cpu_feature_enabled(X86_FEATURE_LA57)) + pasid_entry_val |= PASID_ENTRY_FLPM_5LP; + + iommu->pasid_table[svm->pasid].val = pasid_entry_val; + wmb(); /* In caching mode, we still have to flush with PASID 0 when * a PASID table entry becomes present. Not entirely clear -- cgit v1.2.3 From 892d7aaddb24b0d3eaf05534ed29a264d3b52646 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 9 Jan 2018 16:17:25 +0000 Subject: iommu/msm: Claim bus ops on probe Since the MSM IOMMU driver now probes via DT exclusively rather than platform data, dependent masters should be deferred until the IOMMU itself is ready. Thus we can do away with the early initialisation hook to unconditionally claim the bus ops, and instead do that only once an IOMMU is actually probed. Furthermore, this should also make the driver safe for multiplatform kernels on non-MSM SoCs. Reviewed-by: Sricharan R Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 04f4d51ffacb..dda1ce87a070 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -823,6 +823,8 @@ static int msm_iommu_probe(struct platform_device *pdev) goto fail; } + bus_set_iommu(&platform_bus_type, &msm_iommu_ops); + pr_info("device mapped at %p, irq %d with %d ctx banks\n", iommu->base, iommu->irq, iommu->ncb); @@ -875,19 +877,7 @@ static void __exit msm_iommu_driver_exit(void) subsys_initcall(msm_iommu_driver_init); module_exit(msm_iommu_driver_exit); -static int __init msm_iommu_init(void) -{ - bus_set_iommu(&platform_bus_type, &msm_iommu_ops); - return 0; -} - -static int __init msm_iommu_of_setup(struct device_node *np) -{ - msm_iommu_init(); - return 0; -} - -IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", msm_iommu_of_setup); +IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", NULL); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Stepan Moskovchenko "); -- cgit v1.2.3 From e7747d88e05eabed6fd921c3636a9d1f5b4f754f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 9 Jan 2018 16:17:26 +0000 Subject: iommu/ipmmu-vmsa: Remove redundant of_iommu_init_fn hook Having of_iommu_init() call ipmmu_init() via ipmmu_vmsa_iommu_of_setup() does nothing that the subsys_initcall wouldn't do slightly later anyway, since probe-deferral of masters means it is no longer critical to register the driver super-early. Clean it up. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 8dce3a9de9d8..331dad909301 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1108,18 +1108,8 @@ static void __exit ipmmu_exit(void) subsys_initcall(ipmmu_init); module_exit(ipmmu_exit); -#ifdef CONFIG_IOMMU_DMA -static int __init ipmmu_vmsa_iommu_of_setup(struct device_node *np) -{ - ipmmu_init(); - return 0; -} - -IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", - ipmmu_vmsa_iommu_of_setup); -IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795", - ipmmu_vmsa_iommu_of_setup); -#endif +IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", NULL); +IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795", NULL); MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); MODULE_AUTHOR("Laurent Pinchart "); -- cgit v1.2.3 From b0c560f7d8a4b333bcc18f692d0af0d5cca90fe2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 9 Jan 2018 16:17:27 +0000 Subject: iommu: Clean up of_iommu_init_fn Now that no more drivers rely on arbitrary early initialisation via an of_iommu_init_fn hook, let's clean up the redundant remnants. The IOMMU_OF_DECLARE() macro needs to remain for now, as the probe-deferral mechanism has no other nice way to detect built-in drivers before they have registered themselves, such that it can make the right decision. Reviewed-by: Sricharan R Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 12 ++++++------ drivers/iommu/exynos-iommu.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 4 ++-- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/of_iommu.c | 16 ---------------- drivers/iommu/qcom_iommu.c | 2 +- include/linux/of_iommu.h | 5 +---- 8 files changed, 13 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index f122071688fd..7f186beaa1a6 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2962,7 +2962,7 @@ static struct platform_driver arm_smmu_driver = { }; module_platform_driver(arm_smmu_driver); -IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL); +IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3"); MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); MODULE_AUTHOR("Will Deacon "); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 78d4c6b8f1ba..69e7c60792a8 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2211,12 +2211,12 @@ static struct platform_driver arm_smmu_driver = { }; module_platform_driver(arm_smmu_driver); -IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL); -IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL); -IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL); -IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL); -IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL); -IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL); +IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1"); +IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2"); +IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400"); +IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401"); +IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500"); +IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2"); MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); MODULE_AUTHOR("Will Deacon "); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 79c45650f8de..1c7f926fad0e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1394,4 +1394,4 @@ err_reg_driver: } core_initcall(exynos_iommu_init); -IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu", NULL); +IOMMU_OF_DECLARE(exynos_iommu_of, "samsung,exynos-sysmmu"); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 331dad909301..40ae6e87cb88 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1108,8 +1108,8 @@ static void __exit ipmmu_exit(void) subsys_initcall(ipmmu_init); module_exit(ipmmu_exit); -IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa", NULL); -IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795", NULL); +IOMMU_OF_DECLARE(ipmmu_vmsa_iommu_of, "renesas,ipmmu-vmsa"); +IOMMU_OF_DECLARE(ipmmu_r8a7795_iommu_of, "renesas,ipmmu-r8a7795"); MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU"); MODULE_AUTHOR("Laurent Pinchart "); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index dda1ce87a070..0d3350463a3f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -877,7 +877,7 @@ static void __exit msm_iommu_driver_exit(void) subsys_initcall(msm_iommu_driver_init); module_exit(msm_iommu_driver_exit); -IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu", NULL); +IOMMU_OF_DECLARE(msm_iommu_of, "qcom,apq8064-iommu"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Stepan Moskovchenko "); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 50947ebb6d17..5c36a8b7656a 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -231,19 +231,3 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, return ops; } - -static int __init of_iommu_init(void) -{ - struct device_node *np; - const struct of_device_id *match, *matches = &__iommu_of_table; - - for_each_matching_node_and_match(np, matches, &match) { - const of_iommu_init_fn init_fn = match->data; - - if (init_fn && init_fn(np)) - pr_err("Failed to initialise IOMMU %pOF\n", np); - } - - return 0; -} -postcore_initcall_sync(of_iommu_init); diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index e07f02d00c68..65b9c99707f8 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -947,7 +947,7 @@ static void __exit qcom_iommu_exit(void) module_init(qcom_iommu_init); module_exit(qcom_iommu_exit); -IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1", NULL); +IOMMU_OF_DECLARE(qcom_iommu_dev, "qcom,msm-iommu-v1"); MODULE_DESCRIPTION("IOMMU API for QCOM IOMMU v1 implementations"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index cddfaff4d0b7..4fa654e4b5a9 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -34,9 +34,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, extern struct of_device_id __iommu_of_table; -typedef int (*of_iommu_init_fn)(struct device_node *); - -#define IOMMU_OF_DECLARE(name, compat, fn) \ - _OF_DECLARE(iommu, name, compat, fn, of_iommu_init_fn) +#define IOMMU_OF_DECLARE(name, compat) OF_DECLARE_1(iommu, name, compat, NULL) #endif /* __OF_IOMMU_H */ -- cgit v1.2.3