diff options
63 files changed, 2493 insertions, 646 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu new file mode 100644 index 000000000000..d7af4919302e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu @@ -0,0 +1,37 @@ +What: /sys/bus/event_source/devices/dmar*/format +Date: Jan 2023 +KernelVersion: 6.3 +Contact: Kan Liang <kan.liang@linux.intel.com> +Description: Read-only. Attribute group to describe the magic bits + that go into perf_event_attr.config, + perf_event_attr.config1 or perf_event_attr.config2 for + the IOMMU pmu. (See also + ABI/testing/sysfs-bus-event_source-devices-format). + + Each attribute in this group defines a bit range in + perf_event_attr.config, perf_event_attr.config1, + or perf_event_attr.config2. All supported attributes + are listed below (See the VT-d Spec 4.0 for possible + attribute values):: + + event = "config:0-27" - event ID + event_group = "config:28-31" - event group ID + + filter_requester_en = "config1:0" - Enable Requester ID filter + filter_domain_en = "config1:1" - Enable Domain ID filter + filter_pasid_en = "config1:2" - Enable PASID filter + filter_ats_en = "config1:3" - Enable Address Type filter + filter_page_table_en= "config1:4" - Enable Page Table Level filter + filter_requester_id = "config1:16-31" - Requester ID filter + filter_domain = "config1:32-47" - Domain ID filter + filter_pasid = "config2:0-21" - PASID filter + filter_ats = "config2:24-28" - Address Type filter + filter_page_table = "config2:32-36" - Page Table Level filter + +What: /sys/bus/event_source/devices/dmar*/cpumask +Date: Jan 2023 +KernelVersion: 6.3 +Contact: Kan Liang <kan.liang@linux.intel.com> +Description: Read-only. This file always returns the CPU to which the + IOMMU pmu is bound for access to all IOMMU pmu performance + monitoring events. diff --git a/Documentation/devicetree/bindings/iommu/apple,dart.yaml b/Documentation/devicetree/bindings/iommu/apple,dart.yaml index 06af2bacbe97..903edf85d72e 100644 --- a/Documentation/devicetree/bindings/iommu/apple,dart.yaml +++ b/Documentation/devicetree/bindings/iommu/apple,dart.yaml @@ -24,6 +24,7 @@ properties: compatible: enum: - apple,t8103-dart + - apple,t8110-dart - apple,t6000-dart reg: diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index b28c5c2b0ff2..807cb511fe18 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -36,13 +36,17 @@ properties: - enum: - qcom,qcm2290-smmu-500 - qcom,qdu1000-smmu-500 + - qcom,sa8775p-smmu-500 - qcom,sc7180-smmu-500 - qcom,sc7280-smmu-500 - qcom,sc8180x-smmu-500 - qcom,sc8280xp-smmu-500 - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 + - qcom,sdx55-smmu-500 + - qcom,sdx65-smmu-500 - qcom,sm6115-smmu-500 + - qcom,sm6125-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - qcom,sm8150-smmu-500 @@ -52,14 +56,6 @@ properties: - const: qcom,smmu-500 - const: arm,mmu-500 - - description: Qcom SoCs implementing "arm,mmu-500" (non-qcom implementation) - deprecated: true - items: - - enum: - - qcom,sdx55-smmu-500 - - qcom,sdx65-smmu-500 - - const: arm,mmu-500 - - description: Qcom SoCs implementing "arm,mmu-500" (legacy binding) deprecated: true items: @@ -84,6 +80,7 @@ properties: items: - enum: - qcom,sc7280-smmu-500 + - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 - const: qcom,adreno-smmu - const: arm,mmu-500 @@ -201,7 +198,8 @@ properties: maxItems: 7 power-domains: - maxItems: 1 + minItems: 1 + maxItems: 3 nvidia,memory-controller: description: | @@ -366,6 +364,56 @@ allOf: - description: interface clock required to access smmu's registers through the TCU's programming interface. + # Disallow clocks for all other platforms with specific compatibles + - if: + properties: + compatible: + contains: + enum: + - cavium,smmu-v2 + - marvell,ap806-smmu-500 + - nvidia,smmu-500 + - qcom,qcm2290-smmu-500 + - qcom,qdu1000-smmu-500 + - qcom,sa8775p-smmu-500 + - qcom,sc7180-smmu-500 + - qcom,sc8180x-smmu-500 + - qcom,sc8280xp-smmu-500 + - qcom,sdm670-smmu-500 + - qcom,sdm845-smmu-500 + - qcom,sdx55-smmu-500 + - qcom,sdx65-smmu-500 + - qcom,sm6115-smmu-500 + - qcom,sm6125-smmu-500 + - qcom,sm6350-smmu-500 + - qcom,sm6375-smmu-500 + - qcom,sm8350-smmu-500 + - qcom,sm8450-smmu-500 + then: + properties: + clock-names: false + clocks: false + + - if: + properties: + compatible: + contains: + const: qcom,sm6375-smmu-500 + then: + properties: + power-domains: + items: + - description: SNoC MMU TBU RT GDSC + - description: SNoC MMU TBU NRT GDSC + - description: SNoC TURING MMU TBU0 GDSC + + required: + - power-domains + else: + properties: + power-domains: + maxItems: 1 + examples: - |+ /* SMMU with stream matching or stream indexing */ diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.txt b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt index 059139abce35..e6cecfd360eb 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.txt +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.txt @@ -10,6 +10,7 @@ to non-secure vs secure interrupt line. - compatible : Should be one of: "qcom,msm8916-iommu" + "qcom,msm8953-iommu" Followed by "qcom,msm-iommu-v1". diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml index 26d0a5121f02..72308a4c14e7 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.yaml @@ -49,6 +49,7 @@ properties: - enum: - renesas,ipmmu-r8a779a0 # R-Car V3U - renesas,ipmmu-r8a779f0 # R-Car S4-8 + - renesas,ipmmu-r8a779g0 # R-Car V4H - const: renesas,rcar-gen4-ipmmu-vmsa # R-Car Gen4 reg: diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml index 44f72bcf1782..010219138858 100644 --- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml +++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.yaml @@ -52,6 +52,30 @@ properties: Address and Length pairs. Specifies regions of memory that are acceptable to allocate from. + iommu-addresses: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: > + A list of phandle and specifier pairs that describe static IO virtual + address space mappings and carveouts associated with a given reserved + memory region. The phandle in the first cell refers to the device for + which the mapping or carveout is to be created. + + The specifier consists of an address/size pair and denotes the IO + virtual address range of the region for the given device. The exact + format depends on the values of the "#address-cells" and "#size-cells" + properties of the device referenced via the phandle. + + When used in combination with a "reg" property, an IOVA mapping is to + be established for this memory region. One example where this can be + useful is to create an identity mapping for physical memory that the + firmware has configured some hardware to access (such as a bootsplash + framebuffer). + + If no "reg" property is specified, the "iommu-addresses" property + defines carveout regions in the IOVA space for the given device. This + can be useful if a certain memory region should not be mapped through + the IOMMU. + no-map: type: boolean description: > @@ -89,12 +113,69 @@ allOf: - no-map oneOf: - - required: - - reg + - oneOf: + - required: + - reg + + - required: + - size + + - oneOf: + # IOMMU reservations + - required: + - iommu-addresses - - required: - - size + # IOMMU mappings + - required: + - reg + - iommu-addresses additionalProperties: true +examples: + - | + / { + compatible = "foo"; + model = "foo"; + + #address-cells = <2>; + #size-cells = <2>; + + reserved-memory { + #address-cells = <2>; + #size-cells = <2>; + ranges; + + adsp_resv: reservation-adsp { + /* + * Restrict IOVA mappings for ADSP buffers to the 512 MiB region + * from 0x40000000 - 0x5fffffff. Anything outside is reserved by + * the ADSP for I/O memory and private memory allocations. + */ + iommu-addresses = <&adsp 0x0 0x00000000 0x00 0x40000000>, + <&adsp 0x0 0x60000000 0xff 0xa0000000>; + }; + + fb: framebuffer@90000000 { + reg = <0x0 0x90000000 0x0 0x00800000>; + iommu-addresses = <&dc0 0x0 0x90000000 0x0 0x00800000>; + }; + }; + + bus@0 { + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x0 0x0 0x40000000>; + + adsp: adsp@2990000 { + reg = <0x2990000 0x2000>; + memory-region = <&adsp_resv>; + }; + + dc0: display@15200000 { + reg = <0x15200000 0x10000>; + memory-region = <&fb>; + }; + }; + }; ... diff --git a/MAINTAINERS b/MAINTAINERS index 39ff1a717625..a265f634e41f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10466,7 +10466,6 @@ L: iommu@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/intel/ -F: include/linux/intel-svm.h INTEL IPU3 CSI-2 CIO2 DRIVER M: Yong Zhi <yong.zhi@intel.com> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c135f6e37a00..8bc01071474a 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -984,7 +984,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size, len = (j - i) << PAGE_SHIFT; ret = iommu_map(mapping->domain, iova, phys, len, - __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs)); + __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs), + GFP_KERNEL); if (ret < 0) goto fail; iova += len; @@ -1207,7 +1208,8 @@ static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, prot = __dma_info_to_prot(dir, attrs); - ret = iommu_map(mapping->domain, iova, phys, len, prot); + ret = iommu_map(mapping->domain, iova, phys, len, prot, + GFP_KERNEL); if (ret < 0) goto fail; count += len >> PAGE_SHIFT; @@ -1379,7 +1381,8 @@ static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, prot = __dma_info_to_prot(dir, attrs); - ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot); + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, + prot, GFP_KERNEL); if (ret < 0) goto fail; @@ -1443,7 +1446,7 @@ static dma_addr_t arm_iommu_map_resource(struct device *dev, prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; - ret = iommu_map(mapping->domain, dma_addr, addr, len, prot); + ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL); if (ret < 0) goto fail; diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 91e63426bdc5..7119c04c51c5 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -186,9 +186,10 @@ static inline unsigned long *get_st_pto(unsigned long entry) /* Prototypes */ void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(void); +unsigned long *dma_alloc_cpu_table(gfp_t gfp); void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr); +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp); void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); extern const struct dma_map_ops s390_pci_dma_ops; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index ea478d11fbd1..2d9b01d7ca4c 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -27,11 +27,11 @@ static int zpci_refresh_global(struct zpci_dev *zdev) zdev->iommu_pages * PAGE_SIZE); } -unsigned long *dma_alloc_cpu_table(void) +unsigned long *dma_alloc_cpu_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_region_table_cache, gfp); if (!table) return NULL; @@ -45,11 +45,11 @@ static void dma_free_cpu_table(void *table) kmem_cache_free(dma_region_table_cache, table); } -static unsigned long *dma_alloc_page_table(void) +static unsigned long *dma_alloc_page_table(gfp_t gfp) { unsigned long *table, *entry; - table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC); + table = kmem_cache_alloc(dma_page_table_cache, gfp); if (!table) return NULL; @@ -63,7 +63,7 @@ static void dma_free_page_table(void *table) kmem_cache_free(dma_page_table_cache, table); } -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) { unsigned long old_rte, rte; unsigned long *sto; @@ -72,7 +72,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) if (reg_entry_isvalid(rte)) { sto = get_rt_sto(rte); } else { - sto = dma_alloc_cpu_table(); + sto = dma_alloc_cpu_table(gfp); if (!sto) return NULL; @@ -90,7 +90,7 @@ static unsigned long *dma_get_seg_table_origin(unsigned long *rtep) return sto; } -static unsigned long *dma_get_page_table_origin(unsigned long *step) +static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) { unsigned long old_ste, ste; unsigned long *pto; @@ -99,7 +99,7 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) if (reg_entry_isvalid(ste)) { pto = get_st_pto(ste); } else { - pto = dma_alloc_page_table(); + pto = dma_alloc_page_table(gfp); if (!pto) return NULL; set_st_pto(&ste, virt_to_phys(pto)); @@ -116,18 +116,19 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step) return pto; } -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr) +unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, + gfp_t gfp) { unsigned long *sto, *pto; unsigned int rtx, sx, px; rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx]); + sto = dma_get_seg_table_origin(&rto[rtx], gfp); if (!sto) return NULL; sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx]); + pto = dma_get_page_table_origin(&sto[sx], gfp); if (!pto) return NULL; @@ -170,7 +171,8 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, return -EINVAL; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) { rc = -ENOMEM; goto undo_cpu_trans; @@ -186,7 +188,8 @@ undo_cpu_trans: while (i-- > 0) { page_addr -= PAGE_SIZE; dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr); + entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, + GFP_ATOMIC); if (!entry) break; dma_update_cpu_trans(entry, page_addr, flags); @@ -576,7 +579,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) spin_lock_init(&zdev->iommu_bitmap_lock); - zdev->dma_table = dma_alloc_cpu_table(); + zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!zdev->dma_table) { rc = -ENOMEM; goto out; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 648ecf5a8fbc..a4ac94a2ab57 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -475,7 +475,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, u32 offset = (r->offset + i) << imem->iommu_pgshift; ret = iommu_map(imem->domain, offset, node->dma_addrs[i], - PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); + PAGE_SIZE, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret < 0) { nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret); diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 7bd2e65c2a16..6ca9f396e55b 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1057,7 +1057,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma) *dma = iova_dma_addr(&tegra->carveout.domain, alloc); err = iommu_map(tegra->domain, *dma, virt_to_phys(virt), - size, IOMMU_READ | IOMMU_WRITE); + size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (err < 0) goto free_iova; diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 103fda055394..4ddfcd2138c9 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) pb->dma = iova_dma_addr(&host1x->iova, alloc); err = iommu_map(host1x->domain, pb->dma, pb->phys, size, - IOMMU_READ); + IOMMU_READ, GFP_KERNEL); if (err) goto iommu_free_iova; } else { diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index a2857accc427..aeeaca65ace9 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -276,8 +276,8 @@ iter_chunk: size = pa_end - pa_start + PAGE_SIZE; usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x", va_start, &pa_start, size, flags); - err = iommu_map_atomic(pd->domain, va_start, - pa_start, size, flags); + err = iommu_map(pd->domain, va_start, pa_start, + size, flags, GFP_KERNEL); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); @@ -293,8 +293,8 @@ iter_chunk: size = pa - pa_start + PAGE_SIZE; usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n", va_start, &pa_start, size, flags); - err = iommu_map_atomic(pd->domain, va_start, - pa_start, size, flags); + err = iommu_map(pd->domain, va_start, pa_start, + size, flags, GFP_KERNEL); if (err) { usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 79707685d54a..889c7efd050b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -32,7 +32,8 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM || ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for cpmxchg64() help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -70,7 +71,8 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST config IOMMU_IO_PGTABLE_DART bool "Apple DART Formats" select IOMMU_IO_PGTABLE - depends on ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM64 || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for cpmxchg64() help Enable support for the Apple DART pagetable formats. These include the t8020 and t6000/t8110 DART formats used in Apple M1/M2 family @@ -284,7 +286,8 @@ config EXYNOS_IOMMU_DEBUG config IPMMU_VMSA bool "Renesas VMSA-compatible IPMMU" - depends on ARCH_RENESAS || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARCH_RENESAS || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU @@ -304,7 +307,8 @@ config SPAPR_TCE_IOMMU config APPLE_DART tristate "Apple DART IOMMU Support" - depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARCH_APPLE || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_DART select IOMMU_API select IOMMU_IO_PGTABLE_DART default ARCH_APPLE @@ -319,7 +323,8 @@ config APPLE_DART # ARM IOMMU support config ARM_SMMU tristate "ARM Ltd. System MMU (SMMU) Support" - depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARM64 || ARM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM @@ -466,7 +471,8 @@ config MTK_IOMMU_V1 config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" - depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on ARCH_QCOM || COMPILE_TEST + depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE select QCOM_SCM select IOMMU_API select IOMMU_IO_PGTABLE_LPAE diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 467b194975b3..19a46b9f7357 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3475,15 +3475,26 @@ found: return 1; } +#define ACPIID_LEN (ACPIHID_UID_LEN + ACPIHID_HID_LEN) + static int __init parse_ivrs_acpihid(char *str) { u32 seg = 0, bus, dev, fn; char *hid, *uid, *p, *addr; - char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0}; + char acpiid[ACPIID_LEN] = {0}; int i; addr = strchr(str, '@'); if (!addr) { + addr = strchr(str, '='); + if (!addr) + goto not_found; + + ++addr; + + if (strlen(addr) > ACPIID_LEN) + goto not_found; + if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 || sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) { pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n", @@ -3496,6 +3507,9 @@ static int __init parse_ivrs_acpihid(char *str) /* We have the '@', make it the terminator to get just the acpiid */ *addr++ = 0; + if (strlen(str) > ACPIID_LEN + 1) + goto not_found; + if (sscanf(str, "=%s", acpiid) != 1) goto not_found; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index cbeaab55c0db..c20c41dd9c91 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -558,6 +558,15 @@ static void amd_iommu_report_page_fault(struct amd_iommu *iommu, * prevent logging it. */ if (IS_IOMMU_MEM_TRANSACTION(flags)) { + /* Device not attached to domain properly */ + if (dev_data->domain == NULL) { + pr_err_ratelimited("Event logged [Device not attached to domain properly]\n"); + pr_err_ratelimited(" device=%04x:%02x:%02x.%x domain=0x%04x\n", + iommu->pci_seg->id, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), domain_id); + goto out; + } + if (!report_iommu_fault(&dev_data->domain->domain, &pdev->dev, address, IS_WRITE_REQUEST(flags) ? @@ -667,7 +676,14 @@ retry: event[0], event[1], event[2], event[3]); } - memset(__evt, 0, 4 * sizeof(u32)); + /* + * To detect the hardware errata 732 we need to clear the + * entry back to zero. This issue does not exist on SNP + * enabled system. Also this buffer is not writeable on + * SNP enabled system. + */ + if (!amd_iommu_snp_en) + memset(__evt, 0, 4 * sizeof(u32)); } static void iommu_poll_events(struct amd_iommu *iommu) @@ -736,10 +752,13 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) entry[1] = raw[1]; /* - * To detect the hardware bug we need to clear the entry - * back to zero. + * To detect the hardware errata 733 we need to clear the + * entry back to zero. This issue does not exist on SNP + * enabled system. Also this buffer is not writeable on + * SNP enabled system. */ - raw[0] = raw[1] = 0UL; + if (!amd_iommu_snp_en) + raw[0] = raw[1] = 0UL; /* Update head pointer of hardware ring-buffer */ head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; @@ -1702,27 +1721,29 @@ static int pdev_pri_ats_enable(struct pci_dev *pdev) /* Only allow access to user-accessible pages */ ret = pci_enable_pasid(pdev, 0); if (ret) - goto out_err; + return ret; /* First reset the PRI state of the device */ ret = pci_reset_pri(pdev); if (ret) - goto out_err; + goto out_err_pasid; /* Enable PRI */ /* FIXME: Hardcode number of outstanding requests for now */ ret = pci_enable_pri(pdev, 32); if (ret) - goto out_err; + goto out_err_pasid; ret = pci_enable_ats(pdev, PAGE_SHIFT); if (ret) - goto out_err; + goto out_err_pri; return 0; -out_err: +out_err_pri: pci_disable_pri(pdev); + +out_err_pasid: pci_disable_pasid(pdev); return ret; @@ -2072,6 +2093,10 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) if (ret) goto out_err; + /* No need to allocate io pgtable ops in passthrough mode */ + if (type == IOMMU_DOMAIN_IDENTITY) + return domain; + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); if (!pgtbl_ops) { domain_id_free(domain->id); @@ -2126,31 +2151,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) protection_domain_free(domain); } -static void amd_iommu_detach_device(struct iommu_domain *dom, - struct device *dev) -{ - struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); - struct amd_iommu *iommu; - - if (!check_device(dev)) - return; - - if (dev_data->domain != NULL) - detach_device(dev); - - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return; - -#ifdef CONFIG_IRQ_REMAP - if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - (dom->type == IOMMU_DOMAIN_UNMANAGED)) - dev_data->use_vapic = 0; -#endif - - iommu_completion_wait(iommu); -} - static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { @@ -2159,6 +2159,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, struct amd_iommu *iommu = rlookup_amd_iommu(dev); int ret; + /* + * Skip attach device to domain if new domain is same as + * devices current domain + */ + if (dev_data->domain == domain) + return 0; + dev_data->defer_attach = false; if (dev_data->domain) @@ -2387,12 +2394,17 @@ static int amd_iommu_def_domain_type(struct device *dev) return 0; /* - * Do not identity map IOMMUv2 capable devices when memory encryption is - * active, because some of those devices (AMD GPUs) don't have the - * encryption bit in their DMA-mask and require remapping. + * Do not identity map IOMMUv2 capable devices when: + * - memory encryption is active, because some of those devices + * (AMD GPUs) don't have the encryption bit in their DMA-mask + * and require remapping. + * - SNP is enabled, because it prohibits DTE[Mode]=0. */ - if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT) && dev_data->iommu_v2) + if (dev_data->iommu_v2 && + !cc_platform_has(CC_ATTR_MEM_ENCRYPT) && + !amd_iommu_snp_en) { return IOMMU_DOMAIN_IDENTITY; + } return 0; } @@ -2416,7 +2428,6 @@ const struct iommu_ops amd_iommu_ops = { .def_domain_type = amd_iommu_def_domain_type, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, - .detach_dev = amd_iommu_detach_device, .map_pages = amd_iommu_map_pages, .unmap_pages = amd_iommu_unmap_pages, .iotlb_sync_map = amd_iommu_iotlb_sync_map, diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 4f4a323be0d0..06169d36eab8 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -34,57 +34,154 @@ #include "dma-iommu.h" -#define DART_MAX_STREAMS 16 +#define DART_MAX_STREAMS 256 #define DART_MAX_TTBR 4 #define MAX_DARTS_PER_DEVICE 2 -#define DART_STREAM_ALL 0xffff +/* Common registers */ #define DART_PARAMS1 0x00 -#define DART_PARAMS_PAGE_SHIFT GENMASK(27, 24) +#define DART_PARAMS1_PAGE_SHIFT GENMASK(27, 24) #define DART_PARAMS2 0x04 -#define DART_PARAMS_BYPASS_SUPPORT BIT(0) +#define DART_PARAMS2_BYPASS_SUPPORT BIT(0) -#define DART_STREAM_COMMAND 0x20 -#define DART_STREAM_COMMAND_BUSY BIT(2) -#define DART_STREAM_COMMAND_INVALIDATE BIT(20) +/* T8020/T6000 registers */ -#define DART_STREAM_SELECT 0x34 +#define DART_T8020_STREAM_COMMAND 0x20 +#define DART_T8020_STREAM_COMMAND_BUSY BIT(2) +#define DART_T8020_STREAM_COMMAND_INVALIDATE BIT(20) -#define DART_ERROR 0x40 -#define DART_ERROR_STREAM GENMASK(27, 24) -#define DART_ERROR_CODE GENMASK(11, 0) -#define DART_ERROR_FLAG BIT(31) +#define DART_T8020_STREAM_SELECT 0x34 -#define DART_ERROR_READ_FAULT BIT(4) -#define DART_ERROR_WRITE_FAULT BIT(3) -#define DART_ERROR_NO_PTE BIT(2) -#define DART_ERROR_NO_PMD BIT(1) -#define DART_ERROR_NO_TTBR BIT(0) +#define DART_T8020_ERROR 0x40 +#define DART_T8020_ERROR_STREAM GENMASK(27, 24) +#define DART_T8020_ERROR_CODE GENMASK(11, 0) +#define DART_T8020_ERROR_FLAG BIT(31) -#define DART_CONFIG 0x60 -#define DART_CONFIG_LOCK BIT(15) +#define DART_T8020_ERROR_READ_FAULT BIT(4) +#define DART_T8020_ERROR_WRITE_FAULT BIT(3) +#define DART_T8020_ERROR_NO_PTE BIT(2) +#define DART_T8020_ERROR_NO_PMD BIT(1) +#define DART_T8020_ERROR_NO_TTBR BIT(0) -#define DART_STREAM_COMMAND_BUSY_TIMEOUT 100 - -#define DART_ERROR_ADDR_HI 0x54 -#define DART_ERROR_ADDR_LO 0x50 - -#define DART_STREAMS_ENABLE 0xfc +#define DART_T8020_CONFIG 0x60 +#define DART_T8020_CONFIG_LOCK BIT(15) -#define DART_TCR(sid) (0x100 + 4 * (sid)) -#define DART_TCR_TRANSLATE_ENABLE BIT(7) -#define DART_TCR_BYPASS0_ENABLE BIT(8) -#define DART_TCR_BYPASS1_ENABLE BIT(12) +#define DART_STREAM_COMMAND_BUSY_TIMEOUT 100 -#define DART_TTBR(sid, idx) (0x200 + 16 * (sid) + 4 * (idx)) -#define DART_TTBR_VALID BIT(31) -#define DART_TTBR_SHIFT 12 +#define DART_T8020_ERROR_ADDR_HI 0x54 +#define DART_T8020_ERROR_ADDR_LO 0x50 + +#define DART_T8020_STREAMS_ENABLE 0xfc + +#define DART_T8020_TCR 0x100 +#define DART_T8020_TCR_TRANSLATE_ENABLE BIT(7) +#define DART_T8020_TCR_BYPASS_DART BIT(8) +#define DART_T8020_TCR_BYPASS_DAPF BIT(12) + +#define DART_T8020_TTBR 0x200 +#define DART_T8020_TTBR_VALID BIT(31) +#define DART_T8020_TTBR_ADDR_FIELD_SHIFT 0 +#define DART_T8020_TTBR_SHIFT 12 + +/* T8110 registers */ + +#define DART_T8110_PARAMS3 0x08 +#define DART_T8110_PARAMS3_PA_WIDTH GENMASK(29, 24) +#define DART_T8110_PARAMS3_VA_WIDTH GENMASK(21, 16) +#define DART_T8110_PARAMS3_VER_MAJ GENMASK(15, 8) +#define DART_T8110_PARAMS3_VER_MIN GENMASK(7, 0) + +#define DART_T8110_PARAMS4 0x0c +#define DART_T8110_PARAMS4_NUM_CLIENTS GENMASK(24, 16) +#define DART_T8110_PARAMS4_NUM_SIDS GENMASK(8, 0) + +#define DART_T8110_TLB_CMD 0x80 +#define DART_T8110_TLB_CMD_BUSY BIT(31) +#define DART_T8110_TLB_CMD_OP GENMASK(10, 8) +#define DART_T8110_TLB_CMD_OP_FLUSH_ALL 0 +#define DART_T8110_TLB_CMD_OP_FLUSH_SID 1 +#define DART_T8110_TLB_CMD_STREAM GENMASK(7, 0) + +#define DART_T8110_ERROR 0x100 +#define DART_T8110_ERROR_STREAM GENMASK(27, 20) +#define DART_T8110_ERROR_CODE GENMASK(14, 0) +#define DART_T8110_ERROR_FLAG BIT(31) + +#define DART_T8110_ERROR_MASK 0x104 + +#define DART_T8110_ERROR_READ_FAULT BIT(5) +#define DART_T8110_ERROR_WRITE_FAULT BIT(4) +#define DART_T8110_ERROR_NO_PTE BIT(3) +#define DART_T8110_ERROR_NO_PMD BIT(2) +#define DART_T8110_ERROR_NO_PGD BIT(1) +#define DART_T8110_ERROR_NO_TTBR BIT(0) + +#define DART_T8110_ERROR_ADDR_LO 0x170 +#define DART_T8110_ERROR_ADDR_HI 0x174 + +#define DART_T8110_PROTECT 0x200 +#define DART_T8110_UNPROTECT 0x204 +#define DART_T8110_PROTECT_LOCK 0x208 +#define DART_T8110_PROTECT_TTBR_TCR BIT(0) + +#define DART_T8110_ENABLE_STREAMS 0xc00 +#define DART_T8110_DISABLE_STREAMS 0xc20 + +#define DART_T8110_TCR 0x1000 +#define DART_T8110_TCR_REMAP GENMASK(11, 8) +#define DART_T8110_TCR_REMAP_EN BIT(7) +#define DART_T8110_TCR_BYPASS_DAPF BIT(2) +#define DART_T8110_TCR_BYPASS_DART BIT(1) +#define DART_T8110_TCR_TRANSLATE_ENABLE BIT(0) + +#define DART_T8110_TTBR 0x1400 +#define DART_T8110_TTBR_VALID BIT(0) +#define DART_T8110_TTBR_ADDR_FIELD_SHIFT 2 +#define DART_T8110_TTBR_SHIFT 14 + +#define DART_TCR(dart, sid) ((dart)->hw->tcr + ((sid) << 2)) + +#define DART_TTBR(dart, sid, idx) ((dart)->hw->ttbr + \ + (((dart)->hw->ttbr_count * (sid)) << 2) + \ + ((idx) << 2)) + +struct apple_dart_stream_map; + +enum dart_type { + DART_T8020, + DART_T6000, + DART_T8110, +}; struct apple_dart_hw { + enum dart_type type; + irqreturn_t (*irq_handler)(int irq, void *dev); + int (*invalidate_tlb)(struct apple_dart_stream_map *stream_map); + u32 oas; enum io_pgtable_fmt fmt; + + int max_sid_count; + + u64 lock; + u64 lock_bit; + + u64 error; + + u64 enable_streams; + + u64 tcr; + u64 tcr_enabled; + u64 tcr_disabled; + u64 tcr_bypass; + + u64 ttbr; + u64 ttbr_valid; + u64 ttbr_addr_field_shift; + u64 ttbr_shift; + int ttbr_count; }; /* @@ -115,12 +212,18 @@ struct apple_dart { spinlock_t lock; + u32 ias; + u32 oas; u32 pgsize; + u32 num_streams; u32 supports_bypass : 1; u32 force_bypass : 1; struct iommu_group *sid2group[DART_MAX_STREAMS]; struct iommu_device iommu; + + u32 save_tcr[DART_MAX_STREAMS]; + u32 save_ttbr[DART_MAX_STREAMS][DART_MAX_TTBR]; }; /* @@ -140,11 +243,11 @@ struct apple_dart { */ struct apple_dart_stream_map { struct apple_dart *dart; - unsigned long sidmap; + DECLARE_BITMAP(sidmap, DART_MAX_STREAMS); }; struct apple_dart_atomic_stream_map { struct apple_dart *dart; - atomic64_t sidmap; + atomic_long_t sidmap[BITS_TO_LONGS(DART_MAX_STREAMS)]; }; /* @@ -202,50 +305,55 @@ static struct apple_dart_domain *to_dart_domain(struct iommu_domain *dom) static void apple_dart_hw_enable_translation(struct apple_dart_stream_map *stream_map) { + struct apple_dart *dart = stream_map->dart; int sid; - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(DART_TCR_TRANSLATE_ENABLE, - stream_map->dart->regs + DART_TCR(sid)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(dart->hw->tcr_enabled, dart->regs + DART_TCR(dart, sid)); } static void apple_dart_hw_disable_dma(struct apple_dart_stream_map *stream_map) { + struct apple_dart *dart = stream_map->dart; int sid; - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(0, stream_map->dart->regs + DART_TCR(sid)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(dart->hw->tcr_disabled, dart->regs + DART_TCR(dart, sid)); } static void apple_dart_hw_enable_bypass(struct apple_dart_stream_map *stream_map) { + struct apple_dart *dart = stream_map->dart; int sid; WARN_ON(!stream_map->dart->supports_bypass); - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(DART_TCR_BYPASS0_ENABLE | DART_TCR_BYPASS1_ENABLE, - stream_map->dart->regs + DART_TCR(sid)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(dart->hw->tcr_bypass, + dart->regs + DART_TCR(dart, sid)); } static void apple_dart_hw_set_ttbr(struct apple_dart_stream_map *stream_map, u8 idx, phys_addr_t paddr) { + struct apple_dart *dart = stream_map->dart; int sid; - WARN_ON(paddr & ((1 << DART_TTBR_SHIFT) - 1)); - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(DART_TTBR_VALID | (paddr >> DART_TTBR_SHIFT), - stream_map->dart->regs + DART_TTBR(sid, idx)); + WARN_ON(paddr & ((1 << dart->hw->ttbr_shift) - 1)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(dart->hw->ttbr_valid | + (paddr >> dart->hw->ttbr_shift) << dart->hw->ttbr_addr_field_shift, + dart->regs + DART_TTBR(dart, sid, idx)); } static void apple_dart_hw_clear_ttbr(struct apple_dart_stream_map *stream_map, u8 idx) { + struct apple_dart *dart = stream_map->dart; int sid; - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) - writel(0, stream_map->dart->regs + DART_TTBR(sid, idx)); + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) + writel(0, dart->regs + DART_TTBR(dart, sid, idx)); } static void @@ -253,12 +361,12 @@ apple_dart_hw_clear_all_ttbrs(struct apple_dart_stream_map *stream_map) { int i; - for (i = 0; i < DART_MAX_TTBR; ++i) + for (i = 0; i < stream_map->dart->hw->ttbr_count; ++i) apple_dart_hw_clear_ttbr(stream_map, i); } static int -apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, +apple_dart_t8020_hw_stream_command(struct apple_dart_stream_map *stream_map, u32 command) { unsigned long flags; @@ -267,12 +375,12 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, spin_lock_irqsave(&stream_map->dart->lock, flags); - writel(stream_map->sidmap, stream_map->dart->regs + DART_STREAM_SELECT); - writel(command, stream_map->dart->regs + DART_STREAM_COMMAND); + writel(stream_map->sidmap[0], stream_map->dart->regs + DART_T8020_STREAM_SELECT); + writel(command, stream_map->dart->regs + DART_T8020_STREAM_COMMAND); ret = readl_poll_timeout_atomic( - stream_map->dart->regs + DART_STREAM_COMMAND, command_reg, - !(command_reg & DART_STREAM_COMMAND_BUSY), 1, + stream_map->dart->regs + DART_T8020_STREAM_COMMAND, command_reg, + !(command_reg & DART_T8020_STREAM_COMMAND_BUSY), 1, DART_STREAM_COMMAND_BUSY_TIMEOUT); spin_unlock_irqrestore(&stream_map->dart->lock, flags); @@ -280,7 +388,45 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, if (ret) { dev_err(stream_map->dart->dev, "busy bit did not clear after command %x for streams %lx\n", - command, stream_map->sidmap); + command, stream_map->sidmap[0]); + return ret; + } + + return 0; +} + +static int +apple_dart_t8110_hw_tlb_command(struct apple_dart_stream_map *stream_map, + u32 command) +{ + struct apple_dart *dart = stream_map->dart; + unsigned long flags; + int ret = 0; + int sid; + + spin_lock_irqsave(&dart->lock, flags); + + for_each_set_bit(sid, stream_map->sidmap, dart->num_streams) { + u32 val = FIELD_PREP(DART_T8110_TLB_CMD_OP, command) | + FIELD_PREP(DART_T8110_TLB_CMD_STREAM, sid); + writel(val, dart->regs + DART_T8110_TLB_CMD); + + ret = readl_poll_timeout_atomic( + dart->regs + DART_T8110_TLB_CMD, val, + !(val & DART_T8110_TLB_CMD_BUSY), 1, + DART_STREAM_COMMAND_BUSY_TIMEOUT); + + if (ret) + break; + + } + + spin_unlock_irqrestore(&dart->lock, flags); + + if (ret) { + dev_err(stream_map->dart->dev, + "busy bit did not clear after command %x for stream %d\n", + command, sid); return ret; } @@ -288,48 +434,64 @@ apple_dart_hw_stream_command(struct apple_dart_stream_map *stream_map, } static int -apple_dart_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) +apple_dart_t8020_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) +{ + return apple_dart_t8020_hw_stream_command( + stream_map, DART_T8020_STREAM_COMMAND_INVALIDATE); +} + +static int +apple_dart_t8110_hw_invalidate_tlb(struct apple_dart_stream_map *stream_map) { - return apple_dart_hw_stream_command(stream_map, - DART_STREAM_COMMAND_INVALIDATE); + return apple_dart_t8110_hw_tlb_command( + stream_map, DART_T8110_TLB_CMD_OP_FLUSH_SID); } static int apple_dart_hw_reset(struct apple_dart *dart) { u32 config; struct apple_dart_stream_map stream_map; + int i; - config = readl(dart->regs + DART_CONFIG); - if (config & DART_CONFIG_LOCK) { + config = readl(dart->regs + dart->hw->lock); + if (config & dart->hw->lock_bit) { dev_err(dart->dev, "DART is locked down until reboot: %08x\n", config); return -EINVAL; } stream_map.dart = dart; - stream_map.sidmap = DART_STREAM_ALL; + bitmap_zero(stream_map.sidmap, DART_MAX_STREAMS); + bitmap_set(stream_map.sidmap, 0, dart->num_streams); apple_dart_hw_disable_dma(&stream_map); apple_dart_hw_clear_all_ttbrs(&stream_map); /* enable all streams globally since TCR is used to control isolation */ - writel(DART_STREAM_ALL, dart->regs + DART_STREAMS_ENABLE); + for (i = 0; i < BITS_TO_U32(dart->num_streams); i++) + writel(U32_MAX, dart->regs + dart->hw->enable_streams + 4 * i); /* clear any pending errors before the interrupt is unmasked */ - writel(readl(dart->regs + DART_ERROR), dart->regs + DART_ERROR); + writel(readl(dart->regs + dart->hw->error), dart->regs + dart->hw->error); + + if (dart->hw->type == DART_T8110) + writel(0, dart->regs + DART_T8110_ERROR_MASK); - return apple_dart_hw_invalidate_tlb(&stream_map); + return dart->hw->invalidate_tlb(&stream_map); } static void apple_dart_domain_flush_tlb(struct apple_dart_domain *domain) { - int i; + int i, j; struct apple_dart_atomic_stream_map *domain_stream_map; struct apple_dart_stream_map stream_map; for_each_stream_map(i, domain, domain_stream_map) { stream_map.dart = domain_stream_map->dart; - stream_map.sidmap = atomic64_read(&domain_stream_map->sidmap); - apple_dart_hw_invalidate_tlb(&stream_map); + + for (j = 0; j < BITS_TO_LONGS(stream_map.dart->num_streams); j++) + stream_map.sidmap[j] = atomic_long_read(&domain_stream_map->sidmap[j]); + + stream_map.dart->hw->invalidate_tlb(&stream_map); } } @@ -399,11 +561,11 @@ apple_dart_setup_translation(struct apple_dart_domain *domain, for (i = 0; i < pgtbl_cfg->apple_dart_cfg.n_ttbrs; ++i) apple_dart_hw_set_ttbr(stream_map, i, pgtbl_cfg->apple_dart_cfg.ttbr[i]); - for (; i < DART_MAX_TTBR; ++i) + for (; i < stream_map->dart->hw->ttbr_count; ++i) apple_dart_hw_clear_ttbr(stream_map, i); apple_dart_hw_enable_translation(stream_map); - apple_dart_hw_invalidate_tlb(stream_map); + stream_map->dart->hw->invalidate_tlb(stream_map); } static int apple_dart_finalize_domain(struct iommu_domain *domain, @@ -413,7 +575,7 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, struct apple_dart *dart = cfg->stream_maps[0].dart; struct io_pgtable_cfg pgtbl_cfg; int ret = 0; - int i; + int i, j; mutex_lock(&dart_domain->init_lock); @@ -422,14 +584,15 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { dart_domain->stream_maps[i].dart = cfg->stream_maps[i].dart; - atomic64_set(&dart_domain->stream_maps[i].sidmap, - cfg->stream_maps[i].sidmap); + for (j = 0; j < BITS_TO_LONGS(dart->num_streams); j++) + atomic_long_set(&dart_domain->stream_maps[i].sidmap[j], + cfg->stream_maps[i].sidmap[j]); } pgtbl_cfg = (struct io_pgtable_cfg){ .pgsize_bitmap = dart->pgsize, - .ias = 32, - .oas = dart->hw->oas, + .ias = dart->ias, + .oas = dart->oas, .coherent_walk = 1, .iommu_dev = dart->dev, }; @@ -443,7 +606,7 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain, domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; domain->geometry.aperture_start = 0; - domain->geometry.aperture_end = DMA_BIT_MASK(32); + domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias); domain->geometry.force_aperture = true; dart_domain->finalized = true; @@ -458,7 +621,7 @@ apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps, struct apple_dart_stream_map *master_maps, bool add_streams) { - int i; + int i, j; for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (domain_maps[i].dart != master_maps[i].dart) @@ -468,12 +631,14 @@ apple_dart_mod_streams(struct apple_dart_atomic_stream_map *domain_maps, for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (!domain_maps[i].dart) break; - if (add_streams) - atomic64_or(master_maps[i].sidmap, - &domain_maps[i].sidmap); - else - atomic64_and(~master_maps[i].sidmap, - &domain_maps[i].sidmap); + for (j = 0; j < BITS_TO_LONGS(domain_maps[i].dart->num_streams); j++) { + if (add_streams) + atomic_long_or(master_maps[i].sidmap[j], + &domain_maps[i].sidmap[j]); + else + atomic_long_and(~master_maps[i].sidmap[j], + &domain_maps[i].sidmap[j]); + } } return 0; @@ -486,13 +651,6 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain, true); } -static int apple_dart_domain_remove_streams(struct apple_dart_domain *domain, - struct apple_dart_master_cfg *cfg) -{ - return apple_dart_mod_streams(domain->stream_maps, cfg->stream_maps, - false); -} - static int apple_dart_attach_dev(struct iommu_domain *domain, struct device *dev) { @@ -535,22 +693,6 @@ static int apple_dart_attach_dev(struct iommu_domain *domain, return ret; } -static void apple_dart_detach_dev(struct iommu_domain *domain, - struct device *dev) -{ - int i; - struct apple_dart_stream_map *stream_map; - struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); - struct apple_dart_domain *dart_domain = to_dart_domain(domain); - - for_each_stream_map(i, cfg, stream_map) - apple_dart_hw_disable_dma(stream_map); - - if (domain->type == IOMMU_DOMAIN_DMA || - domain->type == IOMMU_DOMAIN_UNMANAGED) - apple_dart_domain_remove_streams(dart_domain, cfg); -} - static struct iommu_device *apple_dart_probe_device(struct device *dev) { struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev); @@ -637,14 +779,14 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args) for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (cfg->stream_maps[i].dart == dart) { - cfg->stream_maps[i].sidmap |= 1 << sid; + set_bit(sid, cfg->stream_maps[i].sidmap); return 0; } } for (i = 0; i < MAX_DARTS_PER_DEVICE; ++i) { if (!cfg->stream_maps[i].dart) { cfg->stream_maps[i].dart = dart; - cfg->stream_maps[i].sidmap = 1 << sid; + set_bit(sid, cfg->stream_maps[i].sidmap); return 0; } } @@ -663,13 +805,36 @@ static void apple_dart_release_group(void *iommu_data) mutex_lock(&apple_dart_groups_lock); for_each_stream_map(i, group_master_cfg, stream_map) - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) stream_map->dart->sid2group[sid] = NULL; kfree(iommu_data); mutex_unlock(&apple_dart_groups_lock); } +static int apple_dart_merge_master_cfg(struct apple_dart_master_cfg *dst, + struct apple_dart_master_cfg *src) +{ + /* + * We know that this function is only called for groups returned from + * pci_device_group and that all Apple Silicon platforms never spread + * PCIe devices from the same bus across multiple DARTs such that we can + * just assume that both src and dst only have the same single DART. + */ + if (src->stream_maps[1].dart) + return -EINVAL; + if (dst->stream_maps[1].dart) + return -EINVAL; + if (src->stream_maps[0].dart != dst->stream_maps[0].dart) + return -EINVAL; + + bitmap_or(dst->stream_maps[0].sidmap, + dst->stream_maps[0].sidmap, + src->stream_maps[0].sidmap, + dst->stream_maps[0].dart->num_streams); + return 0; +} + static struct iommu_group *apple_dart_device_group(struct device *dev) { int i, sid; @@ -682,7 +847,7 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) mutex_lock(&apple_dart_groups_lock); for_each_stream_map(i, cfg, stream_map) { - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) { + for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) { struct iommu_group *stream_group = stream_map->dart->sid2group[sid]; @@ -711,17 +876,31 @@ static struct iommu_group *apple_dart_device_group(struct device *dev) if (!group) goto out; - group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg), GFP_KERNEL); - if (!group_master_cfg) { - iommu_group_put(group); - goto out; - } + group_master_cfg = iommu_group_get_iommudata(group); + if (group_master_cfg) { + int ret; + + ret = apple_dart_merge_master_cfg(group_master_cfg, cfg); + if (ret) { + dev_err(dev, "Failed to merge DART IOMMU grups.\n"); + iommu_group_put(group); + res = ERR_PTR(ret); + goto out; + } + } else { + group_master_cfg = kmemdup(cfg, sizeof(*group_master_cfg), + GFP_KERNEL); + if (!group_master_cfg) { + iommu_group_put(group); + goto out; + } - iommu_group_set_iommudata(group, group_master_cfg, - apple_dart_release_group); + iommu_group_set_iommudata(group, group_master_cfg, + apple_dart_release_group); + } for_each_stream_map(i, cfg, stream_map) - for_each_set_bit(sid, &stream_map->sidmap, DART_MAX_STREAMS) + for_each_set_bit(sid, stream_map->sidmap, stream_map->dart->num_streams) stream_map->dart->sid2group[sid] = group; res = group; @@ -780,7 +959,6 @@ static const struct iommu_ops apple_dart_iommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = apple_dart_attach_dev, - .detach_dev = apple_dart_detach_dev, .map_pages = apple_dart_map_pages, .unmap_pages = apple_dart_unmap_pages, .flush_iotlb_all = apple_dart_flush_iotlb_all, @@ -791,30 +969,69 @@ static const struct iommu_ops apple_dart_iommu_ops = { } }; -static irqreturn_t apple_dart_irq(int irq, void *dev) +static irqreturn_t apple_dart_t8020_irq(int irq, void *dev) +{ + struct apple_dart *dart = dev; + const char *fault_name = NULL; + u32 error = readl(dart->regs + DART_T8020_ERROR); + u32 error_code = FIELD_GET(DART_T8020_ERROR_CODE, error); + u32 addr_lo = readl(dart->regs + DART_T8020_ERROR_ADDR_LO); + u32 addr_hi = readl(dart->regs + DART_T8020_ERROR_ADDR_HI); + u64 addr = addr_lo | (((u64)addr_hi) << 32); + u8 stream_idx = FIELD_GET(DART_T8020_ERROR_STREAM, error); + + if (!(error & DART_T8020_ERROR_FLAG)) + return IRQ_NONE; + + /* there should only be a single bit set but let's use == to be sure */ + if (error_code == DART_T8020_ERROR_READ_FAULT) + fault_name = "READ FAULT"; + else if (error_code == DART_T8020_ERROR_WRITE_FAULT) + fault_name = "WRITE FAULT"; + else if (error_code == DART_T8020_ERROR_NO_PTE) + fault_name = "NO PTE FOR IOVA"; + else if (error_code == DART_T8020_ERROR_NO_PMD) + fault_name = "NO PMD FOR IOVA"; + else if (error_code == DART_T8020_ERROR_NO_TTBR) + fault_name = "NO TTBR FOR IOVA"; + else + fault_name = "unknown"; + + dev_err_ratelimited( + dart->dev, + "translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx", + error, stream_idx, error_code, fault_name, addr); + + writel(error, dart->regs + DART_T8020_ERROR); + return IRQ_HANDLED; +} + +static irqreturn_t apple_dart_t8110_irq(int irq, void *dev) { struct apple_dart *dart = dev; const char *fault_name = NULL; - u32 error = readl(dart->regs + DART_ERROR); - u32 error_code = FIELD_GET(DART_ERROR_CODE, error); - u32 addr_lo = readl(dart->regs + DART_ERROR_ADDR_LO); - u32 addr_hi = readl(dart->regs + DART_ERROR_ADDR_HI); + u32 error = readl(dart->regs + DART_T8110_ERROR); + u32 error_code = FIELD_GET(DART_T8110_ERROR_CODE, error); + u32 addr_lo = readl(dart->regs + DART_T8110_ERROR_ADDR_LO); + u32 addr_hi = readl(dart->regs + DART_T8110_ERROR_ADDR_HI); u64 addr = addr_lo | (((u64)addr_hi) << 32); - u8 stream_idx = FIELD_GET(DART_ERROR_STREAM, error); + u8 stream_idx = FIELD_GET(DART_T8110_ERROR_STREAM, error); - if (!(error & DART_ERROR_FLAG)) + if (!(error & DART_T8110_ERROR_FLAG)) return IRQ_NONE; /* there should only be a single bit set but let's use == to be sure */ - if (error_code == DART_ERROR_READ_FAULT) + if (error_code == DART_T8110_ERROR_READ_FAULT) fault_name = "READ FAULT"; - else if (error_code == DART_ERROR_WRITE_FAULT) + else if (error_code == DART_T8110_ERROR_WRITE_FAULT) fault_name = "WRITE FAULT"; - else if (error_code == DART_ERROR_NO_PTE) + else if (error_code == DART_T8110_ERROR_NO_PTE) fault_name = "NO PTE FOR IOVA"; - else if (error_code == DART_ERROR_NO_PMD) + else if (error_code == DART_T8110_ERROR_NO_PMD) fault_name = "NO PMD FOR IOVA"; - else if (error_code == DART_ERROR_NO_TTBR) + else if (error_code == DART_T8110_ERROR_NO_PGD) + fault_name = "NO PGD FOR IOVA"; + else if (error_code == DART_T8110_ERROR_NO_TTBR) fault_name = "NO TTBR FOR IOVA"; else fault_name = "unknown"; @@ -824,14 +1041,14 @@ static irqreturn_t apple_dart_irq(int irq, void *dev) "translation fault: status:0x%x stream:%d code:0x%x (%s) at 0x%llx", error, stream_idx, error_code, fault_name, addr); - writel(error, dart->regs + DART_ERROR); + writel(error, dart->regs + DART_T8110_ERROR); return IRQ_HANDLED; } static int apple_dart_probe(struct platform_device *pdev) { int ret; - u32 dart_params[2]; + u32 dart_params[4]; struct resource *res; struct apple_dart *dart; struct device *dev = &pdev->dev; @@ -866,17 +1083,42 @@ static int apple_dart_probe(struct platform_device *pdev) if (ret) return ret; - ret = apple_dart_hw_reset(dart); - if (ret) - goto err_clk_disable; - dart_params[0] = readl(dart->regs + DART_PARAMS1); dart_params[1] = readl(dart->regs + DART_PARAMS2); - dart->pgsize = 1 << FIELD_GET(DART_PARAMS_PAGE_SHIFT, dart_params[0]); - dart->supports_bypass = dart_params[1] & DART_PARAMS_BYPASS_SUPPORT; + dart->pgsize = 1 << FIELD_GET(DART_PARAMS1_PAGE_SHIFT, dart_params[0]); + dart->supports_bypass = dart_params[1] & DART_PARAMS2_BYPASS_SUPPORT; + + switch (dart->hw->type) { + case DART_T8020: + case DART_T6000: + dart->ias = 32; + dart->oas = dart->hw->oas; + dart->num_streams = dart->hw->max_sid_count; + break; + + case DART_T8110: + dart_params[2] = readl(dart->regs + DART_T8110_PARAMS3); + dart_params[3] = readl(dart->regs + DART_T8110_PARAMS4); + dart->ias = FIELD_GET(DART_T8110_PARAMS3_VA_WIDTH, dart_params[2]); + dart->oas = FIELD_GET(DART_T8110_PARAMS3_PA_WIDTH, dart_params[2]); + dart->num_streams = FIELD_GET(DART_T8110_PARAMS4_NUM_SIDS, dart_params[3]); + break; + } + + if (dart->num_streams > DART_MAX_STREAMS) { + dev_err(&pdev->dev, "Too many streams (%d > %d)\n", + dart->num_streams, DART_MAX_STREAMS); + ret = -EINVAL; + goto err_clk_disable; + } + dart->force_bypass = dart->pgsize > PAGE_SIZE; - ret = request_irq(dart->irq, apple_dart_irq, IRQF_SHARED, + ret = apple_dart_hw_reset(dart); + if (ret) + goto err_clk_disable; + + ret = request_irq(dart->irq, dart->hw->irq_handler, IRQF_SHARED, "apple-dart fault handler", dart); if (ret) goto err_clk_disable; @@ -894,8 +1136,8 @@ static int apple_dart_probe(struct platform_device *pdev) dev_info( &pdev->dev, - "DART [pagesize %x, bypass support: %d, bypass forced: %d] initialized\n", - dart->pgsize, dart->supports_bypass, dart->force_bypass); + "DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n", + dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass); return 0; err_sysfs_remove: @@ -924,16 +1166,123 @@ static int apple_dart_remove(struct platform_device *pdev) } static const struct apple_dart_hw apple_dart_hw_t8103 = { + .type = DART_T8020, + .irq_handler = apple_dart_t8020_irq, + .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, .oas = 36, .fmt = APPLE_DART, + .max_sid_count = 16, + + .enable_streams = DART_T8020_STREAMS_ENABLE, + .lock = DART_T8020_CONFIG, + .lock_bit = DART_T8020_CONFIG_LOCK, + + .error = DART_T8020_ERROR, + + .tcr = DART_T8020_TCR, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART, + + .ttbr = DART_T8020_TTBR, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8020_TTBR_SHIFT, + .ttbr_count = 4, }; static const struct apple_dart_hw apple_dart_hw_t6000 = { + .type = DART_T6000, + .irq_handler = apple_dart_t8020_irq, + .invalidate_tlb = apple_dart_t8020_hw_invalidate_tlb, .oas = 42, .fmt = APPLE_DART2, + .max_sid_count = 16, + + .enable_streams = DART_T8020_STREAMS_ENABLE, + .lock = DART_T8020_CONFIG, + .lock_bit = DART_T8020_CONFIG_LOCK, + + .error = DART_T8020_ERROR, + + .tcr = DART_T8020_TCR, + .tcr_enabled = DART_T8020_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = DART_T8020_TCR_BYPASS_DAPF | DART_T8020_TCR_BYPASS_DART, + + .ttbr = DART_T8020_TTBR, + .ttbr_valid = DART_T8020_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8020_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8020_TTBR_SHIFT, + .ttbr_count = 4, }; +static const struct apple_dart_hw apple_dart_hw_t8110 = { + .type = DART_T8110, + .irq_handler = apple_dart_t8110_irq, + .invalidate_tlb = apple_dart_t8110_hw_invalidate_tlb, + .fmt = APPLE_DART2, + .max_sid_count = 256, + + .enable_streams = DART_T8110_ENABLE_STREAMS, + .lock = DART_T8110_PROTECT, + .lock_bit = DART_T8110_PROTECT_TTBR_TCR, + + .error = DART_T8110_ERROR, + + .tcr = DART_T8110_TCR, + .tcr_enabled = DART_T8110_TCR_TRANSLATE_ENABLE, + .tcr_disabled = 0, + .tcr_bypass = DART_T8110_TCR_BYPASS_DAPF | DART_T8110_TCR_BYPASS_DART, + + .ttbr = DART_T8110_TTBR, + .ttbr_valid = DART_T8110_TTBR_VALID, + .ttbr_addr_field_shift = DART_T8110_TTBR_ADDR_FIELD_SHIFT, + .ttbr_shift = DART_T8110_TTBR_SHIFT, + .ttbr_count = 1, +}; + +static __maybe_unused int apple_dart_suspend(struct device *dev) +{ + struct apple_dart *dart = dev_get_drvdata(dev); + unsigned int sid, idx; + + for (sid = 0; sid < dart->num_streams; sid++) { + dart->save_tcr[sid] = readl_relaxed(dart->regs + DART_TCR(dart, sid)); + for (idx = 0; idx < dart->hw->ttbr_count; idx++) + dart->save_ttbr[sid][idx] = + readl(dart->regs + DART_TTBR(dart, sid, idx)); + } + + return 0; +} + +static __maybe_unused int apple_dart_resume(struct device *dev) +{ + struct apple_dart *dart = dev_get_drvdata(dev); + unsigned int sid, idx; + int ret; + + ret = apple_dart_hw_reset(dart); + if (ret) { + dev_err(dev, "Failed to reset DART on resume\n"); + return ret; + } + + for (sid = 0; sid < dart->num_streams; sid++) { + for (idx = 0; idx < dart->hw->ttbr_count; idx++) + writel(dart->save_ttbr[sid][idx], + dart->regs + DART_TTBR(dart, sid, idx)); + writel(dart->save_tcr[sid], dart->regs + DART_TCR(dart, sid)); + } + + return 0; +} + +DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume); + static const struct of_device_id apple_dart_of_match[] = { { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, + { .compatible = "apple,t8110-dart", .data = &apple_dart_hw_t8110 }, { .compatible = "apple,t6000-dart", .data = &apple_dart_hw_t6000 }, {}, }; @@ -944,6 +1293,7 @@ static struct platform_driver apple_dart_driver = { .name = "apple-dart", .of_match_table = apple_dart_of_match, .suppress_bind_attrs = true, + .pm = pm_sleep_ptr(&apple_dart_pm_ops), }, .probe = apple_dart_probe, .remove = apple_dart_remove, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 91d404deb115..d7ad49aa997e 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -250,6 +250,8 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mdss" }, { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, + { .compatible = "qcom,sc8280xp-mdss" }, + { .compatible = "qcom,sm8150-mdss" }, { .compatible = "qcom,sm8250-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 270c3d9128ba..d7be3adee426 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -387,28 +387,6 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev return 0; } -static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) -{ - struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct qcom_iommu_dev *qcom_iommu = to_iommu(dev); - unsigned i; - - if (WARN_ON(!qcom_domain->iommu)) - return; - - pm_runtime_get_sync(qcom_iommu->dev); - for (i = 0; i < fwspec->num_ids; i++) { - struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]); - - /* Disable the context bank: */ - iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); - - ctx->domain = NULL; - } - pm_runtime_put_sync(qcom_iommu->dev); -} - static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -583,7 +561,6 @@ static const struct iommu_ops qcom_iommu_ops = { .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = qcom_iommu_attach_dev, - .detach_dev = qcom_iommu_detach_dev, .map_pages = qcom_iommu_map, .unmap_pages = qcom_iommu_unmap, .flush_iotlb_all = qcom_iommu_flush_iotlb_all, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f798c44e0903..99b2646cb5c7 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -23,6 +23,7 @@ #include <linux/memremap.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/of_iommu.h> #include <linux/pci.h> #include <linux/scatterlist.h> #include <linux/spinlock.h> @@ -391,6 +392,8 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) if (!is_of_node(dev_iommu_fwspec_get(dev)->iommu_fwnode)) iort_iommu_get_resv_regions(dev, list); + if (dev->of_node) + of_iommu_get_resv_regions(dev, list); } EXPORT_SYMBOL(iommu_dma_get_resv_regions); @@ -713,7 +716,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, if (!iova) return DMA_MAPPING_ERROR; - if (iommu_map_atomic(domain, iova, phys - iova_off, size, prot)) { + if (iommu_map(domain, iova, phys - iova_off, size, prot, GFP_ATOMIC)) { iommu_dma_free_iova(cookie, iova, size, NULL); return DMA_MAPPING_ERROR; } @@ -822,7 +825,14 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, if (!iova) goto out_free_pages; - if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL)) + /* + * Remove the zone/policy flags from the GFP - these are applied to the + * __iommu_dma_alloc_pages() but are not used for the supporting + * internal allocations that follow. + */ + gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM | __GFP_COMP); + + if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, gfp)) goto out_free_iova; if (!(ioprot & IOMMU_CACHE)) { @@ -833,7 +843,8 @@ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, arch_dma_prep_coherent(sg_page(sg), sg->length); } - ret = iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot); + ret = iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, ioprot, + gfp); if (ret < 0 || ret < size) goto out_free_sg; @@ -1281,7 +1292,7 @@ static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * We'll leave any physical concatenation to the IOMMU driver's * implementation - it knows better than we do. */ - ret = iommu_map_sg_atomic(domain, iova, sg, nents, prot); + ret = iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); if (ret < 0 || ret < iova_len) goto out_free_iova; @@ -1615,7 +1626,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!iova) goto out_free_page; - if (iommu_map(domain, iova, msi_addr, size, prot)) + if (iommu_map(domain, iova, msi_addr, size, prot, GFP_KERNEL)) goto out_free_iova; INIT_LIST_HEAD(&msi_page->list); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index b0cde2211987..483aaaeb6dae 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -185,38 +185,43 @@ static sysmmu_pte_t *page_entry(sysmmu_pte_t *sent, sysmmu_iova_t iova) lv2table_base(sent)) + lv2ent_offset(iova); } -/* - * IOMMU fault information register - */ -struct sysmmu_fault_info { - unsigned int bit; /* bit number in STATUS register */ - unsigned short addr_reg; /* register to read VA fault address */ +struct sysmmu_fault { + sysmmu_iova_t addr; /* IOVA address that caused fault */ + const char *name; /* human readable fault name */ + unsigned int type; /* fault type for report_iommu_fault() */ +}; + +struct sysmmu_v1_fault_info { + unsigned short addr_reg; /* register to read IOVA fault address */ const char *name; /* human readable fault name */ unsigned int type; /* fault type for report_iommu_fault */ }; -static const struct sysmmu_fault_info sysmmu_faults[] = { - { 0, REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ }, - { 1, REG_AR_FAULT_ADDR, "AR MULTI-HIT", IOMMU_FAULT_READ }, - { 2, REG_AW_FAULT_ADDR, "AW MULTI-HIT", IOMMU_FAULT_WRITE }, - { 3, REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ }, - { 4, REG_AR_FAULT_ADDR, "AR SECURITY PROTECTION", IOMMU_FAULT_READ }, - { 5, REG_AR_FAULT_ADDR, "AR ACCESS PROTECTION", IOMMU_FAULT_READ }, - { 6, REG_AW_FAULT_ADDR, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE }, - { 7, REG_AW_FAULT_ADDR, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE }, +static const struct sysmmu_v1_fault_info sysmmu_v1_faults[] = { + { REG_PAGE_FAULT_ADDR, "PAGE", IOMMU_FAULT_READ }, + { REG_AR_FAULT_ADDR, "MULTI-HIT", IOMMU_FAULT_READ }, + { REG_AW_FAULT_ADDR, "MULTI-HIT", IOMMU_FAULT_WRITE }, + { REG_DEFAULT_SLAVE_ADDR, "BUS ERROR", IOMMU_FAULT_READ }, + { REG_AR_FAULT_ADDR, "SECURITY PROTECTION", IOMMU_FAULT_READ }, + { REG_AR_FAULT_ADDR, "ACCESS PROTECTION", IOMMU_FAULT_READ }, + { REG_AW_FAULT_ADDR, "SECURITY PROTECTION", IOMMU_FAULT_WRITE }, + { REG_AW_FAULT_ADDR, "ACCESS PROTECTION", IOMMU_FAULT_WRITE }, +}; + +/* SysMMU v5 has the same faults for AR (0..4 bits) and AW (16..20 bits) */ +static const char * const sysmmu_v5_fault_names[] = { + "PTW", + "PAGE", + "MULTI-HIT", + "ACCESS PROTECTION", + "SECURITY PROTECTION" }; -static const struct sysmmu_fault_info sysmmu_v5_faults[] = { - { 0, REG_V5_FAULT_AR_VA, "AR PTW", IOMMU_FAULT_READ }, - { 1, REG_V5_FAULT_AR_VA, "AR PAGE", IOMMU_FAULT_READ }, - { 2, REG_V5_FAULT_AR_VA, "AR MULTI-HIT", IOMMU_FAULT_READ }, - { 3, REG_V5_FAULT_AR_VA, "AR ACCESS PROTECTION", IOMMU_FAULT_READ }, - { 4, REG_V5_FAULT_AR_VA, "AR SECURITY PROTECTION", IOMMU_FAULT_READ }, - { 16, REG_V5_FAULT_AW_VA, "AW PTW", IOMMU_FAULT_WRITE }, - { 17, REG_V5_FAULT_AW_VA, "AW PAGE", IOMMU_FAULT_WRITE }, - { 18, REG_V5_FAULT_AW_VA, "AW MULTI-HIT", IOMMU_FAULT_WRITE }, - { 19, REG_V5_FAULT_AW_VA, "AW ACCESS PROTECTION", IOMMU_FAULT_WRITE }, - { 20, REG_V5_FAULT_AW_VA, "AW SECURITY PROTECTION", IOMMU_FAULT_WRITE }, +static const char * const sysmmu_v7_fault_names[] = { + "PTW", + "PAGE", + "ACCESS PROTECTION", + "RESERVED" }; /* @@ -246,9 +251,12 @@ struct exynos_iommu_domain { struct iommu_domain domain; /* generic domain data structure */ }; +struct sysmmu_drvdata; + /* * SysMMU version specific data. Contains offsets for the registers which can * be found in different SysMMU variants, but have different offset values. + * Also contains version specific callbacks to abstract the hardware. */ struct sysmmu_variant { u32 pt_base; /* page table base address (physical) */ @@ -259,6 +267,11 @@ struct sysmmu_variant { u32 flush_end; /* end address of range invalidation */ u32 int_status; /* interrupt status information */ u32 int_clear; /* clear the interrupt */ + u32 fault_va; /* IOVA address that caused fault */ + u32 fault_info; /* fault transaction info */ + + int (*get_fault_info)(struct sysmmu_drvdata *data, unsigned int itype, + struct sysmmu_fault *fault); }; /* @@ -293,6 +306,59 @@ struct sysmmu_drvdata { #define SYSMMU_REG(data, reg) ((data)->sfrbase + (data)->variant->reg) +static int exynos_sysmmu_v1_get_fault_info(struct sysmmu_drvdata *data, + unsigned int itype, + struct sysmmu_fault *fault) +{ + const struct sysmmu_v1_fault_info *finfo; + + if (itype >= ARRAY_SIZE(sysmmu_v1_faults)) + return -ENXIO; + + finfo = &sysmmu_v1_faults[itype]; + fault->addr = readl(data->sfrbase + finfo->addr_reg); + fault->name = finfo->name; + fault->type = finfo->type; + + return 0; +} + +static int exynos_sysmmu_v5_get_fault_info(struct sysmmu_drvdata *data, + unsigned int itype, + struct sysmmu_fault *fault) +{ + unsigned int addr_reg; + + if (itype < ARRAY_SIZE(sysmmu_v5_fault_names)) { + fault->type = IOMMU_FAULT_READ; + addr_reg = REG_V5_FAULT_AR_VA; + } else if (itype >= 16 && itype <= 20) { + fault->type = IOMMU_FAULT_WRITE; + addr_reg = REG_V5_FAULT_AW_VA; + itype -= 16; + } else { + return -ENXIO; + } + + fault->name = sysmmu_v5_fault_names[itype]; + fault->addr = readl(data->sfrbase + addr_reg); + + return 0; +} + +static int exynos_sysmmu_v7_get_fault_info(struct sysmmu_drvdata *data, + unsigned int itype, + struct sysmmu_fault *fault) +{ + u32 info = readl(SYSMMU_REG(data, fault_info)); + + fault->addr = readl(SYSMMU_REG(data, fault_va)); + fault->name = sysmmu_v7_fault_names[itype % 4]; + fault->type = (info & BIT(20)) ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; + + return 0; +} + /* SysMMU v1..v3 */ static const struct sysmmu_variant sysmmu_v1_variant = { .flush_all = 0x0c, @@ -300,9 +366,11 @@ static const struct sysmmu_variant sysmmu_v1_variant = { .pt_base = 0x14, .int_status = 0x18, .int_clear = 0x1c, + + .get_fault_info = exynos_sysmmu_v1_get_fault_info, }; -/* SysMMU v5 and v7 (non-VM capable) */ +/* SysMMU v5 */ static const struct sysmmu_variant sysmmu_v5_variant = { .pt_base = 0x0c, .flush_all = 0x10, @@ -312,9 +380,27 @@ static const struct sysmmu_variant sysmmu_v5_variant = { .flush_end = 0x24, .int_status = 0x60, .int_clear = 0x64, + + .get_fault_info = exynos_sysmmu_v5_get_fault_info, }; -/* SysMMU v7: VM capable register set */ +/* SysMMU v7: non-VM capable register layout */ +static const struct sysmmu_variant sysmmu_v7_variant = { + .pt_base = 0x0c, + .flush_all = 0x10, + .flush_entry = 0x14, + .flush_range = 0x18, + .flush_start = 0x20, + .flush_end = 0x24, + .int_status = 0x60, + .int_clear = 0x64, + .fault_va = 0x70, + .fault_info = 0x78, + + .get_fault_info = exynos_sysmmu_v7_get_fault_info, +}; + +/* SysMMU v7: VM capable register layout */ static const struct sysmmu_variant sysmmu_v7_vm_variant = { .pt_base = 0x800c, .flush_all = 0x8010, @@ -324,6 +410,10 @@ static const struct sysmmu_variant sysmmu_v7_vm_variant = { .flush_end = 0x8024, .int_status = 0x60, .int_clear = 0x64, + .fault_va = 0x1000, + .fault_info = 0x1004, + + .get_fault_info = exynos_sysmmu_v7_get_fault_info, }; static struct exynos_iommu_domain *to_exynos_domain(struct iommu_domain *dom) @@ -446,75 +536,63 @@ static void __sysmmu_get_version(struct sysmmu_drvdata *data) if (data->has_vcr) data->variant = &sysmmu_v7_vm_variant; else - data->variant = &sysmmu_v5_variant; + data->variant = &sysmmu_v7_variant; } __sysmmu_disable_clocks(data); } static void show_fault_information(struct sysmmu_drvdata *data, - const struct sysmmu_fault_info *finfo, - sysmmu_iova_t fault_addr) + const struct sysmmu_fault *fault) { sysmmu_pte_t *ent; - dev_err(data->sysmmu, "%s: %s FAULT occurred at %#x\n", - dev_name(data->master), finfo->name, fault_addr); + dev_err(data->sysmmu, "%s: [%s] %s FAULT occurred at %#x\n", + dev_name(data->master), + fault->type == IOMMU_FAULT_READ ? "READ" : "WRITE", + fault->name, fault->addr); dev_dbg(data->sysmmu, "Page table base: %pa\n", &data->pgtable); - ent = section_entry(phys_to_virt(data->pgtable), fault_addr); + ent = section_entry(phys_to_virt(data->pgtable), fault->addr); dev_dbg(data->sysmmu, "\tLv1 entry: %#x\n", *ent); if (lv1ent_page(ent)) { - ent = page_entry(ent, fault_addr); + ent = page_entry(ent, fault->addr); dev_dbg(data->sysmmu, "\t Lv2 entry: %#x\n", *ent); } } static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) { - /* SYSMMU is in blocked state when interrupt occurred. */ struct sysmmu_drvdata *data = dev_id; - const struct sysmmu_fault_info *finfo; - unsigned int i, n, itype; - sysmmu_iova_t fault_addr; + unsigned int itype; + struct sysmmu_fault fault; int ret = -ENOSYS; WARN_ON(!data->active); - if (MMU_MAJ_VER(data->version) < 5) { - finfo = sysmmu_faults; - n = ARRAY_SIZE(sysmmu_faults); - } else { - finfo = sysmmu_v5_faults; - n = ARRAY_SIZE(sysmmu_v5_faults); - } - spin_lock(&data->lock); - clk_enable(data->clk_master); itype = __ffs(readl(SYSMMU_REG(data, int_status))); - for (i = 0; i < n; i++, finfo++) - if (finfo->bit == itype) - break; - /* unknown/unsupported fault */ - BUG_ON(i == n); - - /* print debug message */ - fault_addr = readl(data->sfrbase + finfo->addr_reg); - show_fault_information(data, finfo, fault_addr); - - if (data->domain) - ret = report_iommu_fault(&data->domain->domain, - data->master, fault_addr, finfo->type); - /* fault is not recovered by fault handler */ - BUG_ON(ret != 0); + ret = data->variant->get_fault_info(data, itype, &fault); + if (ret) { + dev_err(data->sysmmu, "Unhandled interrupt bit %u\n", itype); + goto out; + } + show_fault_information(data, &fault); + if (data->domain) { + ret = report_iommu_fault(&data->domain->domain, data->master, + fault.addr, fault.type); + } + if (ret) + panic("Unrecoverable System MMU Fault!"); + +out: writel(1 << itype, SYSMMU_REG(data, int_clear)); + /* SysMMU is in blocked state when interrupt occurred */ sysmmu_unblock(data); - clk_disable(data->clk_master); - spin_unlock(&data->lock); return IRQ_HANDLED; @@ -1346,8 +1424,10 @@ static void exynos_iommu_release_device(struct device *dev) struct iommu_group *group = iommu_group_get(dev); if (group) { +#ifndef CONFIG_ARM WARN_ON(owner->domain != iommu_group_default_domain(group)); +#endif exynos_iommu_detach_device(owner->domain, dev); iommu_group_put(group); } @@ -1398,13 +1478,15 @@ static int exynos_iommu_of_xlate(struct device *dev, static const struct iommu_ops exynos_iommu_ops = { .domain_alloc = exynos_iommu_domain_alloc, .device_group = generic_device_group, +#ifdef CONFIG_ARM + .set_platform_dma_ops = exynos_iommu_release_device, +#endif .probe_device = exynos_iommu_probe_device, .release_device = exynos_iommu_release_device, .pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE, .of_xlate = exynos_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = exynos_iommu_attach_device, - .detach_dev = exynos_iommu_detach_device, .map = exynos_iommu_map, .unmap = exynos_iommu_unmap, .iova_to_phys = exynos_iommu_iova_to_phys, @@ -1446,7 +1528,7 @@ static int __init exynos_iommu_init(void) return 0; err_reg_driver: - platform_driver_unregister(&exynos_sysmmu_driver); + kmem_cache_free(lv2table_kmem_cache, zero_lv2_table); err_zero_lv2: kmem_cache_destroy(lv2table_kmem_cache); return ret; diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 4408ac3c49b6..bce372297099 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -283,9 +283,9 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, return ret; } -static void fsl_pamu_detach_device(struct iommu_domain *domain, - struct device *dev) +static void fsl_pamu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain); const u32 *prop; int len; @@ -452,9 +452,9 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_alloc = fsl_pamu_domain_alloc, .probe_device = fsl_pamu_probe_device, .device_group = fsl_pamu_device_group, + .set_platform_dma_ops = fsl_pamu_set_platform_dma, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = fsl_pamu_attach_device, - .detach_dev = fsl_pamu_detach_device, .iova_to_phys = fsl_pamu_iova_to_phys, .free = fsl_pamu_domain_free, } diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index b7dff5092fd2..12e1e90fdae1 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -96,4 +96,15 @@ config INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON passing intel_iommu=sm_on to the kernel. If not sure, please use the default value. +config INTEL_IOMMU_PERF_EVENTS + def_bool y + bool "Intel IOMMU performance events" + depends on INTEL_IOMMU && PERF_EVENTS + help + Selecting this option will enable the performance monitoring + infrastructure in the Intel IOMMU. It collects information about + key events occurring during operation of the remapping hardware, + to aid performance tuning and debug. These are available on modern + processors which support Intel VT-d 4.0 and later. + endif # INTEL_IOMMU diff --git a/drivers/iommu/intel/Makefile b/drivers/iommu/intel/Makefile index fa0dae16441c..7af3b8a4f2a0 100644 --- a/drivers/iommu/intel/Makefile +++ b/drivers/iommu/intel/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_DMAR_PERF) += perf.o obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o +obj-$(CONFIG_INTEL_IOMMU_PERF_EVENTS) += perfmon.o diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index b00a0ceb2d13..6acfe879589c 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -34,6 +34,7 @@ #include "../irq_remapping.h" #include "perf.h" #include "trace.h" +#include "perfmon.h" typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *); struct dmar_res_callback { @@ -427,6 +428,8 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) memcpy(dmaru->hdr, header, header->length); dmaru->reg_base_addr = drhd->address; dmaru->segment = drhd->segment; + /* The size of the register set is 2 ^ N 4 KB pages. */ + dmaru->reg_size = 1UL << (drhd->size + 12); dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), ((void *)drhd) + drhd->header.length, @@ -956,17 +959,18 @@ static void unmap_iommu(struct intel_iommu *iommu) /** * map_iommu: map the iommu's registers * @iommu: the iommu to map - * @phys_addr: the physical address of the base resgister + * @drhd: DMA remapping hardware definition structure * * Memory map the iommu's registers. Start w/ a single page, and * possibly expand if that turns out to be insufficent. */ -static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) +static int map_iommu(struct intel_iommu *iommu, struct dmar_drhd_unit *drhd) { + u64 phys_addr = drhd->reg_base_addr; int map_size, err=0; iommu->reg_phys = phys_addr; - iommu->reg_size = VTD_PAGE_SIZE; + iommu->reg_size = drhd->reg_size; if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) { pr_err("Can't reserve memory\n"); @@ -1013,6 +1017,16 @@ static int map_iommu(struct intel_iommu *iommu, u64 phys_addr) goto release; } } + + if (cap_ecmds(iommu->cap)) { + int i; + + for (i = 0; i < DMA_MAX_NUM_ECMDCAP; i++) { + iommu->ecmdcap[i] = dmar_readq(iommu->reg + DMAR_ECCAP_REG + + i * DMA_ECMD_REG_STEP); + } + } + err = 0; goto out; @@ -1050,7 +1064,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) } sprintf(iommu->name, "dmar%d", iommu->seq_id); - err = map_iommu(iommu, drhd->reg_base_addr); + err = map_iommu(iommu, drhd); if (err) { pr_err("Failed to map %s\n", iommu->name); goto error_free_seq_id; @@ -1103,6 +1117,9 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) if (sts & DMA_GSTS_QIES) iommu->gcmd |= DMA_GCMD_QIE; + if (alloc_iommu_pmu(iommu)) + pr_debug("Cannot alloc PMU for iommu (seq_id = %d)\n", iommu->seq_id); + raw_spin_lock_init(&iommu->register_lock); /* @@ -1127,6 +1144,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); if (err) goto err_sysfs; + + iommu_pmu_register(iommu); } drhd->iommu = iommu; @@ -1137,6 +1156,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) err_sysfs: iommu_device_sysfs_remove(&iommu->iommu); err_unmap: + free_iommu_pmu(iommu); unmap_iommu(iommu); error_free_seq_id: ida_free(&dmar_seq_ids, iommu->seq_id); @@ -1148,10 +1168,13 @@ error: static void free_iommu(struct intel_iommu *iommu) { if (intel_iommu_enabled && !iommu->drhd->ignored) { + iommu_pmu_unregister(iommu); iommu_device_unregister(&iommu->iommu); iommu_device_sysfs_remove(&iommu->iommu); } + free_iommu_pmu(iommu); + if (iommu->irq) { if (iommu->pr_irq) { free_irq(iommu->pr_irq, iommu); @@ -1859,6 +1882,8 @@ static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq) return DMAR_FECTL_REG; else if (iommu->pr_irq == irq) return DMAR_PECTL_REG; + else if (iommu->perf_irq == irq) + return DMAR_PERFINTRCTL_REG; else BUG(); } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 59df7e42fd53..86ee06ac058b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -16,7 +16,6 @@ #include <linux/crash_dump.h> #include <linux/dma-direct.h> #include <linux/dmi.h> -#include <linux/intel-svm.h> #include <linux/memory.h> #include <linux/pci.h> #include <linux/pci-ats.h> @@ -30,6 +29,7 @@ #include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" +#include "perfmon.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -362,12 +362,12 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node) +void *alloc_pgtable_page(int node, gfp_t gfp) { struct page *page; void *vaddr = NULL; - page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); if (page) vaddr = page_address(page); return vaddr; @@ -612,7 +612,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node); + context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -908,7 +908,8 @@ pgtable_walk: #endif static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int *target_level) + unsigned long pfn, int *target_level, + gfp_t gfp) { struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); @@ -935,7 +936,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid); + tmp_page = alloc_pgtable_page(domain->nid, gfp); if (!tmp_page) return NULL; @@ -1186,7 +1187,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = (struct root_entry *)alloc_pgtable_page(iommu->node); + root = (struct root_entry *)alloc_pgtable_page(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -2150,7 +2151,8 @@ static void switch_to_super_page(struct dmar_domain *domain, while (start_pfn <= end_pfn) { if (!pte) - pte = pfn_to_dma_pte(domain, start_pfn, &level); + pte = pfn_to_dma_pte(domain, start_pfn, &level, + GFP_ATOMIC); if (dma_pte_present(pte)) { dma_pte_free_pagetable(domain, start_pfn, @@ -2172,7 +2174,8 @@ static void switch_to_super_page(struct dmar_domain *domain, static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, - unsigned long phys_pfn, unsigned long nr_pages, int prot) + unsigned long phys_pfn, unsigned long nr_pages, int prot, + gfp_t gfp) { struct dma_pte *first_pte = NULL, *pte = NULL; unsigned int largepage_lvl = 0; @@ -2202,7 +2205,8 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, nr_pages); - pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); + pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl, + gfp); if (!pte) return -ENOMEM; first_pte = pte; @@ -2368,7 +2372,7 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, return __domain_mapping(domain, first_vpfn, first_vpfn, last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + DMA_PTE_READ|DMA_PTE_WRITE, GFP_KERNEL); } static int md_domain_init(struct dmar_domain *domain, int guest_width); @@ -2676,7 +2680,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node); + new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -4005,7 +4009,8 @@ int __init intel_iommu_init(void) * is likely to be much lower than the overhead of synchronizing * the virtual and physical IOMMU page-tables. */ - if (cap_caching_mode(iommu->cap)) { + if (cap_caching_mode(iommu->cap) && + !first_level_by_default(IOMMU_DOMAIN_DMA)) { pr_info_once("IOMMU batching disallowed due to virtualization\n"); iommu_set_dma_strict(); } @@ -4013,6 +4018,8 @@ int __init intel_iommu_init(void) intel_iommu_groups, "%s", iommu->name); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + + iommu_pmu_register(iommu); } up_read(&dmar_global_lock); @@ -4136,7 +4143,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid); + domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -4298,7 +4305,7 @@ static int intel_iommu_map(struct iommu_domain *domain, the low bits of hpa would take us onto the next page */ size = aligned_nrpages(hpa, size); return __domain_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, - hpa >> VTD_PAGE_SHIFT, size, prot); + hpa >> VTD_PAGE_SHIFT, size, prot, gfp); } static int intel_iommu_map_pages(struct iommu_domain *domain, @@ -4333,7 +4340,8 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ - BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)); + BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC)); if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); @@ -4346,7 +4354,12 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; - iommu_iotlb_gather_add_page(domain, gather, iova, size); + /* + * We do not use page-selective IOTLB invalidation in flush queue, + * so there is no need to track page and sync iotlb. + */ + if (!iommu_iotlb_gather_queued(gather)) + iommu_iotlb_gather_add_page(domain, gather, iova, size); return size; } @@ -4392,7 +4405,8 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level, + GFP_ATOMIC); if (pte && dma_pte_present(pte)) phys = dma_pte_addr(pte) + (iova & (BIT_MASK(level_to_offset_bits(level) + @@ -4642,8 +4656,12 @@ static int intel_iommu_enable_sva(struct device *dev) return -EINVAL; ret = iopf_queue_add_device(iommu->iopf_queue, dev); - if (!ret) - ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + return ret; + + ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); + if (ret) + iopf_queue_remove_device(iommu->iopf_queue, dev); return ret; } @@ -4655,8 +4673,12 @@ static int intel_iommu_disable_sva(struct device *dev) int ret; ret = iommu_unregister_device_fault_handler(dev); - if (!ret) - ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + if (ret) + return ret; + + ret = iopf_queue_remove_device(iommu->iopf_queue, dev); + if (ret) + iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev); return ret; } @@ -5023,3 +5045,59 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, pasid, qdep, address, mask); } } + +#define ecmd_get_status_code(res) (((res) & 0xff) >> 1) + +/* + * Function to submit a command to the enhanced command interface. The + * valid enhanced command descriptions are defined in Table 47 of the + * VT-d spec. The VT-d hardware implementation may support some but not + * all commands, which can be determined by checking the Enhanced + * Command Capability Register. + * + * Return values: + * - 0: Command successful without any error; + * - Negative: software error value; + * - Nonzero positive: failure status code defined in Table 48. + */ +int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob) +{ + unsigned long flags; + u64 res; + int ret; + + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + raw_spin_lock_irqsave(&iommu->register_lock, flags); + + res = dmar_readq(iommu->reg + DMAR_ECRSP_REG); + if (res & DMA_ECMD_ECRSP_IP) { + ret = -EBUSY; + goto err; + } + + /* + * Unconditionally write the operand B, because + * - There is no side effect if an ecmd doesn't require an + * operand B, but we set the register to some value. + * - It's not invoked in any critical path. The extra MMIO + * write doesn't bring any performance concerns. + */ + dmar_writeq(iommu->reg + DMAR_ECEO_REG, ob); + dmar_writeq(iommu->reg + DMAR_ECMD_REG, ecmd | (oa << DMA_ECMD_OA_SHIFT)); + + IOMMU_WAIT_OP(iommu, DMAR_ECRSP_REG, dmar_readq, + !(res & DMA_ECMD_ECRSP_IP), res); + + if (res & DMA_ECMD_ECRSP_IP) { + ret = -ETIMEDOUT; + goto err; + } + + ret = ecmd_get_status_code(res); +err: + raw_spin_unlock_irqrestore(&iommu->register_lock, flags); + + return ret; +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 06e61e474856..d6df3b865812 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -22,6 +22,7 @@ #include <linux/ioasid.h> #include <linux/bitfield.h> #include <linux/xarray.h> +#include <linux/perf_event.h> #include <asm/cacheflush.h> #include <asm/iommu.h> @@ -125,6 +126,17 @@ #define DMAR_MTRR_PHYSMASK8_REG 0x208 #define DMAR_MTRR_PHYSBASE9_REG 0x210 #define DMAR_MTRR_PHYSMASK9_REG 0x218 +#define DMAR_PERFCAP_REG 0x300 +#define DMAR_PERFCFGOFF_REG 0x310 +#define DMAR_PERFOVFOFF_REG 0x318 +#define DMAR_PERFCNTROFF_REG 0x31c +#define DMAR_PERFINTRSTS_REG 0x324 +#define DMAR_PERFINTRCTL_REG 0x328 +#define DMAR_PERFEVNTCAP_REG 0x380 +#define DMAR_ECMD_REG 0x400 +#define DMAR_ECEO_REG 0x408 +#define DMAR_ECRSP_REG 0x410 +#define DMAR_ECCAP_REG 0x430 #define DMAR_VCCAP_REG 0xe30 /* Virtual command capability register */ #define DMAR_VCMD_REG 0xe00 /* Virtual command register */ #define DMAR_VCRSP_REG 0xe10 /* Virtual command response register */ @@ -148,6 +160,7 @@ */ #define cap_esrtps(c) (((c) >> 63) & 1) #define cap_esirtps(c) (((c) >> 62) & 1) +#define cap_ecmds(c) (((c) >> 61) & 1) #define cap_fl5lp_support(c) (((c) >> 60) & 1) #define cap_pi_support(c) (((c) >> 59) & 1) #define cap_fl1gp_support(c) (((c) >> 56) & 1) @@ -179,7 +192,8 @@ * Extended Capability Register */ -#define ecap_rps(e) (((e) >> 49) & 0x1) +#define ecap_pms(e) (((e) >> 51) & 0x1) +#define ecap_rps(e) (((e) >> 49) & 0x1) #define ecap_smpwc(e) (((e) >> 48) & 0x1) #define ecap_flts(e) (((e) >> 47) & 0x1) #define ecap_slts(e) (((e) >> 46) & 0x1) @@ -210,6 +224,22 @@ #define ecap_max_handle_mask(e) (((e) >> 20) & 0xf) #define ecap_sc_support(e) (((e) >> 7) & 0x1) /* Snooping Control */ +/* + * Decoding Perf Capability Register + */ +#define pcap_num_cntr(p) ((p) & 0xffff) +#define pcap_cntr_width(p) (((p) >> 16) & 0x7f) +#define pcap_num_event_group(p) (((p) >> 24) & 0x1f) +#define pcap_filters_mask(p) (((p) >> 32) & 0x1f) +#define pcap_interrupt(p) (((p) >> 50) & 0x1) +/* The counter stride is calculated as 2 ^ (x+10) bytes */ +#define pcap_cntr_stride(p) (1ULL << ((((p) >> 52) & 0x7) + 10)) + +/* + * Decoding Perf Event Capability Register + */ +#define pecap_es(p) ((p) & 0xfffffff) + /* Virtual command interface capability */ #define vccap_pasid(v) (((v) & DMA_VCS_PAS)) /* PASID allocation */ @@ -281,6 +311,26 @@ #define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) #define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) +/* ECMD_REG */ +#define DMA_MAX_NUM_ECMD 256 +#define DMA_MAX_NUM_ECMDCAP (DMA_MAX_NUM_ECMD / 64) +#define DMA_ECMD_REG_STEP 8 +#define DMA_ECMD_ENABLE 0xf0 +#define DMA_ECMD_DISABLE 0xf1 +#define DMA_ECMD_FREEZE 0xf4 +#define DMA_ECMD_UNFREEZE 0xf5 +#define DMA_ECMD_OA_SHIFT 16 +#define DMA_ECMD_ECRSP_IP 0x1 +#define DMA_ECMD_ECCAP3 3 +#define DMA_ECMD_ECCAP3_ECNTS BIT_ULL(48) +#define DMA_ECMD_ECCAP3_DCNTS BIT_ULL(49) +#define DMA_ECMD_ECCAP3_FCNTS BIT_ULL(52) +#define DMA_ECMD_ECCAP3_UFCNTS BIT_ULL(53) +#define DMA_ECMD_ECCAP3_ESSENTIAL (DMA_ECMD_ECCAP3_ECNTS | \ + DMA_ECMD_ECCAP3_DCNTS | \ + DMA_ECMD_ECCAP3_FCNTS | \ + DMA_ECMD_ECCAP3_UFCNTS) + /* FECTL_REG */ #define DMA_FECTL_IM (((u32)1) << 31) @@ -309,6 +359,9 @@ #define DMA_VCS_PAS ((u64)1) +/* PERFINTRSTS_REG */ +#define DMA_PERFINTRSTS_PIS ((u32)1) + #define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \ do { \ cycles_t start_time = get_cycles(); \ @@ -438,6 +491,11 @@ struct q_inval { int free_cnt; }; +/* Page Request Queue depth */ +#define PRQ_ORDER 4 +#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) +#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) + struct dmar_pci_notify_info; #ifdef CONFIG_IRQ_REMAP @@ -554,6 +612,40 @@ struct dmar_domain { iommu core */ }; +/* + * In theory, the VT-d 4.0 spec can support up to 2 ^ 16 counters. + * But in practice, there are only 14 counters for the existing + * platform. Setting the max number of counters to 64 should be good + * enough for a long time. Also, supporting more than 64 counters + * requires more extras, e.g., extra freeze and overflow registers, + * which is not necessary for now. + */ +#define IOMMU_PMU_IDX_MAX 64 + +struct iommu_pmu { + struct intel_iommu *iommu; + u32 num_cntr; /* Number of counters */ + u32 num_eg; /* Number of event group */ + u32 cntr_width; /* Counter width */ + u32 cntr_stride; /* Counter Stride */ + u32 filter; /* Bitmask of filter support */ + void __iomem *base; /* the PerfMon base address */ + void __iomem *cfg_reg; /* counter configuration base address */ + void __iomem *cntr_reg; /* counter 0 address*/ + void __iomem *overflow; /* overflow status register */ + + u64 *evcap; /* Indicates all supported events */ + u32 **cntr_evcap; /* Supported events of each counter. */ + + struct pmu pmu; + DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); + struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; + unsigned char irq_name[16]; +}; + +#define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) +#define IOMMU_IRQ_ID_OFFSET_PERF (2 * DMAR_UNITS_SUPPORTED) + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -561,12 +653,13 @@ struct intel_iommu { u64 cap; u64 ecap; u64 vccap; + u64 ecmdcap[DMA_MAX_NUM_ECMDCAP]; u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ raw_spinlock_t register_lock; /* protect register handling */ int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ int msagaw; /* max sagaw of this iommu */ - unsigned int irq, pr_irq; + unsigned int irq, pr_irq, perf_irq; u16 segment; /* PCI segment# */ unsigned char name[13]; /* Device Name */ @@ -600,6 +693,8 @@ struct intel_iommu { struct dmar_drhd_unit *drhd; void *perf_statistic; + + struct iommu_pmu *pmu; }; /* PCI domain-device relationship */ @@ -737,7 +832,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, extern int dmar_ir_support(void); -void *alloc_pgtable_page(int node); +void *alloc_pgtable_page(int node, gfp_t gfp); void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); @@ -756,19 +851,13 @@ struct intel_svm_dev { struct rcu_head rcu; struct device *dev; struct intel_iommu *iommu; - struct iommu_sva sva; - u32 pasid; - int users; u16 did; - u16 dev_iotlb:1; u16 sid, qdep; }; struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; - - unsigned int flags; u32 pasid; struct list_head devs; }; @@ -800,6 +889,14 @@ extern const struct iommu_ops intel_iommu_ops; extern int intel_iommu_sm; extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); +int ecmd_submit_sync(struct intel_iommu *iommu, u8 ecmd, u64 oa, u64 ob); + +static inline bool ecmd_has_pmu_essential(struct intel_iommu *iommu) +{ + return (iommu->ecmdcap[DMA_ECMD_ECCAP3] & DMA_ECMD_ECCAP3_ESSENTIAL) == + DMA_ECMD_ECCAP3_ESSENTIAL; +} + extern int dmar_disabled; extern int intel_iommu_enabled; #else diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index fb3c7020028d..633e0a4a01e7 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -128,6 +128,9 @@ int intel_pasid_alloc_table(struct device *dev) pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); info->pasid_table = pasid_table; + if (!ecap_coherent(info->iommu->ecap)) + clflush_cache_range(pasid_table->table, size); + return 0; } @@ -200,7 +203,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node); + entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -215,6 +218,10 @@ retry: free_pgtable_page(entries); goto retry; } + if (!ecap_coherent(info->iommu->ecap)) { + clflush_cache_range(entries, VTD_PAGE_SIZE); + clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); + } } return &entries[index]; @@ -365,6 +372,16 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) } /* + * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID + * entry. It is required when XD bit of the first level page table + * entry is about to be set. + */ +static inline void pasid_set_nxe(struct pasid_entry *pe) +{ + pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); +} + +/* * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode * PASID entry. */ @@ -557,6 +574,7 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); + pasid_set_nxe(pte); /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c new file mode 100644 index 000000000000..e17d9743a0d8 --- /dev/null +++ b/drivers/iommu/intel/perfmon.c @@ -0,0 +1,877 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support Intel IOMMU PerfMon + * Copyright(c) 2023 Intel Corporation. + */ +#define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/dmar.h> +#include "iommu.h" +#include "perfmon.h" + +PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */ +PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */ + +static struct attribute *iommu_pmu_format_attrs[] = { + &format_attr_event_group.attr, + &format_attr_event.attr, + NULL +}; + +static struct attribute_group iommu_pmu_format_attr_group = { + .name = "format", + .attrs = iommu_pmu_format_attrs, +}; + +/* The available events are added in attr_update later */ +static struct attribute *attrs_empty[] = { + NULL +}; + +static struct attribute_group iommu_pmu_events_attr_group = { + .name = "events", + .attrs = attrs_empty, +}; + +static cpumask_t iommu_pmu_cpu_mask; + +static ssize_t +cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask); +} +static DEVICE_ATTR_RO(cpumask); + +static struct attribute *iommu_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL +}; + +static struct attribute_group iommu_pmu_cpumask_attr_group = { + .attrs = iommu_pmu_cpumask_attrs, +}; + +static const struct attribute_group *iommu_pmu_attr_groups[] = { + &iommu_pmu_format_attr_group, + &iommu_pmu_events_attr_group, + &iommu_pmu_cpumask_attr_group, + NULL +}; + +static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev) +{ + /* + * The perf_event creates its own dev for each PMU. + * See pmu_dev_alloc() + */ + return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu); +} + +#define IOMMU_PMU_ATTR(_name, _format, _filter) \ + PMU_FORMAT_ATTR(_name, _format); \ + \ +static struct attribute *_name##_attr[] = { \ + &format_attr_##_name.attr, \ + NULL \ +}; \ + \ +static umode_t \ +_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ + \ + if (!iommu_pmu) \ + return 0; \ + return (iommu_pmu->filter & _filter) ? attr->mode : 0; \ +} \ + \ +static struct attribute_group _name = { \ + .name = "format", \ + .attrs = _name##_attr, \ + .is_visible = _name##_is_visible, \ +}; + +IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID); +IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN); +IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID); +IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS); +IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE); +IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID); +IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN); +IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID); +IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS); +IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE); + +#define iommu_pmu_en_requester_id(e) ((e) & 0x1) +#define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1) +#define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1) +#define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1) +#define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1) +#define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff) +#define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff) +#define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff) +#define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f) +#define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f) + +#define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \ +{ \ + if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \ + dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ + IOMMU_PMU_CFG_SIZE + \ + (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ + iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\ + } \ +} + +#define iommu_pmu_clear_filter(_filter, _idx) \ +{ \ + if (iommu_pmu->filter & _filter) { \ + dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ + IOMMU_PMU_CFG_SIZE + \ + (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ + 0); \ + } \ +} + +/* + * Define the event attr related functions + * Input: _name: event attr name + * _string: string of the event in sysfs + * _g_idx: event group encoding + * _event: event encoding + */ +#define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \ + PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \ + \ +static struct attribute *_name##_attr[] = { \ + &event_attr_##_name.attr.attr, \ + NULL \ +}; \ + \ +static umode_t \ +_name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ + \ + if (!iommu_pmu) \ + return 0; \ + return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \ +} \ + \ +static struct attribute_group _name = { \ + .name = "events", \ + .attrs = _name##_attr, \ + .is_visible = _name##_is_visible, \ +}; + +IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001) +IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002) +IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004) +IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008) +IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001) +IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020) +IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040) +IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001) +IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002) +IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004) +IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008) +IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010) +IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020) +IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040) +IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080) +IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100) +IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200) +IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001) +IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002) +IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004) +IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008) +IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001) +IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002) +IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004) + +static const struct attribute_group *iommu_pmu_attr_update[] = { + &filter_requester_id_en, + &filter_domain_en, + &filter_pasid_en, + &filter_ats_en, + &filter_page_table_en, + &filter_requester_id, + &filter_domain, + &filter_pasid, + &filter_ats, + &filter_page_table, + &iommu_clocks, + &iommu_requests, + &pw_occupancy, + &ats_blocked, + &iommu_mrds, + &iommu_mem_blocked, + &pg_req_posted, + &ctxt_cache_lookup, + &ctxt_cache_hit, + &pasid_cache_lookup, + &pasid_cache_hit, + &ss_nonleaf_lookup, + &ss_nonleaf_hit, + &fs_nonleaf_lookup, + &fs_nonleaf_hit, + &hpt_nonleaf_lookup, + &hpt_nonleaf_hit, + &iotlb_lookup, + &iotlb_hit, + &hpt_leaf_lookup, + &hpt_leaf_hit, + &int_cache_lookup, + &int_cache_hit_nonposted, + &int_cache_hit_posted, + NULL +}; + +static inline void __iomem * +iommu_event_base(struct iommu_pmu *iommu_pmu, int idx) +{ + return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride; +} + +static inline void __iomem * +iommu_config_base(struct iommu_pmu *iommu_pmu, int idx) +{ + return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET; +} + +static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct iommu_pmu, pmu); +} + +static inline u64 iommu_event_config(struct perf_event *event) +{ + u64 config = event->attr.config; + + return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) | + (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) | + IOMMU_EVENT_CFG_INT; +} + +static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu, + struct perf_event *event) +{ + return event->pmu == &iommu_pmu->pmu; +} + +static int iommu_pmu_validate_event(struct perf_event *event) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + u32 event_group = iommu_event_group(event->attr.config); + + if (event_group >= iommu_pmu->num_eg) + return -EINVAL; + + return 0; +} + +static int iommu_pmu_validate_group(struct perf_event *event) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct perf_event *sibling; + int nr = 0; + + /* + * All events in a group must be scheduled simultaneously. + * Check whether there is enough counters for all the events. + */ + for_each_sibling_event(sibling, event->group_leader) { + if (!is_iommu_pmu_event(iommu_pmu, sibling) || + sibling->state <= PERF_EVENT_STATE_OFF) + continue; + + if (++nr > iommu_pmu->num_cntr) + return -EINVAL; + } + + return 0; +} + +static int iommu_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* sampling not supported */ + if (event->attr.sample_period) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (iommu_pmu_validate_event(event)) + return -EINVAL; + + hwc->config = iommu_event_config(event); + + return iommu_pmu_validate_group(event); +} + +static void iommu_pmu_event_update(struct perf_event *event) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct hw_perf_event *hwc = &event->hw; + u64 prev_count, new_count, delta; + int shift = 64 - iommu_pmu->cntr_width; + +again: + prev_count = local64_read(&hwc->prev_count); + new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); + if (local64_xchg(&hwc->prev_count, new_count) != prev_count) + goto again; + + /* + * The counter width is enumerated. Always shift the counter + * before using it. + */ + delta = (new_count << shift) - (prev_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); +} + +static void iommu_pmu_start(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct intel_iommu *iommu = iommu_pmu->iommu; + struct hw_perf_event *hwc = &event->hw; + u64 count; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + hwc->state = 0; + + /* Always reprogram the period */ + count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); + local64_set((&hwc->prev_count), count); + + /* + * The error of ecmd will be ignored. + * - The existing perf_event subsystem doesn't handle the error. + * Only IOMMU PMU returns runtime HW error. We don't want to + * change the existing generic interfaces for the specific case. + * - It's a corner case caused by HW, which is very unlikely to + * happen. There is nothing SW can do. + * - The worst case is that the user will get <not count> with + * perf command, which can give the user some hints. + */ + ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0); + + perf_event_update_userpage(event); +} + +static void iommu_pmu_stop(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct intel_iommu *iommu = iommu_pmu->iommu; + struct hw_perf_event *hwc = &event->hw; + + if (!(hwc->state & PERF_HES_STOPPED)) { + ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0); + + iommu_pmu_event_update(event); + + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static inline int +iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu, + int idx, struct perf_event *event) +{ + u32 event_group = iommu_event_group(event->attr.config); + u32 select = iommu_event_select(event->attr.config); + + if (!(iommu_pmu->cntr_evcap[idx][event_group] & select)) + return -EINVAL; + + return 0; +} + +static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx; + + /* + * The counters which support limited events are usually at the end. + * Schedule them first to accommodate more events. + */ + for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) { + if (test_and_set_bit(idx, iommu_pmu->used_mask)) + continue; + /* Check per-counter event capabilities */ + if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event)) + break; + clear_bit(idx, iommu_pmu->used_mask); + } + if (idx < 0) + return -EINVAL; + + iommu_pmu->event_list[idx] = event; + hwc->idx = idx; + + /* config events */ + dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config); + + iommu_pmu_set_filter(requester_id, event->attr.config1, + IOMMU_PMU_FILTER_REQUESTER_ID, idx, + event->attr.config1); + iommu_pmu_set_filter(domain, event->attr.config1, + IOMMU_PMU_FILTER_DOMAIN, idx, + event->attr.config1); + iommu_pmu_set_filter(pasid, event->attr.config1, + IOMMU_PMU_FILTER_PASID, idx, + event->attr.config1); + iommu_pmu_set_filter(ats, event->attr.config2, + IOMMU_PMU_FILTER_ATS, idx, + event->attr.config1); + iommu_pmu_set_filter(page_table, event->attr.config2, + IOMMU_PMU_FILTER_PAGE_TABLE, idx, + event->attr.config1); + + return 0; +} + +static int iommu_pmu_add(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + struct hw_perf_event *hwc = &event->hw; + int ret; + + ret = iommu_pmu_assign_event(iommu_pmu, event); + if (ret < 0) + return ret; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + iommu_pmu_start(event, 0); + + return 0; +} + +static void iommu_pmu_del(struct perf_event *event, int flags) +{ + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); + int idx = event->hw.idx; + + iommu_pmu_stop(event, PERF_EF_UPDATE); + + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx); + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx); + + iommu_pmu->event_list[idx] = NULL; + event->hw.idx = -1; + clear_bit(idx, iommu_pmu->used_mask); + + perf_event_update_userpage(event); +} + +static void iommu_pmu_enable(struct pmu *pmu) +{ + struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); + struct intel_iommu *iommu = iommu_pmu->iommu; + + ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0); +} + +static void iommu_pmu_disable(struct pmu *pmu) +{ + struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); + struct intel_iommu *iommu = iommu_pmu->iommu; + + ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0); +} + +static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu) +{ + struct perf_event *event; + u64 status; + int i; + + /* + * Two counters may be overflowed very close. Always check + * whether there are more to handle. + */ + while ((status = dmar_readq(iommu_pmu->overflow))) { + for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) { + /* + * Find the assigned event of the counter. + * Accumulate the value into the event->count. + */ + event = iommu_pmu->event_list[i]; + if (!event) { + pr_warn_once("Cannot find the assigned event for counter %d\n", i); + continue; + } + iommu_pmu_event_update(event); + } + + dmar_writeq(iommu_pmu->overflow, status); + } +} + +static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id) +{ + struct intel_iommu *iommu = dev_id; + + if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG)) + return IRQ_NONE; + + iommu_pmu_counter_overflow(iommu->pmu); + + /* Clear the status bit */ + dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS); + + return IRQ_HANDLED; +} + +static int __iommu_pmu_register(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + iommu_pmu->pmu.name = iommu->name; + iommu_pmu->pmu.task_ctx_nr = perf_invalid_context; + iommu_pmu->pmu.event_init = iommu_pmu_event_init; + iommu_pmu->pmu.pmu_enable = iommu_pmu_enable; + iommu_pmu->pmu.pmu_disable = iommu_pmu_disable; + iommu_pmu->pmu.add = iommu_pmu_add; + iommu_pmu->pmu.del = iommu_pmu_del; + iommu_pmu->pmu.start = iommu_pmu_start; + iommu_pmu->pmu.stop = iommu_pmu_stop; + iommu_pmu->pmu.read = iommu_pmu_event_update; + iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; + iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; + iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + iommu_pmu->pmu.module = THIS_MODULE; + + return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1); +} + +static inline void __iomem * +get_perf_reg_address(struct intel_iommu *iommu, u32 offset) +{ + u32 off = dmar_readl(iommu->reg + offset); + + return iommu->reg + off; +} + +int alloc_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu; + int i, j, ret; + u64 perfcap; + u32 cap; + + if (!ecap_pms(iommu->ecap)) + return 0; + + /* The IOMMU PMU requires the ECMD support as well */ + if (!cap_ecmds(iommu->cap)) + return -ENODEV; + + perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); + /* The performance monitoring is not supported. */ + if (!perfcap) + return -ENODEV; + + /* Sanity check for the number of the counters and event groups */ + if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) + return -ENODEV; + + /* The interrupt on overflow is required */ + if (!pcap_interrupt(perfcap)) + return -ENODEV; + + /* Check required Enhanced Command Capability */ + if (!ecmd_has_pmu_essential(iommu)) + return -ENODEV; + + iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); + if (!iommu_pmu) + return -ENOMEM; + + iommu_pmu->num_cntr = pcap_num_cntr(perfcap); + if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) { + pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!", + iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX); + iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX; + } + + iommu_pmu->cntr_width = pcap_cntr_width(perfcap); + iommu_pmu->filter = pcap_filters_mask(perfcap); + iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); + iommu_pmu->num_eg = pcap_num_event_group(perfcap); + + iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); + if (!iommu_pmu->evcap) { + ret = -ENOMEM; + goto free_pmu; + } + + /* Parse event group capabilities */ + for (i = 0; i < iommu_pmu->num_eg; i++) { + u64 pcap; + + pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + + i * IOMMU_PMU_CAP_REGS_STEP); + iommu_pmu->evcap[i] = pecap_es(pcap); + } + + iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap) { + ret = -ENOMEM; + goto free_pmu_evcap; + } + for (i = 0; i < iommu_pmu->num_cntr; i++) { + iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); + if (!iommu_pmu->cntr_evcap[i]) { + ret = -ENOMEM; + goto free_pmu_cntr_evcap; + } + /* + * Set to the global capabilities, will adjust according + * to per-counter capabilities later. + */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; + } + + iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); + iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); + iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); + + /* + * Check per-counter capabilities. All counters should have the + * same capabilities on Interrupt on Overflow Support and Counter + * Width. + */ + for (i = 0; i < iommu_pmu->num_cntr; i++) { + cap = dmar_readl(iommu_pmu->cfg_reg + + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTRCAP_OFFSET); + if (!iommu_cntrcap_pcc(cap)) + continue; + + /* + * It's possible that some counters have a different + * capability because of e.g., HW bug. Check the corner + * case here and simply drop those counters. + */ + if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || + !iommu_cntrcap_ios(cap)) { + iommu_pmu->num_cntr = i; + pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", + iommu_pmu->num_cntr); + } + + /* Clear the pre-defined events group */ + for (j = 0; j < iommu_pmu->num_eg; j++) + iommu_pmu->cntr_evcap[i][j] = 0; + + /* Override with per-counter event capabilities */ + for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { + cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + + IOMMU_PMU_CFG_CNTREVCAP_OFFSET + + (j * IOMMU_PMU_OFF_REGS_STEP)); + iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); + /* + * Some events may only be supported by a specific counter. + * Track them in the evcap as well. + */ + iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); + } + } + + iommu_pmu->iommu = iommu; + iommu->pmu = iommu_pmu; + + return 0; + +free_pmu_cntr_evcap: + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); +free_pmu_evcap: + kfree(iommu_pmu->evcap); +free_pmu: + kfree(iommu_pmu); + + return ret; +} + +void free_iommu_pmu(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + if (iommu_pmu->evcap) { + int i; + + for (i = 0; i < iommu_pmu->num_cntr; i++) + kfree(iommu_pmu->cntr_evcap[i]); + kfree(iommu_pmu->cntr_evcap); + } + kfree(iommu_pmu->evcap); + kfree(iommu_pmu); + iommu->pmu = NULL; +} + +static int iommu_pmu_set_interrupt(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + int irq, ret; + + irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu); + if (irq <= 0) + return -EINVAL; + + snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id); + + iommu->perf_irq = irq; + ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler, + IRQF_ONESHOT, iommu_pmu->irq_name, iommu); + if (ret) { + dmar_free_hwirq(irq); + iommu->perf_irq = 0; + return ret; + } + return 0; +} + +static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) +{ + if (!iommu->perf_irq) + return; + + free_irq(iommu->perf_irq, iommu); + dmar_free_hwirq(iommu->perf_irq); + iommu->perf_irq = 0; +} + +static int iommu_pmu_cpu_online(unsigned int cpu) +{ + if (cpumask_empty(&iommu_pmu_cpu_mask)) + cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); + + return 0; +} + +static int iommu_pmu_cpu_offline(unsigned int cpu) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int target; + + if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + + if (target < nr_cpu_ids) + cpumask_set_cpu(target, &iommu_pmu_cpu_mask); + else + target = -1; + + rcu_read_lock(); + + for_each_iommu(iommu, drhd) { + if (!iommu->pmu) + continue; + perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target); + } + rcu_read_unlock(); + + return 0; +} + +static int nr_iommu_pmu; + +static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) +{ + int ret; + + if (nr_iommu_pmu++) + return 0; + + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, + "driver/iommu/intel/perfmon:online", + iommu_pmu_cpu_online, + iommu_pmu_cpu_offline); + if (ret) + nr_iommu_pmu = 0; + + return ret; +} + +static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) +{ + if (--nr_iommu_pmu) + return; + + cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE); +} + +void iommu_pmu_register(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + if (__iommu_pmu_register(iommu)) + goto err; + + if (iommu_pmu_cpuhp_setup(iommu_pmu)) + goto unregister; + + /* Set interrupt for overflow */ + if (iommu_pmu_set_interrupt(iommu)) + goto cpuhp_free; + + return; + +cpuhp_free: + iommu_pmu_cpuhp_free(iommu_pmu); +unregister: + perf_pmu_unregister(&iommu_pmu->pmu); +err: + pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id); + free_iommu_pmu(iommu); +} + +void iommu_pmu_unregister(struct intel_iommu *iommu) +{ + struct iommu_pmu *iommu_pmu = iommu->pmu; + + if (!iommu_pmu) + return; + + iommu_pmu_unset_interrupt(iommu); + iommu_pmu_cpuhp_free(iommu_pmu); + perf_pmu_unregister(&iommu_pmu->pmu); +} diff --git a/drivers/iommu/intel/perfmon.h b/drivers/iommu/intel/perfmon.h new file mode 100644 index 000000000000..58606af9a2b9 --- /dev/null +++ b/drivers/iommu/intel/perfmon.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * PERFCFGOFF_REG, PERFFRZOFF_REG + * PERFOVFOFF_REG, PERFCNTROFF_REG + */ +#define IOMMU_PMU_NUM_OFF_REGS 4 +#define IOMMU_PMU_OFF_REGS_STEP 4 + +#define IOMMU_PMU_FILTER_REQUESTER_ID 0x01 +#define IOMMU_PMU_FILTER_DOMAIN 0x02 +#define IOMMU_PMU_FILTER_PASID 0x04 +#define IOMMU_PMU_FILTER_ATS 0x08 +#define IOMMU_PMU_FILTER_PAGE_TABLE 0x10 + +#define IOMMU_PMU_FILTER_EN BIT(31) + +#define IOMMU_PMU_CFG_OFFSET 0x100 +#define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 +#define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 +#define IOMMU_PMU_CFG_SIZE 0x8 +#define IOMMU_PMU_CFG_FILTERS_OFFSET 0x4 + +#define IOMMU_PMU_CAP_REGS_STEP 8 + +#define iommu_cntrcap_pcc(p) ((p) & 0x1) +#define iommu_cntrcap_cw(p) (((p) >> 8) & 0xff) +#define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) +#define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) + +#define IOMMU_EVENT_CFG_EGI_SHIFT 8 +#define IOMMU_EVENT_CFG_ES_SHIFT 32 +#define IOMMU_EVENT_CFG_INT BIT_ULL(1) + +#define iommu_event_select(p) ((p) & 0xfffffff) +#define iommu_event_group(p) (((p) >> 28) & 0xf) + +#ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS +int alloc_iommu_pmu(struct intel_iommu *iommu); +void free_iommu_pmu(struct intel_iommu *iommu); +void iommu_pmu_register(struct intel_iommu *iommu); +void iommu_pmu_unregister(struct intel_iommu *iommu); +#else +static inline int +alloc_iommu_pmu(struct intel_iommu *iommu) +{ + return 0; +} + +static inline void +free_iommu_pmu(struct intel_iommu *iommu) +{ +} + +static inline void +iommu_pmu_register(struct intel_iommu *iommu) +{ +} + +static inline void +iommu_pmu_unregister(struct intel_iommu *iommu) +{ +} +#endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */ diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index c76b66263467..7367f56c3bad 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -9,7 +9,6 @@ #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/slab.h> -#include <linux/intel-svm.h> #include <linux/rculist.h> #include <linux/pci.h> #include <linux/pci-ats.h> @@ -79,7 +78,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) } iommu->prq = page_address(pages); - irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); + irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); if (irq <= 0) { pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", iommu->name); @@ -299,9 +298,8 @@ out: return 0; } -static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, - struct device *dev, - struct mm_struct *mm) +static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, + struct mm_struct *mm) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_svm_dev *sdev; @@ -313,7 +311,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, if (!svm) { svm = kzalloc(sizeof(*svm), GFP_KERNEL); if (!svm) - return ERR_PTR(-ENOMEM); + return -ENOMEM; svm->pasid = mm->pasid; svm->mm = mm; @@ -323,24 +321,17 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, ret = mmu_notifier_register(&svm->notifier, mm); if (ret) { kfree(svm); - return ERR_PTR(ret); + return ret; } ret = pasid_private_add(svm->pasid, svm); if (ret) { mmu_notifier_unregister(&svm->notifier, mm); kfree(svm); - return ERR_PTR(ret); + return ret; } } - /* Find the matching device in svm list */ - sdev = svm_lookup_device_by_dev(svm, dev); - if (sdev) { - sdev->users++; - goto success; - } - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) { ret = -ENOMEM; @@ -351,12 +342,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, sdev->iommu = iommu; sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); - sdev->users = 1; - sdev->pasid = svm->pasid; - sdev->sva.dev = dev; init_rcu_head(&sdev->rcu); if (info->ats_enabled) { - sdev->dev_iotlb = 1; sdev->qdep = info->ats_qdep; if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) sdev->qdep = 0; @@ -370,8 +357,8 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, goto free_sdev; list_add_rcu(&sdev->list, &svm->devs); -success: - return &sdev->sva; + + return 0; free_sdev: kfree(sdev); @@ -382,7 +369,7 @@ free_svm: kfree(svm); } - return ERR_PTR(ret); + return ret; } /* Caller must hold pasid_mutex */ @@ -404,32 +391,32 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) mm = svm->mm; if (sdev) { - sdev->users--; - if (!sdev->users) { - list_del_rcu(&sdev->list); - /* Flush the PASID cache and IOTLB for this device. - * Note that we do depend on the hardware *not* using - * the PASID any more. Just as we depend on other - * devices never using PASIDs that they have no right - * to use. We have a *shared* PASID table, because it's - * large and has to be physically contiguous. So it's - * hard to be as defensive as we might like. */ - intel_pasid_tear_down_entry(iommu, dev, - svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); - kfree_rcu(sdev, rcu); - - if (list_empty(&svm->devs)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); - pasid_private_remove(svm->pasid); - /* We mandate that no page faults may be outstanding - * for the PASID when intel_svm_unbind_mm() is called. - * If that is not obeyed, subtle errors will happen. - * Let's make them less subtle... */ - memset(svm, 0x6b, sizeof(*svm)); - kfree(svm); - } + list_del_rcu(&sdev->list); + /* + * Flush the PASID cache and IOTLB for this device. + * Note that we do depend on the hardware *not* using + * the PASID any more. Just as we depend on other + * devices never using PASIDs that they have no right + * to use. We have a *shared* PASID table, because it's + * large and has to be physically contiguous. So it's + * hard to be as defensive as we might like. + */ + intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); + intel_svm_drain_prq(dev, svm->pasid); + kfree_rcu(sdev, rcu); + + if (list_empty(&svm->devs)) { + if (svm->notifier.ops) + mmu_notifier_unregister(&svm->notifier, mm); + pasid_private_remove(svm->pasid); + /* + * We mandate that no page faults may be outstanding + * for the PASID when intel_svm_unbind_mm() is called. + * If that is not obeyed, subtle errors will happen. + * Let's make them less subtle... + */ + memset(svm, 0x6b, sizeof(*svm)); + kfree(svm); } } out: @@ -854,13 +841,10 @@ static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; - struct iommu_sva *sva; - int ret = 0; + int ret; mutex_lock(&pasid_mutex); - sva = intel_svm_bind_mm(iommu, dev, mm); - if (IS_ERR(sva)) - ret = PTR_ERR(sva); + ret = intel_svm_bind_mm(iommu, dev, mm); mutex_unlock(&pasid_mutex); return ret; diff --git a/drivers/iommu/iommu-traces.c b/drivers/iommu/iommu-traces.c index 1e9ca7789de1..23416bf76df9 100644 --- a/drivers/iommu/iommu-traces.c +++ b/drivers/iommu/iommu-traces.c @@ -18,7 +18,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(remove_device_from_group); /* iommu_device_event */ EXPORT_TRACEPOINT_SYMBOL_GPL(attach_device_to_domain); -EXPORT_TRACEPOINT_SYMBOL_GPL(detach_device_from_domain); /* iommu_map_unmap */ EXPORT_TRACEPOINT_SYMBOL_GPL(map); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5f6a85aea501..b3f847b25b4f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -371,6 +371,30 @@ err_unlock: return ret; } +static bool iommu_is_attach_deferred(struct device *dev) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + + if (ops->is_attach_deferred) + return ops->is_attach_deferred(dev); + + return false; +} + +static int iommu_group_do_dma_first_attach(struct device *dev, void *data) +{ + struct iommu_domain *domain = data; + + lockdep_assert_held(&dev->iommu_group->mutex); + + if (iommu_is_attach_deferred(dev)) { + dev->iommu->attach_deferred = 1; + return 0; + } + + return __iommu_attach_device(domain, dev); +} + int iommu_probe_device(struct device *dev) { const struct iommu_ops *ops; @@ -401,7 +425,7 @@ int iommu_probe_device(struct device *dev) * attach the default domain. */ if (group->default_domain && !group->owner) { - ret = __iommu_attach_device(group->default_domain, dev); + ret = iommu_group_do_dma_first_attach(dev, group->default_domain); if (ret) { mutex_unlock(&group->mutex); iommu_group_put(group); @@ -774,12 +798,16 @@ struct iommu_group *iommu_group_alloc(void) ret = iommu_group_create_file(group, &iommu_group_attr_reserved_regions); - if (ret) + if (ret) { + kobject_put(group->devices_kobj); return ERR_PTR(ret); + } ret = iommu_group_create_file(group, &iommu_group_attr_type); - if (ret) + if (ret) { + kobject_put(group->devices_kobj); return ERR_PTR(ret); + } pr_debug("Allocated group %d\n", group->id); @@ -930,7 +958,7 @@ map_end: if (map_size) { ret = iommu_map(domain, addr - map_size, addr - map_size, map_size, - entry->prot); + entry->prot, GFP_KERNEL); if (ret) goto out; map_size = 0; @@ -947,16 +975,6 @@ out: return ret; } -static bool iommu_is_attach_deferred(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (ops->is_attach_deferred) - return ops->is_attach_deferred(dev); - - return false; -} - /** * iommu_group_add_device - add a device to an iommu group * @group: the group into which to add the device (reference should be held) @@ -1009,8 +1027,8 @@ rename: mutex_lock(&group->mutex); list_add_tail(&device->list, &group->devices); - if (group->domain && !iommu_is_attach_deferred(dev)) - ret = __iommu_attach_device(group->domain, dev); + if (group->domain) + ret = iommu_group_do_dma_first_attach(dev, group->domain); mutex_unlock(&group->mutex); if (ret) goto err_put_group; @@ -1776,21 +1794,10 @@ static void probe_alloc_default_domain(struct bus_type *bus, } -static int iommu_group_do_dma_attach(struct device *dev, void *data) -{ - struct iommu_domain *domain = data; - int ret = 0; - - if (!iommu_is_attach_deferred(dev)) - ret = __iommu_attach_device(domain, dev); - - return ret; -} - -static int __iommu_group_dma_attach(struct iommu_group *group) +static int __iommu_group_dma_first_attach(struct iommu_group *group) { return __iommu_group_for_each_dev(group, group->default_domain, - iommu_group_do_dma_attach); + iommu_group_do_dma_first_attach); } static int iommu_group_do_probe_finalize(struct device *dev, void *data) @@ -1855,7 +1862,7 @@ int bus_iommu_probe(struct bus_type *bus) iommu_group_create_direct_mappings(group); - ret = __iommu_group_dma_attach(group); + ret = __iommu_group_dma_first_attach(group); mutex_unlock(&group->mutex); @@ -1987,9 +1994,11 @@ static int __iommu_attach_device(struct iommu_domain *domain, return -ENODEV; ret = domain->ops->attach_dev(domain, dev); - if (!ret) - trace_attach_device_to_domain(dev); - return ret; + if (ret) + return ret; + dev->iommu->attach_deferred = 0; + trace_attach_device_to_domain(dev); + return 0; } /** @@ -2034,22 +2043,12 @@ EXPORT_SYMBOL_GPL(iommu_attach_device); int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain) { - if (iommu_is_attach_deferred(dev)) + if (dev->iommu && dev->iommu->attach_deferred) return __iommu_attach_device(domain, dev); return 0; } -static void __iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - if (iommu_is_attach_deferred(dev)) - return; - - domain->ops->detach_dev(domain, dev); - trace_detach_device_from_domain(dev); -} - void iommu_detach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; @@ -2124,8 +2123,22 @@ static int __iommu_attach_group(struct iommu_domain *domain, ret = __iommu_group_for_each_dev(group, domain, iommu_group_do_attach_device); - if (ret == 0) + if (ret == 0) { group->domain = domain; + } else { + /* + * To recover from the case when certain device within the + * group fails to attach to the new domain, we need force + * attaching all devices back to the old domain. The old + * domain is compatible for all devices in the group, + * hence the iommu driver should always return success. + */ + struct iommu_domain *old_domain = group->domain; + + group->domain = NULL; + WARN(__iommu_group_set_domain(group, old_domain), + "iommu driver failed to attach a compatible domain"); + } return ret; } @@ -2154,11 +2167,12 @@ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_attach_group); -static int iommu_group_do_detach_device(struct device *dev, void *data) +static int iommu_group_do_set_platform_dma(struct device *dev, void *data) { - struct iommu_domain *domain = data; + const struct iommu_ops *ops = dev_iommu_ops(dev); - __iommu_detach_device(domain, dev); + if (!WARN_ON(!ops->set_platform_dma_ops)) + ops->set_platform_dma_ops(dev); return 0; } @@ -2172,15 +2186,13 @@ static int __iommu_group_set_domain(struct iommu_group *group, return 0; /* - * New drivers should support default domains and so the detach_dev() op - * will never be called. Otherwise the NULL domain represents some + * New drivers should support default domains, so set_platform_dma() + * op will never be called. Otherwise the NULL domain represents some * platform specific behavior. */ if (!new_domain) { - if (WARN_ON(!group->domain->ops->detach_dev)) - return -EINVAL; - __iommu_group_for_each_dev(group, group->domain, - iommu_group_do_detach_device); + __iommu_group_for_each_dev(group, NULL, + iommu_group_do_set_platform_dma); group->domain = NULL; return 0; } @@ -2360,34 +2372,27 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static int _iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +int iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); if (ret == 0 && ops->iotlb_sync_map) ops->iotlb_sync_map(domain, iova, size); return ret; } - -int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - might_sleep(); - return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map); -int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); -} -EXPORT_SYMBOL_GPL(iommu_map_atomic); - static size_t __iommu_unmap_pages(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather) @@ -2477,9 +2482,9 @@ size_t iommu_unmap_fast(struct iommu_domain *domain, } EXPORT_SYMBOL_GPL(iommu_unmap_fast); -static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot, - gfp_t gfp) +ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, + struct scatterlist *sg, unsigned int nents, int prot, + gfp_t gfp) { const struct iommu_domain_ops *ops = domain->ops; size_t len = 0, mapped = 0; @@ -2487,6 +2492,13 @@ static ssize_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, unsigned int i = 0; int ret; + might_sleep_if(gfpflags_allow_blocking(gfp)); + + /* Discourage passing strange GFP flags */ + if (WARN_ON_ONCE(gfp & (__GFP_COMP | __GFP_DMA | __GFP_DMA32 | + __GFP_HIGHMEM))) + return -EINVAL; + while (i <= nents) { phys_addr_t s_phys = sg_phys(sg); @@ -2526,21 +2538,8 @@ out_err: return ret; } - -ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - might_sleep(); - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_KERNEL); -} EXPORT_SYMBOL_GPL(iommu_map_sg); -ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot) -{ - return __iommu_map_sg(domain, iova, sg, nents, prot, GFP_ATOMIC); -} - /** * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework * @domain: the iommu domain where the fault has happened diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 1e1d3509efae..f8d92c9bb65b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -456,7 +456,8 @@ static int batch_iommu_map_small(struct iommu_domain *domain, size % PAGE_SIZE); while (size) { - rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); + rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot, + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova += PAGE_SIZE; @@ -500,7 +501,8 @@ static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, else rc = iommu_map(domain, iova, PFN_PHYS(batch->pfns[cur]) + page_offset, - next_iova - iova, area->iommu_prot); + next_iova - iova, area->iommu_prot, + GFP_KERNEL_ACCOUNT); if (rc) goto err_unmap; iova = next_iova; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a003bd5fc65c..bdf1a4e5eae0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -299,18 +299,6 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain, mmu->utlb_ctx[utlb] = domain->context_id; } -/* - * Disable MMU translation for the microTLB. - */ -static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, - unsigned int utlb) -{ - struct ipmmu_vmsa_device *mmu = domain->mmu; - - ipmmu_imuctr_write(mmu, utlb, 0); - mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID; -} - static void ipmmu_tlb_flush_all(void *cookie) { struct ipmmu_vmsa_domain *domain = cookie; @@ -643,21 +631,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, return 0; } -static void ipmmu_detach_device(struct iommu_domain *io_domain, - struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - unsigned int i; - - for (i = 0; i < fwspec->num_ids; ++i) - ipmmu_utlb_disable(domain, fwspec->ids[i]); - - /* - * TODO: Optimize by disabling the context when no device is attached. - */ -} - static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -876,7 +849,6 @@ static const struct iommu_ops ipmmu_ops = { .of_xlate = ipmmu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = ipmmu_attach_device, - .detach_dev = ipmmu_detach_device, .map_pages = ipmmu_map, .unmap_pages = ipmmu_unmap, .flush_iotlb_all = ipmmu_flush_iotlb_all, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index c60624910872..454f6331c889 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -443,9 +443,9 @@ fail: return ret; } -static void msm_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void msm_iommu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; struct msm_iommu_dev *iommu; @@ -678,11 +678,11 @@ static struct iommu_ops msm_iommu_ops = { .domain_alloc = msm_iommu_domain_alloc, .probe_device = msm_iommu_probe_device, .device_group = generic_device_group, + .set_platform_dma_ops = msm_iommu_set_platform_dma, .pgsize_bitmap = MSM_IOMMU_PGSIZES, .of_xlate = qcom_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = msm_iommu_attach_dev, - .detach_dev = msm_iommu_detach_dev, .map_pages = msm_iommu_map, .unmap_pages = msm_iommu_unmap, /* diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2badd6acfb23..d5a4955910ff 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -710,14 +710,6 @@ err_unlock: return ret; } -static void mtk_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - struct mtk_iommu_data *data = dev_iommu_priv_get(dev); - - mtk_iommu_config(data, dev, false, 0); -} - static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -946,7 +938,6 @@ static const struct iommu_ops mtk_iommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_attach_device, - .detach_dev = mtk_iommu_detach_device, .map_pages = mtk_iommu_map, .unmap_pages = mtk_iommu_unmap, .flush_iotlb_all = mtk_iommu_flush_iotlb_all, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index ca581ff1c769..43e4c8f89e23 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -319,7 +319,7 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device return 0; } -static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct device *dev) +static void mtk_iommu_v1_set_platform_dma(struct device *dev) { struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev); @@ -585,10 +585,10 @@ static const struct iommu_ops mtk_iommu_v1_ops = { .def_domain_type = mtk_iommu_v1_def_domain_type, .device_group = generic_device_group, .pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE, + .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma, .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = mtk_iommu_v1_attach_device, - .detach_dev = mtk_iommu_v1_detach_device, .map_pages = mtk_iommu_v1_map, .unmap_pages = mtk_iommu_v1_unmap, .iova_to_phys = mtk_iommu_v1_iova_to_phys, diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 00d98f08732f..40f57d293a79 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -10,6 +10,7 @@ #include <linux/limits.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/of_iommu.h> #include <linux/of_pci.h> #include <linux/pci.h> @@ -171,3 +172,98 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, return ops; } + +static enum iommu_resv_type __maybe_unused +iommu_resv_region_get_type(struct device *dev, + struct resource *phys, + phys_addr_t start, size_t length) +{ + phys_addr_t end = start + length - 1; + + /* + * IOMMU regions without an associated physical region cannot be + * mapped and are simply reservations. + */ + if (phys->start >= phys->end) + return IOMMU_RESV_RESERVED; + + /* may be IOMMU_RESV_DIRECT_RELAXABLE for certain cases */ + if (start == phys->start && end == phys->end) + return IOMMU_RESV_DIRECT; + + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", &phys, + &start, &end); + return IOMMU_RESV_RESERVED; +} + +/** + * of_iommu_get_resv_regions - reserved region driver helper for device tree + * @dev: device for which to get reserved regions + * @list: reserved region list + * + * IOMMU drivers can use this to implement their .get_resv_regions() callback + * for memory regions attached to a device tree node. See the reserved-memory + * device tree bindings on how to use these: + * + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt + */ +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) +{ +#if IS_ENABLED(CONFIG_OF_ADDRESS) + struct of_phandle_iterator it; + int err; + + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { + const __be32 *maps, *end; + struct resource phys; + int size; + + memset(&phys, 0, sizeof(phys)); + + /* + * The "reg" property is optional and can be omitted by reserved-memory regions + * that represent reservations in the IOVA space, which are regions that should + * not be mapped. + */ + if (of_find_property(it.node, "reg", NULL)) { + err = of_address_to_resource(it.node, 0, &phys); + if (err < 0) { + dev_err(dev, "failed to parse memory region %pOF: %d\n", + it.node, err); + continue; + } + } + + maps = of_get_property(it.node, "iommu-addresses", &size); + if (!maps) + continue; + + end = maps + size / sizeof(__be32); + + while (maps < end) { + struct device_node *np; + u32 phandle; + + phandle = be32_to_cpup(maps++); + np = of_find_node_by_phandle(phandle); + + if (np == dev->of_node) { + int prot = IOMMU_READ | IOMMU_WRITE; + struct iommu_resv_region *region; + enum iommu_resv_type type; + phys_addr_t iova; + size_t length; + + maps = of_translate_dma_region(np, maps, &iova, &length); + type = iommu_resv_region_get_type(dev, &phys, iova, length); + + region = iommu_alloc_resv_region(iova, length, prot, type, + GFP_KERNEL); + if (region) + list_add_tail(®ion->list, list); + } + } + } +#endif +} +EXPORT_SYMBOL(of_iommu_get_resv_regions); diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 2fd7702c6709..3ab078112a7c 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1556,9 +1556,9 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, omap_domain->dev = NULL; } -static void omap_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void omap_iommu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct omap_iommu_domain *omap_domain = to_omap_domain(domain); spin_lock(&omap_domain->lock); @@ -1737,10 +1737,10 @@ static const struct iommu_ops omap_iommu_ops = { .probe_device = omap_iommu_probe_device, .release_device = omap_iommu_release_device, .device_group = omap_iommu_device_group, + .set_platform_dma_ops = omap_iommu_set_platform_dma, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = omap_iommu_attach_dev, - .detach_dev = omap_iommu_detach_dev, .map = omap_iommu_map, .unmap = omap_iommu_unmap, .iova_to_phys = omap_iommu_iova_to_phys, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index a68eadd64f38..f30db22ea5d7 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1192,7 +1192,6 @@ static const struct iommu_ops rk_iommu_ops = { .of_xlate = rk_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = rk_iommu_attach_device, - .detach_dev = rk_iommu_detach_device, .map = rk_iommu_map, .unmap = rk_iommu_unmap, .iova_to_phys = rk_iommu_iova_to_phys, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ed33c6cce083..6849644e2892 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -52,7 +52,7 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type) if (!s390_domain) return NULL; - s390_domain->dma_table = dma_alloc_cpu_table(); + s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL); if (!s390_domain->dma_table) { kfree(s390_domain); return NULL; @@ -144,13 +144,10 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; } -static void s390_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) +static void s390_iommu_set_platform_dma(struct device *dev) { struct zpci_dev *zdev = to_zpci_dev(dev); - WARN_ON(zdev->s390_domain != to_s390_domain(domain)); - __s390_iommu_detach_device(zdev); zpci_dma_init_device(zdev); } @@ -260,7 +257,8 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, static int s390_iommu_validate_trans(struct s390_domain *s390_domain, phys_addr_t pa, dma_addr_t dma_addr, - unsigned long nr_pages, int flags) + unsigned long nr_pages, int flags, + gfp_t gfp) { phys_addr_t page_addr = pa & PAGE_MASK; unsigned long *entry; @@ -268,7 +266,8 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain, int rc; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + gfp); if (unlikely(!entry)) { rc = -ENOMEM; goto undo_cpu_trans; @@ -284,7 +283,7 @@ undo_cpu_trans: while (i-- > 0) { dma_addr -= PAGE_SIZE; entry = dma_walk_cpu_trans(s390_domain->dma_table, - dma_addr); + dma_addr, gfp); if (!entry) break; dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID); @@ -301,7 +300,8 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain, int rc = 0; for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr); + entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr, + GFP_ATOMIC); if (unlikely(!entry)) { rc = -EINVAL; break; @@ -339,7 +339,7 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_validate_trans(s390_domain, paddr, iova, - pgcount, flags); + pgcount, flags, gfp); if (!rc) *mapped = size; @@ -435,11 +435,11 @@ static const struct iommu_ops s390_iommu_ops = { .probe_device = s390_iommu_probe_device, .release_device = s390_iommu_release_device, .device_group = generic_device_group, + .set_platform_dma_ops = s390_iommu_set_platform_dma, .pgsize_bitmap = SZ_4K, .get_resv_regions = s390_iommu_get_resv_regions, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = s390_iommu_attach_device, - .detach_dev = s390_iommu_detach_device, .map_pages = s390_iommu_map_pages, .unmap_pages = s390_iommu_unmap_pages, .flush_iotlb_all = s390_iommu_flush_iotlb_all, diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 219bfa11f7f4..ae94d74b73f4 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -255,21 +255,6 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain, return 0; } -static void sprd_iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ - struct sprd_iommu_domain *dom = to_sprd_domain(domain); - struct sprd_iommu_device *sdev = dom->sdev; - size_t pgt_size = sprd_iommu_pgt_size(domain); - - if (!sdev) - return; - - dma_free_coherent(sdev->dev, pgt_size, dom->pgt_va, dom->pgt_pa); - sprd_iommu_hw_en(sdev, false); - dom->sdev = NULL; -} - static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -414,7 +399,6 @@ static const struct iommu_ops sprd_iommu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sprd_iommu_attach_device, - .detach_dev = sprd_iommu_detach_device, .map_pages = sprd_iommu_map, .unmap_pages = sprd_iommu_unmap, .iotlb_sync_map = sprd_iommu_sync_map, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 5b585eace3d4..2d993d0cea7d 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -834,7 +834,6 @@ static const struct iommu_ops sun50i_iommu_ops = { .probe_device = sun50i_iommu_probe_device, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = sun50i_iommu_attach_device, - .detach_dev = sun50i_iommu_detach_device, .flush_iotlb_all = sun50i_iommu_flush_iotlb_all, .iotlb_sync_map = sun50i_iommu_iotlb_sync_map, .iotlb_sync = sun50i_iommu_iotlb_sync, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index ed53279d1106..a482ff838b53 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -124,9 +124,9 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, return ret; } -static void gart_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void gart_iommu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct gart_device *gart = gart_handle; spin_lock(&gart->dom_lock); @@ -270,11 +270,11 @@ static const struct iommu_ops gart_iommu_ops = { .domain_alloc = gart_iommu_domain_alloc, .probe_device = gart_iommu_probe_device, .device_group = generic_device_group, + .set_platform_dma_ops = gart_iommu_set_platform_dma, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = gart_iommu_attach_dev, - .detach_dev = gart_iommu_detach_dev, .map = gart_iommu_map, .unmap = gart_iommu_unmap, .iova_to_phys = gart_iommu_iova_to_phys, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 5b1af40221ec..4c4ac22d5fb1 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -511,8 +511,9 @@ disable: return err; } -static void tegra_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) +static void tegra_smmu_set_platform_dma(struct device *dev) { + struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct tegra_smmu_as *as = to_smmu_as(domain); struct tegra_smmu *smmu = as->smmu; @@ -965,11 +966,11 @@ static const struct iommu_ops tegra_smmu_ops = { .domain_alloc = tegra_smmu_domain_alloc, .probe_device = tegra_smmu_probe_device, .device_group = tegra_smmu_device_group, + .set_platform_dma_ops = tegra_smmu_set_platform_dma, .of_xlate = tegra_smmu_of_xlate, .pgsize_bitmap = SZ_4K, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = tegra_smmu_attach_dev, - .detach_dev = tegra_smmu_detach_dev, .map = tegra_smmu_map, .unmap = tegra_smmu_unmap, .iova_to_phys = tegra_smmu_iova_to_phys, diff --git a/drivers/media/platform/qcom/venus/firmware.c b/drivers/media/platform/qcom/venus/firmware.c index 142d4c74017c..07d4dceb5e72 100644 --- a/drivers/media/platform/qcom/venus/firmware.c +++ b/drivers/media/platform/qcom/venus/firmware.c @@ -158,7 +158,7 @@ static int venus_boot_no_tz(struct venus_core *core, phys_addr_t mem_phys, core->fw.mapped_mem_size = mem_size; ret = iommu_map(iommu, VENUS_FW_START_ADDR, mem_phys, mem_size, - IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV); + IOMMU_READ | IOMMU_WRITE | IOMMU_PRIV, GFP_KERNEL); if (ret) { dev_err(dev, "could not map video firmware region\n"); return ret; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index 9ec5af323f73..991a7d39f066 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -466,7 +466,8 @@ static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size) size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ - ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE); + ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret) return ret; @@ -574,7 +575,8 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size) size = PAGE_ALIGN(size + addr - phys); iova = phys; /* We just want a direct mapping */ - ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE); + ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE, + GFP_KERNEL); if (ret) return ret; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index cfcb759a87de..9a82f0336d95 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1639,7 +1639,7 @@ static int ath10k_fw_init(struct ath10k *ar) ret = iommu_map(iommu_dom, ar_snoc->fw.fw_start_addr, ar->msa.paddr, ar->msa.mem_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath10k_err(ar, "failed to map firmware region: %d\n", ret); goto err_iommu_detach; diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index d34a4d6325b2..df8fdc7067f9 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1021,7 +1021,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ret = iommu_map(iommu_dom, ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_paddr, ab_ahb->fw.msa_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath11k_err(ab, "failed to map firmware region: %d\n", ret); goto err_iommu_detach; @@ -1029,7 +1029,7 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ret = iommu_map(iommu_dom, ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_paddr, ab_ahb->fw.ce_size, - IOMMU_READ | IOMMU_WRITE); + IOMMU_READ | IOMMU_WRITE, GFP_KERNEL); if (ret) { ath11k_err(ab, "failed to map firmware CE region: %d\n", ret); goto err_iommu_unmap; diff --git a/drivers/of/address.c b/drivers/of/address.c index 67763e5b8c0e..4c0b169ef9bf 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -626,6 +626,47 @@ u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr) } EXPORT_SYMBOL(of_translate_dma_address); +/** + * of_translate_dma_region - Translate device tree address and size tuple + * @dev: device tree node for which to translate + * @prop: pointer into array of cells + * @start: return value for the start of the DMA range + * @length: return value for the length of the DMA range + * + * Returns a pointer to the cell immediately following the translated DMA region. + */ +const __be32 *of_translate_dma_region(struct device_node *dev, const __be32 *prop, + phys_addr_t *start, size_t *length) +{ + struct device_node *parent; + u64 address, size; + int na, ns; + + parent = __of_get_dma_parent(dev); + if (!parent) + return NULL; + + na = of_bus_n_addr_cells(parent); + ns = of_bus_n_size_cells(parent); + + of_node_put(parent); + + address = of_translate_dma_address(dev, prop); + if (address == OF_BAD_ADDR) + return NULL; + + size = of_read_number(prop + na, ns); + + if (start) + *start = address; + + if (length) + *length = size; + + return prop + na + ns; +} +EXPORT_SYMBOL(of_translate_dma_region); + const __be32 *__of_get_address(struct device_node *dev, int index, int bar_no, u64 *size, unsigned int *flags) { diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 1cd4815a6dd1..80072b6b6283 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -643,7 +643,8 @@ static int rproc_handle_devmem(struct rproc *rproc, void *ptr, if (!mapping) return -ENOMEM; - ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags); + ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags, + GFP_KERNEL); if (ret) { dev_err(dev, "failed to map devmem: %d\n", ret); goto out; @@ -737,7 +738,7 @@ static int rproc_alloc_carveout(struct rproc *rproc, } ret = iommu_map(rproc->domain, mem->da, dma, mem->len, - mem->flags); + mem->flags, GFP_KERNEL); if (ret) { dev_err(dev, "iommu_map failed: %d\n", ret); goto free_mapping; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 2209372f236d..5600eb67ca30 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1480,7 +1480,8 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, list_for_each_entry(d, &iommu->domain_list, next) { ret = iommu_map(d->domain, iova, (phys_addr_t)pfn << PAGE_SHIFT, - npage << PAGE_SHIFT, prot | IOMMU_CACHE); + npage << PAGE_SHIFT, prot | IOMMU_CACHE, + GFP_KERNEL); if (ret) goto unwind; @@ -1777,8 +1778,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, size = npage << PAGE_SHIFT; } - ret = iommu_map(domain->domain, iova, phys, - size, dma->prot | IOMMU_CACHE); + ret = iommu_map(domain->domain, iova, phys, size, + dma->prot | IOMMU_CACHE, GFP_KERNEL); if (ret) { if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, @@ -1873,7 +1874,7 @@ static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head * continue; ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2, - IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE); + IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE, GFP_KERNEL); if (!ret) { size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ec32f785dfde..fd1536de5b1d 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -792,7 +792,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, r = ops->set_map(vdpa, asid, iotlb); } else { r = iommu_map(v->domain, iova, pa, size, - perm_to_iommu_flags(perm)); + perm_to_iommu_flags(perm), GFP_KERNEL); } if (r) { vhost_iotlb_del_range(iotlb, iova, iova + size - 1); diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 4175dce3967c..bdded0ac46eb 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -802,7 +802,7 @@ struct acpi_dmar_pci_path { struct acpi_dmar_hardware_unit { struct acpi_dmar_header header; u8 flags; - u8 reserved; + u8 size; /* Size of the register set */ u16 segment; u64 address; /* Register Base Address */ }; diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6c6859bfc454..f2ea348ce3b0 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -221,6 +221,7 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, + CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d81a51978d01..725d5e6acec0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -39,6 +39,7 @@ struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ + unsigned long reg_size; /* size of register set */ struct dmar_dev_scope *devices;/* target device array */ int devices_cnt; /* target device count */ u16 segment; /* PCI domain */ diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h deleted file mode 100644 index f9a0d44f6fdb..000000000000 --- a/include/linux/intel-svm.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright © 2015 Intel Corporation. - * - * Authors: David Woodhouse <David.Woodhouse@intel.com> - */ - -#ifndef __INTEL_SVM_H__ -#define __INTEL_SVM_H__ - -/* Page Request Queue depth */ -#define PRQ_ORDER 4 -#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20) -#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5) - -#endif /* __INTEL_SVM_H__ */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 46e1347bfa22..3589d1b8f922 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -228,6 +228,9 @@ struct iommu_iotlb_gather { * @release_device: Remove device from iommu driver handling * @probe_finalize: Do final setup work after the device is added to an IOMMU * group and attached to the groups domain + * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op + * is to support old IOMMU drivers, new drivers should use + * default domains, and the common IOMMU DMA ops. * @device_group: find iommu group for a particular device * @get_resv_regions: Request list of reserved regions for a device * @of_xlate: add OF master IDs to iommu grouping @@ -256,6 +259,7 @@ struct iommu_ops { struct iommu_device *(*probe_device)(struct device *dev); void (*release_device)(struct device *dev); void (*probe_finalize)(struct device *dev); + void (*set_platform_dma_ops)(struct device *dev); struct iommu_group *(*device_group)(struct device *dev); /* Request/Free a list of reserved regions for a device */ @@ -295,7 +299,6 @@ struct iommu_ops { * * EBUSY - device is attached to a domain and cannot be changed * * ENODEV - device specific errors, not able to be attached * * <others> - treated as ENODEV by the caller. Use is discouraged - * @detach_dev: detach an iommu domain from a device * @set_dev_pasid: set an iommu domain to a pasid of device * @map: map a physically contiguous memory region to an iommu domain * @map_pages: map a physically contiguous set of pages of the same size to @@ -316,7 +319,6 @@ struct iommu_ops { */ struct iommu_domain_ops { int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev, ioasid_t pasid); @@ -401,6 +403,7 @@ struct iommu_fault_param { * @iommu_dev: IOMMU device this device is linked to * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume + * @attach_deferred: the dma domain attachment is deferred * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -413,6 +416,7 @@ struct dev_iommu { struct iommu_device *iommu_dev; void *priv; u32 max_pasids; + u32 attach_deferred:1; }; int iommu_device_register(struct iommu_device *iommu, @@ -467,19 +471,15 @@ extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); -extern int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, unsigned long iova, size_t size, struct iommu_iotlb_gather *iotlb_gather); extern ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, - struct scatterlist *sg, unsigned int nents, int prot); -extern ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot); + struct scatterlist *sg, unsigned int nents, + int prot, gfp_t gfp); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -773,14 +773,7 @@ static inline struct iommu_domain *iommu_get_domain_for_dev(struct device *dev) } static inline int iommu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot) -{ - return -ENODEV; -} - -static inline int iommu_map_atomic(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) { return -ENODEV; } @@ -800,14 +793,7 @@ static inline size_t iommu_unmap_fast(struct iommu_domain *domain, static inline ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) -{ - return -ENODEV; -} - -static inline ssize_t iommu_map_sg_atomic(struct iommu_domain *domain, - unsigned long iova, struct scatterlist *sg, - unsigned int nents, int prot) + unsigned int nents, int prot, gfp_t gfp) { return -ENODEV; } @@ -1118,7 +1104,8 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, static inline size_t iommu_map_sgtable(struct iommu_domain *domain, unsigned long iova, struct sg_table *sgt, int prot) { - return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot); + return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot, + GFP_KERNEL); } #ifdef CONFIG_IOMMU_DEBUGFS diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 265f26eeaf6b..376671594746 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -38,6 +38,8 @@ struct of_pci_range { /* Translate a DMA address from device space to CPU space */ extern u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr); +extern const __be32 *of_translate_dma_region(struct device_node *dev, const __be32 *addr, + phys_addr_t *start, size_t *length); #ifdef CONFIG_OF_ADDRESS extern u64 of_translate_address(struct device_node *np, const __be32 *addr); diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 55c1eb300a86..9a5e6b410dd2 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); +extern void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list); + #else static inline const struct iommu_ops *of_iommu_configure(struct device *dev, @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, return NULL; } +static inline void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ +} + #endif /* CONFIG_OF_IOMMU */ #endif /* __OF_IOMMU_H */ diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 29096fe12623..70743db1fb75 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -76,13 +76,6 @@ DEFINE_EVENT(iommu_device_event, attach_device_to_domain, TP_ARGS(dev) ); -DEFINE_EVENT(iommu_device_event, detach_device_from_domain, - - TP_PROTO(struct device *dev), - - TP_ARGS(dev) -); - TRACE_EVENT(map, TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), |