diff options
41 files changed, 1567 insertions, 2050 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-iommu b/Documentation/ABI/testing/sysfs-class-iommu new file mode 100644 index 000000000000..6d0a1b4be82d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-iommu @@ -0,0 +1,17 @@ +What: /sys/class/iommu/<iommu>/devices/ +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + IOMMU drivers are able to link devices managed by a + given IOMMU here to allow association of IOMMU to + device. + +What: /sys/devices/.../iommu +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + IOMMU drivers are able to link the IOMMU for a + given device here to allow association of device to + IOMMU. diff --git a/Documentation/ABI/testing/sysfs-class-iommu-amd-iommu b/Documentation/ABI/testing/sysfs-class-iommu-amd-iommu new file mode 100644 index 000000000000..d6ba8e8a4a97 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-iommu-amd-iommu @@ -0,0 +1,14 @@ +What: /sys/class/iommu/<iommu>/amd-iommu/cap +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + IOMMU capability header as documented in the AMD IOMMU + specification. Format: %x + +What: /sys/class/iommu/<iommu>/amd-iommu/features +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + Extended features of the IOMMU. Format: %llx diff --git a/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu b/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu new file mode 100644 index 000000000000..258cc246d98e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu @@ -0,0 +1,32 @@ +What: /sys/class/iommu/<iommu>/intel-iommu/address +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + Physical address of the VT-d DRHD for this IOMMU. + Format: %llx. This allows association of a sysfs + intel-iommu with a DMAR DRHD table entry. + +What: /sys/class/iommu/<iommu>/intel-iommu/cap +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + The cached hardware capability register value + of this DRHD unit. Format: %llx. + +What: /sys/class/iommu/<iommu>/intel-iommu/ecap +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + The cached hardware extended capability register + value of this DRHD unit. Format: %llx. + +What: /sys/class/iommu/<iommu>/intel-iommu/version +Date: June 2014 +KernelVersion: 3.17 +Contact: Alex Williamson <alex.williamson@redhat.com> +Description: + The architecture version as reported from the + VT-d VER_REG. Format: %d:%d, major:minor diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index f284b99402bc..2d0f7cd867ea 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -42,12 +42,6 @@ conditions. ** System MMU optional properties: -- smmu-parent : When multiple SMMUs are chained together, this - property can be used to provide a phandle to the - parent SMMU (that is the next SMMU on the path going - from the mmu-masters towards memory) node for this - SMMU. - - calxeda,smmu-secure-config-access : Enable proper handling of buggy implementations that always use secure access to SMMU configuration registers. In this case non-secure diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt new file mode 100644 index 000000000000..5a8b4624defc --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/iommu.txt @@ -0,0 +1,182 @@ +This document describes the generic device tree binding for IOMMUs and their +master(s). + + +IOMMU device node: +================== + +An IOMMU can provide the following services: + +* Remap address space to allow devices to access physical memory ranges that + they otherwise wouldn't be capable of accessing. + + Example: 32-bit DMA to 64-bit physical addresses + +* Implement scatter-gather at page level granularity so that the device does + not have to. + +* Provide system protection against "rogue" DMA by forcing all accesses to go + through the IOMMU and faulting when encountering accesses to unmapped + address regions. + +* Provide address space isolation between multiple contexts. + + Example: Virtualization + +Device nodes compatible with this binding represent hardware with some of the +above capabilities. + +IOMMUs can be single-master or multiple-master. Single-master IOMMU devices +typically have a fixed association to the master device, whereas multiple- +master IOMMU devices can translate accesses from more than one master. + +The device tree node of the IOMMU device's parent bus must contain a valid +"dma-ranges" property that describes how the physical address space of the +IOMMU maps to memory. An empty "dma-ranges" property means that there is a +1:1 mapping from IOMMU to memory. + +Required properties: +-------------------- +- #iommu-cells: The number of cells in an IOMMU specifier needed to encode an + address. + +The meaning of the IOMMU specifier is defined by the device tree binding of +the specific IOMMU. Below are a few examples of typical use-cases: + +- #iommu-cells = <0>: Single master IOMMU devices are not configurable and + therefore no additional information needs to be encoded in the specifier. + This may also apply to multiple master IOMMU devices that do not allow the + association of masters to be configured. Note that an IOMMU can by design + be multi-master yet only expose a single master in a given configuration. + In such cases the number of cells will usually be 1 as in the next case. +- #iommu-cells = <1>: Multiple master IOMMU devices may need to be configured + in order to enable translation for a given master. In such cases the single + address cell corresponds to the master device's ID. In some cases more than + one cell can be required to represent a single master ID. +- #iommu-cells = <4>: Some IOMMU devices allow the DMA window for masters to + be configured. The first cell of the address in this may contain the master + device's ID for example, while the second cell could contain the start of + the DMA window for the given device. The length of the DMA window is given + by the third and fourth cells. + +Note that these are merely examples and real-world use-cases may use different +definitions to represent their individual needs. Always refer to the specific +IOMMU binding for the exact meaning of the cells that make up the specifier. + + +IOMMU master node: +================== + +Devices that access memory through an IOMMU are called masters. A device can +have multiple master interfaces (to one or more IOMMU devices). + +Required properties: +-------------------- +- iommus: A list of phandle and IOMMU specifier pairs that describe the IOMMU + master interfaces of the device. One entry in the list describes one master + interface of the device. + +When an "iommus" property is specified in a device tree node, the IOMMU will +be used for address translation. If a "dma-ranges" property exists in the +device's parent node it will be ignored. An exception to this rule is if the +referenced IOMMU is disabled, in which case the "dma-ranges" property of the +parent shall take effect. Note that merely disabling a device tree node does +not guarantee that the IOMMU is really disabled since the hardware may not +have a means to turn off translation. But it is invalid in such cases to +disable the IOMMU's device tree node in the first place because it would +prevent any driver from properly setting up the translations. + + +Notes: +====== + +One possible extension to the above is to use an "iommus" property along with +a "dma-ranges" property in a bus device node (such as PCI host bridges). This +can be useful to describe how children on the bus relate to the IOMMU if they +are not explicitly listed in the device tree (e.g. PCI devices). However, the +requirements of that use-case haven't been fully determined yet. Implementing +this is therefore not recommended without further discussion and extension of +this binding. + + +Examples: +========= + +Single-master IOMMU: +-------------------- + + iommu { + #iommu-cells = <0>; + }; + + master { + iommus = <&{/iommu}>; + }; + +Multiple-master IOMMU with fixed associations: +---------------------------------------------- + + /* multiple-master IOMMU */ + iommu { + /* + * Masters are statically associated with this IOMMU and share + * the same address translations because the IOMMU does not + * have sufficient information to distinguish between masters. + * + * Consequently address translation is always on or off for + * all masters at any given point in time. + */ + #iommu-cells = <0>; + }; + + /* static association with IOMMU */ + master@1 { + reg = <1>; + iommus = <&{/iommu}>; + }; + + /* static association with IOMMU */ + master@2 { + reg = <2>; + iommus = <&{/iommu}>; + }; + +Multiple-master IOMMU: +---------------------- + + iommu { + /* the specifier represents the ID of the master */ + #iommu-cells = <1>; + }; + + master@1 { + /* device has master ID 42 in the IOMMU */ + iommus = <&{/iommu} 42>; + }; + + master@2 { + /* device has master IDs 23 and 24 in the IOMMU */ + iommus = <&{/iommu} 23>, <&{/iommu} 24>; + }; + +Multiple-master IOMMU with configurable DMA window: +--------------------------------------------------- + + / { + iommu { + /* + * One cell for the master ID and one cell for the + * address of the DMA window. The length of the DMA + * window is encoded in two cells. + * + * The DMA window is the range addressable by the + * master (i.e. the I/O virtual address space). + */ + #iommu-cells = <4>; + }; + + master { + /* master ID 42, 4 GiB DMA window starting at 0 */ + iommus = <&{/iommu} 42 0 0x1 0x0>; + }; + }; diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index f1fab5684a24..4068350f9059 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -34,8 +34,6 @@ static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused) pdata->name = oh->name; pdata->nr_tlb_entries = a->nr_tlb_entries; - pdata->da_start = a->da_start; - pdata->da_end = a->da_end; if (oh->rst_lines_cnt == 1) { pdata->reset_name = oh->rst_lines->name; diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 1cd0cfdc03e0..e9516b454e76 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -2986,8 +2986,6 @@ static struct omap_hwmod_class omap3xxx_mmu_hwmod_class = { /* mmu isp */ static struct omap_mmu_dev_attr mmu_isp_dev_attr = { - .da_start = 0x0, - .da_end = 0xfffff000, .nr_tlb_entries = 8, }; @@ -3026,8 +3024,6 @@ static struct omap_hwmod omap3xxx_mmu_isp_hwmod = { /* mmu iva */ static struct omap_mmu_dev_attr mmu_iva_dev_attr = { - .da_start = 0x11000000, - .da_end = 0xfffff000, .nr_tlb_entries = 32, }; diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 41e54f759934..b4acc0a7576f 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -2084,8 +2084,6 @@ static struct omap_hwmod_class omap44xx_mmu_hwmod_class = { /* mmu ipu */ static struct omap_mmu_dev_attr mmu_ipu_dev_attr = { - .da_start = 0x0, - .da_end = 0xfffff000, .nr_tlb_entries = 32, }; @@ -2133,8 +2131,6 @@ static struct omap_hwmod omap44xx_mmu_ipu_hwmod = { /* mmu dsp */ static struct omap_mmu_dev_attr mmu_dsp_dev_attr = { - .da_start = 0x0, - .da_end = 0xfffff000, .nr_tlb_entries = 32, }; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d260605e6d5f..dd5112265cc9 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -76,7 +76,7 @@ config AMD_IOMMU_STATS config AMD_IOMMU_V2 tristate "AMD IOMMU Version 2 driver" - depends on AMD_IOMMU && PROFILING + depends on AMD_IOMMU select MMU_NOTIFIER ---help--- This option enables support for the AMD IOMMUv2 features of the IOMMU @@ -143,16 +143,12 @@ config OMAP_IOMMU depends on ARCH_OMAP2PLUS select IOMMU_API -config OMAP_IOVMM - tristate "OMAP IO Virtual Memory Manager Support" - depends on OMAP_IOMMU - config OMAP_IOMMU_DEBUG - tristate "Export OMAP IOMMU/IOVMM internals in DebugFS" - depends on OMAP_IOVMM && DEBUG_FS + tristate "Export OMAP IOMMU internals in DebugFS" + depends on OMAP_IOMMU && DEBUG_FS help Select this to see extensive information about - the internal state of OMAP IOMMU/IOVMM in debugfs. + the internal state of OMAP IOMMU in debugfs. Say N unless you know you need this. @@ -180,6 +176,7 @@ config EXYNOS_IOMMU bool "Exynos IOMMU Support" depends on ARCH_EXYNOS select IOMMU_API + select ARM_DMA_USE_IOMMU help Support for the IOMMU (System MMU) of Samsung Exynos application processor family. This enables H/W multimedia accelerators to see diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 8893bad048e0..16edef74b8ee 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o +obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o @@ -11,7 +12,6 @@ obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o -obj-$(CONFIG_OMAP_IOVMM) += omap-iovmm.o obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4aec6a29e316..18405314168b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -46,7 +46,6 @@ #include "amd_iommu_proto.h" #include "amd_iommu_types.h" #include "irq_remapping.h" -#include "pci.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) @@ -81,7 +80,7 @@ LIST_HEAD(hpet_map); */ static struct protection_domain *pt_domain; -static struct iommu_ops amd_iommu_ops; +static const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; @@ -133,9 +132,6 @@ static void free_dev_data(struct iommu_dev_data *dev_data) list_del(&dev_data->dev_data_list); spin_unlock_irqrestore(&dev_data_list_lock, flags); - if (dev_data->group) - iommu_group_put(dev_data->group); - kfree(dev_data); } @@ -264,167 +260,79 @@ static bool check_device(struct device *dev) return true; } -static struct pci_bus *find_hosted_bus(struct pci_bus *bus) -{ - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - return ERR_PTR(-ENODEV); - } - - return bus; -} - -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - -static struct pci_dev *get_isolation_root(struct pci_dev *pdev) -{ - struct pci_dev *dma_pdev = pdev; - - /* Account for quirked devices */ - swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); - - /* - * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as lowest numbered - * function that also does not suport the required ACS flags. - */ - if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { - u8 i, slot = PCI_SLOT(dma_pdev->devfn); - - for (i = 0; i < 8; i++) { - struct pci_dev *tmp; - - tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); - if (!tmp) - continue; - - if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { - swap_pci_ref(&dma_pdev, tmp); - break; - } - pci_dev_put(tmp); - } - } - - /* - * Devices on the root bus go through the iommu. If that's not us, - * find the next upstream device and test ACS up to the root bus. - * Finding the next device may require skipping virtual buses. - */ - while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = find_hosted_bus(dma_pdev->bus); - if (IS_ERR(bus)) - break; - - if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) - break; - - swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); - } - - return dma_pdev; -} - -static int use_pdev_iommu_group(struct pci_dev *pdev, struct device *dev) +static int init_iommu_group(struct device *dev) { - struct iommu_group *group = iommu_group_get(&pdev->dev); - int ret; + struct iommu_group *group; - if (!group) { - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); + group = iommu_group_get_for_dev(dev); - WARN_ON(&pdev->dev != dev); - } + if (IS_ERR(group)) + return PTR_ERR(group); - ret = iommu_group_add_device(group, dev); iommu_group_put(group); - return ret; + return 0; } -static int use_dev_data_iommu_group(struct iommu_dev_data *dev_data, - struct device *dev) +static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) { - if (!dev_data->group) { - struct iommu_group *group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - - dev_data->group = group; - } - - return iommu_group_add_device(dev_data->group, dev); + *(u16 *)data = alias; + return 0; } -static int init_iommu_group(struct device *dev) +static u16 get_alias(struct device *dev) { - struct iommu_dev_data *dev_data; - struct iommu_group *group; - struct pci_dev *dma_pdev; - int ret; - - group = iommu_group_get(dev); - if (group) { - iommu_group_put(group); - return 0; - } - - dev_data = find_dev_data(get_device_id(dev)); - if (!dev_data) - return -ENOMEM; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid, ivrs_alias, pci_alias; - if (dev_data->alias_data) { - u16 alias; - struct pci_bus *bus; + devid = get_device_id(dev); + ivrs_alias = amd_iommu_alias_table[devid]; + pci_for_each_dma_alias(pdev, __last_alias, &pci_alias); - if (dev_data->alias_data->group) - goto use_group; + if (ivrs_alias == pci_alias) + return ivrs_alias; - /* - * If the alias device exists, it's effectively just a first - * level quirk for finding the DMA source. - */ - alias = amd_iommu_alias_table[dev_data->devid]; - dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff); - if (dma_pdev) { - dma_pdev = get_isolation_root(dma_pdev); - goto use_pdev; + /* + * DMA alias showdown + * + * The IVRS is fairly reliable in telling us about aliases, but it + * can't know about every screwy device. If we don't have an IVRS + * reported alias, use the PCI reported alias. In that case we may + * still need to initialize the rlookup and dev_table entries if the + * alias is to a non-existent device. + */ + if (ivrs_alias == devid) { + if (!amd_iommu_rlookup_table[pci_alias]) { + amd_iommu_rlookup_table[pci_alias] = + amd_iommu_rlookup_table[devid]; + memcpy(amd_iommu_dev_table[pci_alias].data, + amd_iommu_dev_table[devid].data, + sizeof(amd_iommu_dev_table[pci_alias].data)); } - /* - * If the alias is virtual, try to find a parent device - * and test whether the IOMMU group is actualy rooted above - * the alias. Be careful to also test the parent device if - * we think the alias is the root of the group. - */ - bus = pci_find_bus(0, alias >> 8); - if (!bus) - goto use_group; - - bus = find_hosted_bus(bus); - if (IS_ERR(bus) || !bus->self) - goto use_group; + return pci_alias; + } - dma_pdev = get_isolation_root(pci_dev_get(bus->self)); - if (dma_pdev != bus->self || (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))) - goto use_pdev; + pr_info("AMD-Vi: Using IVRS reported alias %02x:%02x.%d " + "for device %s[%04x:%04x], kernel reported alias " + "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), + PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), + PCI_FUNC(pci_alias)); - pci_dev_put(dma_pdev); - goto use_group; + /* + * If we don't have a PCI DMA alias and the IVRS alias is on the same + * bus, then the IVRS table may know about a quirk that we don't. + */ + if (pci_alias == devid && + PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { + pdev->dev_flags |= PCI_DEV_FLAGS_DMA_ALIAS_DEVFN; + pdev->dma_alias_devfn = ivrs_alias & 0xff; + pr_info("AMD-Vi: Added PCI DMA alias %02x.%d for %s\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), + dev_name(dev)); } - dma_pdev = get_isolation_root(pci_dev_get(to_pci_dev(dev))); -use_pdev: - ret = use_pdev_iommu_group(dma_pdev, dev); - pci_dev_put(dma_pdev); - return ret; -use_group: - return use_dev_data_iommu_group(dev_data->alias_data, dev); + return ivrs_alias; } static int iommu_init_device(struct device *dev) @@ -441,7 +349,8 @@ static int iommu_init_device(struct device *dev) if (!dev_data) return -ENOMEM; - alias = amd_iommu_alias_table[dev_data->devid]; + alias = get_alias(dev); + if (alias != dev_data->devid) { struct iommu_dev_data *alias_data; @@ -470,6 +379,9 @@ static int iommu_init_device(struct device *dev) dev->archdata.iommu = dev_data; + iommu_device_link(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, + dev); + return 0; } @@ -489,12 +401,22 @@ static void iommu_ignore_device(struct device *dev) static void iommu_uninit_device(struct device *dev) { + struct iommu_dev_data *dev_data = search_dev_data(get_device_id(dev)); + + if (!dev_data) + return; + + iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev, + dev); + iommu_group_remove_device(dev); + /* Unlink from alias, it may change if another device is re-plugged */ + dev_data->alias_data = NULL; + /* - * Nothing to do here - we keep dev_data around for unplugged devices - * and reuse it when the device is re-plugged - not doing so would - * introduce a ton of races. + * We keep dev_data around for unplugged devices and reuse it when the + * device is re-plugged - not doing so would introduce a ton of races. */ } @@ -3473,7 +3395,7 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -static struct iommu_ops amd_iommu_ops = { +static const struct iommu_ops amd_iommu_ops = { .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, .attach_dev = amd_iommu_attach_device, diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 0e08545d7298..3783e0b44df6 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -26,6 +26,7 @@ #include <linux/msi.h> #include <linux/amd-iommu.h> #include <linux/export.h> +#include <linux/iommu.h> #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -1197,6 +1198,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) iommu->max_counters = (u8) ((val >> 7) & 0xf); } +static ssize_t amd_iommu_show_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%x\n", iommu->cap); +} +static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL); + +static ssize_t amd_iommu_show_features(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amd_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->features); +} +static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); + +static struct attribute *amd_iommu_attrs[] = { + &dev_attr_cap.attr, + &dev_attr_features.attr, + NULL, +}; + +static struct attribute_group amd_iommu_group = { + .name = "amd-iommu", + .attrs = amd_iommu_attrs, +}; + +static const struct attribute_group *amd_iommu_groups[] = { + &amd_iommu_group, + NULL, +}; static int iommu_init_pci(struct amd_iommu *iommu) { @@ -1297,6 +1331,10 @@ static int iommu_init_pci(struct amd_iommu *iommu) amd_iommu_erratum_746_workaround(iommu); + iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu, + amd_iommu_groups, "ivhd%d", + iommu->index); + return pci_enable_device(iommu->dev); } diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f1a5abf11acf..8e43b7cba133 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -390,12 +390,6 @@ struct amd_iommu_fault { }; -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) struct iommu_domain; @@ -432,7 +426,6 @@ struct iommu_dev_data { struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reference count */ - struct iommu_group *group; /* IOMMU group for virtual aliases */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ @@ -578,6 +571,9 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + /* IOMMU sysfs device */ + struct device *iommu_dev; + /* * We can't rely on the BIOS to restore all values on reinit, so we * need to stash them diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 499b4366a98d..5f578e850fc5 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -47,12 +47,13 @@ struct pasid_state { atomic_t count; /* Reference count */ unsigned mmu_notifier_count; /* Counting nested mmu_notifier calls */ - struct task_struct *task; /* Task bound to this PASID */ struct mm_struct *mm; /* mm_struct for the faults */ - struct mmu_notifier mn; /* mmu_otifier handle */ + struct mmu_notifier mn; /* mmu_notifier handle */ struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ struct device_state *device_state; /* Link to our device_state */ int pasid; /* PASID index */ + bool invalid; /* Used during setup and + teardown of the pasid */ spinlock_t lock; /* Protect pri_queues and mmu_notifer_count */ wait_queue_head_t wq; /* To wait for count == 0 */ @@ -99,7 +100,6 @@ static struct workqueue_struct *iommu_wq; static u64 *empty_page_table; static void free_pasid_states(struct device_state *dev_state); -static void unbind_pasid(struct device_state *dev_state, int pasid); static u16 device_id(struct pci_dev *pdev) { @@ -297,37 +297,29 @@ static void put_pasid_state_wait(struct pasid_state *pasid_state) schedule(); finish_wait(&pasid_state->wq, &wait); - mmput(pasid_state->mm); free_pasid_state(pasid_state); } -static void __unbind_pasid(struct pasid_state *pasid_state) +static void unbind_pasid(struct pasid_state *pasid_state) { struct iommu_domain *domain; domain = pasid_state->device_state->domain; + /* + * Mark pasid_state as invalid, no more faults will we added to the + * work queue after this is visible everywhere. + */ + pasid_state->invalid = true; + + /* Make sure this is visible */ + smp_wmb(); + + /* After this the device/pasid can't access the mm anymore */ amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); - clear_pasid_state(pasid_state->device_state, pasid_state->pasid); /* Make sure no more pending faults are in the queue */ flush_workqueue(iommu_wq); - - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state(pasid_state); /* Reference taken in bind() function */ -} - -static void unbind_pasid(struct device_state *dev_state, int pasid) -{ - struct pasid_state *pasid_state; - - pasid_state = get_pasid_state(dev_state, pasid); - if (pasid_state == NULL) - return; - - __unbind_pasid(pasid_state); - put_pasid_state_wait(pasid_state); /* Reference taken in this function */ } static void free_pasid_states_level1(struct pasid_state **tbl) @@ -373,6 +365,12 @@ static void free_pasid_states(struct device_state *dev_state) * unbind the PASID */ mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + + put_pasid_state_wait(pasid_state); /* Reference taken in + amd_iommu_bind_pasid */ + + /* Drop reference taken in amd_iommu_bind_pasid */ + put_device_state(dev_state); } if (dev_state->pasid_levels == 2) @@ -411,14 +409,6 @@ static int mn_clear_flush_young(struct mmu_notifier *mn, return 0; } -static void mn_change_pte(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long address, - pte_t pte) -{ - __mn_flush_page(mn, address); -} - static void mn_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address) @@ -472,22 +462,23 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct pasid_state *pasid_state; struct device_state *dev_state; + bool run_inv_ctx_cb; might_sleep(); - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; + pasid_state = mn_to_state(mn); + dev_state = pasid_state->device_state; + run_inv_ctx_cb = !pasid_state->invalid; - if (pasid_state->device_state->inv_ctx_cb) + if (run_inv_ctx_cb && pasid_state->device_state->inv_ctx_cb) dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); - unbind_pasid(dev_state, pasid_state->pasid); + unbind_pasid(pasid_state); } static struct mmu_notifier_ops iommu_mn = { .release = mn_release, .clear_flush_young = mn_clear_flush_young, - .change_pte = mn_change_pte, .invalidate_page = mn_invalidate_page, .invalidate_range_start = mn_invalidate_range_start, .invalidate_range_end = mn_invalidate_range_end, @@ -529,7 +520,7 @@ static void do_fault(struct work_struct *work) write = !!(fault->flags & PPR_FAULT_WRITE); down_read(&fault->state->mm->mmap_sem); - npages = get_user_pages(fault->state->task, fault->state->mm, + npages = get_user_pages(NULL, fault->state->mm, fault->address, 1, write, 0, &page, NULL); up_read(&fault->state->mm->mmap_sem); @@ -587,7 +578,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) goto out; pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); - if (pasid_state == NULL) { + if (pasid_state == NULL || pasid_state->invalid) { /* We know the device but not the PASID -> send INVALID */ amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, PPR_INVALID, tag); @@ -612,6 +603,7 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) fault->state = pasid_state; fault->tag = tag; fault->finish = finish; + fault->pasid = iommu_fault->pasid; fault->flags = iommu_fault->flags; INIT_WORK(&fault->work, do_fault); @@ -620,6 +612,10 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) ret = NOTIFY_OK; out_drop_state: + + if (ret != NOTIFY_OK && pasid_state) + put_pasid_state(pasid_state); + put_device_state(dev_state); out: @@ -635,6 +631,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, { struct pasid_state *pasid_state; struct device_state *dev_state; + struct mm_struct *mm; u16 devid; int ret; @@ -658,20 +655,23 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, if (pasid_state == NULL) goto out; + atomic_set(&pasid_state->count, 1); init_waitqueue_head(&pasid_state->wq); spin_lock_init(&pasid_state->lock); - pasid_state->task = task; - pasid_state->mm = get_task_mm(task); + mm = get_task_mm(task); + pasid_state->mm = mm; pasid_state->device_state = dev_state; pasid_state->pasid = pasid; + pasid_state->invalid = true; /* Mark as valid only if we are + done with setting up the pasid */ pasid_state->mn.ops = &iommu_mn; if (pasid_state->mm == NULL) goto out_free; - mmu_notifier_register(&pasid_state->mn, pasid_state->mm); + mmu_notifier_register(&pasid_state->mn, mm); ret = set_pasid_state(dev_state, pasid_state, pasid); if (ret) @@ -682,15 +682,26 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid, if (ret) goto out_clear_state; + /* Now we are ready to handle faults */ + pasid_state->invalid = false; + + /* + * Drop the reference to the mm_struct here. We rely on the + * mmu_notifier release call-back to inform us when the mm + * is going away. + */ + mmput(mm); + return 0; out_clear_state: clear_pasid_state(dev_state, pasid); out_unregister: - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + mmu_notifier_unregister(&pasid_state->mn, mm); out_free: + mmput(mm); free_pasid_state(pasid_state); out: @@ -728,10 +739,22 @@ void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid) */ put_pasid_state(pasid_state); - /* This will call the mn_release function and unbind the PASID */ + /* Clear the pasid state so that the pasid can be re-used */ + clear_pasid_state(dev_state, pasid_state->pasid); + + /* + * Call mmu_notifier_unregister to drop our reference + * to pasid_state->mm + */ mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); + put_pasid_state_wait(pasid_state); /* Reference taken in + amd_iommu_bind_pasid */ out: + /* Drop reference taken in this function */ + put_device_state(dev_state); + + /* Drop reference taken in amd_iommu_bind_pasid */ put_device_state(dev_state); } EXPORT_SYMBOL(amd_iommu_unbind_pasid); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1599354e974d..ca18d6d42a9b 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -39,6 +39,7 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -316,9 +317,9 @@ #define FSR_AFF (1 << 2) #define FSR_TF (1 << 1) -#define FSR_IGN (FSR_AFF | FSR_ASF | FSR_TLBMCF | \ - FSR_TLBLKF) -#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ +#define FSR_IGN (FSR_AFF | FSR_ASF | \ + FSR_TLBMCF | FSR_TLBLKF) +#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ FSR_EF | FSR_PF | FSR_TF | FSR_IGN) #define FSYNR0_WNR (1 << 4) @@ -329,27 +330,20 @@ struct arm_smmu_smr { u16 id; }; -struct arm_smmu_master { - struct device_node *of_node; - - /* - * The following is specific to the master's position in the - * SMMU chain. - */ - struct rb_node node; +struct arm_smmu_master_cfg { int num_streamids; u16 streamids[MAX_MASTER_STREAMIDS]; - - /* - * We only need to allocate these on the root SMMU, as we - * configure unmatched streams to bypass translation. - */ struct arm_smmu_smr *smrs; }; +struct arm_smmu_master { + struct device_node *of_node; + struct rb_node node; + struct arm_smmu_master_cfg cfg; +}; + struct arm_smmu_device { struct device *dev; - struct device_node *parent_of_node; void __iomem *base; unsigned long size; @@ -387,7 +381,6 @@ struct arm_smmu_device { }; struct arm_smmu_cfg { - struct arm_smmu_device *smmu; u8 cbndx; u8 irptndx; u32 cbar; @@ -399,15 +392,8 @@ struct arm_smmu_cfg { #define ARM_SMMU_CB_VMID(cfg) ((cfg)->cbndx + 1) struct arm_smmu_domain { - /* - * A domain can span across multiple, chained SMMUs and requires - * all devices within the domain to follow the same translation - * path. - */ - struct arm_smmu_device *leaf_smmu; - struct arm_smmu_cfg root_cfg; - phys_addr_t output_mask; - + struct arm_smmu_device *smmu; + struct arm_smmu_cfg cfg; spinlock_t lock; }; @@ -419,7 +405,7 @@ struct arm_smmu_option_prop { const char *prop; }; -static struct arm_smmu_option_prop arm_smmu_options [] = { +static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, { 0, NULL}, }; @@ -427,6 +413,7 @@ static struct arm_smmu_option_prop arm_smmu_options [] = { static void parse_driver_options(struct arm_smmu_device *smmu) { int i = 0; + do { if (of_property_read_bool(smmu->dev->of_node, arm_smmu_options[i].prop)) { @@ -437,6 +424,19 @@ static void parse_driver_options(struct arm_smmu_device *smmu) } while (arm_smmu_options[++i].opt); } +static struct device *dev_get_master_dev(struct device *dev) +{ + if (dev_is_pci(dev)) { + struct pci_bus *bus = to_pci_dev(dev)->bus; + + while (!pci_is_root_bus(bus)) + bus = bus->parent; + return bus->bridge->parent; + } + + return dev; +} + static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, struct device_node *dev_node) { @@ -444,6 +444,7 @@ static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, while (node) { struct arm_smmu_master *master; + master = container_of(node, struct arm_smmu_master, node); if (dev_node < master->of_node) @@ -457,6 +458,18 @@ static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, return NULL; } +static struct arm_smmu_master_cfg * +find_smmu_master_cfg(struct arm_smmu_device *smmu, struct device *dev) +{ + struct arm_smmu_master *master; + + if (dev_is_pci(dev)) + return dev->archdata.iommu; + + master = find_smmu_master(smmu, dev->of_node); + return master ? &master->cfg : NULL; +} + static int insert_smmu_master(struct arm_smmu_device *smmu, struct arm_smmu_master *master) { @@ -465,8 +478,8 @@ static int insert_smmu_master(struct arm_smmu_device *smmu, new = &smmu->masters.rb_node; parent = NULL; while (*new) { - struct arm_smmu_master *this; - this = container_of(*new, struct arm_smmu_master, node); + struct arm_smmu_master *this + = container_of(*new, struct arm_smmu_master, node); parent = *new; if (master->of_node < this->of_node) @@ -508,33 +521,30 @@ static int register_smmu_master(struct arm_smmu_device *smmu, if (!master) return -ENOMEM; - master->of_node = masterspec->np; - master->num_streamids = masterspec->args_count; + master->of_node = masterspec->np; + master->cfg.num_streamids = masterspec->args_count; - for (i = 0; i < master->num_streamids; ++i) - master->streamids[i] = masterspec->args[i]; + for (i = 0; i < master->cfg.num_streamids; ++i) + master->cfg.streamids[i] = masterspec->args[i]; return insert_smmu_master(smmu, master); } -static struct arm_smmu_device *find_parent_smmu(struct arm_smmu_device *smmu) +static struct arm_smmu_device *find_smmu_for_device(struct device *dev) { - struct arm_smmu_device *parent; - - if (!smmu->parent_of_node) - return NULL; + struct arm_smmu_device *smmu; + struct arm_smmu_master *master = NULL; + struct device_node *dev_node = dev_get_master_dev(dev)->of_node; spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(parent, &arm_smmu_devices, list) - if (parent->dev->of_node == smmu->parent_of_node) - goto out_unlock; - - parent = NULL; - dev_warn(smmu->dev, - "Failed to find SMMU parent despite parent in DT\n"); -out_unlock: + list_for_each_entry(smmu, &arm_smmu_devices, list) { + master = find_smmu_master(smmu, dev_node); + if (master) + break; + } spin_unlock(&arm_smmu_devices_lock); - return parent; + + return master ? smmu : NULL; } static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end) @@ -574,9 +584,10 @@ static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu) } } -static void arm_smmu_tlb_inv_context(struct arm_smmu_cfg *cfg) +static void arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_device *smmu = cfg->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *base = ARM_SMMU_GR0(smmu); bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; @@ -600,11 +611,11 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) unsigned long iova; struct iommu_domain *domain = dev; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - struct arm_smmu_device *smmu = root_cfg->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *cb_base; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); if (!(fsr & FSR_FAULT)) @@ -631,7 +642,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) } else { dev_err_ratelimited(smmu->dev, "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n", - iova, fsynr, root_cfg->cbndx); + iova, fsynr, cfg->cbndx); ret = IRQ_NONE; resume = RESUME_TERMINATE; } @@ -696,19 +707,19 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) { u32 reg; bool stage1; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - struct arm_smmu_device *smmu = root_cfg->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *cb_base, *gr0_base, *gr1_base; gr0_base = ARM_SMMU_GR0(smmu); gr1_base = ARM_SMMU_GR1(smmu); - stage1 = root_cfg->cbar != CBAR_TYPE_S2_TRANS; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); + stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); /* CBAR */ - reg = root_cfg->cbar; + reg = cfg->cbar; if (smmu->version == 1) - reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT; + reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT; /* * Use the weakest shareability/memory types, so they are @@ -718,9 +729,9 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) | (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); } else { - reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT; + reg |= ARM_SMMU_CB_VMID(cfg) << CBAR_VMID_SHIFT; } - writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx)); + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); if (smmu->version > 1) { /* CBA2R */ @@ -730,7 +741,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) reg = CBA2R_RW64_32BIT; #endif writel_relaxed(reg, - gr1_base + ARM_SMMU_GR1_CBA2R(root_cfg->cbndx)); + gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); /* TTBCR2 */ switch (smmu->input_size) { @@ -780,13 +791,13 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) } /* TTBR0 */ - arm_smmu_flush_pgtable(smmu, root_cfg->pgd, + arm_smmu_flush_pgtable(smmu, cfg->pgd, PTRS_PER_PGD * sizeof(pgd_t)); - reg = __pa(root_cfg->pgd); + reg = __pa(cfg->pgd); writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32; + reg = (phys_addr_t)__pa(cfg->pgd) >> 32; if (stage1) - reg |= ARM_SMMU_CB_ASID(root_cfg) << TTBRn_HI_ASID_SHIFT; + reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); /* @@ -800,6 +811,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) reg = TTBCR_TG0_64K; if (!stage1) { + reg |= (64 - smmu->s1_output_size) << TTBCR_T0SZ_SHIFT; + switch (smmu->s2_output_size) { case 32: reg |= (TTBCR2_ADDR_32 << TTBCR_PASIZE_SHIFT); @@ -821,7 +834,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) break; } } else { - reg |= (64 - smmu->s1_output_size) << TTBCR_T0SZ_SHIFT; + reg |= (64 - smmu->input_size) << TTBCR_T0SZ_SHIFT; } } else { reg = 0; @@ -853,44 +866,25 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) } static int arm_smmu_init_domain_context(struct iommu_domain *domain, - struct device *dev) + struct arm_smmu_device *smmu) { int irq, ret, start; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - struct arm_smmu_device *smmu, *parent; - - /* - * Walk the SMMU chain to find the root device for this chain. - * We assume that no masters have translations which terminate - * early, and therefore check that the root SMMU does indeed have - * a StreamID for the master in question. - */ - parent = dev->archdata.iommu; - smmu_domain->output_mask = -1; - do { - smmu = parent; - smmu_domain->output_mask &= (1ULL << smmu->s2_output_size) - 1; - } while ((parent = find_parent_smmu(smmu))); - - if (!find_smmu_master(smmu, dev->of_node)) { - dev_err(dev, "unable to find root SMMU for device\n"); - return -ENODEV; - } + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) { /* * We will likely want to change this if/when KVM gets * involved. */ - root_cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; + cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; start = smmu->num_s2_context_banks; - } else if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) { - root_cfg->cbar = CBAR_TYPE_S2_TRANS; - start = 0; - } else { - root_cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; + } else if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) { + cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; start = smmu->num_s2_context_banks; + } else { + cfg->cbar = CBAR_TYPE_S2_TRANS; + start = 0; } ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, @@ -898,38 +892,38 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (IS_ERR_VALUE(ret)) return ret; - root_cfg->cbndx = ret; + cfg->cbndx = ret; if (smmu->version == 1) { - root_cfg->irptndx = atomic_inc_return(&smmu->irptndx); - root_cfg->irptndx %= smmu->num_context_irqs; + cfg->irptndx = atomic_inc_return(&smmu->irptndx); + cfg->irptndx %= smmu->num_context_irqs; } else { - root_cfg->irptndx = root_cfg->cbndx; + cfg->irptndx = cfg->cbndx; } - irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx]; + irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED, "arm-smmu-context-fault", domain); if (IS_ERR_VALUE(ret)) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", - root_cfg->irptndx, irq); - root_cfg->irptndx = INVALID_IRPTNDX; + cfg->irptndx, irq); + cfg->irptndx = INVALID_IRPTNDX; goto out_free_context; } - root_cfg->smmu = smmu; + smmu_domain->smmu = smmu; arm_smmu_init_context_bank(smmu_domain); - return ret; + return 0; out_free_context: - __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); return ret; } static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - struct arm_smmu_device *smmu = root_cfg->smmu; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; void __iomem *cb_base; int irq; @@ -937,16 +931,16 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) return; /* Disable the context bank and nuke the TLB before freeing it. */ - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx); + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); - arm_smmu_tlb_inv_context(root_cfg); + arm_smmu_tlb_inv_context(smmu_domain); - if (root_cfg->irptndx != INVALID_IRPTNDX) { - irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx]; + if (cfg->irptndx != INVALID_IRPTNDX) { + irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; free_irq(irq, domain); } - __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx); + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); } static int arm_smmu_domain_init(struct iommu_domain *domain) @@ -963,10 +957,10 @@ static int arm_smmu_domain_init(struct iommu_domain *domain) if (!smmu_domain) return -ENOMEM; - pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); + pgd = kcalloc(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL); if (!pgd) goto out_free_domain; - smmu_domain->root_cfg.pgd = pgd; + smmu_domain->cfg.pgd = pgd; spin_lock_init(&smmu_domain->lock); domain->priv = smmu_domain; @@ -980,6 +974,7 @@ out_free_domain: static void arm_smmu_free_ptes(pmd_t *pmd) { pgtable_t table = pmd_pgtable(*pmd); + pgtable_page_dtor(table); __free_page(table); } @@ -1021,8 +1016,8 @@ static void arm_smmu_free_puds(pgd_t *pgd) static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain) { int i; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - pgd_t *pgd, *pgd_base = root_cfg->pgd; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + pgd_t *pgd, *pgd_base = cfg->pgd; /* * Recursively free the page tables for this domain. We don't @@ -1054,7 +1049,7 @@ static void arm_smmu_domain_destroy(struct iommu_domain *domain) } static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu, - struct arm_smmu_master *master) + struct arm_smmu_master_cfg *cfg) { int i; struct arm_smmu_smr *smrs; @@ -1063,18 +1058,18 @@ static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu, if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH)) return 0; - if (master->smrs) + if (cfg->smrs) return -EEXIST; - smrs = kmalloc(sizeof(*smrs) * master->num_streamids, GFP_KERNEL); + smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL); if (!smrs) { - dev_err(smmu->dev, "failed to allocate %d SMRs for master %s\n", - master->num_streamids, master->of_node->name); + dev_err(smmu->dev, "failed to allocate %d SMRs\n", + cfg->num_streamids); return -ENOMEM; } - /* Allocate the SMRs on the root SMMU */ - for (i = 0; i < master->num_streamids; ++i) { + /* Allocate the SMRs on the SMMU */ + for (i = 0; i < cfg->num_streamids; ++i) { int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0, smmu->num_mapping_groups); if (IS_ERR_VALUE(idx)) { @@ -1085,18 +1080,18 @@ static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu, smrs[i] = (struct arm_smmu_smr) { .idx = idx, .mask = 0, /* We don't currently share SMRs */ - .id = master->streamids[i], + .id = cfg->streamids[i], }; } /* It worked! Now, poke the actual hardware */ - for (i = 0; i < master->num_streamids; ++i) { + for (i = 0; i < cfg->num_streamids; ++i) { u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT | smrs[i].mask << SMR_MASK_SHIFT; writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx)); } - master->smrs = smrs; + cfg->smrs = smrs; return 0; err_free_smrs: @@ -1107,68 +1102,55 @@ err_free_smrs: } static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu, - struct arm_smmu_master *master) + struct arm_smmu_master_cfg *cfg) { int i; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - struct arm_smmu_smr *smrs = master->smrs; + struct arm_smmu_smr *smrs = cfg->smrs; /* Invalidate the SMRs before freeing back to the allocator */ - for (i = 0; i < master->num_streamids; ++i) { + for (i = 0; i < cfg->num_streamids; ++i) { u8 idx = smrs[i].idx; + writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx)); __arm_smmu_free_bitmap(smmu->smr_map, idx); } - master->smrs = NULL; + cfg->smrs = NULL; kfree(smrs); } static void arm_smmu_bypass_stream_mapping(struct arm_smmu_device *smmu, - struct arm_smmu_master *master) + struct arm_smmu_master_cfg *cfg) { int i; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - for (i = 0; i < master->num_streamids; ++i) { - u16 sid = master->streamids[i]; + for (i = 0; i < cfg->num_streamids; ++i) { + u16 sid = cfg->streamids[i]; + writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(sid)); } } static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master) + struct arm_smmu_master_cfg *cfg) { int i, ret; - struct arm_smmu_device *parent, *smmu = smmu_domain->root_cfg.smmu; + struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - ret = arm_smmu_master_configure_smrs(smmu, master); + ret = arm_smmu_master_configure_smrs(smmu, cfg); if (ret) return ret; - /* Bypass the leaves */ - smmu = smmu_domain->leaf_smmu; - while ((parent = find_parent_smmu(smmu))) { - /* - * We won't have a StreamID match for anything but the root - * smmu, so we only need to worry about StreamID indexing, - * where we must install bypass entries in the S2CRs. - */ - if (smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) - continue; - - arm_smmu_bypass_stream_mapping(smmu, master); - smmu = parent; - } - - /* Now we're at the root, time to point at our context bank */ - for (i = 0; i < master->num_streamids; ++i) { + for (i = 0; i < cfg->num_streamids; ++i) { u32 idx, s2cr; - idx = master->smrs ? master->smrs[i].idx : master->streamids[i]; + + idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i]; s2cr = S2CR_TYPE_TRANS | - (smmu_domain->root_cfg.cbndx << S2CR_CBNDX_SHIFT); + (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT); writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx)); } @@ -1176,58 +1158,57 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, } static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master) + struct arm_smmu_master_cfg *cfg) { - struct arm_smmu_device *smmu = smmu_domain->root_cfg.smmu; + struct arm_smmu_device *smmu = smmu_domain->smmu; /* * We *must* clear the S2CR first, because freeing the SMR means * that it can be re-allocated immediately. */ - arm_smmu_bypass_stream_mapping(smmu, master); - arm_smmu_master_free_smrs(smmu, master); + arm_smmu_bypass_stream_mapping(smmu, cfg); + arm_smmu_master_free_smrs(smmu, cfg); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = -EINVAL; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_device *device_smmu = dev->archdata.iommu; - struct arm_smmu_master *master; + struct arm_smmu_device *smmu; + struct arm_smmu_master_cfg *cfg; unsigned long flags; - if (!device_smmu) { + smmu = dev_get_master_dev(dev)->archdata.iommu; + if (!smmu) { dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n"); return -ENXIO; } /* - * Sanity check the domain. We don't currently support domains - * that cross between different SMMU chains. + * Sanity check the domain. We don't support domains across + * different SMMUs. */ spin_lock_irqsave(&smmu_domain->lock, flags); - if (!smmu_domain->leaf_smmu) { + if (!smmu_domain->smmu) { /* Now that we have a master, we can finalise the domain */ - ret = arm_smmu_init_domain_context(domain, dev); + ret = arm_smmu_init_domain_context(domain, smmu); if (IS_ERR_VALUE(ret)) goto err_unlock; - - smmu_domain->leaf_smmu = device_smmu; - } else if (smmu_domain->leaf_smmu != device_smmu) { + } else if (smmu_domain->smmu != smmu) { dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", - dev_name(smmu_domain->leaf_smmu->dev), - dev_name(device_smmu->dev)); + dev_name(smmu_domain->smmu->dev), + dev_name(smmu->dev)); goto err_unlock; } spin_unlock_irqrestore(&smmu_domain->lock, flags); /* Looks ok, so add the device to the domain */ - master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node); - if (!master) + cfg = find_smmu_master_cfg(smmu_domain->smmu, dev); + if (!cfg) return -ENODEV; - return arm_smmu_domain_add_master(smmu_domain, master); + return arm_smmu_domain_add_master(smmu_domain, cfg); err_unlock: spin_unlock_irqrestore(&smmu_domain->lock, flags); @@ -1237,11 +1218,11 @@ err_unlock: static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) { struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_master *master; + struct arm_smmu_master_cfg *cfg; - master = find_smmu_master(smmu_domain->leaf_smmu, dev->of_node); - if (master) - arm_smmu_domain_remove_master(smmu_domain, master); + cfg = find_smmu_master_cfg(smmu_domain->smmu, dev); + if (cfg) + arm_smmu_domain_remove_master(smmu_domain, cfg); } static bool arm_smmu_pte_is_contiguous_range(unsigned long addr, @@ -1261,6 +1242,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, if (pmd_none(*pmd)) { /* Allocate a new set of tables */ pgtable_t table = alloc_page(GFP_ATOMIC|__GFP_ZERO); + if (!table) return -ENOMEM; @@ -1326,6 +1308,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, */ do { int i = 1; + pteval &= ~ARM_SMMU_PTE_CONT; if (arm_smmu_pte_is_contiguous_range(addr, end)) { @@ -1340,7 +1323,8 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, idx &= ~(ARM_SMMU_PTE_CONT_ENTRIES - 1); cont_start = pmd_page_vaddr(*pmd) + idx; for (j = 0; j < ARM_SMMU_PTE_CONT_ENTRIES; ++j) - pte_val(*(cont_start + j)) &= ~ARM_SMMU_PTE_CONT; + pte_val(*(cont_start + j)) &= + ~ARM_SMMU_PTE_CONT; arm_smmu_flush_pgtable(smmu, cont_start, sizeof(*pte) * @@ -1429,12 +1413,12 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, int ret, stage; unsigned long end; phys_addr_t input_mask, output_mask; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; - pgd_t *pgd = root_cfg->pgd; - struct arm_smmu_device *smmu = root_cfg->smmu; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + pgd_t *pgd = cfg->pgd; unsigned long flags; - if (root_cfg->cbar == CBAR_TYPE_S2_TRANS) { + if (cfg->cbar == CBAR_TYPE_S2_TRANS) { stage = 2; output_mask = (1ULL << smmu->s2_output_size) - 1; } else { @@ -1484,10 +1468,6 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, if (!smmu_domain) return -ENODEV; - /* Check for silent address truncation up the SMMU chain. */ - if ((phys_addr_t)iova & ~smmu_domain->output_mask) - return -ERANGE; - return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot); } @@ -1498,7 +1478,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, struct arm_smmu_domain *smmu_domain = domain->priv; ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0); - arm_smmu_tlb_inv_context(&smmu_domain->root_cfg); + arm_smmu_tlb_inv_context(smmu_domain); return ret ? 0 : size; } @@ -1510,9 +1490,9 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, pmd_t pmd; pte_t pte; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - pgdp = root_cfg->pgd; + pgdp = cfg->pgd; if (!pgdp) return 0; @@ -1538,19 +1518,29 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, static int arm_smmu_domain_has_cap(struct iommu_domain *domain, unsigned long cap) { - unsigned long caps = 0; struct arm_smmu_domain *smmu_domain = domain->priv; + struct arm_smmu_device *smmu = smmu_domain->smmu; + u32 features = smmu ? smmu->features : 0; + + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return features & ARM_SMMU_FEAT_COHERENT_WALK; + case IOMMU_CAP_INTR_REMAP: + return 1; /* MSIs are just memory writes */ + default: + return 0; + } +} - if (smmu_domain->root_cfg.smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) - caps |= IOMMU_CAP_CACHE_COHERENCY; - - return !!(cap & caps); +static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data) +{ + *((u16 *)data) = alias; + return 0; /* Continue walking */ } static int arm_smmu_add_device(struct device *dev) { - struct arm_smmu_device *child, *parent, *smmu; - struct arm_smmu_master *master = NULL; + struct arm_smmu_device *smmu; struct iommu_group *group; int ret; @@ -1559,35 +1549,8 @@ static int arm_smmu_add_device(struct device *dev) return -EINVAL; } - spin_lock(&arm_smmu_devices_lock); - list_for_each_entry(parent, &arm_smmu_devices, list) { - smmu = parent; - - /* Try to find a child of the current SMMU. */ - list_for_each_entry(child, &arm_smmu_devices, list) { - if (child->parent_of_node == parent->dev->of_node) { - /* Does the child sit above our master? */ - master = find_smmu_master(child, dev->of_node); - if (master) { - smmu = NULL; - break; - } - } - } - - /* We found some children, so keep searching. */ - if (!smmu) { - master = NULL; - continue; - } - - master = find_smmu_master(smmu, dev->of_node); - if (master) - break; - } - spin_unlock(&arm_smmu_devices_lock); - - if (!master) + smmu = find_smmu_for_device(dev); + if (!smmu) return -ENODEV; group = iommu_group_alloc(); @@ -1596,20 +1559,45 @@ static int arm_smmu_add_device(struct device *dev) return PTR_ERR(group); } + if (dev_is_pci(dev)) { + struct arm_smmu_master_cfg *cfg; + struct pci_dev *pdev = to_pci_dev(dev); + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) { + ret = -ENOMEM; + goto out_put_group; + } + + cfg->num_streamids = 1; + /* + * Assume Stream ID == Requester ID for now. + * We need a way to describe the ID mappings in FDT. + */ + pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, + &cfg->streamids[0]); + dev->archdata.iommu = cfg; + } else { + dev->archdata.iommu = smmu; + } + ret = iommu_group_add_device(group, dev); - iommu_group_put(group); - dev->archdata.iommu = smmu; +out_put_group: + iommu_group_put(group); return ret; } static void arm_smmu_remove_device(struct device *dev) { + if (dev_is_pci(dev)) + kfree(dev->archdata.iommu); + dev->archdata.iommu = NULL; iommu_group_remove_device(dev); } -static struct iommu_ops arm_smmu_ops = { +static const struct iommu_ops arm_smmu_ops = { .domain_init = arm_smmu_domain_init, .domain_destroy = arm_smmu_domain_destroy, .attach_dev = arm_smmu_attach_dev, @@ -1639,7 +1627,8 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Mark all SMRn as invalid and all S2CRn as bypass */ for (i = 0; i < smmu->num_mapping_groups; ++i) { writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(i)); - writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i)); + writel_relaxed(S2CR_TYPE_BYPASS, + gr0_base + ARM_SMMU_GR0_S2CR(i)); } /* Make sure all context banks are disabled and clear CB_FSR */ @@ -1779,11 +1768,13 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->pagesize = (id & ID1_PAGESIZE) ? SZ_64K : SZ_4K; /* Check for size mismatch of SMMU address space from mapped region */ - size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); + size = 1 << + (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); size *= (smmu->pagesize << 1); if (smmu->size != size) - dev_warn(smmu->dev, "SMMU address space size (0x%lx) differs " - "from mapped region size (0x%lx)!\n", size, smmu->size); + dev_warn(smmu->dev, + "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", + size, smmu->size); smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK; @@ -1804,14 +1795,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) * allocation (PTRS_PER_PGD). */ #ifdef CONFIG_64BIT - smmu->s1_output_size = min((unsigned long)VA_BITS, size); + smmu->s1_output_size = min_t(unsigned long, VA_BITS, size); #else smmu->s1_output_size = min(32UL, size); #endif /* The stage-2 output mask is also applied for bypass */ size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK); - smmu->s2_output_size = min((unsigned long)PHYS_MASK_SHIFT, size); + smmu->s2_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); if (smmu->version == 1) { smmu->input_size = 32; @@ -1835,7 +1826,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\t%lu-bit VA, %lu-bit IPA, %lu-bit PA\n", - smmu->input_size, smmu->s1_output_size, smmu->s2_output_size); + smmu->input_size, smmu->s1_output_size, + smmu->s2_output_size); return 0; } @@ -1843,7 +1835,6 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) { struct resource *res; struct arm_smmu_device *smmu; - struct device_node *dev_node; struct device *dev = &pdev->dev; struct rb_node *node; struct of_phandle_args masterspec; @@ -1890,6 +1881,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) for (i = 0; i < num_irqs; ++i) { int irq = platform_get_irq(pdev, i); + if (irq < 0) { dev_err(dev, "failed to get irq index %d\n", i); return -ENODEV; @@ -1913,12 +1905,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) } dev_notice(dev, "registered %d master devices\n", i); - if ((dev_node = of_parse_phandle(dev->of_node, "smmu-parent", 0))) - smmu->parent_of_node = dev_node; - err = arm_smmu_device_cfg_probe(smmu); if (err) - goto out_put_parent; + goto out_put_masters; parse_driver_options(smmu); @@ -1928,7 +1917,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) "found only %d context interrupt(s) but %d required\n", smmu->num_context_irqs, smmu->num_context_banks); err = -ENODEV; - goto out_put_parent; + goto out_put_masters; } for (i = 0; i < smmu->num_global_irqs; ++i) { @@ -1956,14 +1945,10 @@ out_free_irqs: while (i--) free_irq(smmu->irqs[i], smmu); -out_put_parent: - if (smmu->parent_of_node) - of_node_put(smmu->parent_of_node); - out_put_masters: for (node = rb_first(&smmu->masters); node; node = rb_next(node)) { - struct arm_smmu_master *master; - master = container_of(node, struct arm_smmu_master, node); + struct arm_smmu_master *master + = container_of(node, struct arm_smmu_master, node); of_node_put(master->of_node); } @@ -1990,12 +1975,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) if (!smmu) return -ENODEV; - if (smmu->parent_of_node) - of_node_put(smmu->parent_of_node); - for (node = rb_first(&smmu->masters); node; node = rb_next(node)) { - struct arm_smmu_master *master; - master = container_of(node, struct arm_smmu_master, node); + struct arm_smmu_master *master + = container_of(node, struct arm_smmu_master, node); of_node_put(master->of_node); } @@ -2006,7 +1988,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) free_irq(smmu->irqs[i], smmu); /* Turn the thing off */ - writel(sCR0_CLIENTPD,ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); + writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); return 0; } @@ -2048,6 +2030,11 @@ static int __init arm_smmu_init(void) bus_set_iommu(&amba_bustype, &arm_smmu_ops); #endif +#ifdef CONFIG_PCI + if (!iommu_present(&pci_bus_type)) + bus_set_iommu(&pci_bus_type, &arm_smmu_ops); +#endif + return 0; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9a4f05e5b23f..4306885f48b1 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -38,6 +38,7 @@ #include <linux/tboot.h> #include <linux/dmi.h> #include <linux/slab.h> +#include <linux/iommu.h> #include <asm/irq_remapping.h> #include <asm/iommu_table.h> @@ -980,6 +981,12 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) raw_spin_lock_init(&iommu->register_lock); drhd->iommu = iommu; + + if (intel_iommu_enabled) + iommu->iommu_dev = iommu_device_create(NULL, iommu, + intel_iommu_groups, + iommu->name); + return 0; err_unmap: @@ -991,6 +998,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) static void free_iommu(struct intel_iommu *iommu) { + iommu_device_destroy(iommu->iommu_dev); + if (iommu->irq) { free_irq(iommu->irq, iommu); irq_set_handler_data(iommu->irq, NULL); @@ -1339,9 +1348,6 @@ int dmar_enable_qi(struct intel_iommu *iommu) return -ENOMEM; } - qi->free_head = qi->free_tail = 0; - qi->free_cnt = QI_LENGTH; - raw_spin_lock_init(&qi->q_lock); __dmar_enable_qi(iommu); diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 99054d2c040d..d037e87a1fe5 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1170,7 +1170,7 @@ static void exynos_iommu_remove_device(struct device *dev) iommu_group_remove_device(dev); } -static struct iommu_ops exynos_iommu_ops = { +static const struct iommu_ops exynos_iommu_ops = { .domain_init = exynos_iommu_domain_init, .domain_destroy = exynos_iommu_domain_destroy, .attach_dev = exynos_iommu_attach_device, diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index bb446d742a2d..2b6ce9387af1 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -92,7 +92,7 @@ struct gen_pool *spaace_pool; * subwindow count per liodn. * */ -u32 pamu_get_max_subwin_cnt() +u32 pamu_get_max_subwin_cnt(void) { return max_subwindow_count; } diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index af47648301a9..61d1dafa242d 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -38,7 +38,6 @@ #include <sysdev/fsl_pci.h> #include "fsl_pamu_domain.h" -#include "pci.h" /* * Global spinlock that needs to be held while @@ -887,8 +886,6 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, return ret; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - static struct iommu_group *get_device_iommu_group(struct device *dev) { struct iommu_group *group; @@ -945,74 +942,13 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) struct pci_controller *pci_ctl; bool pci_endpt_partioning; struct iommu_group *group = NULL; - struct pci_dev *bridge, *dma_pdev = NULL; pci_ctl = pci_bus_to_host(pdev->bus); pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl); /* We can partition PCIe devices so assign device group to the device */ if (pci_endpt_partioning) { - bridge = pci_find_upstream_pcie_bridge(pdev); - if (bridge) { - if (pci_is_pcie(bridge)) - dma_pdev = pci_get_domain_bus_and_slot( - pci_domain_nr(pdev->bus), - bridge->subordinate->number, 0); - if (!dma_pdev) - dma_pdev = pci_dev_get(bridge); - } else - dma_pdev = pci_dev_get(pdev); - - /* Account for quirked devices */ - swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); - - /* - * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as lowest numbered - * function that also does not suport the required ACS flags. - */ - if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { - u8 i, slot = PCI_SLOT(dma_pdev->devfn); - - for (i = 0; i < 8; i++) { - struct pci_dev *tmp; - - tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); - if (!tmp) - continue; - - if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { - swap_pci_ref(&dma_pdev, tmp); - break; - } - pci_dev_put(tmp); - } - } - - /* - * Devices on the root bus go through the iommu. If that's not us, - * find the next upstream device and test ACS up to the root bus. - * Finding the next device may require skipping virtual buses. - */ - while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } - - if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) - break; - - swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); - } + group = iommu_group_get_for_dev(&pdev->dev); -root_bus: - group = get_device_iommu_group(&dma_pdev->dev); - pci_dev_put(dma_pdev); /* * PCIe controller is not a paritionable entity * free the controller device iommu_group. @@ -1116,8 +1052,7 @@ static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, ((w_count > 1) ? w_count : 0)); if (!ret) { - if (dma_domain->win_arr) - kfree(dma_domain->win_arr); + kfree(dma_domain->win_arr); dma_domain->win_arr = kzalloc(sizeof(struct dma_window) * w_count, GFP_ATOMIC); if (!dma_domain->win_arr) { @@ -1138,7 +1073,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain) return dma_domain->win_cnt; } -static struct iommu_ops fsl_pamu_ops = { +static const struct iommu_ops fsl_pamu_ops = { .domain_init = fsl_pamu_domain_init, .domain_destroy = fsl_pamu_domain_destroy, .attach_dev = fsl_pamu_attach_device, @@ -1155,7 +1090,7 @@ static struct iommu_ops fsl_pamu_ops = { .remove_device = fsl_pamu_remove_device, }; -int pamu_domain_init() +int pamu_domain_init(void) { int ret = 0; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 51b6b77dc3e5..d1f5caad04f9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -45,7 +45,6 @@ #include <asm/iommu.h> #include "irq_remapping.h" -#include "pci.h" #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE @@ -304,7 +303,7 @@ static inline bool dma_pte_present(struct dma_pte *pte) static inline bool dma_pte_superpage(struct dma_pte *pte) { - return (pte->val & (1 << 7)); + return (pte->val & DMA_PTE_LARGE_PAGE); } static inline int first_pte_in_page(struct dma_pte *pte) @@ -321,16 +320,13 @@ static inline int first_pte_in_page(struct dma_pte *pte) static struct dmar_domain *si_domain; static int hw_pass_through = 1; -/* devices under the same p2p bridge are owned in one domain */ -#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0) - /* domain represents a virtual machine, more than one devices * across iommus may be owned in one domain, e.g. kvm guest. */ -#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1) +#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0) /* si_domain contains mulitple devices */ -#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2) +#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1) /* define the limit of IOMMUs supported in each domain */ #ifdef CONFIG_X86 @@ -429,6 +425,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, struct device *dev); static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct device *dev); +static int domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); static DEFINE_SPINLOCK(device_domain_lock); static LIST_HEAD(device_domain_list); -static struct iommu_ops intel_iommu_ops; +static const struct iommu_ops intel_iommu_ops; static int __init intel_iommu_setup(char *str) { @@ -540,6 +538,24 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } +static inline int domain_type_is_vm(struct dmar_domain *domain) +{ + return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; +} + +static inline int domain_type_is_vm_or_si(struct dmar_domain *domain) +{ + return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE | + DOMAIN_FLAG_STATIC_IDENTITY); +} + +static inline int domain_pfn_supported(struct dmar_domain *domain, + unsigned long pfn) +{ + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + + return !(addr_width < BITS_PER_LONG && pfn >> addr_width); +} static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { @@ -580,9 +596,7 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) int iommu_id; /* si_domain and vm domain should not get here. */ - BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE); - BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY); - + BUG_ON(domain_type_is_vm_or_si(domain)); iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus); if (iommu_id < 0 || iommu_id >= g_num_of_iommus) return NULL; @@ -619,50 +633,56 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain) rcu_read_unlock(); } -static void domain_update_iommu_snooping(struct dmar_domain *domain) +static int domain_update_iommu_snooping(struct intel_iommu *skip) { - int i; - - domain->iommu_snooping = 1; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int ret = 1; - for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { - if (!ecap_sc_support(g_iommus[i]->ecap)) { - domain->iommu_snooping = 0; - break; + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (iommu != skip) { + if (!ecap_sc_support(iommu->ecap)) { + ret = 0; + break; + } } } + rcu_read_unlock(); + + return ret; } -static void domain_update_iommu_superpage(struct dmar_domain *domain) +static int domain_update_iommu_superpage(struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu = NULL; + struct intel_iommu *iommu; int mask = 0xf; if (!intel_iommu_superpage) { - domain->iommu_superpage = 0; - return; + return 0; } /* set iommu_superpage to the smallest common denominator */ rcu_read_lock(); for_each_active_iommu(iommu, drhd) { - mask &= cap_super_page_val(iommu->cap); - if (!mask) { - break; + if (iommu != skip) { + mask &= cap_super_page_val(iommu->cap); + if (!mask) + break; } } rcu_read_unlock(); - domain->iommu_superpage = fls(mask); + return fls(mask); } /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); - domain_update_iommu_snooping(domain); - domain_update_iommu_superpage(domain); + domain->iommu_snooping = domain_update_iommu_snooping(NULL); + domain->iommu_superpage = domain_update_iommu_superpage(NULL); } static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) @@ -671,7 +691,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf struct intel_iommu *iommu; struct device *tmp; struct pci_dev *ptmp, *pdev = NULL; - u16 segment; + u16 segment = 0; int i; if (dev_is_pci(dev)) { @@ -816,14 +836,13 @@ out: static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, unsigned long pfn, int *target_level) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); int offset; BUG_ON(!domain->pgd); - if (addr_width < BITS_PER_LONG && pfn >> addr_width) + if (!domain_pfn_supported(domain, pfn)) /* Address beyond IOMMU's addressing capabilities. */ return NULL; @@ -849,13 +868,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; - if (cmpxchg64(&pte->val, 0ULL, pteval)) { + if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); - } else { - dma_pte_addr(pte); + else domain_flush_cache(domain, pte, sizeof(*pte)); - } } if (level == 1) break; @@ -892,7 +909,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, break; } - if (pte->val & DMA_PTE_LARGE_PAGE) { + if (dma_pte_superpage(pte)) { *large_page = total; return pte; } @@ -908,12 +925,11 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned int large_page = 1; struct dma_pte *first_pte, *pte; - BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); BUG_ON(start_pfn > last_pfn); /* we don't need lock here; nobody else touches the iova range */ @@ -974,12 +990,12 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; - - BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); BUG_ON(start_pfn > last_pfn); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* We don't need lock here; nobody else touches the iova range */ dma_pte_free_level(domain, agaw_to_level(domain->agaw), domain->pgd, 0, start_pfn, last_pfn); @@ -1077,11 +1093,10 @@ struct page *domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct page *freelist = NULL; - BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); - BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(!domain_pfn_supported(domain, start_pfn)); + BUG_ON(!domain_pfn_supported(domain, last_pfn)); BUG_ON(start_pfn > last_pfn); /* we don't need lock here; nobody else touches the iova range */ @@ -1275,7 +1290,8 @@ iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &domain->devices, link) - if (info->bus == bus && info->devfn == devfn) { + if (info->iommu == iommu && info->bus == bus && + info->devfn == devfn) { found = 1; break; } @@ -1384,7 +1400,7 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) raw_spin_unlock_irqrestore(&iommu->register_lock, flags); } -static int iommu_enable_translation(struct intel_iommu *iommu) +static void iommu_enable_translation(struct intel_iommu *iommu) { u32 sts; unsigned long flags; @@ -1398,10 +1414,9 @@ static int iommu_enable_translation(struct intel_iommu *iommu) readl, (sts & DMA_GSTS_TES), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flags); - return 0; } -static int iommu_disable_translation(struct intel_iommu *iommu) +static void iommu_disable_translation(struct intel_iommu *iommu) { u32 sts; unsigned long flag; @@ -1415,7 +1430,6 @@ static int iommu_disable_translation(struct intel_iommu *iommu) readl, (!(sts & DMA_GSTS_TES)), sts); raw_spin_unlock_irqrestore(&iommu->register_lock, flag); - return 0; } @@ -1462,8 +1476,7 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; - int i, count; - unsigned long flags; + int i; if ((iommu->domains) && (iommu->domain_ids)) { for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { @@ -1476,11 +1489,8 @@ static void free_dmar_iommu(struct intel_iommu *iommu) domain = iommu->domains[i]; clear_bit(i, iommu->domain_ids); - - spin_lock_irqsave(&domain->iommu_lock, flags); - count = --domain->iommu_count; - spin_unlock_irqrestore(&domain->iommu_lock, flags); - if (count == 0) + if (domain_detach_iommu(domain, iommu) == 0 && + !domain_type_is_vm(domain)) domain_exit(domain); } } @@ -1499,7 +1509,7 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); } -static struct dmar_domain *alloc_domain(bool vm) +static struct dmar_domain *alloc_domain(int flags) { /* domain id for virtual machine, it won't be set in context */ static atomic_t vm_domid = ATOMIC_INIT(0); @@ -1509,46 +1519,62 @@ static struct dmar_domain *alloc_domain(bool vm) if (!domain) return NULL; + memset(domain, 0, sizeof(*domain)); domain->nid = -1; - domain->iommu_count = 0; - memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); - domain->flags = 0; + domain->flags = flags; spin_lock_init(&domain->iommu_lock); INIT_LIST_HEAD(&domain->devices); - if (vm) { + if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE) domain->id = atomic_inc_return(&vm_domid); - domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; - } return domain; } -static int iommu_attach_domain(struct dmar_domain *domain, - struct intel_iommu *iommu) +static int __iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) { int num; unsigned long ndomains; - unsigned long flags; ndomains = cap_ndoms(iommu->cap); - - spin_lock_irqsave(&iommu->lock, flags); - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { - spin_unlock_irqrestore(&iommu->lock, flags); - printk(KERN_ERR "IOMMU: no free domain ids\n"); - return -ENOMEM; + if (num < ndomains) { + set_bit(num, iommu->domain_ids); + iommu->domains[num] = domain; + } else { + num = -ENOSPC; } - domain->id = num; - domain->iommu_count++; - set_bit(num, iommu->domain_ids); - set_bit(iommu->seq_id, domain->iommu_bmp); - iommu->domains[num] = domain; + return num; +} + +static int iommu_attach_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long flags; + + spin_lock_irqsave(&iommu->lock, flags); + num = __iommu_attach_domain(domain, iommu); spin_unlock_irqrestore(&iommu->lock, flags); + if (num < 0) + pr_err("IOMMU: no free domain ids\n"); - return 0; + return num; +} + +static int iommu_attach_vm_domain(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + int num; + unsigned long ndomains; + + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) + if (iommu->domains[num] == domain) + return num; + + return __iommu_attach_domain(domain, iommu); } static void iommu_detach_domain(struct dmar_domain *domain, @@ -1558,17 +1584,53 @@ static void iommu_detach_domain(struct dmar_domain *domain, int num, ndomains; spin_lock_irqsave(&iommu->lock, flags); - ndomains = cap_ndoms(iommu->cap); - for_each_set_bit(num, iommu->domain_ids, ndomains) { - if (iommu->domains[num] == domain) { - clear_bit(num, iommu->domain_ids); - iommu->domains[num] = NULL; - break; + if (domain_type_is_vm_or_si(domain)) { + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) { + if (iommu->domains[num] == domain) { + clear_bit(num, iommu->domain_ids); + iommu->domains[num] = NULL; + break; + } } + } else { + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; } spin_unlock_irqrestore(&iommu->lock, flags); } +static void domain_attach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { + domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); +} + +static int domain_detach_iommu(struct dmar_domain *domain, + struct intel_iommu *iommu) +{ + unsigned long flags; + int count = INT_MAX; + + spin_lock_irqsave(&domain->iommu_lock, flags); + if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) { + count = --domain->iommu_count; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags); + + return count; +} + static struct iova_domain reserved_iova_list; static struct lock_class_key reserved_rbtree_key; @@ -1706,9 +1768,7 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); for_each_active_iommu(iommu, drhd) - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || - test_bit(iommu->seq_id, domain->iommu_bmp)) - iommu_detach_domain(domain, iommu); + iommu_detach_domain(domain, iommu); rcu_read_unlock(); dma_free_pagelist(freelist); @@ -1723,8 +1783,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; unsigned long flags; struct dma_pte *pgd; - unsigned long num; - unsigned long ndomains; int id; int agaw; struct device_domain_info *info = NULL; @@ -1748,31 +1806,14 @@ static int domain_context_mapping_one(struct dmar_domain *domain, id = domain->id; pgd = domain->pgd; - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || - domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) { - int found = 0; - - /* find an available domain id for this device in iommu */ - ndomains = cap_ndoms(iommu->cap); - for_each_set_bit(num, iommu->domain_ids, ndomains) { - if (iommu->domains[num] == domain) { - id = num; - found = 1; - break; - } - } - - if (found == 0) { - num = find_first_zero_bit(iommu->domain_ids, ndomains); - if (num >= ndomains) { + if (domain_type_is_vm_or_si(domain)) { + if (domain_type_is_vm(domain)) { + id = iommu_attach_vm_domain(domain, iommu); + if (id < 0) { spin_unlock_irqrestore(&iommu->lock, flags); - printk(KERN_ERR "IOMMU: no free domain ids\n"); + pr_err("IOMMU: no free domain ids\n"); return -EFAULT; } - - set_bit(num, iommu->domain_ids); - iommu->domains[num] = domain; - id = num; } /* Skip top levels of page tables for @@ -1824,72 +1865,68 @@ static int domain_context_mapping_one(struct dmar_domain *domain, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH); + iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH); } else { iommu_flush_write_buffer(iommu); } iommu_enable_dev_iotlb(info); spin_unlock_irqrestore(&iommu->lock, flags); - spin_lock_irqsave(&domain->iommu_lock, flags); - if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) { - domain->iommu_count++; - if (domain->iommu_count == 1) - domain->nid = iommu->node; - domain_update_iommu_cap(domain); - } - spin_unlock_irqrestore(&domain->iommu_lock, flags); + domain_attach_iommu(domain, iommu); + return 0; } +struct domain_context_mapping_data { + struct dmar_domain *domain; + struct intel_iommu *iommu; + int translation; +}; + +static int domain_context_mapping_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct domain_context_mapping_data *data = opaque; + + return domain_context_mapping_one(data->domain, data->iommu, + PCI_BUS_NUM(alias), alias & 0xff, + data->translation); +} + static int domain_context_mapping(struct dmar_domain *domain, struct device *dev, int translation) { - int ret; - struct pci_dev *pdev, *tmp, *parent; struct intel_iommu *iommu; u8 bus, devfn; + struct domain_context_mapping_data data; iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; - ret = domain_context_mapping_one(domain, iommu, bus, devfn, - translation); - if (ret || !dev_is_pci(dev)) - return ret; - - /* dependent device mapping */ - pdev = to_pci_dev(dev); - tmp = pci_find_upstream_pcie_bridge(pdev); - if (!tmp) - return 0; - /* Secondary interface's bus number and devfn 0 */ - parent = pdev->bus->self; - while (parent != tmp) { - ret = domain_context_mapping_one(domain, iommu, - parent->bus->number, - parent->devfn, translation); - if (ret) - return ret; - parent = parent->bus->self; - } - if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ - return domain_context_mapping_one(domain, iommu, - tmp->subordinate->number, 0, - translation); - else /* this is a legacy PCI bridge */ - return domain_context_mapping_one(domain, iommu, - tmp->bus->number, - tmp->devfn, + if (!dev_is_pci(dev)) + return domain_context_mapping_one(domain, iommu, bus, devfn, translation); + + data.domain = domain; + data.iommu = iommu; + data.translation = translation; + + return pci_for_each_dma_alias(to_pci_dev(dev), + &domain_context_mapping_cb, &data); +} + +static int domain_context_mapped_cb(struct pci_dev *pdev, + u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff); } static int domain_context_mapped(struct device *dev) { - int ret; - struct pci_dev *pdev, *tmp, *parent; struct intel_iommu *iommu; u8 bus, devfn; @@ -1897,30 +1934,11 @@ static int domain_context_mapped(struct device *dev) if (!iommu) return -ENODEV; - ret = device_context_mapped(iommu, bus, devfn); - if (!ret || !dev_is_pci(dev)) - return ret; + if (!dev_is_pci(dev)) + return device_context_mapped(iommu, bus, devfn); - /* dependent device mapping */ - pdev = to_pci_dev(dev); - tmp = pci_find_upstream_pcie_bridge(pdev); - if (!tmp) - return ret; - /* Secondary interface's bus number and devfn 0 */ - parent = pdev->bus->self; - while (parent != tmp) { - ret = device_context_mapped(iommu, parent->bus->number, - parent->devfn); - if (!ret) - return ret; - parent = parent->bus->self; - } - if (pci_is_pcie(tmp)) - return device_context_mapped(iommu, tmp->subordinate->number, - 0); - else - return device_context_mapped(iommu, tmp->bus->number, - tmp->devfn); + return !pci_for_each_dma_alias(to_pci_dev(dev), + domain_context_mapped_cb, iommu); } /* Returns a number of VTD pages, but aligned to MM page size */ @@ -1965,12 +1983,11 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, { struct dma_pte *first_pte = NULL, *pte = NULL; phys_addr_t uninitialized_var(pteval); - int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; - BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); + BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; @@ -2004,12 +2021,14 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, /* It is large page*/ if (largepage_lvl > 1) { pteval |= DMA_PTE_LARGE_PAGE; - /* Ensure that old small page tables are removed to make room - for superpage, if they exist. */ - dma_pte_clear_range(domain, iov_pfn, - iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + lvl_pages = lvl_to_nr_pages(largepage_lvl); + /* + * Ensure that old small page tables are + * removed to make room for superpage, + * if they exist. + */ dma_pte_free_pagetable(domain, iov_pfn, - iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1); + iov_pfn + lvl_pages - 1); } else { pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; } @@ -2102,31 +2121,20 @@ static inline void unlink_domain_info(struct device_domain_info *info) static void domain_remove_dev_info(struct dmar_domain *domain) { - struct device_domain_info *info; - unsigned long flags, flags2; + struct device_domain_info *info, *tmp; + unsigned long flags; spin_lock_irqsave(&device_domain_lock, flags); - while (!list_empty(&domain->devices)) { - info = list_entry(domain->devices.next, - struct device_domain_info, link); + list_for_each_entry_safe(info, tmp, &domain->devices, link) { unlink_domain_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); iommu_disable_dev_iotlb(info); iommu_detach_dev(info->iommu, info->bus, info->devfn); - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + if (domain_type_is_vm(domain)) { iommu_detach_dependent_devices(info->iommu, info->dev); - /* clear this iommu in iommu_bmp, update iommu count - * and capabilities - */ - spin_lock_irqsave(&domain->iommu_lock, flags2); - if (test_and_clear_bit(info->iommu->seq_id, - domain->iommu_bmp)) { - domain->iommu_count--; - domain_update_iommu_cap(domain); - } - spin_unlock_irqrestore(&domain->iommu_lock, flags2); + domain_detach_iommu(domain, info->iommu); } free_devinfo_mem(info); @@ -2181,8 +2189,6 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, info->dev = dev; info->domain = domain; info->iommu = iommu; - if (!dev) - domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; spin_lock_irqsave(&device_domain_lock, flags); if (dev) @@ -2209,79 +2215,86 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, return domain; } +static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) +{ + *(u16 *)opaque = alias; + return 0; +} + /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) { - struct dmar_domain *domain, *free = NULL; - struct intel_iommu *iommu = NULL; + struct dmar_domain *domain, *tmp; + struct intel_iommu *iommu; struct device_domain_info *info; - struct pci_dev *dev_tmp = NULL; + u16 dma_alias; unsigned long flags; - u8 bus, devfn, bridge_bus, bridge_devfn; + u8 bus, devfn; domain = find_domain(dev); if (domain) return domain; + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return NULL; + if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - u16 segment; - segment = pci_domain_nr(pdev->bus); - dev_tmp = pci_find_upstream_pcie_bridge(pdev); - if (dev_tmp) { - if (pci_is_pcie(dev_tmp)) { - bridge_bus = dev_tmp->subordinate->number; - bridge_devfn = 0; - } else { - bridge_bus = dev_tmp->bus->number; - bridge_devfn = dev_tmp->devfn; - } - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(segment, - bridge_bus, - bridge_devfn); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - /* pcie-pci bridge already has a domain, uses it */ - if (info) - goto found_domain; + pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias); + + spin_lock_irqsave(&device_domain_lock, flags); + info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus), + PCI_BUS_NUM(dma_alias), + dma_alias & 0xff); + if (info) { + iommu = info->iommu; + domain = info->domain; } - } + spin_unlock_irqrestore(&device_domain_lock, flags); - iommu = device_to_iommu(dev, &bus, &devfn); - if (!iommu) - goto error; + /* DMA alias already has a domain, uses it */ + if (info) + goto found_domain; + } /* Allocate and initialize new domain for the device */ - domain = alloc_domain(false); + domain = alloc_domain(0); if (!domain) - goto error; - if (iommu_attach_domain(domain, iommu)) { + return NULL; + domain->id = iommu_attach_domain(domain, iommu); + if (domain->id < 0) { free_domain_mem(domain); - domain = NULL; - goto error; + return NULL; } - free = domain; - if (domain_init(domain, gaw)) - goto error; + domain_attach_iommu(domain, iommu); + if (domain_init(domain, gaw)) { + domain_exit(domain); + return NULL; + } + + /* register PCI DMA alias device */ + if (dev_is_pci(dev)) { + tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias), + dma_alias & 0xff, NULL, domain); + + if (!tmp || tmp != domain) { + domain_exit(domain); + domain = tmp; + } - /* register pcie-to-pci device */ - if (dev_tmp) { - domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn, - NULL, domain); if (!domain) - goto error; + return NULL; } found_domain: - domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); -error: - if (free != domain) - domain_exit(free); + tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); + + if (!tmp || tmp != domain) { + domain_exit(domain); + domain = tmp; + } return domain; } @@ -2405,6 +2418,7 @@ static inline void iommu_prepare_isa(void) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " "floppy might not work\n"); + pci_dev_put(pdev); } #else static inline void iommu_prepare_isa(void) @@ -2420,19 +2434,25 @@ static int __init si_domain_init(int hw) struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; int nid, ret = 0; + bool first = true; - si_domain = alloc_domain(false); + si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) return -EFAULT; - si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; - for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); - if (ret) { + if (ret < 0) { + domain_exit(si_domain); + return -EFAULT; + } else if (first) { + si_domain->id = ret; + first = false; + } else if (si_domain->id != ret) { domain_exit(si_domain); return -EFAULT; } + domain_attach_iommu(si_domain, iommu); } if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { @@ -2523,22 +2543,46 @@ static bool device_has_rmrr(struct device *dev) return false; } +/* + * There are a couple cases where we need to restrict the functionality of + * devices associated with RMRRs. The first is when evaluating a device for + * identity mapping because problems exist when devices are moved in and out + * of domains and their respective RMRR information is lost. This means that + * a device with associated RMRRs will never be in a "passthrough" domain. + * The second is use of the device through the IOMMU API. This interface + * expects to have full control of the IOVA space for the device. We cannot + * satisfy both the requirement that RMRR access is maintained and have an + * unencumbered IOVA space. We also have no ability to quiesce the device's + * use of the RMRR space or even inform the IOMMU API user of the restriction. + * We therefore prevent devices associated with an RMRR from participating in + * the IOMMU API, which eliminates them from device assignment. + * + * In both cases we assume that PCI USB devices with RMRRs have them largely + * for historical reasons and that the RMRR space is not actively used post + * boot. This exclusion may change if vendors begin to abuse it. + */ +static bool device_is_rmrr_locked(struct device *dev) +{ + if (!device_has_rmrr(dev)) + return false; + + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + + if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) + return false; + } + + return true; +} + static int iommu_should_identity_map(struct device *dev, int startup) { if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); - /* - * We want to prevent any device associated with an RMRR from - * getting placed into the SI Domain. This is done because - * problems exist when devices are moved in and out of domains - * and their respective RMRR info is lost. We exempt USB devices - * from this process due to their usage of RMRRs that are known - * to not be needed after BIOS hand-off to OS. - */ - if (device_has_rmrr(dev) && - (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) + if (device_is_rmrr_locked(dev)) return 0; if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) @@ -2850,11 +2894,7 @@ static int __init init_dmars(void) iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - - ret = iommu_enable_translation(iommu); - if (ret) - goto free_iommu; - + iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3091,10 +3131,10 @@ static void flush_unmaps(void) /* On real hardware multiple invalidations are expensive */ if (cap_caching_mode(iommu->cap)) iommu_flush_iotlb_psi(iommu, domain->id, - iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, + iova->pfn_lo, iova_size(iova), !deferred_flush[i].freelist[j], 0); else { - mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); + mask = ilog2(mm_to_dma_pfn(iova_size(iova))); iommu_flush_dev_iotlb(deferred_flush[i].domain[j], (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); } @@ -3144,9 +3184,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *f spin_unlock_irqrestore(&async_umap_flush_lock, flags); } -static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - struct dma_attrs *attrs) +static void intel_unmap(struct device *dev, dma_addr_t dev_addr) { struct dmar_domain *domain; unsigned long start_pfn, last_pfn; @@ -3190,6 +3228,13 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, } } +static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + intel_unmap(dev, dev_addr); +} + static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) @@ -3246,7 +3291,7 @@ static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, size = PAGE_ALIGN(size); order = get_order(size); - intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); + intel_unmap(dev, dma_handle); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) __free_pages(page, order); } @@ -3255,43 +3300,7 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct dmar_domain *domain; - unsigned long start_pfn, last_pfn; - struct iova *iova; - struct intel_iommu *iommu; - struct page *freelist; - - if (iommu_no_mapping(dev)) - return; - - domain = find_domain(dev); - BUG_ON(!domain); - - iommu = domain_get_iommu(domain); - - iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); - if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", - (unsigned long long)sglist[0].dma_address)) - return; - - start_pfn = mm_to_dma_pfn(iova->pfn_lo); - last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; - - freelist = domain_unmap(domain, start_pfn, last_pfn); - - if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - last_pfn - start_pfn + 1, !freelist, 0); - /* free iova */ - __free_iova(&domain->iovad, iova); - dma_free_pagelist(freelist); - } else { - add_unmap(domain, iova, freelist); - /* - * queue up the release of the unmap to save the 1/6th of the - * cpu used up by the iotlb flush operation... - */ - } + intel_unmap(dev, sglist[0].dma_address); } static int intel_nontranslate_map_sg(struct device *hddev, @@ -3355,13 +3364,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot); if (unlikely(ret)) { - /* clear the page */ - dma_pte_clear_range(domain, start_vpfn, - start_vpfn + size - 1); - /* free page tables */ dma_pte_free_pagetable(domain, start_vpfn, start_vpfn + size - 1); - /* free iova */ __free_iova(&domain->iovad, iova); return 0; } @@ -3568,10 +3572,8 @@ static int init_iommu_hw(void) iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL); - iommu->flush.flush_iotlb(iommu, 0, 0, 0, - DMA_TLB_GLOBAL_FLUSH); - if (iommu_enable_translation(iommu)) - return 1; + iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); + iommu_enable_translation(iommu); iommu_disable_protect_mem_regions(iommu); } @@ -3873,9 +3875,7 @@ static int device_notifier(struct notifier_block *nb, down_read(&dmar_global_lock); domain_remove_one_dev_info(domain, dev); - if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) + if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) domain_exit(domain); up_read(&dmar_global_lock); @@ -3935,8 +3935,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain->id, - iova->pfn_lo, - iova->pfn_hi - iova->pfn_lo + 1, + iova->pfn_lo, iova_size(iova), !freelist, 0); rcu_read_unlock(); dma_free_pagelist(freelist); @@ -3955,6 +3954,63 @@ static struct notifier_block intel_iommu_memory_nb = { .priority = 0 }; + +static ssize_t intel_iommu_show_version(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + u32 ver = readl(iommu->reg + DMAR_VER_REG); + return sprintf(buf, "%d:%d\n", + DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver)); +} +static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL); + +static ssize_t intel_iommu_show_address(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->reg_phys); +} +static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL); + +static ssize_t intel_iommu_show_cap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->cap); +} +static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL); + +static ssize_t intel_iommu_show_ecap(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct intel_iommu *iommu = dev_get_drvdata(dev); + return sprintf(buf, "%llx\n", iommu->ecap); +} +static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL); + +static struct attribute *intel_iommu_attrs[] = { + &dev_attr_version.attr, + &dev_attr_address.attr, + &dev_attr_cap.attr, + &dev_attr_ecap.attr, + NULL, +}; + +static struct attribute_group intel_iommu_group = { + .name = "intel-iommu", + .attrs = intel_iommu_attrs, +}; + +const struct attribute_group *intel_iommu_groups[] = { + &intel_iommu_group, + NULL, +}; + int __init intel_iommu_init(void) { int ret = -ENODEV; @@ -4026,6 +4082,11 @@ int __init intel_iommu_init(void) init_iommu_pm_ops(); + for_each_active_iommu(iommu, drhd) + iommu->iommu_dev = iommu_device_create(NULL, iommu, + intel_iommu_groups, + iommu->name); + bus_set_iommu(&pci_bus_type, &intel_iommu_ops); bus_register_notifier(&pci_bus_type, &device_nb); if (si_domain && !hw_pass_through) @@ -4044,33 +4105,27 @@ out_free_dmar: return ret; } +static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct intel_iommu *iommu = opaque; + + iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff); + return 0; +} + +/* + * NB - intel-iommu lacks any sort of reference counting for the users of + * dependent devices. If multiple endpoints have intersecting dependent + * devices, unbinding the driver from any one of them will possibly leave + * the others unable to operate. + */ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct device *dev) { - struct pci_dev *tmp, *parent, *pdev; - if (!iommu || !dev || !dev_is_pci(dev)) return; - pdev = to_pci_dev(dev); - - /* dependent device detach */ - tmp = pci_find_upstream_pcie_bridge(pdev); - /* Secondary interface's bus number and devfn 0 */ - if (tmp) { - parent = pdev->bus->self; - while (parent != tmp) { - iommu_detach_dev(iommu, parent->bus->number, - parent->devfn); - parent = parent->bus->self; - } - if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ - iommu_detach_dev(iommu, - tmp->subordinate->number, 0); - else /* this is a legacy PCI bridge */ - iommu_detach_dev(iommu, tmp->bus->number, - tmp->devfn); - } + pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu); } static void domain_remove_one_dev_info(struct dmar_domain *domain, @@ -4117,20 +4172,9 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_unlock_irqrestore(&device_domain_lock, flags); if (found == 0) { - unsigned long tmp_flags; - spin_lock_irqsave(&domain->iommu_lock, tmp_flags); - clear_bit(iommu->seq_id, domain->iommu_bmp); - domain->iommu_count--; - domain_update_iommu_cap(domain); - spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - - if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); - } + domain_detach_iommu(domain, iommu); + if (!domain_type_is_vm_or_si(domain)) + iommu_detach_domain(domain, iommu); } } @@ -4150,7 +4194,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_superpage = 0; domain->max_addr = 0; - domain->nid = -1; /* always allocate the top pgd */ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); @@ -4164,7 +4207,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) { struct dmar_domain *dmar_domain; - dmar_domain = alloc_domain(true); + dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE); if (!dmar_domain) { printk(KERN_ERR "intel_iommu_domain_init: dmar_domain == NULL\n"); @@ -4202,14 +4245,18 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, int addr_width; u8 bus, devfn; + if (device_is_rmrr_locked(dev)) { + dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); + return -EPERM; + } + /* normally dev is not mapped */ if (unlikely(domain_context_mapped(dev))) { struct dmar_domain *old_domain; old_domain = find_domain(dev); if (old_domain) { - if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || - dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) + if (domain_type_is_vm_or_si(dmar_domain)) domain_remove_one_dev_info(old_domain, dev); else domain_remove_dev_info(old_domain); @@ -4373,99 +4420,42 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) - static int intel_iommu_add_device(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_dev *bridge, *dma_pdev = NULL; + struct intel_iommu *iommu; struct iommu_group *group; - int ret; u8 bus, devfn; - if (!device_to_iommu(dev, &bus, &devfn)) + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) return -ENODEV; - bridge = pci_find_upstream_pcie_bridge(pdev); - if (bridge) { - if (pci_is_pcie(bridge)) - dma_pdev = pci_get_domain_bus_and_slot( - pci_domain_nr(pdev->bus), - bridge->subordinate->number, 0); - if (!dma_pdev) - dma_pdev = pci_dev_get(bridge); - } else - dma_pdev = pci_dev_get(pdev); - - /* Account for quirked devices */ - swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev)); + iommu_device_link(iommu->iommu_dev, dev); - /* - * If it's a multifunction device that does not support our - * required ACS flags, add to the same group as lowest numbered - * function that also does not suport the required ACS flags. - */ - if (dma_pdev->multifunction && - !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) { - u8 i, slot = PCI_SLOT(dma_pdev->devfn); - - for (i = 0; i < 8; i++) { - struct pci_dev *tmp; + group = iommu_group_get_for_dev(dev); - tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i)); - if (!tmp) - continue; - - if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) { - swap_pci_ref(&dma_pdev, tmp); - break; - } - pci_dev_put(tmp); - } - } - - /* - * Devices on the root bus go through the iommu. If that's not us, - * find the next upstream device and test ACS up to the root bus. - * Finding the next device may require skipping virtual buses. - */ - while (!pci_is_root_bus(dma_pdev->bus)) { - struct pci_bus *bus = dma_pdev->bus; - - while (!bus->self) { - if (!pci_is_root_bus(bus)) - bus = bus->parent; - else - goto root_bus; - } - - if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) - break; - - swap_pci_ref(&dma_pdev, pci_dev_get(bus->self)); - } - -root_bus: - group = iommu_group_get(&dma_pdev->dev); - pci_dev_put(dma_pdev); - if (!group) { - group = iommu_group_alloc(); - if (IS_ERR(group)) - return PTR_ERR(group); - } - - ret = iommu_group_add_device(group, dev); + if (IS_ERR(group)) + return PTR_ERR(group); iommu_group_put(group); - return ret; + return 0; } static void intel_iommu_remove_device(struct device *dev) { + struct intel_iommu *iommu; + u8 bus, devfn; + + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + return; + iommu_group_remove_device(dev); + + iommu_device_unlink(iommu->iommu_dev, dev); } -static struct iommu_ops intel_iommu_ops = { +static const struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, .attach_dev = intel_iommu_attach_device, diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 9b174893f0f5..0df41f6264f5 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -70,6 +70,11 @@ static int get_irte(int irq, struct irte *entry) raw_spin_lock_irqsave(&irq_2_ir_lock, flags); + if (unlikely(!irq_iommu->iommu)) { + raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags); + return -1; + } + index = irq_iommu->irte_index + irq_iommu->sub_handle; *entry = *(irq_iommu->iommu->ir_table->base + index); @@ -369,29 +374,52 @@ static int set_hpet_sid(struct irte *irte, u8 id) return 0; } +struct set_msi_sid_data { + struct pci_dev *pdev; + u16 alias; +}; + +static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct set_msi_sid_data *data = opaque; + + data->pdev = pdev; + data->alias = alias; + + return 0; +} + static int set_msi_sid(struct irte *irte, struct pci_dev *dev) { - struct pci_dev *bridge; + struct set_msi_sid_data data; if (!irte || !dev) return -1; - /* PCIe device or Root Complex integrated PCI device */ - if (pci_is_pcie(dev) || !dev->bus->parent) { - set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, - (dev->bus->number << 8) | dev->devfn); - return 0; - } + pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); - bridge = pci_find_upstream_pcie_bridge(dev); - if (bridge) { - if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */ - set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, - (bridge->bus->number << 8) | dev->bus->number); - else /* this is a legacy PCI bridge */ - set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, - (bridge->bus->number << 8) | bridge->devfn); - } + /* + * DMA alias provides us with a PCI device and alias. The only case + * where the it will return an alias on a different bus than the + * device is the case of a PCIe-to-PCI bridge, where the alias is for + * the subordinate bus. In this case we can only verify the bus. + * + * If the alias device is on a different bus than our source device + * then we have a topology based alias, use it. + * + * Otherwise, the alias is for a device DMA quirk and we cannot + * assume that MSI uses the same requester ID. Therefore use the + * original device. + */ + if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + PCI_DEVID(PCI_BUS_NUM(data.alias), + dev->bus->number)); + else if (data.pdev->bus->number != dev->bus->number) + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); + else + set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, + PCI_DEVID(dev->bus->number, dev->devfn)); return 0; } diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c new file mode 100644 index 000000000000..39b2d9127dbf --- /dev/null +++ b/drivers/iommu/iommu-sysfs.c @@ -0,0 +1,134 @@ +/* + * IOMMU sysfs class support + * + * Copyright (C) 2014 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/device.h> +#include <linux/iommu.h> +#include <linux/module.h> +#include <linux/slab.h> + +/* + * We provide a common class "devices" group which initially has no attributes. + * As devices are added to the IOMMU, we'll add links to the group. + */ +static struct attribute *devices_attr[] = { + NULL, +}; + +static const struct attribute_group iommu_devices_attr_group = { + .name = "devices", + .attrs = devices_attr, +}; + +static const struct attribute_group *iommu_dev_groups[] = { + &iommu_devices_attr_group, + NULL, +}; + +static void iommu_release_device(struct device *dev) +{ + kfree(dev); +} + +static struct class iommu_class = { + .name = "iommu", + .dev_release = iommu_release_device, + .dev_groups = iommu_dev_groups, +}; + +static int __init iommu_dev_init(void) +{ + return class_register(&iommu_class); +} +postcore_initcall(iommu_dev_init); + +/* + * Create an IOMMU device and return a pointer to it. IOMMU specific + * attributes can be provided as an attribute group, allowing a unique + * namespace per IOMMU type. + */ +struct device *iommu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...) +{ + struct device *dev; + va_list vargs; + int ret; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + device_initialize(dev); + + dev->class = &iommu_class; + dev->parent = parent; + dev->groups = groups; + dev_set_drvdata(dev, drvdata); + + va_start(vargs, fmt); + ret = kobject_set_name_vargs(&dev->kobj, fmt, vargs); + va_end(vargs); + if (ret) + goto error; + + ret = device_add(dev); + if (ret) + goto error; + + return dev; + +error: + put_device(dev); + return ERR_PTR(ret); +} + +void iommu_device_destroy(struct device *dev) +{ + if (!dev || IS_ERR(dev)) + return; + + device_unregister(dev); +} + +/* + * IOMMU drivers can indicate a device is managed by a given IOMMU using + * this interface. A link to the device will be created in the "devices" + * directory of the IOMMU device in sysfs and an "iommu" link will be + * created under the linked device, pointing back at the IOMMU device. + */ +int iommu_device_link(struct device *dev, struct device *link) +{ + int ret; + + if (!dev || IS_ERR(dev)) + return -ENODEV; + + ret = sysfs_add_link_to_group(&dev->kobj, "devices", + &link->kobj, dev_name(link)); + if (ret) + return ret; + + ret = sysfs_create_link_nowarn(&link->kobj, &dev->kobj, "iommu"); + if (ret) + sysfs_remove_link_from_group(&dev->kobj, "devices", + dev_name(link)); + + return ret; +} + +void iommu_device_unlink(struct device *dev, struct device *link) +{ + if (!dev || IS_ERR(dev)) + return; + + sysfs_remove_link(&link->kobj, "iommu"); + sysfs_remove_link_from_group(&dev->kobj, "devices", dev_name(link)); +} diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index e5555fcfe703..169836020208 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -29,12 +29,17 @@ #include <linux/idr.h> #include <linux/notifier.h> #include <linux/err.h> +#include <linux/pci.h> #include <trace/events/iommu.h> static struct kset *iommu_group_kset; static struct ida iommu_group_ida; static struct mutex iommu_group_mutex; +struct iommu_callback_data { + const struct iommu_ops *ops; +}; + struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; @@ -514,9 +519,191 @@ int iommu_group_id(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_id); +/* + * To consider a PCI device isolated, we require ACS to support Source + * Validation, Request Redirection, Completer Redirection, and Upstream + * Forwarding. This effectively means that devices cannot spoof their + * requester ID, requests and completions cannot be redirected, and all + * transactions are forwarded upstream, even as it passes through a + * bridge where the target device is downstream. + */ +#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) + +struct group_for_pci_data { + struct pci_dev *pdev; + struct iommu_group *group; +}; + +/* + * DMA alias iterator callback, return the last seen device. Stop and return + * the IOMMU group if we find one along the way. + */ +static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque) +{ + struct group_for_pci_data *data = opaque; + + data->pdev = pdev; + data->group = iommu_group_get(&pdev->dev); + + return data->group != NULL; +} + +/* + * Use standard PCI bus topology, isolation features, and DMA alias quirks + * to find or create an IOMMU group for a device. + */ +static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) +{ + struct group_for_pci_data data; + struct pci_bus *bus; + struct iommu_group *group = NULL; + struct pci_dev *tmp; + + /* + * Find the upstream DMA alias for the device. A device must not + * be aliased due to topology in order to have its own IOMMU group. + * If we find an alias along the way that already belongs to a + * group, use it. + */ + if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data)) + return data.group; + + pdev = data.pdev; + + /* + * Continue upstream from the point of minimum IOMMU granularity + * due to aliases to the point where devices are protected from + * peer-to-peer DMA by PCI ACS. Again, if we find an existing + * group, use it. + */ + for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) { + if (!bus->self) + continue; + + if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS)) + break; + + pdev = bus->self; + + group = iommu_group_get(&pdev->dev); + if (group) + return group; + } + + /* + * Next we need to consider DMA alias quirks. If one device aliases + * to another, they should be grouped together. It's theoretically + * possible that aliases could create chains of devices where each + * device aliases another device. If we then factor in multifunction + * ACS grouping requirements, each alias could incorporate a new slot + * with multiple functions, each with aliases. This is all extremely + * unlikely as DMA alias quirks are typically only used for PCIe + * devices where we usually have a single slot per bus. Furthermore, + * the alias quirk is usually to another function within the slot + * (and ACS multifunction is not supported) or to a different slot + * that doesn't physically exist. The likely scenario is therefore + * that everything on the bus gets grouped together. To reduce the + * problem space, share the IOMMU group for all devices on the bus + * if a DMA alias quirk is present on the bus. + */ + tmp = NULL; + for_each_pci_dev(tmp) { + if (tmp->bus != pdev->bus || + !(tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) + continue; + + pci_dev_put(tmp); + tmp = NULL; + + /* We have an alias quirk, search for an existing group */ + for_each_pci_dev(tmp) { + struct iommu_group *group_tmp; + + if (tmp->bus != pdev->bus) + continue; + + group_tmp = iommu_group_get(&tmp->dev); + if (!group) { + group = group_tmp; + continue; + } + + if (group_tmp) { + WARN_ON(group != group_tmp); + iommu_group_put(group_tmp); + } + } + + return group ? group : iommu_group_alloc(); + } + + /* + * Non-multifunction devices or multifunction devices supporting + * ACS get their own group. + */ + if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) + return iommu_group_alloc(); + + /* + * Multifunction devices not supporting ACS share a group with other + * similar devices in the same slot. + */ + tmp = NULL; + for_each_pci_dev(tmp) { + if (tmp == pdev || tmp->bus != pdev->bus || + PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || + pci_acs_enabled(tmp, REQ_ACS_FLAGS)) + continue; + + group = iommu_group_get(&tmp->dev); + if (group) { + pci_dev_put(tmp); + return group; + } + } + + /* No shared group found, allocate new */ + return iommu_group_alloc(); +} + +/** + * iommu_group_get_for_dev - Find or create the IOMMU group for a device + * @dev: target device + * + * This function is intended to be called by IOMMU drivers and extended to + * support common, bus-defined algorithms when determining or creating the + * IOMMU group for a device. On success, the caller will hold a reference + * to the returned IOMMU group, which will already include the provided + * device. The reference should be released with iommu_group_put(). + */ +struct iommu_group *iommu_group_get_for_dev(struct device *dev) +{ + struct iommu_group *group = ERR_PTR(-EIO); + int ret; + + group = iommu_group_get(dev); + if (group) + return group; + + if (dev_is_pci(dev)) + group = iommu_group_get_for_pci_dev(to_pci_dev(dev)); + + if (IS_ERR(group)) + return group; + + ret = iommu_group_add_device(group, dev); + if (ret) { + iommu_group_put(group); + return ERR_PTR(ret); + } + + return group; +} + static int add_iommu_group(struct device *dev, void *data) { - struct iommu_ops *ops = data; + struct iommu_callback_data *cb = data; + const struct iommu_ops *ops = cb->ops; if (!ops->add_device) return -ENODEV; @@ -532,7 +719,7 @@ static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct iommu_ops *ops = dev->bus->iommu_ops; + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; unsigned long group_action = 0; @@ -585,10 +772,14 @@ static struct notifier_block iommu_bus_nb = { .notifier_call = iommu_bus_notifier, }; -static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) +static void iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) { + struct iommu_callback_data cb = { + .ops = ops, + }; + bus_register_notifier(bus, &iommu_bus_nb); - bus_for_each_dev(bus, NULL, ops, add_iommu_group); + bus_for_each_dev(bus, NULL, &cb, add_iommu_group); } /** @@ -604,7 +795,7 @@ static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) * is set up. With this function the iommu-driver can set the iommu-ops * afterwards. */ -int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops) +int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) { if (bus->iommu_ops != NULL) return -EBUSY; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 53cde086e83b..7dab5cbcc775 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1120,7 +1120,7 @@ static void ipmmu_remove_device(struct device *dev) dev->archdata.iommu = NULL; } -static struct iommu_ops ipmmu_ops = { +static const struct iommu_ops ipmmu_ops = { .domain_init = ipmmu_domain_init, .domain_destroy = ipmmu_domain_destroy, .attach_dev = ipmmu_attach_device, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index f5ff657f49fa..49f41d6e02f1 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -674,7 +674,7 @@ fail: return 0; } -static struct iommu_ops msm_iommu_ops = { +static const struct iommu_ops msm_iommu_ops = { .domain_init = msm_iommu_domain_init, .domain_destroy = msm_iommu_domain_destroy, .attach_dev = msm_iommu_attach_dev, diff --git a/drivers/iommu/omap-iommu-debug.c b/drivers/iommu/omap-iommu-debug.c index 80fffba7f12d..531658d17333 100644 --- a/drivers/iommu/omap-iommu-debug.c +++ b/drivers/iommu/omap-iommu-debug.c @@ -213,116 +213,6 @@ static ssize_t debug_read_pagetable(struct file *file, char __user *userbuf, return bytes; } -static ssize_t debug_read_mmap(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - struct omap_iommu *obj = dev_to_omap_iommu(dev); - char *p, *buf; - struct iovm_struct *tmp; - int uninitialized_var(i); - ssize_t bytes; - - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - p += sprintf(p, "%-3s %-8s %-8s %6s %8s\n", - "No", "start", "end", "size", "flags"); - p += sprintf(p, "-------------------------------------------------\n"); - - mutex_lock(&iommu_debug_lock); - - list_for_each_entry(tmp, &obj->mmap, list) { - size_t len; - const char *str = "%3d %08x-%08x %6x %8x\n"; - const int maxcol = 39; - - len = tmp->da_end - tmp->da_start; - p += snprintf(p, maxcol, str, - i, tmp->da_start, tmp->da_end, len, tmp->flags); - - if (PAGE_SIZE - (p - buf) < maxcol) - break; - i++; - } - - bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); - - mutex_unlock(&iommu_debug_lock); - free_page((unsigned long)buf); - - return bytes; -} - -static ssize_t debug_read_mem(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - char *p, *buf; - struct iovm_struct *area; - ssize_t bytes; - - count = min_t(ssize_t, count, PAGE_SIZE); - - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - mutex_lock(&iommu_debug_lock); - - area = omap_find_iovm_area(dev, (u32)ppos); - if (!area) { - bytes = -EINVAL; - goto err_out; - } - memcpy(p, area->va, count); - p += count; - - bytes = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); -err_out: - mutex_unlock(&iommu_debug_lock); - free_page((unsigned long)buf); - - return bytes; -} - -static ssize_t debug_write_mem(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) -{ - struct device *dev = file->private_data; - struct iovm_struct *area; - char *p, *buf; - - count = min_t(size_t, count, PAGE_SIZE); - - buf = (char *)__get_free_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; - p = buf; - - mutex_lock(&iommu_debug_lock); - - if (copy_from_user(p, userbuf, count)) { - count = -EFAULT; - goto err_out; - } - - area = omap_find_iovm_area(dev, (u32)ppos); - if (!area) { - count = -EINVAL; - goto err_out; - } - memcpy(area->va, p, count); -err_out: - mutex_unlock(&iommu_debug_lock); - free_page((unsigned long)buf); - - return count; -} - #define DEBUG_FOPS(name) \ static const struct file_operations debug_##name##_fops = { \ .open = simple_open, \ @@ -342,8 +232,6 @@ DEBUG_FOPS_RO(ver); DEBUG_FOPS_RO(regs); DEBUG_FOPS_RO(tlb); DEBUG_FOPS(pagetable); -DEBUG_FOPS_RO(mmap); -DEBUG_FOPS(mem); #define __DEBUG_ADD_FILE(attr, mode) \ { \ @@ -389,8 +277,6 @@ static int iommu_debug_register(struct device *dev, void *data) DEBUG_ADD_FILE_RO(regs); DEBUG_ADD_FILE_RO(tlb); DEBUG_ADD_FILE(pagetable); - DEBUG_ADD_FILE_RO(mmap); - DEBUG_ADD_FILE(mem); return 0; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 895af06a667f..e202b0c24120 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -959,31 +959,18 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) return -EINVAL; - /* - * da_start and da_end are needed for omap-iovmm, so hardcode - * these values as used by OMAP3 ISP - the only user for - * omap-iovmm - */ - obj->da_start = 0; - obj->da_end = 0xfffff000; if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; } else { obj->nr_tlb_entries = pdata->nr_tlb_entries; obj->name = pdata->name; - obj->da_start = pdata->da_start; - obj->da_end = pdata->da_end; } - if (obj->da_end <= obj->da_start) - return -EINVAL; obj->dev = &pdev->dev; obj->ctx = (void *)obj + sizeof(*obj); spin_lock_init(&obj->iommu_lock); - mutex_init(&obj->mmap_lock); spin_lock_init(&obj->page_table_lock); - INIT_LIST_HEAD(&obj->mmap); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); obj->regbase = devm_ioremap_resource(obj->dev, res); @@ -1291,7 +1278,7 @@ static void omap_iommu_remove_device(struct device *dev) kfree(arch_data); } -static struct iommu_ops omap_iommu_ops = { +static const struct iommu_ops omap_iommu_ops = { .domain_init = omap_iommu_domain_init, .domain_destroy = omap_iommu_domain_destroy, .attach_dev = omap_iommu_attach_dev, diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index ea920c3e94ff..1275a822934b 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -46,12 +46,7 @@ struct omap_iommu { int nr_tlb_entries; - struct list_head mmap; - struct mutex mmap_lock; /* protect mmap */ - void *ctx; /* iommu context: registres saved area */ - u32 da_start; - u32 da_end; int has_bus_err_back; }; @@ -154,9 +149,12 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) #define MMU_RAM_PADDR_MASK \ ((~0UL >> MMU_RAM_PADDR_SHIFT) << MMU_RAM_PADDR_SHIFT) +#define MMU_RAM_ENDIAN_SHIFT 9 #define MMU_RAM_ENDIAN_MASK (1 << MMU_RAM_ENDIAN_SHIFT) +#define MMU_RAM_ENDIAN_LITTLE (0 << MMU_RAM_ENDIAN_SHIFT) #define MMU_RAM_ENDIAN_BIG (1 << MMU_RAM_ENDIAN_SHIFT) +#define MMU_RAM_ELSZ_SHIFT 7 #define MMU_RAM_ELSZ_MASK (3 << MMU_RAM_ELSZ_SHIFT) #define MMU_RAM_ELSZ_8 (0 << MMU_RAM_ELSZ_SHIFT) #define MMU_RAM_ELSZ_16 (1 << MMU_RAM_ELSZ_SHIFT) diff --git a/drivers/iommu/omap-iovmm.c b/drivers/iommu/omap-iovmm.c deleted file mode 100644 index d14725984153..000000000000 --- a/drivers/iommu/omap-iovmm.c +++ /dev/null @@ -1,791 +0,0 @@ -/* - * omap iommu: simple virtual address space management - * - * Copyright (C) 2008-2009 Nokia Corporation - * - * Written by Hiroshi DOYU <Hiroshi.DOYU@nokia.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/err.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/device.h> -#include <linux/scatterlist.h> -#include <linux/iommu.h> -#include <linux/omap-iommu.h> -#include <linux/platform_data/iommu-omap.h> - -#include <asm/cacheflush.h> -#include <asm/mach/map.h> - -#include "omap-iopgtable.h" -#include "omap-iommu.h" - -/* - * IOVMF_FLAGS: attribute for iommu virtual memory area(iovma) - * - * lower 16 bit is used for h/w and upper 16 bit is for s/w. - */ -#define IOVMF_SW_SHIFT 16 - -/* - * iovma: h/w flags derived from cam and ram attribute - */ -#define IOVMF_CAM_MASK (~((1 << 10) - 1)) -#define IOVMF_RAM_MASK (~IOVMF_CAM_MASK) - -#define IOVMF_PGSZ_MASK (3 << 0) -#define IOVMF_PGSZ_1M MMU_CAM_PGSZ_1M -#define IOVMF_PGSZ_64K MMU_CAM_PGSZ_64K -#define IOVMF_PGSZ_4K MMU_CAM_PGSZ_4K -#define IOVMF_PGSZ_16M MMU_CAM_PGSZ_16M - -#define IOVMF_ENDIAN_MASK (1 << 9) -#define IOVMF_ENDIAN_BIG MMU_RAM_ENDIAN_BIG - -#define IOVMF_ELSZ_MASK (3 << 7) -#define IOVMF_ELSZ_16 MMU_RAM_ELSZ_16 -#define IOVMF_ELSZ_32 MMU_RAM_ELSZ_32 -#define IOVMF_ELSZ_NONE MMU_RAM_ELSZ_NONE - -#define IOVMF_MIXED_MASK (1 << 6) -#define IOVMF_MIXED MMU_RAM_MIXED - -/* - * iovma: s/w flags, used for mapping and umapping internally. - */ -#define IOVMF_MMIO (1 << IOVMF_SW_SHIFT) -#define IOVMF_ALLOC (2 << IOVMF_SW_SHIFT) -#define IOVMF_ALLOC_MASK (3 << IOVMF_SW_SHIFT) - -/* "superpages" is supported just with physically linear pages */ -#define IOVMF_DISCONT (1 << (2 + IOVMF_SW_SHIFT)) -#define IOVMF_LINEAR (2 << (2 + IOVMF_SW_SHIFT)) -#define IOVMF_LINEAR_MASK (3 << (2 + IOVMF_SW_SHIFT)) - -#define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT)) - -static struct kmem_cache *iovm_area_cachep; - -/* return the offset of the first scatterlist entry in a sg table */ -static unsigned int sgtable_offset(const struct sg_table *sgt) -{ - if (!sgt || !sgt->nents) - return 0; - - return sgt->sgl->offset; -} - -/* return total bytes of sg buffers */ -static size_t sgtable_len(const struct sg_table *sgt) -{ - unsigned int i, total = 0; - struct scatterlist *sg; - - if (!sgt) - return 0; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t bytes; - - bytes = sg->length + sg->offset; - - if (!iopgsz_ok(bytes)) { - pr_err("%s: sg[%d] not iommu pagesize(%u %u)\n", - __func__, i, bytes, sg->offset); - return 0; - } - - if (i && sg->offset) { - pr_err("%s: sg[%d] offset not allowed in internal entries\n", - __func__, i); - return 0; - } - - total += bytes; - } - - return total; -} -#define sgtable_ok(x) (!!sgtable_len(x)) - -static unsigned max_alignment(u32 addr) -{ - int i; - unsigned pagesize[] = { SZ_16M, SZ_1M, SZ_64K, SZ_4K, }; - for (i = 0; i < ARRAY_SIZE(pagesize) && addr & (pagesize[i] - 1); i++) - ; - return (i < ARRAY_SIZE(pagesize)) ? pagesize[i] : 0; -} - -/* - * calculate the optimal number sg elements from total bytes based on - * iommu superpages - */ -static unsigned sgtable_nents(size_t bytes, u32 da, u32 pa) -{ - unsigned nr_entries = 0, ent_sz; - - if (!IS_ALIGNED(bytes, PAGE_SIZE)) { - pr_err("%s: wrong size %08x\n", __func__, bytes); - return 0; - } - - while (bytes) { - ent_sz = max_alignment(da | pa); - ent_sz = min_t(unsigned, ent_sz, iopgsz_max(bytes)); - nr_entries++; - da += ent_sz; - pa += ent_sz; - bytes -= ent_sz; - } - - return nr_entries; -} - -/* allocate and initialize sg_table header(a kind of 'superblock') */ -static struct sg_table *sgtable_alloc(const size_t bytes, u32 flags, - u32 da, u32 pa) -{ - unsigned int nr_entries; - int err; - struct sg_table *sgt; - - if (!bytes) - return ERR_PTR(-EINVAL); - - if (!IS_ALIGNED(bytes, PAGE_SIZE)) - return ERR_PTR(-EINVAL); - - if (flags & IOVMF_LINEAR) { - nr_entries = sgtable_nents(bytes, da, pa); - if (!nr_entries) - return ERR_PTR(-EINVAL); - } else - nr_entries = bytes / PAGE_SIZE; - - sgt = kzalloc(sizeof(*sgt), GFP_KERNEL); - if (!sgt) - return ERR_PTR(-ENOMEM); - - err = sg_alloc_table(sgt, nr_entries, GFP_KERNEL); - if (err) { - kfree(sgt); - return ERR_PTR(err); - } - - pr_debug("%s: sgt:%p(%d entries)\n", __func__, sgt, nr_entries); - - return sgt; -} - -/* free sg_table header(a kind of superblock) */ -static void sgtable_free(struct sg_table *sgt) -{ - if (!sgt) - return; - - sg_free_table(sgt); - kfree(sgt); - - pr_debug("%s: sgt:%p\n", __func__, sgt); -} - -/* map 'sglist' to a contiguous mpu virtual area and return 'va' */ -static void *vmap_sg(const struct sg_table *sgt) -{ - u32 va; - size_t total; - unsigned int i; - struct scatterlist *sg; - struct vm_struct *new; - const struct mem_type *mtype; - - mtype = get_mem_type(MT_DEVICE); - if (!mtype) - return ERR_PTR(-EINVAL); - - total = sgtable_len(sgt); - if (!total) - return ERR_PTR(-EINVAL); - - new = __get_vm_area(total, VM_IOREMAP, VMALLOC_START, VMALLOC_END); - if (!new) - return ERR_PTR(-ENOMEM); - va = (u32)new->addr; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t bytes; - u32 pa; - int err; - - pa = sg_phys(sg) - sg->offset; - bytes = sg->length + sg->offset; - - BUG_ON(bytes != PAGE_SIZE); - - err = ioremap_page(va, pa, mtype); - if (err) - goto err_out; - - va += bytes; - } - - flush_cache_vmap((unsigned long)new->addr, - (unsigned long)(new->addr + total)); - return new->addr; - -err_out: - WARN_ON(1); /* FIXME: cleanup some mpu mappings */ - vunmap(new->addr); - return ERR_PTR(-EAGAIN); -} - -static inline void vunmap_sg(const void *va) -{ - vunmap(va); -} - -static struct iovm_struct *__find_iovm_area(struct omap_iommu *obj, - const u32 da) -{ - struct iovm_struct *tmp; - - list_for_each_entry(tmp, &obj->mmap, list) { - if ((da >= tmp->da_start) && (da < tmp->da_end)) { - size_t len; - - len = tmp->da_end - tmp->da_start; - - dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", - __func__, tmp->da_start, da, tmp->da_end, len, - tmp->flags); - - return tmp; - } - } - - return NULL; -} - -/** - * omap_find_iovm_area - find iovma which includes @da - * @dev: client device - * @da: iommu device virtual address - * - * Find the existing iovma starting at @da - */ -struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - struct iovm_struct *area; - - mutex_lock(&obj->mmap_lock); - area = __find_iovm_area(obj, da); - mutex_unlock(&obj->mmap_lock); - - return area; -} -EXPORT_SYMBOL_GPL(omap_find_iovm_area); - -/* - * This finds the hole(area) which fits the requested address and len - * in iovmas mmap, and returns the new allocated iovma. - */ -static struct iovm_struct *alloc_iovm_area(struct omap_iommu *obj, u32 da, - size_t bytes, u32 flags) -{ - struct iovm_struct *new, *tmp; - u32 start, prev_end, alignment; - - if (!obj || !bytes) - return ERR_PTR(-EINVAL); - - start = da; - alignment = PAGE_SIZE; - - if (~flags & IOVMF_DA_FIXED) { - /* Don't map address 0 */ - start = obj->da_start ? obj->da_start : alignment; - - if (flags & IOVMF_LINEAR) - alignment = iopgsz_max(bytes); - start = roundup(start, alignment); - } else if (start < obj->da_start || start > obj->da_end || - obj->da_end - start < bytes) { - return ERR_PTR(-EINVAL); - } - - tmp = NULL; - if (list_empty(&obj->mmap)) - goto found; - - prev_end = 0; - list_for_each_entry(tmp, &obj->mmap, list) { - - if (prev_end > start) - break; - - if (tmp->da_start > start && (tmp->da_start - start) >= bytes) - goto found; - - if (tmp->da_end >= start && ~flags & IOVMF_DA_FIXED) - start = roundup(tmp->da_end + 1, alignment); - - prev_end = tmp->da_end; - } - - if ((start >= prev_end) && (obj->da_end - start >= bytes)) - goto found; - - dev_dbg(obj->dev, "%s: no space to fit %08x(%x) flags: %08x\n", - __func__, da, bytes, flags); - - return ERR_PTR(-EINVAL); - -found: - new = kmem_cache_zalloc(iovm_area_cachep, GFP_KERNEL); - if (!new) - return ERR_PTR(-ENOMEM); - - new->iommu = obj; - new->da_start = start; - new->da_end = start + bytes; - new->flags = flags; - - /* - * keep ascending order of iovmas - */ - if (tmp) - list_add_tail(&new->list, &tmp->list); - else - list_add(&new->list, &obj->mmap); - - dev_dbg(obj->dev, "%s: found %08x-%08x-%08x(%x) %08x\n", - __func__, new->da_start, start, new->da_end, bytes, flags); - - return new; -} - -static void free_iovm_area(struct omap_iommu *obj, struct iovm_struct *area) -{ - size_t bytes; - - BUG_ON(!obj || !area); - - bytes = area->da_end - area->da_start; - - dev_dbg(obj->dev, "%s: %08x-%08x(%x) %08x\n", - __func__, area->da_start, area->da_end, bytes, area->flags); - - list_del(&area->list); - kmem_cache_free(iovm_area_cachep, area); -} - -/** - * omap_da_to_va - convert (d) to (v) - * @dev: client device - * @da: iommu device virtual address - * @va: mpu virtual address - * - * Returns mpu virtual addr which corresponds to a given device virtual addr - */ -void *omap_da_to_va(struct device *dev, u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - void *va = NULL; - struct iovm_struct *area; - - mutex_lock(&obj->mmap_lock); - - area = __find_iovm_area(obj, da); - if (!area) { - dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da); - goto out; - } - va = area->va; -out: - mutex_unlock(&obj->mmap_lock); - - return va; -} -EXPORT_SYMBOL_GPL(omap_da_to_va); - -static void sgtable_fill_vmalloc(struct sg_table *sgt, void *_va) -{ - unsigned int i; - struct scatterlist *sg; - void *va = _va; - void *va_end; - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - struct page *pg; - const size_t bytes = PAGE_SIZE; - - /* - * iommu 'superpage' isn't supported with 'omap_iommu_vmalloc()' - */ - pg = vmalloc_to_page(va); - BUG_ON(!pg); - sg_set_page(sg, pg, bytes, 0); - - va += bytes; - } - - va_end = _va + PAGE_SIZE * i; -} - -static inline void sgtable_drain_vmalloc(struct sg_table *sgt) -{ - /* - * Actually this is not necessary at all, just exists for - * consistency of the code readability. - */ - BUG_ON(!sgt); -} - -/* create 'da' <-> 'pa' mapping from 'sgt' */ -static int map_iovm_area(struct iommu_domain *domain, struct iovm_struct *new, - const struct sg_table *sgt, u32 flags) -{ - int err; - unsigned int i, j; - struct scatterlist *sg; - u32 da = new->da_start; - - if (!domain || !sgt) - return -EINVAL; - - BUG_ON(!sgtable_ok(sgt)); - - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - u32 pa; - size_t bytes; - - pa = sg_phys(sg) - sg->offset; - bytes = sg->length + sg->offset; - - flags &= ~IOVMF_PGSZ_MASK; - - if (bytes_to_iopgsz(bytes) < 0) - goto err_out; - - pr_debug("%s: [%d] %08x %08x(%x)\n", __func__, - i, da, pa, bytes); - - err = iommu_map(domain, da, pa, bytes, flags); - if (err) - goto err_out; - - da += bytes; - } - return 0; - -err_out: - da = new->da_start; - - for_each_sg(sgt->sgl, sg, i, j) { - size_t bytes; - - bytes = sg->length + sg->offset; - - /* ignore failures.. we're already handling one */ - iommu_unmap(domain, da, bytes); - - da += bytes; - } - return err; -} - -/* release 'da' <-> 'pa' mapping */ -static void unmap_iovm_area(struct iommu_domain *domain, struct omap_iommu *obj, - struct iovm_struct *area) -{ - u32 start; - size_t total = area->da_end - area->da_start; - const struct sg_table *sgt = area->sgt; - struct scatterlist *sg; - int i; - size_t unmapped; - - BUG_ON(!sgtable_ok(sgt)); - BUG_ON((!total) || !IS_ALIGNED(total, PAGE_SIZE)); - - start = area->da_start; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - size_t bytes; - - bytes = sg->length + sg->offset; - - unmapped = iommu_unmap(domain, start, bytes); - if (unmapped < bytes) - break; - - dev_dbg(obj->dev, "%s: unmap %08x(%x) %08x\n", - __func__, start, bytes, area->flags); - - BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE)); - - total -= bytes; - start += bytes; - } - BUG_ON(total); -} - -/* template function for all unmapping */ -static struct sg_table *unmap_vm_area(struct iommu_domain *domain, - struct omap_iommu *obj, const u32 da, - void (*fn)(const void *), u32 flags) -{ - struct sg_table *sgt = NULL; - struct iovm_struct *area; - - if (!IS_ALIGNED(da, PAGE_SIZE)) { - dev_err(obj->dev, "%s: alignment err(%08x)\n", __func__, da); - return NULL; - } - - mutex_lock(&obj->mmap_lock); - - area = __find_iovm_area(obj, da); - if (!area) { - dev_dbg(obj->dev, "%s: no da area(%08x)\n", __func__, da); - goto out; - } - - if ((area->flags & flags) != flags) { - dev_err(obj->dev, "%s: wrong flags(%08x)\n", __func__, - area->flags); - goto out; - } - sgt = (struct sg_table *)area->sgt; - - unmap_iovm_area(domain, obj, area); - - fn(area->va); - - dev_dbg(obj->dev, "%s: %08x-%08x-%08x(%x) %08x\n", __func__, - area->da_start, da, area->da_end, - area->da_end - area->da_start, area->flags); - - free_iovm_area(obj, area); -out: - mutex_unlock(&obj->mmap_lock); - - return sgt; -} - -static u32 map_iommu_region(struct iommu_domain *domain, struct omap_iommu *obj, - u32 da, const struct sg_table *sgt, void *va, - size_t bytes, u32 flags) -{ - int err = -ENOMEM; - struct iovm_struct *new; - - mutex_lock(&obj->mmap_lock); - - new = alloc_iovm_area(obj, da, bytes, flags); - if (IS_ERR(new)) { - err = PTR_ERR(new); - goto err_alloc_iovma; - } - new->va = va; - new->sgt = sgt; - - if (map_iovm_area(domain, new, sgt, new->flags)) - goto err_map; - - mutex_unlock(&obj->mmap_lock); - - dev_dbg(obj->dev, "%s: da:%08x(%x) flags:%08x va:%p\n", - __func__, new->da_start, bytes, new->flags, va); - - return new->da_start; - -err_map: - free_iovm_area(obj, new); -err_alloc_iovma: - mutex_unlock(&obj->mmap_lock); - return err; -} - -static inline u32 -__iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, - u32 da, const struct sg_table *sgt, - void *va, size_t bytes, u32 flags) -{ - return map_iommu_region(domain, obj, da, sgt, va, bytes, flags); -} - -/** - * omap_iommu_vmap - (d)-(p)-(v) address mapper - * @domain: iommu domain - * @dev: client device - * @sgt: address of scatter gather table - * @flags: iovma and page property - * - * Creates 1-n-1 mapping with given @sgt and returns @da. - * All @sgt element must be io page size aligned. - */ -u32 omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, - const struct sg_table *sgt, u32 flags) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - size_t bytes; - void *va = NULL; - - if (!obj || !obj->dev || !sgt) - return -EINVAL; - - bytes = sgtable_len(sgt); - if (!bytes) - return -EINVAL; - bytes = PAGE_ALIGN(bytes); - - if (flags & IOVMF_MMIO) { - va = vmap_sg(sgt); - if (IS_ERR(va)) - return PTR_ERR(va); - } - - flags |= IOVMF_DISCONT; - flags |= IOVMF_MMIO; - - da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags); - if (IS_ERR_VALUE(da)) - vunmap_sg(va); - - return da + sgtable_offset(sgt); -} -EXPORT_SYMBOL_GPL(omap_iommu_vmap); - -/** - * omap_iommu_vunmap - release virtual mapping obtained by 'omap_iommu_vmap()' - * @domain: iommu domain - * @dev: client device - * @da: iommu device virtual address - * - * Free the iommu virtually contiguous memory area starting at - * @da, which was returned by 'omap_iommu_vmap()'. - */ -struct sg_table * -omap_iommu_vunmap(struct iommu_domain *domain, struct device *dev, u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - struct sg_table *sgt; - /* - * 'sgt' is allocated before 'omap_iommu_vmalloc()' is called. - * Just returns 'sgt' to the caller to free - */ - da &= PAGE_MASK; - sgt = unmap_vm_area(domain, obj, da, vunmap_sg, - IOVMF_DISCONT | IOVMF_MMIO); - if (!sgt) - dev_dbg(obj->dev, "%s: No sgt\n", __func__); - return sgt; -} -EXPORT_SYMBOL_GPL(omap_iommu_vunmap); - -/** - * omap_iommu_vmalloc - (d)-(p)-(v) address allocator and mapper - * @dev: client device - * @da: contiguous iommu virtual memory - * @bytes: allocation size - * @flags: iovma and page property - * - * Allocate @bytes linearly and creates 1-n-1 mapping and returns - * @da again, which might be adjusted if 'IOVMF_DA_FIXED' is not set. - */ -u32 -omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, u32 da, - size_t bytes, u32 flags) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - void *va; - struct sg_table *sgt; - - if (!obj || !obj->dev || !bytes) - return -EINVAL; - - bytes = PAGE_ALIGN(bytes); - - va = vmalloc(bytes); - if (!va) - return -ENOMEM; - - flags |= IOVMF_DISCONT; - flags |= IOVMF_ALLOC; - - sgt = sgtable_alloc(bytes, flags, da, 0); - if (IS_ERR(sgt)) { - da = PTR_ERR(sgt); - goto err_sgt_alloc; - } - sgtable_fill_vmalloc(sgt, va); - - da = __iommu_vmap(domain, obj, da, sgt, va, bytes, flags); - if (IS_ERR_VALUE(da)) - goto err_iommu_vmap; - - return da; - -err_iommu_vmap: - sgtable_drain_vmalloc(sgt); - sgtable_free(sgt); -err_sgt_alloc: - vfree(va); - return da; -} -EXPORT_SYMBOL_GPL(omap_iommu_vmalloc); - -/** - * omap_iommu_vfree - release memory allocated by 'omap_iommu_vmalloc()' - * @dev: client device - * @da: iommu device virtual address - * - * Frees the iommu virtually continuous memory area starting at - * @da, as obtained from 'omap_iommu_vmalloc()'. - */ -void omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, - const u32 da) -{ - struct omap_iommu *obj = dev_to_omap_iommu(dev); - struct sg_table *sgt; - - sgt = unmap_vm_area(domain, obj, da, vfree, - IOVMF_DISCONT | IOVMF_ALLOC); - if (!sgt) - dev_dbg(obj->dev, "%s: No sgt\n", __func__); - sgtable_free(sgt); -} -EXPORT_SYMBOL_GPL(omap_iommu_vfree); - -static int __init iovmm_init(void) -{ - const unsigned long flags = SLAB_HWCACHE_ALIGN; - struct kmem_cache *p; - - p = kmem_cache_create("iovm_area_cache", sizeof(struct iovm_struct), 0, - flags, NULL); - if (!p) - return -ENOMEM; - iovm_area_cachep = p; - - return 0; -} -module_init(iovmm_init); - -static void __exit iovmm_exit(void) -{ - kmem_cache_destroy(iovm_area_cachep); -} -module_exit(iovmm_exit); - -MODULE_DESCRIPTION("omap iommu: simple virtual address space management"); -MODULE_AUTHOR("Hiroshi DOYU <Hiroshi.DOYU@nokia.com>"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/pci.h b/drivers/iommu/pci.h deleted file mode 100644 index 352d80ae7443..000000000000 --- a/drivers/iommu/pci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License, version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * - * Copyright (C) 2013 Red Hat, Inc. - * Copyright (C) 2013 Freescale Semiconductor, Inc. - * - */ -#ifndef __IOMMU_PCI_H -#define __IOMMU_PCI_H - -/* Helper function for swapping pci device reference */ -static inline void swap_pci_ref(struct pci_dev **from, struct pci_dev *to) -{ - pci_dev_put(*from); - *from = to; -} - -#endif /* __IOMMU_PCI_H */ diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c index 464acda0bbc4..1333e6fb3405 100644 --- a/drivers/iommu/shmobile-iommu.c +++ b/drivers/iommu/shmobile-iommu.c @@ -354,7 +354,7 @@ static int shmobile_iommu_add_device(struct device *dev) return 0; } -static struct iommu_ops shmobile_iommu_ops = { +static const struct iommu_ops shmobile_iommu_ops = { .domain_init = shmobile_iommu_domain_init, .domain_destroy = shmobile_iommu_domain_destroy, .attach_dev = shmobile_iommu_attach_device, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index dba1a9fd5070..b10a8ecede8e 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -309,7 +309,7 @@ static int gart_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } -static struct iommu_ops gart_iommu_ops = { +static const struct iommu_ops gart_iommu_ops = { .domain_init = gart_iommu_domain_init, .domain_destroy = gart_iommu_domain_destroy, .attach_dev = gart_iommu_attach_dev, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 605b5b46a903..792da5ea6d12 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -947,7 +947,7 @@ static void smmu_iommu_domain_destroy(struct iommu_domain *domain) dev_dbg(smmu->dev, "smmu_as@%p\n", as); } -static struct iommu_ops smmu_iommu_ops = { +static const struct iommu_ops smmu_iommu_ops = { .domain_init = smmu_iommu_domain_init, .domain_destroy = smmu_iommu_domain_destroy, .attach_dev = smmu_iommu_attach_dev, diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 15f6b9edd0b1..2b08e79f5100 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -119,6 +119,13 @@ typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, amd_iommu_invalid_ppr_cb cb); +#define PPR_FAULT_EXEC (1 << 1) +#define PPR_FAULT_READ (1 << 2) +#define PPR_FAULT_WRITE (1 << 5) +#define PPR_FAULT_USER (1 << 6) +#define PPR_FAULT_RSVD (1 << 7) +#define PPR_FAULT_GN (1 << 8) + /** * amd_iommu_device_info() - Get information about IOMMUv2 support of a * PCI device diff --git a/include/linux/device.h b/include/linux/device.h index b0aab0d6be7e..43d183aeb25b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -124,7 +124,7 @@ struct bus_type { const struct dev_pm_ops *pm; - struct iommu_ops *iommu_ops; + const struct iommu_ops *iommu_ops; struct subsys_private *p; struct lock_class_key lock_key; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 23c8db129560..1deece46a0ca 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -114,22 +114,30 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, /* Intel IOMMU detection */ extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); -#else -struct dmar_pci_notify_info; -static inline int detect_intel_iommu(void) + +#ifdef CONFIG_INTEL_IOMMU +extern int iommu_detected, no_iommu; +extern int intel_iommu_init(void); +extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); +extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); +extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); +#else /* !CONFIG_INTEL_IOMMU: */ +static inline int intel_iommu_init(void) { return -ENODEV; } +static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header) { - return -ENODEV; + return 0; } - -static inline int dmar_table_init(void) +static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header) { - return -ENODEV; + return 0; } -static inline int enable_drhd_fault_handling(void) +static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { - return -1; + return 0; } -#endif /* !CONFIG_DMAR_TABLE */ +#endif /* CONFIG_INTEL_IOMMU */ + +#endif /* CONFIG_DMAR_TABLE */ struct irte { union { @@ -177,26 +185,4 @@ extern int dmar_set_interrupt(struct intel_iommu *iommu); extern irqreturn_t dmar_fault(int irq, void *dev_id); extern int arch_setup_dmar_msi(unsigned int irq); -#ifdef CONFIG_INTEL_IOMMU -extern int iommu_detected, no_iommu; -extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); -extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); -extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); -extern int intel_iommu_init(void); -#else /* !CONFIG_INTEL_IOMMU: */ -static inline int intel_iommu_init(void) { return -ENODEV; } -static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header) -{ - return 0; -} -static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header) -{ - return 0; -} -static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) -{ - return 0; -} -#endif /* CONFIG_INTEL_IOMMU */ - #endif /* __DMAR_H__ */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0a2da5188217..a65208a8fe18 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -336,6 +336,7 @@ struct intel_iommu { #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ #endif + struct device *iommu_dev; /* IOMMU-sysfs device */ int node; }; @@ -365,4 +366,6 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); +extern const struct attribute_group *intel_iommu_groups[]; + #endif diff --git a/include/linux/iommu.h b/include/linux/iommu.h index b96a5b2136e4..20f9a527922a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -50,7 +50,7 @@ struct iommu_domain_geometry { }; struct iommu_domain { - struct iommu_ops *ops; + const struct iommu_ops *ops; void *priv; iommu_fault_handler_t handler; void *handler_token; @@ -140,7 +140,7 @@ struct iommu_ops { #define IOMMU_GROUP_NOTIFY_UNBIND_DRIVER 5 /* Pre Driver unbind */ #define IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER 6 /* Post Driver unbind */ -extern int bus_set_iommu(struct bus_type *bus, struct iommu_ops *ops); +extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern bool iommu_present(struct bus_type *bus); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); @@ -181,11 +181,18 @@ extern int iommu_group_register_notifier(struct iommu_group *group, extern int iommu_group_unregister_notifier(struct iommu_group *group, struct notifier_block *nb); extern int iommu_group_id(struct iommu_group *group); +extern struct iommu_group *iommu_group_get_for_dev(struct device *dev); extern int iommu_domain_get_attr(struct iommu_domain *domain, enum iommu_attr, void *data); extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); +struct device *iommu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...); +void iommu_device_destroy(struct device *dev); +int iommu_device_link(struct device *dev, struct device *link); +void iommu_device_unlink(struct device *dev, struct device *link); /* Window handling function prototypes */ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, @@ -396,6 +403,27 @@ static inline int iommu_domain_set_attr(struct iommu_domain *domain, return -EINVAL; } +static inline struct device *iommu_device_create(struct device *parent, + void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...) +{ + return ERR_PTR(-ENODEV); +} + +static inline void iommu_device_destroy(struct device *dev) +{ +} + +static inline int iommu_device_link(struct device *dev, struct device *link) +{ + return -EINVAL; +} + +static inline void iommu_device_unlink(struct device *dev, struct device *link) +{ +} + #endif /* CONFIG_IOMMU_API */ #endif /* __LINUX_IOMMU_H */ diff --git a/include/linux/iova.h b/include/linux/iova.h index 3277f4711349..19e81d5ccb6d 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -34,6 +34,11 @@ struct iova_domain { unsigned long dma_32bit_pfn; }; +static inline unsigned long iova_size(struct iova *iova) +{ + return iova->pfn_hi - iova->pfn_lo + 1; +} + struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h index cac78de09c07..c1aede46718b 100644 --- a/include/linux/omap-iommu.h +++ b/include/linux/omap-iommu.h @@ -10,41 +10,8 @@ * published by the Free Software Foundation. */ -#ifndef _INTEL_IOMMU_H_ -#define _INTEL_IOMMU_H_ - -struct iovm_struct { - struct omap_iommu *iommu; /* iommu object which this belongs to */ - u32 da_start; /* area definition */ - u32 da_end; - u32 flags; /* IOVMF_: see below */ - struct list_head list; /* linked in ascending order */ - const struct sg_table *sgt; /* keep 'page' <-> 'da' mapping */ - void *va; /* mpu side mapped address */ -}; - -#define MMU_RAM_ENDIAN_SHIFT 9 -#define MMU_RAM_ENDIAN_LITTLE (0 << MMU_RAM_ENDIAN_SHIFT) -#define MMU_RAM_ELSZ_8 (0 << MMU_RAM_ELSZ_SHIFT) -#define IOVMF_ENDIAN_LITTLE MMU_RAM_ENDIAN_LITTLE -#define MMU_RAM_ELSZ_SHIFT 7 -#define IOVMF_ELSZ_8 MMU_RAM_ELSZ_8 - -struct iommu_domain; - -extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da); -extern u32 -omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da, - const struct sg_table *sgt, u32 flags); -extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain, - struct device *dev, u32 da); -extern u32 -omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev, - u32 da, size_t bytes, u32 flags); -extern void -omap_iommu_vfree(struct iommu_domain *domain, struct device *dev, - const u32 da); -extern void *omap_da_to_va(struct device *dev, u32 da); +#ifndef _OMAP_IOMMU_H_ +#define _OMAP_IOMMU_H_ extern void omap_iommu_save_ctx(struct device *dev); extern void omap_iommu_restore_ctx(struct device *dev); diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 5b429c43a297..54a0a9582fad 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -31,14 +31,10 @@ struct omap_iommu_arch_data { /** * struct omap_mmu_dev_attr - OMAP mmu device attributes for omap_hwmod - * @da_start: device address where the va space starts. - * @da_end: device address where the va space ends. * @nr_tlb_entries: number of entries supported by the translation * look-aside buffer (TLB). */ struct omap_mmu_dev_attr { - u32 da_start; - u32 da_end; int nr_tlb_entries; }; @@ -46,8 +42,6 @@ struct iommu_platform_data { const char *name; const char *reset_name; int nr_tlb_entries; - u32 da_start; - u32 da_end; int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); |