From 058c59a047d61601abf503563bbea818ee645c09 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 24 Feb 2020 17:58:42 +0100 Subject: iommu/arm-smmu-v3: Add support for PCI PASID Enable PASID for PCI devices that support it. Initialize PASID early in add_device() because it must be enabled before ATS. Tested-by: Zhangfei Gao Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 62 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index aa3ac2a03807..6b76df37025e 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2628,6 +2628,53 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master) atomic_dec(&smmu_domain->nr_ats_masters); } +static int arm_smmu_enable_pasid(struct arm_smmu_master *master) +{ + int ret; + int features; + int num_pasids; + struct pci_dev *pdev; + + if (!dev_is_pci(master->dev)) + return -ENODEV; + + pdev = to_pci_dev(master->dev); + + features = pci_pasid_features(pdev); + if (features < 0) + return features; + + num_pasids = pci_max_pasids(pdev); + if (num_pasids <= 0) + return num_pasids; + + ret = pci_enable_pasid(pdev, features); + if (ret) { + dev_err(&pdev->dev, "Failed to enable PASID\n"); + return ret; + } + + master->ssid_bits = min_t(u8, ilog2(num_pasids), + master->smmu->ssid_bits); + return 0; +} + +static void arm_smmu_disable_pasid(struct arm_smmu_master *master) +{ + struct pci_dev *pdev; + + if (!dev_is_pci(master->dev)) + return; + + pdev = to_pci_dev(master->dev); + + if (!pdev->pasid_enabled) + return; + + master->ssid_bits = 0; + pci_disable_pasid(pdev); +} + static void arm_smmu_detach_dev(struct arm_smmu_master *master) { unsigned long flags; @@ -2831,13 +2878,23 @@ static int arm_smmu_add_device(struct device *dev) master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); + /* + * Note that PASID must be enabled before, and disabled after ATS: + * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register + * + * Behavior is undefined if this bit is Set and the value of the PASID + * Enable, Execute Requested Enable, or Privileged Mode Requested bits + * are changed. + */ + arm_smmu_enable_pasid(master); + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) master->ssid_bits = min_t(u8, master->ssid_bits, CTXDESC_LINEAR_CDMAX); ret = iommu_device_link(&smmu->iommu, dev); if (ret) - goto err_free_master; + goto err_disable_pasid; group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) { @@ -2850,6 +2907,8 @@ static int arm_smmu_add_device(struct device *dev) err_unlink: iommu_device_unlink(&smmu->iommu, dev); +err_disable_pasid: + arm_smmu_disable_pasid(master); err_free_master: kfree(master); fwspec->iommu_priv = NULL; @@ -2870,6 +2929,7 @@ static void arm_smmu_remove_device(struct device *dev) arm_smmu_detach_dev(master); iommu_group_remove_device(dev); iommu_device_unlink(&smmu->iommu, dev); + arm_smmu_disable_pasid(master); kfree(master); iommu_fwspec_free(dev); } -- cgit v1.2.3 From 87e5fe5b779a20fa02382aaf169015e68710b9ff Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 24 Feb 2020 17:58:43 +0100 Subject: iommu/arm-smmu-v3: Write level-1 descriptors atomically Use WRITE_ONCE() to make sure that the SMMU doesn't read incomplete stream table descriptors. Refer to the comment about 64-bit accesses, and add the comment to the equivalent context descriptor code. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 6b76df37025e..068a16d0eabe 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1531,6 +1531,7 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V; + /* See comment in arm_smmu_write_ctx_desc() */ WRITE_ONCE(*dst, cpu_to_le64(val)); } @@ -1726,7 +1727,8 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; - *dst = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(*dst, cpu_to_le64(val)); } static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) -- cgit v1.2.3 From 4ce8da453640147101bda418640394637c1a7cfc Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 24 Feb 2020 17:58:44 +0100 Subject: iommu/arm-smmu-v3: Add command queue batching helpers As more functions will implement command queue batching, add two helpers to simplify building a command list. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 068a16d0eabe..beeec366bc41 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -548,6 +548,11 @@ struct arm_smmu_cmdq { atomic_t lock; }; +struct arm_smmu_cmdq_batch { + u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; + int num; +}; + struct arm_smmu_evtq { struct arm_smmu_queue q; u32 max_stalls; @@ -1482,6 +1487,24 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); } +static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *cmd) +{ + if (cmds->num == CMDQ_BATCH_ENTRIES) { + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); + cmds->num = 0; + } + arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd); + cmds->num++; +} + +static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds) +{ + return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); +} + /* Context descriptor manipulation functions */ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, int ssid, bool leaf) @@ -2220,10 +2243,9 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain) { - u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long start = iova, end = iova + size; - int i = 0; + struct arm_smmu_cmdq_batch cmds = {}; struct arm_smmu_cmdq_ent cmd = { .tlbi = { .leaf = leaf, @@ -2242,18 +2264,11 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, } while (iova < end) { - if (i == CMDQ_BATCH_ENTRIES) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false); - i = 0; - } - cmd.tlbi.addr = iova; - arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd); + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); iova += granule; - i++; } - - arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true); + arm_smmu_cmdq_batch_submit(smmu, &cmds); /* * Unfortunately, this can't be leaf-only since we may have -- cgit v1.2.3 From edd0351e7bc49555d8b5ad8438a65a7ca262c9f0 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Mon, 24 Feb 2020 17:58:45 +0100 Subject: iommu/arm-smmu-v3: Batch context descriptor invalidation Rather than publishing one command at a time when invalidating a context descriptor, batch the commands for all SIDs in the domain. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index beeec366bc41..12b2a0fa747e 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1512,6 +1512,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, size_t i; unsigned long flags; struct arm_smmu_master *master; + struct arm_smmu_cmdq_batch cmds = {}; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_cmdq_ent cmd = { .opcode = CMDQ_OP_CFGI_CD, @@ -1525,12 +1526,12 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master, &smmu_domain->devices, domain_head) { for (i = 0; i < master->num_sids; i++) { cmd.cfgi.sid = master->sids[i]; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); } } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_batch_submit(smmu, &cmds); } static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, -- cgit v1.2.3 From 9e773aee8c3e1b3ba019c5c7f8435aaa836c6130 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 24 Feb 2020 17:58:46 +0100 Subject: iommu/arm-smmu-v3: Batch ATC invalidation commands Similar to commit 2af2e72b18b4 ("iommu/arm-smmu-v3: Defer TLB invalidation until ->iotlb_sync()"), build up a list of ATC invalidation commands and submit them all at once to the command queue instead of one-by-one. As there is only one caller of arm_smmu_atc_inv_master() left, we can simplify it and avoid passing in struct arm_smmu_cmdq_ent. Cc: Jean-Philippe Brucker Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Signed-off-by: Rob Herring Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 12b2a0fa747e..4f0a38dae6db 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2158,17 +2158,16 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, cmd->atc.size = log2_span; } -static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, - struct arm_smmu_cmdq_ent *cmd) +static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) { int i; + struct arm_smmu_cmdq_ent cmd; - if (!master->ats_enabled) - return 0; + arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); for (i = 0; i < master->num_sids; i++) { - cmd->atc.sid = master->sids[i]; - arm_smmu_cmdq_issue_cmd(master->smmu, cmd); + cmd.atc.sid = master->sids[i]; + arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); } return arm_smmu_cmdq_issue_sync(master->smmu); @@ -2177,10 +2176,11 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size) { - int ret = 0; + int i; unsigned long flags; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_master *master; + struct arm_smmu_cmdq_batch cmds = {}; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) return 0; @@ -2205,11 +2205,18 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) - ret |= arm_smmu_atc_inv_master(master, &cmd); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + if (!master->ats_enabled) + continue; + + for (i = 0; i < master->num_sids; i++) { + cmd.atc.sid = master->sids[i]; + arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); + } + } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - return ret ? -ETIMEDOUT : 0; + return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); } /* IO_PGTABLE API */ @@ -2629,7 +2636,6 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) static void arm_smmu_disable_ats(struct arm_smmu_master *master) { - struct arm_smmu_cmdq_ent cmd; struct arm_smmu_domain *smmu_domain = master->domain; if (!master->ats_enabled) @@ -2641,8 +2647,7 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master) * ATC invalidation via the SMMU. */ wmb(); - arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); - arm_smmu_atc_inv_master(master, &cmd); + arm_smmu_atc_inv_master(master); atomic_dec(&smmu_domain->nr_ats_masters); } -- cgit v1.2.3 From 6a481a95d4c198a2dd0a61f8877b92a375757db8 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 24 Feb 2020 16:31:29 -0600 Subject: iommu/arm-smmu-v3: Add SMMUv3.2 range invalidation support Arm SMMUv3.2 adds support for TLB range invalidate operations. Support for range invalidate is determined by the RIL bit in the IDR3 register. The range invalidate is in units of the leaf page size and operates on 1-32 chunks of a power of 2 multiple pages. First, we determine from the size what power of 2 multiple we can use. Then we calculate how many chunks (1-31) of the power of 2 size for the range on the iteration. On each iteration, we move up in size by at least 5 bits. Cc: Jean-Philippe Brucker Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Reviewed-by: Eric Auger Signed-off-by: Rob Herring Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 69 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) (limited to 'drivers/iommu') diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4f0a38dae6db..a7222dd5b117 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -69,6 +69,9 @@ #define IDR1_SSIDSIZE GENMASK(10, 6) #define IDR1_SIDSIZE GENMASK(5, 0) +#define ARM_SMMU_IDR3 0xc +#define IDR3_RIL (1 << 10) + #define ARM_SMMU_IDR5 0x14 #define IDR5_STALL_MAX GENMASK(31, 16) #define IDR5_GRAN64K (1 << 6) @@ -346,9 +349,14 @@ #define CMDQ_CFGI_1_LEAF (1UL << 0) #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) +#define CMDQ_TLBI_0_NUM GENMASK_ULL(16, 12) +#define CMDQ_TLBI_RANGE_NUM_MAX 31 +#define CMDQ_TLBI_0_SCALE GENMASK_ULL(24, 20) #define CMDQ_TLBI_0_VMID GENMASK_ULL(47, 32) #define CMDQ_TLBI_0_ASID GENMASK_ULL(63, 48) #define CMDQ_TLBI_1_LEAF (1UL << 0) +#define CMDQ_TLBI_1_TTL GENMASK_ULL(9, 8) +#define CMDQ_TLBI_1_TG GENMASK_ULL(11, 10) #define CMDQ_TLBI_1_VA_MASK GENMASK_ULL(63, 12) #define CMDQ_TLBI_1_IPA_MASK GENMASK_ULL(51, 12) @@ -473,9 +481,13 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_TLBI_S2_IPA 0x2a #define CMDQ_OP_TLBI_NSNH_ALL 0x30 struct { + u8 num; + u8 scale; u16 asid; u16 vmid; bool leaf; + u8 ttl; + u8 tg; u64 addr; } tlbi; @@ -632,6 +644,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HYP (1 << 12) #define ARM_SMMU_FEAT_STALL_FORCE (1 << 13) #define ARM_SMMU_FEAT_VAX (1 << 14) +#define ARM_SMMU_FEAT_RANGE_INV (1 << 15) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -900,14 +913,22 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; break; case CMDQ_OP_TLBI_S2_IPA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num); + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl); + cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg); cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK; break; case CMDQ_OP_TLBI_NH_ASID: @@ -2252,7 +2273,8 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, struct arm_smmu_domain *smmu_domain) { struct arm_smmu_device *smmu = smmu_domain->smmu; - unsigned long start = iova, end = iova + size; + unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0; + size_t inv_range = granule; struct arm_smmu_cmdq_batch cmds = {}; struct arm_smmu_cmdq_ent cmd = { .tlbi = { @@ -2271,10 +2293,48 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size, cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; } + if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { + /* Get the leaf page size */ + tg = __ffs(smmu_domain->domain.pgsize_bitmap); + + /* Convert page size of 12,14,16 (log2) to 1,2,3 */ + cmd.tlbi.tg = (tg - 10) / 2; + + /* Determine what level the granule is at */ + cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3)); + + num_pages = size >> tg; + } + while (iova < end) { + if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { + /* + * On each iteration of the loop, the range is 5 bits + * worth of the aligned size remaining. + * The range in pages is: + * + * range = (num_pages & (0x1f << __ffs(num_pages))) + */ + unsigned long scale, num; + + /* Determine the power of 2 multiple number of pages */ + scale = __ffs(num_pages); + cmd.tlbi.scale = scale; + + /* Determine how many chunks of 2^scale size we have */ + num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX; + cmd.tlbi.num = num - 1; + + /* range is num * 2^scale * pgsize */ + inv_range = num << (scale + tg); + + /* Clear out the lower order bits for the next iteration */ + num_pages -= num << scale; + } + cmd.tlbi.addr = iova; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); - iova += granule; + iova += inv_range; } arm_smmu_cmdq_batch_submit(smmu, &cmds); @@ -3783,6 +3843,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) if (smmu->sid_bits <= STRTAB_SPLIT) smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB; + /* IDR3 */ + reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3); + if (FIELD_GET(IDR3_RIL, reg)) + smmu->features |= ARM_SMMU_FEAT_RANGE_INV; + /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5); -- cgit v1.2.3