From fc10cca69ee8af49778ad76c1e5fddf83d5026c8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 9 Jan 2020 14:08:41 +0000 Subject: [PATCH 01/28] drivers/iommu: Initialise module 'owner' field in iommu_device_set_ops() Requiring each IOMMU driver to initialise the 'owner' field of their 'struct iommu_ops' is error-prone and easily forgotten. Follow the example set by PCI and USB by assigning THIS_MODULE automatically when registering the ops structure with IOMMU core. Reviewed-by: Greg Kroah-Hartman Suggested-by: Greg Kroah-Hartman Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 1 - drivers/iommu/arm-smmu.c | 1 - include/linux/iommu.h | 11 +++++++++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 03dc97842875..e82997a705a8 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2733,7 +2733,6 @@ static struct iommu_ops arm_smmu_ops = { .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = arm_smmu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ - .owner = THIS_MODULE, }; /* Probing and initialisation functions */ diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 5ef1f2e100d7..93d332423f6f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1623,7 +1623,6 @@ static struct iommu_ops arm_smmu_ops = { .get_resv_regions = arm_smmu_get_resv_regions, .put_resv_regions = arm_smmu_put_resv_regions, .pgsize_bitmap = -1UL, /* Restricted during device attach */ - .owner = THIS_MODULE, }; static void arm_smmu_device_reset(struct arm_smmu_device *smmu) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e9f94d3f7a04..90007c92ad2d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -388,12 +388,19 @@ void iommu_device_sysfs_remove(struct iommu_device *iommu); int iommu_device_link(struct iommu_device *iommu, struct device *link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link); -static inline void iommu_device_set_ops(struct iommu_device *iommu, - const struct iommu_ops *ops) +static inline void __iommu_device_set_ops(struct iommu_device *iommu, + const struct iommu_ops *ops) { iommu->ops = ops; } +#define iommu_device_set_ops(iommu, ops) \ +do { \ + struct iommu_ops *__ops = (struct iommu_ops *)(ops); \ + __ops->owner = THIS_MODULE; \ + __iommu_device_set_ops(iommu, __ops); \ +} while (0) + static inline void iommu_device_set_fwnode(struct iommu_device *iommu, struct fwnode_handle *fwnode) { From 935d43ba272e0001f8ef446a3eff15d8175cb11b Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 13 Nov 2019 16:11:38 +0000 Subject: [PATCH 02/28] iommu/arm-smmu-v3: Populate VMID field for CMDQ_OP_TLBI_NH_VA CMDQ_OP_TLBI_NH_VA requires VMID and this was missing since commit 1c27df1c0a82 ("iommu/arm-smmu: Use correct address mask for CMD_TLBI_S2_IPA"). Add it back. Fixes: 1c27df1c0a82 ("iommu/arm-smmu: Use correct address mask for CMD_TLBI_S2_IPA") Signed-off-by: Shameer Kolothum Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e82997a705a8..a4ee956b8968 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -851,6 +851,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); break; case CMDQ_OP_TLBI_NH_VA: + cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid); cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid); cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf); cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK; From 322a9bbb720cd273d2967a375d1887199b361528 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 26 Dec 2019 18:50:56 +0900 Subject: [PATCH 03/28] iommu/arm-smmu-v3: Fix resource_size check This is an off-by-one mistake. resource_size() returns res->end - res->start + 1. Reviewed-by: Robin Murphy Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index a4ee956b8968..5c42c686cffe 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -3632,7 +3632,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) { + if (resource_size(res) < arm_smmu_resource_size(smmu)) { dev_err(dev, "MMIO region too small (%pr)\n", res); return -EINVAL; } From 8efda06f83d65c015a99d99a195df533aec0fcf9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 24 Dec 2019 17:14:59 +0900 Subject: [PATCH 04/28] iommu/arm-smmu-v3: Remove useless of_match_ptr() The CONFIG option controlling this driver, ARM_SMMU_V3, depends on ARM64, which select's OF. So, CONFIG_OF is always defined when building this driver. of_match_ptr(arm_smmu_of_match) is the same as arm_smmu_of_match. Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 5c42c686cffe..aa7e53023585 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -3722,7 +3722,7 @@ MODULE_DEVICE_TABLE(of, arm_smmu_of_match); static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu-v3", - .of_match_table = of_match_ptr(arm_smmu_of_match), + .of_match_table = arm_smmu_of_match, .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, From cd037ff2f98227148002084d588f03b348469fe4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 24 Dec 2019 17:15:00 +0900 Subject: [PATCH 05/28] iommu/arm-smmu: Fix -Wunused-const-variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For ARCH=arm builds, OF is not necessarily enabled, that is, you can build this driver without CONFIG_OF. When CONFIG_OF is unset, of_match_ptr() is NULL, and arm_smmu_of_match is left orphan. Building it with W=1 emits a warning: drivers/iommu/arm-smmu.c:1904:34: warning: ‘arm_smmu_of_match’ defined but not used [-Wunused-const-variable=] static const struct of_device_id arm_smmu_of_match[] = { ^~~~~~~~~~~~~~~~~ There are two ways to fix this: - annotate arm_smmu_of_match with __maybe_unused (or surround the code with #ifdef CONFIG_OF ... #endif) - stop using of_match_ptr() This commit took the latter solution. It slightly increases the object size, but it is probably not a big deal because arm_smmu_device_dt_probe() is also compiled irrespective of CONFIG_OF. Signed-off-by: Masahiro Yamada Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 93d332423f6f..46b87740d708 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2310,7 +2310,7 @@ static const struct dev_pm_ops arm_smmu_pm_ops = { static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu", - .of_match_table = of_match_ptr(arm_smmu_of_match), + .of_match_table = arm_smmu_of_match, .pm = &arm_smmu_pm_ops, .suppress_bind_attrs = true, }, From d1e5f26f14272b5039cc198569ec4fabed14e6db Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 25 Oct 2019 19:08:37 +0100 Subject: [PATCH 06/28] iommu/io-pgtable-arm: Rationalise TTBRn handling TTBR1 values have so far been redundant since no users implement any support for split address spaces. Crucially, though, one of the main reasons for wanting to do so is to be able to manage each half entirely independently, e.g. context-switching one set of mappings without disturbing the other. Thus it seems unlikely that tying two tables together in a single io_pgtable_cfg would ever be particularly desirable or useful. Streamline the configs to just a single conceptual TTBR value representing the allocated table. This paves the way for future users to support split address spaces by simply allocating a table and dealing with the detailed TTBRn logistics themselves. Tested-by: Jordan Crouse Signed-off-by: Robin Murphy [will: Drop change to ttbr value] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 9 ++++----- drivers/iommu/io-pgtable-arm-v7s.c | 17 ++++++++--------- drivers/iommu/io-pgtable-arm.c | 5 ++--- drivers/iommu/ipmmu-vmsa.c | 2 +- drivers/iommu/msm_iommu.c | 4 ++-- drivers/iommu/mtk_iommu.c | 4 ++-- drivers/iommu/qcom_iommu.c | 3 +-- include/linux/io-pgtable.h | 4 ++-- 9 files changed, 23 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index aa7e53023585..cf2ae065a6c2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2166,7 +2166,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, } cfg->cd.asid = (u16)asid; - cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; return 0; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 46b87740d708..72640e045268 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -553,13 +553,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* TTBRs */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; - cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; + cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr; + cb->ttbr[1] = 0; } else { - cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); - cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - cb->ttbr[1] |= FIELD_PREP(TTBRn_ASID, cfg->asid); + cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); } } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 7c3bd2c3cdca..eac886f7619d 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -822,15 +822,14 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) | - ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | - (cfg->coherent_walk ? - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | - ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); - cfg->arm_v7s_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | + ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | + (cfg->coherent_walk ? + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : + (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | + ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); return &data->iop; out_free_data: diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index bdf47f745268..7b422b9fe05b 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -872,9 +872,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); - /* TTBRs */ - cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); - cfg->arm_lpae_s1_cfg.ttbr[1] = 0; + /* TTBR */ + cfg->arm_lpae_s1_cfg.ttbr = virt_to_phys(data->pgd); return &data->iop; out_free_data: diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index d02edd2751f3..ecb3f9464dd5 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -374,7 +374,7 @@ static void ipmmu_domain_setup_context(struct ipmmu_vmsa_domain *domain) u32 tmp; /* TTBR0 */ - ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; + ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr; ipmmu_ctx_write_root(domain, IMTTLBR0, ttbr); ipmmu_ctx_write_root(domain, IMTTUBR0, ttbr >> 32); diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 93f14bca26ee..94a6df1bddd6 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -279,8 +279,8 @@ static void __program_context(void __iomem *base, int ctx, SET_V2PCFG(base, ctx, 0x3); SET_TTBCR(base, ctx, priv->cfg.arm_v7s_cfg.tcr); - SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[0]); - SET_TTBR1(base, ctx, priv->cfg.arm_v7s_cfg.ttbr[1]); + SET_TTBR0(base, ctx, priv->cfg.arm_v7s_cfg.ttbr); + SET_TTBR1(base, ctx, 0); /* Set prrr and nmrr */ SET_PRRR(base, ctx, priv->cfg.arm_v7s_cfg.prrr); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 6fc1f5ecf91e..95945f467c03 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -367,7 +367,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, /* Update the pgtable base address register of the M4U HW */ if (!data->m4u_dom) { data->m4u_dom = dom; - writel(dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK, + writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, data->base + REG_MMU_PT_BASE_ADDR); } @@ -765,7 +765,7 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev) writel_relaxed(reg->ivrp_paddr, base + REG_MMU_IVRP_PADDR); writel_relaxed(reg->vld_pa_rng, base + REG_MMU_VLD_PA_RNG); if (m4u_dom) - writel(m4u_dom->cfg.arm_v7s_cfg.ttbr[0] & MMU_PT_ADDR_MASK, + writel(m4u_dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK, base + REG_MMU_PT_BASE_ADDR); return 0; } diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 52f38292df5b..c200bc066257 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -269,10 +269,9 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, - pgtbl_cfg.arm_lpae_s1_cfg.ttbr[0] | + pgtbl_cfg.arm_lpae_s1_cfg.ttbr | FIELD_PREP(TTBRn_ASID, ctx->asid)); iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, - pgtbl_cfg.arm_lpae_s1_cfg.ttbr[1] | FIELD_PREP(TTBRn_ASID, ctx->asid)); /* TCR */ diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ee21eedafe98..53bca5343f52 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -100,7 +100,7 @@ struct io_pgtable_cfg { /* Low-level data specific to the table format */ union { struct { - u64 ttbr[2]; + u64 ttbr; u64 tcr; u64 mair; } arm_lpae_s1_cfg; @@ -111,7 +111,7 @@ struct io_pgtable_cfg { } arm_lpae_s2_cfg; struct { - u32 ttbr[2]; + u32 ttbr; u32 tcr; u32 nmrr; u32 prrr; From 30d2acb67348537dbf3180057c11f092f4603090 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jan 2020 11:40:33 +0000 Subject: [PATCH 07/28] iommu/io-pgtable-arm: Support non-coherent stage-2 page tables Commit 9e6ea59f3ff3 ("iommu/io-pgtable: Support non-coherent page tables") added support for non-coherent page-table walks to the Arm IOMMU page-table backends. Unfortunately, it left the stage-2 allocator unchanged, so let's hook that up in the same way. Cc: Bjorn Andersson Cc: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 7b422b9fe05b..ab440b52a5f4 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -910,10 +910,16 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) } /* VTCR */ - reg = ARM_64_LPAE_S2_TCR_RES1 | - (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + reg = ARM_64_LPAE_S2_TCR_RES1; + if (cfg->coherent_walk) { + reg |= (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + } else { + reg |= (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT); + } sl = data->start_level; From 7618e479098226799207e021e8b0c2c28a23c96b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 10 Jan 2020 15:21:51 +0000 Subject: [PATCH 08/28] iommu/io-pgtable-arm: Improve attribute handling By VMSA rules, using Normal Non-Cacheable type with a shareability attribute of anything other than Outer Shareable is liable to lead into unpredictable territory: | Overlaying the shareability attribute (B3-1377, ARM DDI 0406C.c) | | A memory region with a resultant memory type attribute of Normal, and | a resultant cacheability attribute of Inner Non-cacheable, Outer | Non-cacheable, must have a resultant shareability attribute of Outer | Shareable, otherwise shareability is UNPREDICTABLE Although the SMMU architectures seem to give some slightly stronger guarantees of Non-Cacheable output types becoming implicitly Outer Shareable in most cases, we may as well be explicit and not take any chances. It's also weird that LPAE attribute handling is currently split between prot_to_pte() and init_pte() given that it can all be statically determined up-front. Thus, collect *all* the LPAE attributes into prot_to_pte() in order to logically pick the shareability based on the incoming IOMMU API prot value, and tweak the short-descriptor code to stop setting TTBR0.NOS for Non-Cacheable walks. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 7 +++---- drivers/iommu/io-pgtable-arm.c | 17 +++++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index eac886f7619d..697ffffe34df 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -823,10 +823,9 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, wmb(); /* TTBR */ - cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | - ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | - (cfg->coherent_walk ? - (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | + cfg->arm_v7s_cfg.ttbr = virt_to_phys(data->pgd) | ARM_V7S_TTBR_S | + (cfg->coherent_walk ? (ARM_V7S_TTBR_NOS | + ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA)) : (ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_NC) | ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_NC))); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index ab440b52a5f4..1da0d82444f9 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -293,17 +293,11 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte = prot; - if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) - pte |= ARM_LPAE_PTE_NS; - if (data->iop.fmt != ARM_MALI_LPAE && lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; else pte |= ARM_LPAE_PTE_TYPE_BLOCK; - if (data->iop.fmt != ARM_MALI_LPAE) - pte |= ARM_LPAE_PTE_AF; - pte |= ARM_LPAE_PTE_SH_IS; pte |= paddr_to_iopte(paddr, data); __arm_lpae_set_pte(ptep, pte, &data->iop.cfg); @@ -460,9 +454,20 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, << ARM_LPAE_PTE_ATTRINDX_SHIFT); } + if (prot & IOMMU_CACHE) + pte |= ARM_LPAE_PTE_SH_IS; + else + pte |= ARM_LPAE_PTE_SH_OS; + if (prot & IOMMU_NOEXEC) pte |= ARM_LPAE_PTE_XN; + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + + if (data->iop.fmt != ARM_MALI_LPAE) + pte |= ARM_LPAE_PTE_AF; + return pte; } From 6f932ad369a3c3f853ffc5d93de9a73420e862b1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jan 2020 12:22:16 +0000 Subject: [PATCH 09/28] iommu/io-pgtable-arm: Ensure ARM_64_LPAE_S2_TCR_RES1 is unsigned ARM_64_LPAE_S2_TCR_RES1 is intended to map to bit 31 of the VTCR register, which is required to be set to 1 by the architecture. Unfortunately, we accidentally treat this as a signed quantity which means we also set the upper 32 bits of the VTCR to one, and they are required to be zero. Treat ARM_64_LPAE_S2_TCR_RES1 as unsigned to avoid the unwanted sign-extension up to 64 bits. Cc: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 1da0d82444f9..1c0ec16effbb 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -101,7 +101,7 @@ /* Register bits */ #define ARM_32_LPAE_TCR_EAE (1 << 31) -#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) +#define ARM_64_LPAE_S2_TCR_RES1 (1U << 31) #define ARM_LPAE_TCR_EPD1 (1 << 23) From fb485eb18e632ff1071662122b9d9b7d40c23c73 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 25 Oct 2019 19:08:38 +0100 Subject: [PATCH 10/28] iommu/io-pgtable-arm: Rationalise TCR handling Although it's conceptually nice for the io_pgtable_cfg to provide a standard VMSA TCR value, the reality is that no VMSA-compliant IOMMU looks exactly like an Arm CPU, and they all have various other TCR controls which io-pgtable can't be expected to understand. Thus since there is an expectation that drivers will have to add to the given TCR value anyway, let's strip it down to just the essentials that are directly relevant to io-pgtable's inner workings - namely the various sizes and the walk attributes. Tested-by: Jordan Crouse Signed-off-by: Robin Murphy [will: Add missing include of bitfield.h] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 41 +++---------- drivers/iommu/arm-smmu.c | 7 ++- drivers/iommu/arm-smmu.h | 28 +++++++++ drivers/iommu/io-pgtable-arm-v7s.c | 6 +- drivers/iommu/io-pgtable-arm.c | 98 ++++++++++++------------------ drivers/iommu/io-pgtable.c | 2 +- drivers/iommu/qcom_iommu.c | 8 +-- include/linux/io-pgtable.h | 9 ++- 8 files changed, 95 insertions(+), 104 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index cf2ae065a6c2..d127974afdb7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -260,27 +260,18 @@ /* Context descriptor (stage-1 only) */ #define CTXDESC_CD_DWORDS 8 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) -#define ARM64_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) -#define ARM64_TCR_TG0 GENMASK_ULL(15, 14) #define CTXDESC_CD_0_TCR_IRGN0 GENMASK_ULL(9, 8) -#define ARM64_TCR_IRGN0 GENMASK_ULL(9, 8) #define CTXDESC_CD_0_TCR_ORGN0 GENMASK_ULL(11, 10) -#define ARM64_TCR_ORGN0 GENMASK_ULL(11, 10) #define CTXDESC_CD_0_TCR_SH0 GENMASK_ULL(13, 12) -#define ARM64_TCR_SH0 GENMASK_ULL(13, 12) #define CTXDESC_CD_0_TCR_EPD0 (1ULL << 14) -#define ARM64_TCR_EPD0 (1ULL << 7) #define CTXDESC_CD_0_TCR_EPD1 (1ULL << 30) -#define ARM64_TCR_EPD1 (1ULL << 23) #define CTXDESC_CD_0_ENDI (1UL << 15) #define CTXDESC_CD_0_V (1UL << 31) #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) -#define ARM64_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) -#define ARM64_TCR_TBI0 (1ULL << 37) #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) @@ -291,10 +282,6 @@ #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4) -/* Convert between AArch64 (CPU) TCR format and SMMU CD format */ -#define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \ - FIELD_GET(ARM64_TCR_##fld, tcr)) - /* Command queue */ #define CMDQ_ENT_SZ_SHIFT 4 #define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3) @@ -1439,23 +1426,6 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) } /* Context descriptor manipulation functions */ -static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) -{ - u64 val = 0; - - /* Repack the TCR. Just care about TTBR0 for now */ - val |= ARM_SMMU_TCR2CD(tcr, T0SZ); - val |= ARM_SMMU_TCR2CD(tcr, TG0); - val |= ARM_SMMU_TCR2CD(tcr, IRGN0); - val |= ARM_SMMU_TCR2CD(tcr, ORGN0); - val |= ARM_SMMU_TCR2CD(tcr, SH0); - val |= ARM_SMMU_TCR2CD(tcr, EPD0); - val |= ARM_SMMU_TCR2CD(tcr, EPD1); - val |= ARM_SMMU_TCR2CD(tcr, IPS); - - return val; -} - static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, struct arm_smmu_s1_cfg *cfg) { @@ -1465,7 +1435,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, * We don't need to issue any invalidation here, as we'll invalidate * the STE when installing the new entry anyway. */ - val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) | + val = cfg->cd.tcr | #ifdef __BIG_ENDIAN CTXDESC_CD_0_ENDI | #endif @@ -2151,6 +2121,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, int asid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; asid = arm_smmu_bitmap_alloc(smmu->asid_map, smmu->asid_bits); if (asid < 0) @@ -2167,7 +2138,13 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, cfg->cd.asid = (u16)asid; cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cfg->cd.tcr = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | + FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | + FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | + CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; return 0; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 72640e045268..e65eb60812ec 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -540,11 +540,12 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr; } else { - cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - cb->tcr[1] |= FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM); + cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg); + cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg); if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) cb->tcr[1] |= TCR2_AS; + else + cb->tcr[0] |= TCR_EAE; } } else { cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 62b9f0cec49b..aade2b0ae175 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -11,6 +11,7 @@ #define _ARM_SMMU_H #include +#include #include #include #include @@ -158,12 +159,24 @@ enum arm_smmu_cbar_type { #define TCR2_SEP GENMASK(17, 15) #define TCR2_SEP_UPSTREAM 0x7 #define TCR2_AS BIT(4) +#define TCR2_PASIZE GENMASK(3, 0) #define ARM_SMMU_CB_TTBR0 0x20 #define ARM_SMMU_CB_TTBR1 0x28 #define TTBRn_ASID GENMASK_ULL(63, 48) +/* arm64 headers leak this somehow :( */ +#undef TCR_T0SZ + #define ARM_SMMU_CB_TCR 0x30 +#define TCR_EAE BIT(31) +#define TCR_EPD1 BIT(23) +#define TCR_TG0 GENMASK(15, 14) +#define TCR_SH0 GENMASK(13, 12) +#define TCR_ORGN0 GENMASK(11, 10) +#define TCR_IRGN0 GENMASK(9, 8) +#define TCR_T0SZ GENMASK(5, 0) + #define ARM_SMMU_CB_CONTEXTIDR 0x34 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c @@ -318,6 +331,21 @@ struct arm_smmu_domain { struct iommu_domain domain; }; +static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) +{ + return TCR_EPD1 | + FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | + FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | + FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | + FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | + FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); +} + +static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) +{ + return FIELD_PREP(TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) | + FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM); +} /* Implementation details, yay! */ struct arm_smmu_impl { diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 697ffffe34df..4272fe4e17f4 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -149,8 +149,6 @@ #define ARM_V7S_TTBR_IRGN_ATTR(attr) \ ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1)) -#define ARM_V7S_TCR_PD1 BIT(5) - #ifdef CONFIG_ZONE_DMA32 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 @@ -798,8 +796,8 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, */ cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M; - /* TCR: T0SZ=0, disable TTBR1 */ - cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1; + /* TCR: T0SZ=0, EAE=0 (if applicable) */ + cfg->arm_v7s_cfg.tcr = 0; /* * TEX remap: the indices used map to the closest equivalent types diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 1c0ec16effbb..a868ce594871 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -100,40 +100,32 @@ #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) /* Register bits */ -#define ARM_32_LPAE_TCR_EAE (1 << 31) -#define ARM_64_LPAE_S2_TCR_RES1 (1U << 31) +#define ARM_64_LPAE_VTCR_RES1 (1U << 31) -#define ARM_LPAE_TCR_EPD1 (1 << 23) - -#define ARM_LPAE_TCR_TG0_4K (0 << 14) -#define ARM_LPAE_TCR_TG0_64K (1 << 14) -#define ARM_LPAE_TCR_TG0_16K (2 << 14) +#define ARM_LPAE_VTCR_TG0_SHIFT 14 +#define ARM_LPAE_TCR_TG0_4K 0 +#define ARM_LPAE_TCR_TG0_64K 1 +#define ARM_LPAE_TCR_TG0_16K 2 #define ARM_LPAE_TCR_SH0_SHIFT 12 -#define ARM_LPAE_TCR_SH0_MASK 0x3 #define ARM_LPAE_TCR_SH_NS 0 #define ARM_LPAE_TCR_SH_OS 2 #define ARM_LPAE_TCR_SH_IS 3 #define ARM_LPAE_TCR_ORGN0_SHIFT 10 #define ARM_LPAE_TCR_IRGN0_SHIFT 8 -#define ARM_LPAE_TCR_RGN_MASK 0x3 #define ARM_LPAE_TCR_RGN_NC 0 #define ARM_LPAE_TCR_RGN_WBWA 1 #define ARM_LPAE_TCR_RGN_WT 2 #define ARM_LPAE_TCR_RGN_WB 3 -#define ARM_LPAE_TCR_SL0_SHIFT 6 -#define ARM_LPAE_TCR_SL0_MASK 0x3 +#define ARM_LPAE_VTCR_SL0_SHIFT 6 +#define ARM_LPAE_VTCR_SL0_MASK 0x3 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 -#define ARM_LPAE_TCR_SZ_MASK 0xf -#define ARM_LPAE_TCR_PS_SHIFT 16 -#define ARM_LPAE_TCR_PS_MASK 0x7 - -#define ARM_LPAE_TCR_IPS_SHIFT 32 -#define ARM_LPAE_TCR_IPS_MASK 0x7 +#define ARM_LPAE_VTCR_PS_SHIFT 16 +#define ARM_LPAE_VTCR_PS_MASK 0x7 #define ARM_LPAE_TCR_PS_32_BIT 0x0ULL #define ARM_LPAE_TCR_PS_36_BIT 0x1ULL @@ -792,6 +784,7 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { u64 reg; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NON_STRICT)) @@ -803,58 +796,54 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) /* TCR */ if (cfg->coherent_walk) { - reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + tcr->sh = ARM_LPAE_TCR_SH_IS; + tcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + tcr->orgn = ARM_LPAE_TCR_RGN_WBWA; } else { - reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT); + tcr->sh = ARM_LPAE_TCR_SH_OS; + tcr->irgn = ARM_LPAE_TCR_RGN_NC; + tcr->orgn = ARM_LPAE_TCR_RGN_NC; } switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + tcr->tg = ARM_LPAE_TCR_TG0_4K; break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + tcr->tg = ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + tcr->tg = ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_IPS_SHIFT); + tcr->ips = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - - /* Disable speculative walks through TTBR1 */ - reg |= ARM_LPAE_TCR_EPD1; - cfg->arm_lpae_s1_cfg.tcr = reg; + tcr->tsz = 64ULL - cfg->ias; /* MAIRs */ reg = (ARM_LPAE_MAIR_ATTR_NC @@ -915,7 +904,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) } /* VTCR */ - reg = ARM_64_LPAE_S2_TCR_RES1; + reg = ARM_64_LPAE_VTCR_RES1; if (cfg->coherent_walk) { reg |= (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | @@ -930,45 +919,45 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= ARM_LPAE_TCR_TG0_4K; + reg |= (ARM_LPAE_TCR_TG0_4K << ARM_LPAE_VTCR_TG0_SHIFT); sl++; /* SL0 format is different for 4K granule size */ break; case SZ_16K: - reg |= ARM_LPAE_TCR_TG0_16K; + reg |= (ARM_LPAE_TCR_TG0_16K << ARM_LPAE_VTCR_TG0_SHIFT); break; case SZ_64K: - reg |= ARM_LPAE_TCR_TG0_64K; + reg |= (ARM_LPAE_TCR_TG0_64K << ARM_LPAE_VTCR_TG0_SHIFT); break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_TCR_PS_SHIFT); + reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_VTCR_PS_SHIFT); break; default: goto out_free_data; } reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT; + reg |= (~sl & ARM_LPAE_VTCR_SL0_MASK) << ARM_LPAE_VTCR_SL0_SHIFT; cfg->arm_lpae_s2_cfg.vtcr = reg; /* Allocate pgd pages */ @@ -992,19 +981,12 @@ out_free_data: static struct io_pgtable * arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 32 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); - if (iop) { - cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE; - cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff; - } - return iop; + return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); } static struct io_pgtable * diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index ced53e5b72b5..94394c81468f 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -63,7 +63,7 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops) if (!ops) return; - iop = container_of(ops, struct io_pgtable, ops); + iop = io_pgtable_ops_to_pgtable(ops); io_pgtable_tlb_flush_all(iop); io_pgtable_init_table[iop->fmt]->free(iop); } diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c200bc066257..2eeaf2eec946 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -271,15 +271,13 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, pgtbl_cfg.arm_lpae_s1_cfg.ttbr | FIELD_PREP(TTBRn_ASID, ctx->asid)); - iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, - FIELD_PREP(TTBRn_ASID, ctx->asid)); + iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, 0); /* TCR */ iommu_writel(ctx, ARM_SMMU_CB_TCR2, - (pgtbl_cfg.arm_lpae_s1_cfg.tcr >> 32) | - FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM)); + arm_smmu_lpae_tcr2(&pgtbl_cfg)); iommu_writel(ctx, ARM_SMMU_CB_TCR, - pgtbl_cfg.arm_lpae_s1_cfg.tcr); + arm_smmu_lpae_tcr(&pgtbl_cfg) | TCR_EAE); /* MAIRs (stage-1 only) */ iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 53bca5343f52..6ae104cedfd7 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -101,7 +101,14 @@ struct io_pgtable_cfg { union { struct { u64 ttbr; - u64 tcr; + struct { + u32 ips:3; + u32 tg:2; + u32 sh:2; + u32 orgn:2; + u32 irgn:2; + u32 tsz:6; + } tcr; u64 mair; } arm_lpae_s1_cfg; From fba6e960772b7b68189168abc3259384b7a44388 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jan 2020 13:20:03 +0000 Subject: [PATCH 11/28] iommu/arm-smmu: Rename public #defines under ARM_SMMU_ namespace Now that we have arm-smmu.h defining various SMMU constants, ensure that they are namespaced with the ARM_SMMU_ prefix in order to avoid conflicts with the CPU, such as the one we're currently bodging around with the TCR. Cc: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-impl.c | 2 +- drivers/iommu/arm-smmu.c | 157 +++++++++++++------------ drivers/iommu/arm-smmu.h | 215 +++++++++++++++++----------------- drivers/iommu/qcom_iommu.c | 16 +-- 4 files changed, 203 insertions(+), 187 deletions(-) diff --git a/drivers/iommu/arm-smmu-impl.c b/drivers/iommu/arm-smmu-impl.c index b2fe72a8f019..74d97a886e93 100644 --- a/drivers/iommu/arm-smmu-impl.c +++ b/drivers/iommu/arm-smmu-impl.c @@ -119,7 +119,7 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu) * Secure has also cleared SACR.CACHE_LOCK for this to take effect... */ reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID7); - major = FIELD_GET(ID7_MAJOR, reg); + major = FIELD_GET(ARM_SMMU_ID7_MAJOR, reg); reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sACR); if (major >= 2) reg &= ~ARM_MMU500_ACR_CACHE_LOCK; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e65eb60812ec..214be09f6ded 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -271,7 +271,7 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page, for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { reg = arm_smmu_readl(smmu, page, status); - if (!(reg & sTLBGSTATUS_GSACTIVE)) + if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE)) return; cpu_relax(); } @@ -478,7 +478,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int idx = smmu_domain->cfg.cbndx; fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & FSR_FAULT)) + if (!(fsr & ARM_SMMU_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -510,7 +510,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) if (__ratelimit(&rs)) { if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) && - (gfsr & sGFSR_USF)) + (gfsr & ARM_SMMU_sGFSR_USF)) dev_err(smmu->dev, "Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n", (u16)gfsynr1); @@ -543,9 +543,9 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg); cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg); if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - cb->tcr[1] |= TCR2_AS; + cb->tcr[1] |= ARM_SMMU_TCR2_AS; else - cb->tcr[0] |= TCR_EAE; + cb->tcr[0] |= ARM_SMMU_TCR_EAE; } } else { cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; @@ -558,8 +558,10 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->ttbr[1] = 0; } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cb->ttbr[0] |= FIELD_PREP(TTBRn_ASID, cfg->asid); - cb->ttbr[1] = FIELD_PREP(TTBRn_ASID, cfg->asid); + cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); + cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, + cfg->asid); } } else { cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; @@ -595,31 +597,33 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) /* CBA2R */ if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg = CBA2R_VA64; + reg = ARM_SMMU_CBA2R_VA64; else reg = 0; /* 16-bit VMIDs live in CBA2R */ if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= FIELD_PREP(CBA2R_VMID16, cfg->vmid); + reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid); arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg); } /* CBAR */ - reg = FIELD_PREP(CBAR_TYPE, cfg->cbar); + reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar); if (smmu->version < ARM_SMMU_V2) - reg |= FIELD_PREP(CBAR_IRPTNDX, cfg->irptndx); + reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx); /* * Use the weakest shareability/memory types, so they are * overridden by the ttbcr/pte. */ if (stage1) { - reg |= FIELD_PREP(CBAR_S1_BPSHCFG, CBAR_S1_BPSHCFG_NSH) | - FIELD_PREP(CBAR_S1_MEMATTR, CBAR_S1_MEMATTR_WB); + reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG, + ARM_SMMU_CBAR_S1_BPSHCFG_NSH) | + FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR, + ARM_SMMU_CBAR_S1_MEMATTR_WB); } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) { /* 8-bit VMIDs live in CBAR */ - reg |= FIELD_PREP(CBAR_VMID, cfg->vmid); + reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid); } arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg); @@ -651,11 +655,12 @@ static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx) } /* SCTLR */ - reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M; + reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE | + ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M; if (stage1) - reg |= SCTLR_S1_ASIDPNE; + reg |= ARM_SMMU_SCTLR_S1_ASIDPNE; if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) - reg |= SCTLR_E; + reg |= ARM_SMMU_SCTLR_E; arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg); } @@ -837,7 +842,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (ret < 0) { dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n", cfg->irptndx, irq); - cfg->irptndx = INVALID_IRPTNDX; + cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX; } mutex_unlock(&smmu_domain->init_mutex); @@ -875,7 +880,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) smmu->cbs[cfg->cbndx].cfg = NULL; arm_smmu_write_context_bank(smmu, cfg->cbndx); - if (cfg->irptndx != INVALID_IRPTNDX) { + if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; devm_free_irq(smmu->dev, irq, domain); } @@ -931,23 +936,24 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_smr *smr = smmu->smrs + idx; - u32 reg = FIELD_PREP(SMR_ID, smr->id) | FIELD_PREP(SMR_MASK, smr->mask); + u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) | + FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask); if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid) - reg |= SMR_VALID; + reg |= ARM_SMMU_SMR_VALID; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg); } static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx) { struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx; - u32 reg = FIELD_PREP(S2CR_TYPE, s2cr->type) | - FIELD_PREP(S2CR_CBNDX, s2cr->cbndx) | - FIELD_PREP(S2CR_PRIVCFG, s2cr->privcfg); + u32 reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) | + FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) | + FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg); if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs && smmu->smrs[idx].valid) - reg |= S2CR_EXIDVALID; + reg |= ARM_SMMU_S2CR_EXIDVALID; arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg); } @@ -974,15 +980,15 @@ static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu) * bits are set, so check each one separately. We can reject * masters later if they try to claim IDs outside these masks. */ - smr = FIELD_PREP(SMR_ID, smmu->streamid_mask); + smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask); arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); - smmu->streamid_mask = FIELD_GET(SMR_ID, smr); + smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr); - smr = FIELD_PREP(SMR_MASK, smmu->streamid_mask); + smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask); arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); - smmu->smr_mask_mask = FIELD_GET(SMR_MASK, smr); + smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); } static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask) @@ -1051,8 +1057,8 @@ static int arm_smmu_master_alloc_smes(struct device *dev) mutex_lock(&smmu->stream_map_mutex); /* Figure out a viable stream map entry allocation */ for_each_cfg_sme(fwspec, i, idx) { - u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]); - u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]); + u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]); + u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]); if (idx != INVALID_SMENDX) { ret = -EEXIST; @@ -1296,7 +1302,8 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va); reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR; - if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ATSR_ACTIVE), 5, 50)) { + if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE), + 5, 50)) { spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, "iova to phys timed out on %pad. Falling back to software table walk.\n", @@ -1306,7 +1313,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR); spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); - if (phys & CB_PAR_F) { + if (phys & ARM_SMMU_CB_PAR_F) { dev_err(dev, "translation fault!\n"); dev_err(dev, "PAR = 0x%llx\n", phys); return 0; @@ -1387,8 +1394,8 @@ static int arm_smmu_add_device(struct device *dev) ret = -EINVAL; for (i = 0; i < fwspec->num_ids; i++) { - u16 sid = FIELD_GET(SMR_ID, fwspec->ids[i]); - u16 mask = FIELD_GET(SMR_MASK, fwspec->ids[i]); + u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]); + u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]); if (sid & ~smmu->streamid_mask) { dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n", @@ -1569,12 +1576,12 @@ static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) u32 mask, fwid = 0; if (args->args_count > 0) - fwid |= FIELD_PREP(SMR_ID, args->args[0]); + fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]); if (args->args_count > 1) - fwid |= FIELD_PREP(SMR_MASK, args->args[1]); + fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]); else if (!of_property_read_u32(args->np, "stream-match-mask", &mask)) - fwid |= FIELD_PREP(SMR_MASK, mask); + fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask); return iommu_fwspec_add_ids(dev, &fwid, 1); } @@ -1644,7 +1651,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { arm_smmu_write_context_bank(smmu, i); - arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, FSR_FAULT); + arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); } /* Invalidate the TLB, just in case */ @@ -1654,29 +1661,30 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0); /* Enable fault reporting */ - reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE); + reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE | + ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE); /* Disable TLB broadcasting. */ - reg |= (sCR0_VMIDPNE | sCR0_PTM); + reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM); /* Enable client access, handling unmatched streams as appropriate */ - reg &= ~sCR0_CLIENTPD; + reg &= ~ARM_SMMU_sCR0_CLIENTPD; if (disable_bypass) - reg |= sCR0_USFCFG; + reg |= ARM_SMMU_sCR0_USFCFG; else - reg &= ~sCR0_USFCFG; + reg &= ~ARM_SMMU_sCR0_USFCFG; /* Disable forced broadcasting */ - reg &= ~sCR0_FB; + reg &= ~ARM_SMMU_sCR0_FB; /* Don't upgrade barriers */ - reg &= ~(sCR0_BSU); + reg &= ~(ARM_SMMU_sCR0_BSU); if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= sCR0_VMID16EN; + reg |= ARM_SMMU_sCR0_VMID16EN; if (smmu->features & ARM_SMMU_FEAT_EXIDS) - reg |= sCR0_EXIDENABLE; + reg |= ARM_SMMU_sCR0_EXIDENABLE; if (smmu->impl && smmu->impl->reset) smmu->impl->reset(smmu); @@ -1721,21 +1729,21 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Restrict available stages based on module parameter */ if (force_stage == 1) - id &= ~(ID0_S2TS | ID0_NTS); + id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS); else if (force_stage == 2) - id &= ~(ID0_S1TS | ID0_NTS); + id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS); - if (id & ID0_S1TS) { + if (id & ARM_SMMU_ID0_S1TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S1; dev_notice(smmu->dev, "\tstage 1 translation\n"); } - if (id & ID0_S2TS) { + if (id & ARM_SMMU_ID0_S2TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S2; dev_notice(smmu->dev, "\tstage 2 translation\n"); } - if (id & ID0_NTS) { + if (id & ARM_SMMU_ID0_NTS) { smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED; dev_notice(smmu->dev, "\tnested translation\n"); } @@ -1746,8 +1754,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } - if ((id & ID0_S1TS) && - ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) { + if ((id & ARM_SMMU_ID0_S1TS) && + ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) { smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; dev_notice(smmu->dev, "\taddress translation ops\n"); } @@ -1758,7 +1766,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) * Fortunately, this also opens up a workaround for systems where the * ID register value has ended up configured incorrectly. */ - cttw_reg = !!(id & ID0_CTTW); + cttw_reg = !!(id & ARM_SMMU_ID0_CTTW); if (cttw_fw || cttw_reg) dev_notice(smmu->dev, "\t%scoherent table walk\n", cttw_fw ? "" : "non-"); @@ -1767,16 +1775,16 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) "\t(IDR0.CTTW overridden by FW configuration)\n"); /* Max. number of entries we have for stream matching/indexing */ - if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) { + if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) { smmu->features |= ARM_SMMU_FEAT_EXIDS; size = 1 << 16; } else { - size = 1 << FIELD_GET(ID0_NUMSIDB, id); + size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id); } smmu->streamid_mask = size - 1; - if (id & ID0_SMS) { + if (id & ARM_SMMU_ID0_SMS) { smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH; - size = FIELD_GET(ID0_NUMSMRG, id); + size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id); if (size == 0) { dev_err(smmu->dev, "stream-matching supported, but no SMRs present!\n"); @@ -1804,18 +1812,19 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) mutex_init(&smmu->stream_map_mutex); spin_lock_init(&smmu->global_sync_lock); - if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) { + if (smmu->version < ARM_SMMU_V2 || + !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) { smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L; - if (!(id & ID0_PTFS_NO_AARCH32S)) + if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S)) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S; } /* ID1 */ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1); - smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; + smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12; /* Check for size mismatch of SMMU address space from mapped region */ - size = 1 << (FIELD_GET(ID1_NUMPAGENDXB, id) + 1); + size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1); if (smmu->numpage != 2 * size << smmu->pgshift) dev_warn(smmu->dev, "SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n", @@ -1823,8 +1832,8 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */ smmu->numpage = size; - smmu->num_s2_context_banks = FIELD_GET(ID1_NUMS2CB, id); - smmu->num_context_banks = FIELD_GET(ID1_NUMCB, id); + smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id); + smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id); if (smmu->num_s2_context_banks > smmu->num_context_banks) { dev_err(smmu->dev, "impossible number of S2 context banks!\n"); return -ENODEV; @@ -1838,14 +1847,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID2 */ id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2); - size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_IAS, id)); + size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id)); smmu->ipa_size = size; /* The output mask is also applied for bypass */ - size = arm_smmu_id_size_to_bits(FIELD_GET(ID2_OAS, id)); + size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id)); smmu->pa_size = size; - if (id & ID2_VMID16) + if (id & ARM_SMMU_ID2_VMID16) smmu->features |= ARM_SMMU_FEAT_VMID16; /* @@ -1862,13 +1871,13 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) if (smmu->version == ARM_SMMU_V1_64K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } else { - size = FIELD_GET(ID2_UBS, id); + size = FIELD_GET(ARM_SMMU_ID2_UBS, id); smmu->va_size = arm_smmu_id_size_to_bits(size); - if (id & ID2_PTFS_4K) + if (id & ARM_SMMU_ID2_PTFS_4K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K; - if (id & ID2_PTFS_16K) + if (id & ARM_SMMU_ID2_PTFS_16K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K; - if (id & ID2_PTFS_64K) + if (id & ARM_SMMU_ID2_PTFS_64K) smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K; } @@ -2245,7 +2254,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) arm_smmu_rpm_get(smmu); /* Turn the thing off */ - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, sCR0_CLIENTPD); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); arm_smmu_rpm_put(smmu); if (pm_runtime_enabled(smmu->dev)) diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index aade2b0ae175..6501f38a5966 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -24,51 +24,51 @@ /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 -#define sCR0_VMID16EN BIT(31) -#define sCR0_BSU GENMASK(15, 14) -#define sCR0_FB BIT(13) -#define sCR0_PTM BIT(12) -#define sCR0_VMIDPNE BIT(11) -#define sCR0_USFCFG BIT(10) -#define sCR0_GCFGFIE BIT(5) -#define sCR0_GCFGFRE BIT(4) -#define sCR0_EXIDENABLE BIT(3) -#define sCR0_GFIE BIT(2) -#define sCR0_GFRE BIT(1) -#define sCR0_CLIENTPD BIT(0) +#define ARM_SMMU_sCR0_VMID16EN BIT(31) +#define ARM_SMMU_sCR0_BSU GENMASK(15, 14) +#define ARM_SMMU_sCR0_FB BIT(13) +#define ARM_SMMU_sCR0_PTM BIT(12) +#define ARM_SMMU_sCR0_VMIDPNE BIT(11) +#define ARM_SMMU_sCR0_USFCFG BIT(10) +#define ARM_SMMU_sCR0_GCFGFIE BIT(5) +#define ARM_SMMU_sCR0_GCFGFRE BIT(4) +#define ARM_SMMU_sCR0_EXIDENABLE BIT(3) +#define ARM_SMMU_sCR0_GFIE BIT(2) +#define ARM_SMMU_sCR0_GFRE BIT(1) +#define ARM_SMMU_sCR0_CLIENTPD BIT(0) /* Auxiliary Configuration register */ #define ARM_SMMU_GR0_sACR 0x10 /* Identification registers */ #define ARM_SMMU_GR0_ID0 0x20 -#define ID0_S1TS BIT(30) -#define ID0_S2TS BIT(29) -#define ID0_NTS BIT(28) -#define ID0_SMS BIT(27) -#define ID0_ATOSNS BIT(26) -#define ID0_PTFS_NO_AARCH32 BIT(25) -#define ID0_PTFS_NO_AARCH32S BIT(24) -#define ID0_NUMIRPT GENMASK(23, 16) -#define ID0_CTTW BIT(14) -#define ID0_NUMSIDB GENMASK(12, 9) -#define ID0_EXIDS BIT(8) -#define ID0_NUMSMRG GENMASK(7, 0) +#define ARM_SMMU_ID0_S1TS BIT(30) +#define ARM_SMMU_ID0_S2TS BIT(29) +#define ARM_SMMU_ID0_NTS BIT(28) +#define ARM_SMMU_ID0_SMS BIT(27) +#define ARM_SMMU_ID0_ATOSNS BIT(26) +#define ARM_SMMU_ID0_PTFS_NO_AARCH32 BIT(25) +#define ARM_SMMU_ID0_PTFS_NO_AARCH32S BIT(24) +#define ARM_SMMU_ID0_NUMIRPT GENMASK(23, 16) +#define ARM_SMMU_ID0_CTTW BIT(14) +#define ARM_SMMU_ID0_NUMSIDB GENMASK(12, 9) +#define ARM_SMMU_ID0_EXIDS BIT(8) +#define ARM_SMMU_ID0_NUMSMRG GENMASK(7, 0) #define ARM_SMMU_GR0_ID1 0x24 -#define ID1_PAGESIZE BIT(31) -#define ID1_NUMPAGENDXB GENMASK(30, 28) -#define ID1_NUMS2CB GENMASK(23, 16) -#define ID1_NUMCB GENMASK(7, 0) +#define ARM_SMMU_ID1_PAGESIZE BIT(31) +#define ARM_SMMU_ID1_NUMPAGENDXB GENMASK(30, 28) +#define ARM_SMMU_ID1_NUMS2CB GENMASK(23, 16) +#define ARM_SMMU_ID1_NUMCB GENMASK(7, 0) #define ARM_SMMU_GR0_ID2 0x28 -#define ID2_VMID16 BIT(15) -#define ID2_PTFS_64K BIT(14) -#define ID2_PTFS_16K BIT(13) -#define ID2_PTFS_4K BIT(12) -#define ID2_UBS GENMASK(11, 8) -#define ID2_OAS GENMASK(7, 4) -#define ID2_IAS GENMASK(3, 0) +#define ARM_SMMU_ID2_VMID16 BIT(15) +#define ARM_SMMU_ID2_PTFS_64K BIT(14) +#define ARM_SMMU_ID2_PTFS_16K BIT(13) +#define ARM_SMMU_ID2_PTFS_4K BIT(12) +#define ARM_SMMU_ID2_UBS GENMASK(11, 8) +#define ARM_SMMU_ID2_OAS GENMASK(7, 4) +#define ARM_SMMU_ID2_IAS GENMASK(3, 0) #define ARM_SMMU_GR0_ID3 0x2c #define ARM_SMMU_GR0_ID4 0x30 @@ -76,11 +76,11 @@ #define ARM_SMMU_GR0_ID6 0x38 #define ARM_SMMU_GR0_ID7 0x3c -#define ID7_MAJOR GENMASK(7, 4) -#define ID7_MINOR GENMASK(3, 0) +#define ARM_SMMU_ID7_MAJOR GENMASK(7, 4) +#define ARM_SMMU_ID7_MINOR GENMASK(3, 0) #define ARM_SMMU_GR0_sGFSR 0x48 -#define sGFSR_USF BIT(1) +#define ARM_SMMU_sGFSR_USF BIT(1) #define ARM_SMMU_GR0_sGFSYNR0 0x50 #define ARM_SMMU_GR0_sGFSYNR1 0x54 @@ -93,118 +93,123 @@ #define ARM_SMMU_GR0_sTLBGSYNC 0x70 #define ARM_SMMU_GR0_sTLBGSTATUS 0x74 -#define sTLBGSTATUS_GSACTIVE BIT(0) +#define ARM_SMMU_sTLBGSTATUS_GSACTIVE BIT(0) /* Stream mapping registers */ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) -#define SMR_VALID BIT(31) -#define SMR_MASK GENMASK(31, 16) -#define SMR_ID GENMASK(15, 0) +#define ARM_SMMU_SMR_VALID BIT(31) +#define ARM_SMMU_SMR_MASK GENMASK(31, 16) +#define ARM_SMMU_SMR_ID GENMASK(15, 0) #define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2)) -#define S2CR_PRIVCFG GENMASK(25, 24) +#define ARM_SMMU_S2CR_PRIVCFG GENMASK(25, 24) enum arm_smmu_s2cr_privcfg { S2CR_PRIVCFG_DEFAULT, S2CR_PRIVCFG_DIPAN, S2CR_PRIVCFG_UNPRIV, S2CR_PRIVCFG_PRIV, }; -#define S2CR_TYPE GENMASK(17, 16) +#define ARM_SMMU_S2CR_TYPE GENMASK(17, 16) enum arm_smmu_s2cr_type { S2CR_TYPE_TRANS, S2CR_TYPE_BYPASS, S2CR_TYPE_FAULT, }; -#define S2CR_EXIDVALID BIT(10) -#define S2CR_CBNDX GENMASK(7, 0) +#define ARM_SMMU_S2CR_EXIDVALID BIT(10) +#define ARM_SMMU_S2CR_CBNDX GENMASK(7, 0) /* Context bank attribute registers */ #define ARM_SMMU_GR1_CBAR(n) (0x0 + ((n) << 2)) -#define CBAR_IRPTNDX GENMASK(31, 24) -#define CBAR_TYPE GENMASK(17, 16) +#define ARM_SMMU_CBAR_IRPTNDX GENMASK(31, 24) +#define ARM_SMMU_CBAR_TYPE GENMASK(17, 16) enum arm_smmu_cbar_type { CBAR_TYPE_S2_TRANS, CBAR_TYPE_S1_TRANS_S2_BYPASS, CBAR_TYPE_S1_TRANS_S2_FAULT, CBAR_TYPE_S1_TRANS_S2_TRANS, }; -#define CBAR_S1_MEMATTR GENMASK(15, 12) -#define CBAR_S1_MEMATTR_WB 0xf -#define CBAR_S1_BPSHCFG GENMASK(9, 8) -#define CBAR_S1_BPSHCFG_NSH 3 -#define CBAR_VMID GENMASK(7, 0) +#define ARM_SMMU_CBAR_S1_MEMATTR GENMASK(15, 12) +#define ARM_SMMU_CBAR_S1_MEMATTR_WB 0xf +#define ARM_SMMU_CBAR_S1_BPSHCFG GENMASK(9, 8) +#define ARM_SMMU_CBAR_S1_BPSHCFG_NSH 3 +#define ARM_SMMU_CBAR_VMID GENMASK(7, 0) #define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2)) #define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2)) -#define CBA2R_VMID16 GENMASK(31, 16) -#define CBA2R_VA64 BIT(0) +#define ARM_SMMU_CBA2R_VMID16 GENMASK(31, 16) +#define ARM_SMMU_CBA2R_VA64 BIT(0) #define ARM_SMMU_CB_SCTLR 0x0 -#define SCTLR_S1_ASIDPNE BIT(12) -#define SCTLR_CFCFG BIT(7) -#define SCTLR_CFIE BIT(6) -#define SCTLR_CFRE BIT(5) -#define SCTLR_E BIT(4) -#define SCTLR_AFE BIT(2) -#define SCTLR_TRE BIT(1) -#define SCTLR_M BIT(0) +#define ARM_SMMU_SCTLR_S1_ASIDPNE BIT(12) +#define ARM_SMMU_SCTLR_CFCFG BIT(7) +#define ARM_SMMU_SCTLR_CFIE BIT(6) +#define ARM_SMMU_SCTLR_CFRE BIT(5) +#define ARM_SMMU_SCTLR_E BIT(4) +#define ARM_SMMU_SCTLR_AFE BIT(2) +#define ARM_SMMU_SCTLR_TRE BIT(1) +#define ARM_SMMU_SCTLR_M BIT(0) #define ARM_SMMU_CB_ACTLR 0x4 #define ARM_SMMU_CB_RESUME 0x8 -#define RESUME_TERMINATE BIT(0) +#define ARM_SMMU_RESUME_TERMINATE BIT(0) #define ARM_SMMU_CB_TCR2 0x10 -#define TCR2_SEP GENMASK(17, 15) -#define TCR2_SEP_UPSTREAM 0x7 -#define TCR2_AS BIT(4) -#define TCR2_PASIZE GENMASK(3, 0) +#define ARM_SMMU_TCR2_SEP GENMASK(17, 15) +#define ARM_SMMU_TCR2_SEP_UPSTREAM 0x7 +#define ARM_SMMU_TCR2_AS BIT(4) +#define ARM_SMMU_TCR2_PASIZE GENMASK(3, 0) #define ARM_SMMU_CB_TTBR0 0x20 #define ARM_SMMU_CB_TTBR1 0x28 -#define TTBRn_ASID GENMASK_ULL(63, 48) - -/* arm64 headers leak this somehow :( */ -#undef TCR_T0SZ +#define ARM_SMMU_TTBRn_ASID GENMASK_ULL(63, 48) #define ARM_SMMU_CB_TCR 0x30 -#define TCR_EAE BIT(31) -#define TCR_EPD1 BIT(23) -#define TCR_TG0 GENMASK(15, 14) -#define TCR_SH0 GENMASK(13, 12) -#define TCR_ORGN0 GENMASK(11, 10) -#define TCR_IRGN0 GENMASK(9, 8) -#define TCR_T0SZ GENMASK(5, 0) +#define ARM_SMMU_TCR_EAE BIT(31) +#define ARM_SMMU_TCR_EPD1 BIT(23) +#define ARM_SMMU_TCR_TG0 GENMASK(15, 14) +#define ARM_SMMU_TCR_SH0 GENMASK(13, 12) +#define ARM_SMMU_TCR_ORGN0 GENMASK(11, 10) +#define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) +#define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) #define ARM_SMMU_CB_CONTEXTIDR 0x34 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c #define ARM_SMMU_CB_PAR 0x50 -#define CB_PAR_F BIT(0) +#define ARM_SMMU_CB_PAR_F BIT(0) #define ARM_SMMU_CB_FSR 0x58 -#define FSR_MULTI BIT(31) -#define FSR_SS BIT(30) -#define FSR_UUT BIT(8) -#define FSR_ASF BIT(7) -#define FSR_TLBLKF BIT(6) -#define FSR_TLBMCF BIT(5) -#define FSR_EF BIT(4) -#define FSR_PF BIT(3) -#define FSR_AFF BIT(2) -#define FSR_TF BIT(1) +#define ARM_SMMU_FSR_MULTI BIT(31) +#define ARM_SMMU_FSR_SS BIT(30) +#define ARM_SMMU_FSR_UUT BIT(8) +#define ARM_SMMU_FSR_ASF BIT(7) +#define ARM_SMMU_FSR_TLBLKF BIT(6) +#define ARM_SMMU_FSR_TLBMCF BIT(5) +#define ARM_SMMU_FSR_EF BIT(4) +#define ARM_SMMU_FSR_PF BIT(3) +#define ARM_SMMU_FSR_AFF BIT(2) +#define ARM_SMMU_FSR_TF BIT(1) -#define FSR_IGN (FSR_AFF | FSR_ASF | \ - FSR_TLBMCF | FSR_TLBLKF) -#define FSR_FAULT (FSR_MULTI | FSR_SS | FSR_UUT | \ - FSR_EF | FSR_PF | FSR_TF | FSR_IGN) +#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ + ARM_SMMU_FSR_ASF | \ + ARM_SMMU_FSR_TLBMCF | \ + ARM_SMMU_FSR_TLBLKF) + +#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ + ARM_SMMU_FSR_SS | \ + ARM_SMMU_FSR_UUT | \ + ARM_SMMU_FSR_EF | \ + ARM_SMMU_FSR_PF | \ + ARM_SMMU_FSR_TF | \ + ARM_SMMU_FSR_IGN) #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 -#define FSYNR0_WNR BIT(4) +#define ARM_SMMU_FSYNR0_WNR BIT(4) #define ARM_SMMU_CB_S1_TLBIVA 0x600 #define ARM_SMMU_CB_S1_TLBIASID 0x610 @@ -216,7 +221,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 -#define ATSR_ACTIVE BIT(0) +#define ARM_SMMU_ATSR_ACTIVE BIT(0) /* Maximum number of context banks per SMMU */ @@ -310,7 +315,7 @@ struct arm_smmu_cfg { enum arm_smmu_cbar_type cbar; enum arm_smmu_context_fmt fmt; }; -#define INVALID_IRPTNDX 0xff +#define ARM_SMMU_INVALID_IRPTNDX 0xff enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, @@ -333,18 +338,18 @@ struct arm_smmu_domain { static inline u32 arm_smmu_lpae_tcr(struct io_pgtable_cfg *cfg) { - return TCR_EPD1 | - FIELD_PREP(TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | - FIELD_PREP(TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | - FIELD_PREP(TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | - FIELD_PREP(TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | - FIELD_PREP(TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); + return ARM_SMMU_TCR_EPD1 | + FIELD_PREP(ARM_SMMU_TCR_TG0, cfg->arm_lpae_s1_cfg.tcr.tg) | + FIELD_PREP(ARM_SMMU_TCR_SH0, cfg->arm_lpae_s1_cfg.tcr.sh) | + FIELD_PREP(ARM_SMMU_TCR_ORGN0, cfg->arm_lpae_s1_cfg.tcr.orgn) | + FIELD_PREP(ARM_SMMU_TCR_IRGN0, cfg->arm_lpae_s1_cfg.tcr.irgn) | + FIELD_PREP(ARM_SMMU_TCR_T0SZ, cfg->arm_lpae_s1_cfg.tcr.tsz); } static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) { - return FIELD_PREP(TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) | - FIELD_PREP(TCR2_SEP, TCR2_SEP_UPSTREAM); + return FIELD_PREP(ARM_SMMU_TCR2_PASIZE, cfg->arm_lpae_s1_cfg.tcr.ips) | + FIELD_PREP(ARM_SMMU_TCR2_SEP, ARM_SMMU_TCR2_SEP_UPSTREAM); } /* Implementation details, yay! */ diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 2eeaf2eec946..39759db4f003 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -201,7 +201,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); - if (!(fsr & FSR_FAULT)) + if (!(fsr & ARM_SMMU_FSR_FAULT)) return IRQ_NONE; fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); @@ -215,7 +215,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) } iommu_writel(ctx, ARM_SMMU_CB_FSR, fsr); - iommu_writel(ctx, ARM_SMMU_CB_RESUME, RESUME_TERMINATE); + iommu_writel(ctx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); return IRQ_HANDLED; } @@ -270,14 +270,14 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, pgtbl_cfg.arm_lpae_s1_cfg.ttbr | - FIELD_PREP(TTBRn_ASID, ctx->asid)); + FIELD_PREP(ARM_SMMU_TTBRn_ASID, ctx->asid)); iommu_writeq(ctx, ARM_SMMU_CB_TTBR1, 0); /* TCR */ iommu_writel(ctx, ARM_SMMU_CB_TCR2, arm_smmu_lpae_tcr2(&pgtbl_cfg)); iommu_writel(ctx, ARM_SMMU_CB_TCR, - arm_smmu_lpae_tcr(&pgtbl_cfg) | TCR_EAE); + arm_smmu_lpae_tcr(&pgtbl_cfg) | ARM_SMMU_TCR_EAE); /* MAIRs (stage-1 only) */ iommu_writel(ctx, ARM_SMMU_CB_S1_MAIR0, @@ -286,11 +286,13 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, pgtbl_cfg.arm_lpae_s1_cfg.mair >> 32); /* SCTLR */ - reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | - SCTLR_M | SCTLR_S1_ASIDPNE | SCTLR_CFCFG; + reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | + ARM_SMMU_SCTLR_AFE | ARM_SMMU_SCTLR_TRE | + ARM_SMMU_SCTLR_M | ARM_SMMU_SCTLR_S1_ASIDPNE | + ARM_SMMU_SCTLR_CFCFG; if (IS_ENABLED(CONFIG_BIG_ENDIAN)) - reg |= SCTLR_E; + reg |= ARM_SMMU_SCTLR_E; iommu_writel(ctx, ARM_SMMU_CB_SCTLR, reg); From ac4b80e5b9d0ecf906300d79e4dc4df5526579a8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 10 Jan 2020 14:51:59 +0000 Subject: [PATCH 12/28] iommu/io-pgtable-arm: Rationalise VTCR handling Commit 05a648cd2dd7 ("iommu/io-pgtable-arm: Rationalise TCR handling") reworked the way in which the TCR register value is returned from the io-pgtable code when targetting the Arm long-descriptor format, in preparation for allowing page-tables to target TTBR1. As it turns out, the new interface is a lot nicer to use, so do the same conversion for the VTCR register even though there is only a single base register for stage-2 translation. Cc: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 17 +++++++++- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/arm-smmu.h | 21 +++++++++++++ drivers/iommu/io-pgtable-arm.c | 57 +++++++++++++--------------------- include/linux/io-pgtable.h | 10 +++++- 5 files changed, 68 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index d127974afdb7..4443e1890077 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -250,6 +250,13 @@ #define STRTAB_STE_2_S2VMID GENMASK_ULL(15, 0) #define STRTAB_STE_2_VTCR GENMASK_ULL(50, 32) +#define STRTAB_STE_2_VTCR_S2T0SZ GENMASK_ULL(5, 0) +#define STRTAB_STE_2_VTCR_S2SL0 GENMASK_ULL(7, 6) +#define STRTAB_STE_2_VTCR_S2IR0 GENMASK_ULL(9, 8) +#define STRTAB_STE_2_VTCR_S2OR0 GENMASK_ULL(11, 10) +#define STRTAB_STE_2_VTCR_S2SH0 GENMASK_ULL(13, 12) +#define STRTAB_STE_2_VTCR_S2TG GENMASK_ULL(15, 14) +#define STRTAB_STE_2_VTCR_S2PS GENMASK_ULL(18, 16) #define STRTAB_STE_2_S2AA64 (1UL << 51) #define STRTAB_STE_2_S2ENDI (1UL << 52) #define STRTAB_STE_2_S2PTW (1UL << 54) @@ -2159,14 +2166,22 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, int vmid; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; + typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); if (vmid < 0) return vmid; + vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr; cfg->vmid = (u16)vmid; cfg->vttbr = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; - cfg->vtcr = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + cfg->vtcr = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) | + FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps); return 0; } diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 214be09f6ded..f067783ebd59 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -548,7 +548,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, cb->tcr[0] |= ARM_SMMU_TCR_EAE; } } else { - cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg); } /* TTBRs */ diff --git a/drivers/iommu/arm-smmu.h b/drivers/iommu/arm-smmu.h index 6501f38a5966..8d1cd54d82a6 100644 --- a/drivers/iommu/arm-smmu.h +++ b/drivers/iommu/arm-smmu.h @@ -174,6 +174,15 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_TCR_IRGN0 GENMASK(9, 8) #define ARM_SMMU_TCR_T0SZ GENMASK(5, 0) +#define ARM_SMMU_VTCR_RES1 BIT(31) +#define ARM_SMMU_VTCR_PS GENMASK(18, 16) +#define ARM_SMMU_VTCR_TG0 ARM_SMMU_TCR_TG0 +#define ARM_SMMU_VTCR_SH0 ARM_SMMU_TCR_SH0 +#define ARM_SMMU_VTCR_ORGN0 ARM_SMMU_TCR_ORGN0 +#define ARM_SMMU_VTCR_IRGN0 ARM_SMMU_TCR_IRGN0 +#define ARM_SMMU_VTCR_SL0 GENMASK(7, 6) +#define ARM_SMMU_VTCR_T0SZ ARM_SMMU_TCR_T0SZ + #define ARM_SMMU_CB_CONTEXTIDR 0x34 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c @@ -352,6 +361,18 @@ static inline u32 arm_smmu_lpae_tcr2(struct io_pgtable_cfg *cfg) FIELD_PREP(ARM_SMMU_TCR2_SEP, ARM_SMMU_TCR2_SEP_UPSTREAM); } +static inline u32 arm_smmu_lpae_vtcr(struct io_pgtable_cfg *cfg) +{ + return ARM_SMMU_VTCR_RES1 | + FIELD_PREP(ARM_SMMU_VTCR_PS, cfg->arm_lpae_s2_cfg.vtcr.ps) | + FIELD_PREP(ARM_SMMU_VTCR_TG0, cfg->arm_lpae_s2_cfg.vtcr.tg) | + FIELD_PREP(ARM_SMMU_VTCR_SH0, cfg->arm_lpae_s2_cfg.vtcr.sh) | + FIELD_PREP(ARM_SMMU_VTCR_ORGN0, cfg->arm_lpae_s2_cfg.vtcr.orgn) | + FIELD_PREP(ARM_SMMU_VTCR_IRGN0, cfg->arm_lpae_s2_cfg.vtcr.irgn) | + FIELD_PREP(ARM_SMMU_VTCR_SL0, cfg->arm_lpae_s2_cfg.vtcr.sl) | + FIELD_PREP(ARM_SMMU_VTCR_T0SZ, cfg->arm_lpae_s2_cfg.vtcr.tsz); +} + /* Implementation details, yay! */ struct arm_smmu_impl { u32 (*read_reg)(struct arm_smmu_device *smmu, int page, int offset); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index a868ce594871..2ae4da4dea5e 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -100,26 +100,19 @@ #define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) /* Register bits */ -#define ARM_64_LPAE_VTCR_RES1 (1U << 31) - -#define ARM_LPAE_VTCR_TG0_SHIFT 14 #define ARM_LPAE_TCR_TG0_4K 0 #define ARM_LPAE_TCR_TG0_64K 1 #define ARM_LPAE_TCR_TG0_16K 2 -#define ARM_LPAE_TCR_SH0_SHIFT 12 #define ARM_LPAE_TCR_SH_NS 0 #define ARM_LPAE_TCR_SH_OS 2 #define ARM_LPAE_TCR_SH_IS 3 -#define ARM_LPAE_TCR_ORGN0_SHIFT 10 -#define ARM_LPAE_TCR_IRGN0_SHIFT 8 #define ARM_LPAE_TCR_RGN_NC 0 #define ARM_LPAE_TCR_RGN_WBWA 1 #define ARM_LPAE_TCR_RGN_WT 2 #define ARM_LPAE_TCR_RGN_WB 3 -#define ARM_LPAE_VTCR_SL0_SHIFT 6 #define ARM_LPAE_VTCR_SL0_MASK 0x3 #define ARM_LPAE_TCR_T0SZ_SHIFT 0 @@ -878,8 +871,9 @@ out_free_data: static struct io_pgtable * arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - u64 reg, sl; + u64 sl; struct arm_lpae_io_pgtable *data; + typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr; /* The NS quirk doesn't apply at stage 2 */ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NON_STRICT)) @@ -904,61 +898,59 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) } /* VTCR */ - reg = ARM_64_LPAE_VTCR_RES1; if (cfg->coherent_walk) { - reg |= (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + vtcr->sh = ARM_LPAE_TCR_SH_IS; + vtcr->irgn = ARM_LPAE_TCR_RGN_WBWA; + vtcr->orgn = ARM_LPAE_TCR_RGN_WBWA; } else { - reg |= (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN0_SHIFT) | - (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN0_SHIFT); + vtcr->sh = ARM_LPAE_TCR_SH_OS; + vtcr->irgn = ARM_LPAE_TCR_RGN_NC; + vtcr->orgn = ARM_LPAE_TCR_RGN_NC; } sl = data->start_level; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - reg |= (ARM_LPAE_TCR_TG0_4K << ARM_LPAE_VTCR_TG0_SHIFT); + vtcr->tg = ARM_LPAE_TCR_TG0_4K; sl++; /* SL0 format is different for 4K granule size */ break; case SZ_16K: - reg |= (ARM_LPAE_TCR_TG0_16K << ARM_LPAE_VTCR_TG0_SHIFT); + vtcr->tg = ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - reg |= (ARM_LPAE_TCR_TG0_64K << ARM_LPAE_VTCR_TG0_SHIFT); + vtcr->tg = ARM_LPAE_TCR_TG0_64K; break; } switch (cfg->oas) { case 32: - reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_32_BIT; break; case 36: - reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_36_BIT; break; case 40: - reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_40_BIT; break; case 42: - reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_42_BIT; break; case 44: - reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_44_BIT; break; case 48: - reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_48_BIT; break; case 52: - reg |= (ARM_LPAE_TCR_PS_52_BIT << ARM_LPAE_VTCR_PS_SHIFT); + vtcr->ps = ARM_LPAE_TCR_PS_52_BIT; break; default: goto out_free_data; } - reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; - reg |= (~sl & ARM_LPAE_VTCR_SL0_MASK) << ARM_LPAE_VTCR_SL0_SHIFT; - cfg->arm_lpae_s2_cfg.vtcr = reg; + vtcr->tsz = 64ULL - cfg->ias; + vtcr->sl = ~sl & ARM_LPAE_VTCR_SL0_MASK; /* Allocate pgd pages */ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), @@ -985,24 +977,17 @@ arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - return arm_64_lpae_alloc_pgtable_s1(cfg, cookie); } static struct io_pgtable * arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) { - struct io_pgtable *iop; - if (cfg->ias > 40 || cfg->oas > 40) return NULL; cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); - iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie); - if (iop) - cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff; - - return iop; + return arm_64_lpae_alloc_pgtable_s2(cfg, cookie); } static struct io_pgtable * diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 6ae104cedfd7..40c1b7745fb6 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -114,7 +114,15 @@ struct io_pgtable_cfg { struct { u64 vttbr; - u64 vtcr; + struct { + u32 ps:3; + u32 tg:2; + u32 sh:2; + u32 orgn:2; + u32 irgn:2; + u32 sl:2; + u32 tsz:6; + } vtcr; } arm_lpae_s2_cfg; struct { From db6903010aa5b7691d8234a179f4cf71a119445a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 25 Oct 2019 19:08:39 +0100 Subject: [PATCH 13/28] iommu/io-pgtable-arm: Prepare for TTBR1 usage Now that we can correctly extract top-level indices without relying on the remaining upper bits being zero, the only remaining impediments to using a given table for TTBR1 are the address validation on map/unmap and the awkward TCR translation granule format. Add a quirk so that we can do the right thing at those points. Tested-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 25 +++++++++++++++++++------ include/linux/io-pgtable.h | 4 ++++ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2ae4da4dea5e..983b08477e64 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -104,6 +104,10 @@ #define ARM_LPAE_TCR_TG0_64K 1 #define ARM_LPAE_TCR_TG0_16K 2 +#define ARM_LPAE_TCR_TG1_16K 1 +#define ARM_LPAE_TCR_TG1_4K 2 +#define ARM_LPAE_TCR_TG1_64K 3 + #define ARM_LPAE_TCR_SH_NS 0 #define ARM_LPAE_TCR_SH_OS 2 #define ARM_LPAE_TCR_SH_IS 3 @@ -464,6 +468,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, arm_lpae_iopte *ptep = data->pgd; int ret, lvl = data->start_level; arm_lpae_iopte prot; + long iaext = (long)iova >> cfg->ias; /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) @@ -472,7 +477,9 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return -EINVAL; - if (WARN_ON(iova >> data->iop.cfg.ias || paddr >> data->iop.cfg.oas)) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext || paddr >> cfg->oas)) return -ERANGE; prot = arm_lpae_prot_to_pte(data, iommu_prot); @@ -638,11 +645,14 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; + long iaext = (long)iova >> cfg->ias; if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return 0; - if (WARN_ON(iova >> data->iop.cfg.ias)) + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) return 0; return __arm_lpae_unmap(data, gather, iova, size, data->start_level, ptep); @@ -778,9 +788,11 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) u64 reg; struct arm_lpae_io_pgtable *data; typeof(&cfg->arm_lpae_s1_cfg.tcr) tcr = &cfg->arm_lpae_s1_cfg.tcr; + bool tg1; if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | - IO_PGTABLE_QUIRK_NON_STRICT)) + IO_PGTABLE_QUIRK_NON_STRICT | + IO_PGTABLE_QUIRK_ARM_TTBR1)) return NULL; data = arm_lpae_alloc_pgtable(cfg); @@ -798,15 +810,16 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) tcr->orgn = ARM_LPAE_TCR_RGN_NC; } + tg1 = cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1; switch (ARM_LPAE_GRANULE(data)) { case SZ_4K: - tcr->tg = ARM_LPAE_TCR_TG0_4K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_4K : ARM_LPAE_TCR_TG0_4K; break; case SZ_16K: - tcr->tg = ARM_LPAE_TCR_TG0_16K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_16K : ARM_LPAE_TCR_TG0_16K; break; case SZ_64K: - tcr->tg = ARM_LPAE_TCR_TG0_64K; + tcr->tg = tg1 ? ARM_LPAE_TCR_TG1_64K : ARM_LPAE_TCR_TG0_64K; break; } diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 40c1b7745fb6..53d53c6c2be9 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -83,12 +83,16 @@ struct io_pgtable_cfg { * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs * on unmap, for DMA domains using the flush queue mechanism for * delayed invalidation. + * + * IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table + * for use in the upper half of a split address space. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) #define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2) #define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3) #define IO_PGTABLE_QUIRK_NON_STRICT BIT(4) + #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; From 79f7a5cb87baa229b321246b3f33af875d8a1ef7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 10 Jan 2020 15:25:02 +0000 Subject: [PATCH 14/28] iommu/arm-smmu: Improve SMR mask test Make the SMR mask test more robust against SMR0 being live at probe time, which might happen once we start supporting firmware reservations for framebuffers and suchlike. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f067783ebd59..cca94c30b301 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -971,23 +971,36 @@ static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx) static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu) { u32 smr; + int i; if (!smmu->smrs) return; - + /* + * If we've had to accommodate firmware memory regions, we may + * have live SMRs by now; tread carefully... + * + * Somewhat perversely, not having a free SMR for this test implies we + * can get away without it anyway, as we'll only be able to 'allocate' + * these SMRs for the ID/mask values we're already trusting to be OK. + */ + for (i = 0; i < smmu->num_mapping_groups; i++) + if (!smmu->smrs[i].valid) + goto smr_ok; + return; +smr_ok: /* * SMR.ID bits may not be preserved if the corresponding MASK * bits are set, so check each one separately. We can reject * masters later if they try to claim IDs outside these masks. */ smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask); - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); - smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr); + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr); smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask); - arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(0), smr); - smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(0)); + arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr); + smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); } From 9bb9069cfba5ac9b867bebbd4c2b1fa9fa20936f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:27 +0100 Subject: [PATCH 15/28] iommu/arm-smmu-v3: Drop __GFP_ZERO flag from DMA allocation Since commit 518a2f1925c3 ("dma-mapping: zero memory returned from dma_alloc_*"), dma_alloc_* always initializes memory to zero, so there is no need to use dma_zalloc_* or pass the __GFP_ZERO flag anymore. The flag was introduced by commit 04fa26c71be5 ("iommu/arm-smmu: Convert DMA buffer allocations to the managed API"), since the managed API didn't provide a dmam_zalloc_coherent() function. Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4443e1890077..4a9442f004ca 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1648,7 +1648,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) desc->span = STRTAB_SPLIT + 1; desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, "failed to allocate l2 stream table for SID %u\n", @@ -2135,8 +2135,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, return asid; cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, - &cfg->cdptr_dma, - GFP_KERNEL | __GFP_ZERO); + &cfg->cdptr_dma, GFP_KERNEL); if (!cfg->cdptr) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; @@ -2871,7 +2870,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, "failed to allocate l1 stream table (%u bytes)\n", @@ -2898,7 +2897,7 @@ static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3); strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, "failed to allocate linear stream table (%u bytes)\n", From 2e981b9468e670b76bd1048fa939ad1f9653bd79 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:28 +0100 Subject: [PATCH 16/28] dt-bindings: document PASID property for IOMMU masters On Arm systems, some platform devices behind an SMMU may support the PASID feature, which offers multiple address space. Let the firmware tell us when a device supports PASID. Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Reviewed-by: Rob Herring Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/iommu.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt index 5a8b4624defc..3c36334e4f94 100644 --- a/Documentation/devicetree/bindings/iommu/iommu.txt +++ b/Documentation/devicetree/bindings/iommu/iommu.txt @@ -86,6 +86,12 @@ have a means to turn off translation. But it is invalid in such cases to disable the IOMMU's device tree node in the first place because it would prevent any driver from properly setting up the translations. +Optional properties: +-------------------- +- pasid-num-bits: Some masters support multiple address spaces for DMA, by + tagging DMA transactions with an address space identifier. By default, + this is 0, which means that the device only has one address space. + Notes: ====== From 89535821c04256964e266bf585cf224f65e08983 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:29 +0100 Subject: [PATCH 17/28] iommu/arm-smmu-v3: Parse PASID devicetree property of platform devices For platform devices that support SubstreamID (SSID), firmware provides the number of supported SSID bits. Restrict it to what the SMMU supports and cache it into master->ssid_bits, which will also be used for PCI PASID. Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 13 +++++++++++++ drivers/iommu/of_iommu.c | 6 +++++- include/linux/iommu.h | 2 ++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 4a9442f004ca..2717c2b211d2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -289,6 +289,12 @@ #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4) +/* + * When the SMMU only supports linear context descriptor tables, pick a + * reasonable size limit (64kB). + */ +#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) + /* Command queue */ #define CMDQ_ENT_SZ_SHIFT 4 #define CMDQ_ENT_DWORDS ((1 << CMDQ_ENT_SZ_SHIFT) >> 3) @@ -627,6 +633,7 @@ struct arm_smmu_master { u32 *sids; unsigned int num_sids; bool ats_enabled; + unsigned int ssid_bits; }; /* SMMU private data for an IOMMU domain */ @@ -2559,6 +2566,12 @@ static int arm_smmu_add_device(struct device *dev) } } + master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); + + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) + master->ssid_bits = min_t(u8, master->ssid_bits, + CTXDESC_LINEAR_CDMAX); + group = iommu_group_get_for_dev(dev); if (!IS_ERR(group)) { iommu_group_put(group); diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index e7bc8b721947..20738aacac89 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -203,8 +203,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, if (err) break; } - } + fwspec = dev_iommu_fwspec_get(dev); + if (!err && fwspec) + of_property_read_u32(master_np, "pasid-num-bits", + &fwspec->num_pasid_bits); + } /* * Two success conditions can be represented by non-negative err here: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 90007c92ad2d..c1ad15228447 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -579,6 +579,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @iommu_priv: IOMMU driver private data for this device + * @num_pasid_bits: number of PASID bits supported by this device * @num_ids: number of associated device IDs * @ids: IDs which this device may present to the IOMMU */ @@ -587,6 +588,7 @@ struct iommu_fwspec { struct fwnode_handle *iommu_fwnode; void *iommu_priv; u32 flags; + u32 num_pasid_bits; unsigned int num_ids; u32 ids[1]; }; From da22565d1d733e87e3e57127caab1bab250dcb70 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:30 +0100 Subject: [PATCH 18/28] ACPI/IORT: Parse SSID property of named component node Named component nodes in the IORT tables describe the number of Substream ID bits (aka. PASID) supported by the device. Propagate this value to the fwspec structure in order to enable PASID for platform devices. Acked-by: Hanjun Guo Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4a560fdf7386..9f0a9f9339f9 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "ACPI: IORT: " fmt #include +#include #include #include #include @@ -924,6 +925,20 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data) return iort_iommu_xlate(info->dev, parent, streamid); } +static void iort_named_component_init(struct device *dev, + struct acpi_iort_node *node) +{ + struct acpi_iort_named_component *nc; + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (!fwspec) + return; + + nc = (struct acpi_iort_named_component *)node->node_data; + fwspec->num_pasid_bits = FIELD_GET(ACPI_IORT_NC_PASID_BITS, + nc->node_flags); +} + /** * iort_iommu_configure - Set-up IOMMU configuration for a device. * @@ -978,6 +993,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) if (parent) err = iort_iommu_xlate(dev, parent, streamid); } while (parent && !err); + + if (!err) + iort_named_component_init(dev, node); } /* From 7bc4f3fae9537d17be7d029afcb08e3dfd0f2801 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:31 +0100 Subject: [PATCH 19/28] iommu/arm-smmu-v3: Prepare arm_smmu_s1_cfg for SSID support When adding SSID support to the SMMUv3 driver, we'll need to manipulate leaf pasid tables and context descriptors. Extract the context descriptor structure and align with the way stream tables are handled. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 46 ++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 2717c2b211d2..17231374eda6 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -542,16 +542,21 @@ struct arm_smmu_strtab_l1_desc { dma_addr_t l2ptr_dma; }; -struct arm_smmu_s1_cfg { - __le64 *cdptr; - dma_addr_t cdptr_dma; +struct arm_smmu_ctx_desc { + u16 asid; + u64 ttbr; + u64 tcr; + u64 mair; +}; - struct arm_smmu_ctx_desc { - u16 asid; - u64 ttbr; - u64 tcr; - u64 mair; - } cd; +struct arm_smmu_ctx_desc_cfg { + __le64 *cdtab; + dma_addr_t cdtab_dma; +}; + +struct arm_smmu_s1_cfg { + struct arm_smmu_ctx_desc_cfg cdcfg; + struct arm_smmu_ctx_desc cd; }; struct arm_smmu_s2_cfg { @@ -1444,6 +1449,7 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, struct arm_smmu_s1_cfg *cfg) { u64 val; + __le64 *cdptr = cfg->cdcfg.cdtab; /* * We don't need to issue any invalidation here, as we'll invalidate @@ -1461,12 +1467,12 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) val |= CTXDESC_CD_0_S; - cfg->cdptr[0] = cpu_to_le64(val); + cdptr[0] = cpu_to_le64(val); val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; - cfg->cdptr[1] = cpu_to_le64(val); + cdptr[1] = cpu_to_le64(val); - cfg->cdptr[3] = cpu_to_le64(cfg->cd.mair); + cdptr[3] = cpu_to_le64(cfg->cd.mair); } /* Stream table manipulation functions */ @@ -1597,7 +1603,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE)) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - val |= (s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); } @@ -2111,11 +2117,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - if (cfg->cdptr) { + if (cfg->cdcfg.cdtab) { dmam_free_coherent(smmu_domain->smmu->dev, CTXDESC_CD_DWORDS << 3, - cfg->cdptr, - cfg->cdptr_dma); + cfg->cdcfg.cdtab, + cfg->cdcfg.cdtab_dma); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); } @@ -2141,9 +2147,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, if (asid < 0) return asid; - cfg->cdptr = dmam_alloc_coherent(smmu->dev, CTXDESC_CD_DWORDS << 3, - &cfg->cdptr_dma, GFP_KERNEL); - if (!cfg->cdptr) { + cfg->cdcfg.cdtab = dmam_alloc_coherent(smmu->dev, + CTXDESC_CD_DWORDS << 3, + &cfg->cdcfg.cdtab_dma, + GFP_KERNEL); + if (!cfg->cdcfg.cdtab) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; goto out_free_asid; From a557aff0c72b62e5ca859eab376f932d24e36cea Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:32 +0100 Subject: [PATCH 20/28] iommu/arm-smmu-v3: Add context descriptor tables allocators Support for SSID will require allocating context descriptor tables. Move the context descriptor allocation to separate functions. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 47 +++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 17231374eda6..3f7248d98e72 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -552,11 +552,13 @@ struct arm_smmu_ctx_desc { struct arm_smmu_ctx_desc_cfg { __le64 *cdtab; dma_addr_t cdtab_dma; + unsigned int num_l1_ents; }; struct arm_smmu_s1_cfg { struct arm_smmu_ctx_desc_cfg cdcfg; struct arm_smmu_ctx_desc cd; + u8 s1cdmax; }; struct arm_smmu_s2_cfg { @@ -1475,6 +1477,35 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, cdptr[3] = cpu_to_le64(cfg->cd.mair); } +static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) +{ + size_t l1size; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; + + cdcfg->num_l1_ents = 1UL << cfg->s1cdmax; + l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, + GFP_KERNEL); + if (!cdcfg->cdtab) { + dev_warn(smmu->dev, "failed to allocate context descriptor\n"); + return -ENOMEM; + } + return 0; +} + +static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) +{ + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + size_t l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + + dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); + cdcfg->cdtab_dma = 0; + cdcfg->cdtab = NULL; +} + /* Stream table manipulation functions */ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) @@ -2118,11 +2149,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; if (cfg->cdcfg.cdtab) { - dmam_free_coherent(smmu_domain->smmu->dev, - CTXDESC_CD_DWORDS << 3, - cfg->cdcfg.cdtab, - cfg->cdcfg.cdtab_dma); - + arm_smmu_free_cd_tables(smmu_domain); arm_smmu_bitmap_free(smmu->asid_map, cfg->cd.asid); } } else { @@ -2147,15 +2174,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, if (asid < 0) return asid; - cfg->cdcfg.cdtab = dmam_alloc_coherent(smmu->dev, - CTXDESC_CD_DWORDS << 3, - &cfg->cdcfg.cdtab_dma, - GFP_KERNEL); - if (!cfg->cdcfg.cdtab) { - dev_warn(smmu->dev, "failed to allocate context descriptor\n"); - ret = -ENOMEM; + ret = arm_smmu_alloc_cd_tables(smmu_domain); + if (ret) goto out_free_asid; - } cfg->cd.asid = (u16)asid; cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; From 87f42391f6a57c252453aff2005fe2ec74f6873b Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:33 +0100 Subject: [PATCH 21/28] iommu/arm-smmu-v3: Add support for Substream IDs At the moment, the SMMUv3 driver implements only one stage-1 or stage-2 page directory per device. However SMMUv3 allows more than one address space for some devices, by providing multiple stage-1 page directories. In addition to the Stream ID (SID), that identifies a device, we can now have Substream IDs (SSID) identifying an address space. In PCIe, SID is called Requester ID (RID) and SSID is called Process Address-Space ID (PASID). A complete stage-1 walk goes through the context descriptor table: Stream tables Ctx. Desc. tables Page tables +--------+ ,------->+-------+ ,------->+-------+ : : | : : | : : +--------+ | +-------+ | +-------+ SID->| STE |---' SSID->| CD |---' IOVA->| PTE |--> IPA +--------+ +-------+ +-------+ : : : : : : +--------+ +-------+ +-------+ Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table entries. To keep things simple we only implement one level of context descriptor tables here, but as with stream and page tables, an SSID can be split to index multiple levels of tables. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 139 +++++++++++++++++++++++++++++------- 1 file changed, 114 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 3f7248d98e72..b0e63de9009e 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -226,6 +226,11 @@ #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59) +#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0) +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0 +#define STRTAB_STE_1_S1DSS_BYPASS 0x1 +#define STRTAB_STE_1_S1DSS_SSID0 0x2 + #define STRTAB_STE_1_S1C_CACHE_NC 0UL #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL #define STRTAB_STE_1_S1C_CACHE_WT 2UL @@ -322,6 +327,7 @@ #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0) #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12) +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12) #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32) #define CMDQ_CFGI_1_LEAF (1UL << 0) #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) @@ -435,8 +441,11 @@ struct arm_smmu_cmdq_ent { #define CMDQ_OP_CFGI_STE 0x3 #define CMDQ_OP_CFGI_ALL 0x4 + #define CMDQ_OP_CFGI_CD 0x5 + #define CMDQ_OP_CFGI_CD_ALL 0x6 struct { u32 sid; + u32 ssid; union { bool leaf; u8 span; @@ -558,6 +567,7 @@ struct arm_smmu_ctx_desc_cfg { struct arm_smmu_s1_cfg { struct arm_smmu_ctx_desc_cfg cdcfg; struct arm_smmu_ctx_desc cd; + u8 s1fmt; u8 s1cdmax; }; @@ -850,10 +860,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; break; + case CMDQ_OP_CFGI_CD: + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); + /* Fallthrough */ case CMDQ_OP_CFGI_STE: cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); break; + case CMDQ_OP_CFGI_CD_ALL: + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); + break; case CMDQ_OP_CFGI_ALL: /* Cover the entire SID range */ cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); @@ -1447,34 +1463,101 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) } /* Context descriptor manipulation functions */ -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, - struct arm_smmu_s1_cfg *cfg) +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, + int ssid, bool leaf) { + size_t i; + unsigned long flags; + struct arm_smmu_master *master; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_CFGI_CD, + .cfgi = { + .ssid = ssid, + .leaf = leaf, + }, + }; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + for (i = 0; i < master->num_sids; i++) { + cmd.cfgi.sid = master->sids[i]; + arm_smmu_cmdq_issue_cmd(smmu, &cmd); + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + + arm_smmu_cmdq_issue_sync(smmu); +} + +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, + int ssid, struct arm_smmu_ctx_desc *cd) +{ + /* + * This function handles the following cases: + * + * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (2) Install a secondary CD, for SID+SSID traffic. + * (3) Update ASID of a CD. Atomically write the first 64 bits of the + * CD, then invalidate the old entry and mappings. + * (4) Remove a secondary CD. + */ u64 val; - __le64 *cdptr = cfg->cdcfg.cdtab; + bool cd_live; + struct arm_smmu_device *smmu = smmu_domain->smmu; + __le64 *cdptr = smmu_domain->s1_cfg.cdcfg.cdtab + ssid * + CTXDESC_CD_DWORDS; + + val = le64_to_cpu(cdptr[0]); + cd_live = !!(val & CTXDESC_CD_0_V); + + if (!cd) { /* (4) */ + val = 0; + } else if (cd_live) { /* (3) */ + val &= ~CTXDESC_CD_0_ASID; + val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); + /* + * Until CD+TLB invalidation, both ASIDs may be used for tagging + * this substream's traffic + */ + } else { /* (1) and (2) */ + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); + cdptr[2] = 0; + cdptr[3] = cpu_to_le64(cd->mair); + + /* + * STE is live, and the SMMU might read dwords of this CD in any + * order. Ensure that it observes valid values before reading + * V=1. + */ + arm_smmu_sync_cd(smmu_domain, ssid, true); + + val = cd->tcr | +#ifdef __BIG_ENDIAN + CTXDESC_CD_0_ENDI | +#endif + CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | + CTXDESC_CD_0_AA64 | + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | + CTXDESC_CD_0_V; + + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) + val |= CTXDESC_CD_0_S; + } /* - * We don't need to issue any invalidation here, as we'll invalidate - * the STE when installing the new entry anyway. + * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3 + * "Configuration structures and configuration invalidation completion" + * + * The size of single-copy atomic reads made by the SMMU is + * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single + * field within an aligned 64-bit span of a structure can be altered + * without first making the structure invalid. */ - val = cfg->cd.tcr | -#ifdef __BIG_ENDIAN - CTXDESC_CD_0_ENDI | -#endif - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) | - CTXDESC_CD_0_V; - - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) - val |= CTXDESC_CD_0_S; - - cdptr[0] = cpu_to_le64(val); - - val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; - cdptr[1] = cpu_to_le64(val); - - cdptr[3] = cpu_to_le64(cfg->cd.mair); + WRITE_ONCE(cdptr[0], cpu_to_le64(val)); + arm_smmu_sync_cd(smmu_domain, ssid, true); + return 0; } static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) @@ -1484,6 +1567,8 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; + cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + cdcfg->num_l1_ents = 1UL << cfg->s1cdmax; l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, @@ -1625,6 +1710,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (s1_cfg) { BUG_ON(ste_live); dst[1] = cpu_to_le64( + FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | @@ -1635,7 +1721,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | + FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | + FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); } if (s2_cfg) { @@ -2455,7 +2543,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->ats_enabled = arm_smmu_ats_supported(master); if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) - arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); + arm_smmu_write_ctx_desc(smmu_domain, 0, + &smmu_domain->s1_cfg.cd); arm_smmu_install_ste_for_dev(master); From 2505ec6f85ad864b76127f9662c6f3f315dff14f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:34 +0100 Subject: [PATCH 22/28] iommu/arm-smmu-v3: Propagate ssid_bits Now that we support substream IDs, initialize s1cdmax with the number of SSID bits supported by a master and the SMMU. Context descriptor tables are allocated once for the first master attached to a domain. Therefore attaching multiple devices with different SSID sizes is tricky, and we currently don't support it. As a future improvement it would be nice to at least support attaching a SSID-capable device to a domain that isn't using SSID, by reallocating the SSID table. This would allow supporting a SSID-capable device that is in the same IOMMU group as a bridge, for example. Varying SSID size is less of a concern, since the PCIe specification "highly recommends" that devices supporting PASID implement all 20 bits of it. Tested-by: Zhangfei Gao Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index b0e63de9009e..88a48256c11e 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2250,6 +2250,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int ret; @@ -2262,6 +2263,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, if (asid < 0) return asid; + cfg->s1cdmax = master->ssid_bits; + ret = arm_smmu_alloc_cd_tables(smmu_domain); if (ret) goto out_free_asid; @@ -2284,6 +2287,7 @@ out_free_asid: } static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int vmid; @@ -2308,7 +2312,8 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, return 0; } -static int arm_smmu_domain_finalise(struct iommu_domain *domain) +static int arm_smmu_domain_finalise(struct iommu_domain *domain, + struct arm_smmu_master *master) { int ret; unsigned long ias, oas; @@ -2316,6 +2321,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_domain *, + struct arm_smmu_master *, struct io_pgtable_cfg *); struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -2370,7 +2376,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; - ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); + ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; @@ -2523,7 +2529,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!smmu_domain->smmu) { smmu_domain->smmu = smmu; - ret = arm_smmu_domain_finalise(domain); + ret = arm_smmu_domain_finalise(domain, master); if (ret) { smmu_domain->smmu = NULL; goto out_unlock; @@ -2535,6 +2541,13 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) dev_name(smmu->dev)); ret = -ENXIO; goto out_unlock; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && + master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { + dev_err(dev, + "cannot attach to incompatible domain (%u SSID bits != %u)\n", + smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); + ret = -EINVAL; + goto out_unlock; } master->domain = smmu_domain; From 492ddc79e07ed16a4ec4b6f57787273b2e4c26c1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:35 +0100 Subject: [PATCH 23/28] iommu/arm-smmu-v3: Prepare for handling arm_smmu_write_ctx_desc() failure Second-level context descriptor tables will be allocated lazily in arm_smmu_write_ctx_desc(). Help with handling allocation failure by moving the CD write into arm_smmu_domain_finalise_s1(). Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker [will: Add comment per discussion on list] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 88a48256c11e..a224b915fc59 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2279,8 +2279,20 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; + + /* + * Note that this will end up calling arm_smmu_sync_cd() before + * the master has been added to the devices list for this domain. + * This isn't an issue because the STE hasn't been installed yet. + */ + ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + if (ret) + goto out_free_cd_tables; + return 0; +out_free_cd_tables: + arm_smmu_free_cd_tables(smmu_domain); out_free_asid: arm_smmu_bitmap_free(smmu->asid_map, asid); return ret; @@ -2555,10 +2567,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) master->ats_enabled = arm_smmu_ats_supported(master); - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) - arm_smmu_write_ctx_desc(smmu_domain, 0, - &smmu_domain->s1_cfg.cd); - arm_smmu_install_ste_for_dev(master); spin_lock_irqsave(&smmu_domain->devices_lock, flags); From 73af06f589fe5656b07baa92e390d2d48464b18a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:36 +0100 Subject: [PATCH 24/28] iommu/arm-smmu-v3: Add second level of context descriptor table The SMMU can support up to 20 bits of SSID. Add a second level of page tables to accommodate this. Devices that support more than 1024 SSIDs now have a table of 1024 L1 entries (8kB), pointing to tables of 1024 context descriptors (64kB), allocated on demand. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 142 ++++++++++++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index a224b915fc59..c2d1325c9d39 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -223,6 +223,7 @@ #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4) #define STRTAB_STE_0_S1FMT_LINEAR 0 +#define STRTAB_STE_0_S1FMT_64K_L2 2 #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59) @@ -269,7 +270,20 @@ #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4) -/* Context descriptor (stage-1 only) */ +/* + * Context descriptors. + * + * Linear: when less than 1024 SSIDs are supported + * 2lvl: at most 1024 L1 entries, + * 1024 lazy entries per table. + */ +#define CTXDESC_SPLIT 10 +#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT) + +#define CTXDESC_L1_DESC_DWORDS 1 +#define CTXDESC_L1_DESC_V (1UL << 0) +#define CTXDESC_L1_DESC_L2PTR_MASK GENMASK_ULL(51, 12) + #define CTXDESC_CD_DWORDS 8 #define CTXDESC_CD_0_TCR_T0SZ GENMASK_ULL(5, 0) #define CTXDESC_CD_0_TCR_TG0 GENMASK_ULL(7, 6) @@ -558,9 +572,15 @@ struct arm_smmu_ctx_desc { u64 mair; }; +struct arm_smmu_l1_ctx_desc { + __le64 *l2ptr; + dma_addr_t l2ptr_dma; +}; + struct arm_smmu_ctx_desc_cfg { __le64 *cdtab; dma_addr_t cdtab_dma; + struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; }; @@ -1490,6 +1510,57 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, arm_smmu_cmdq_issue_sync(smmu); } +static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, + struct arm_smmu_l1_ctx_desc *l1_desc) +{ + size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + + l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, + &l1_desc->l2ptr_dma, GFP_KERNEL); + if (!l1_desc->l2ptr) { + dev_warn(smmu->dev, + "failed to allocate context descriptor table\n"); + return -ENOMEM; + } + return 0; +} + +static void arm_smmu_write_cd_l1_desc(__le64 *dst, + struct arm_smmu_l1_ctx_desc *l1_desc) +{ + u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | + CTXDESC_L1_DESC_V; + + WRITE_ONCE(*dst, cpu_to_le64(val)); +} + +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, + u32 ssid) +{ + __le64 *l1ptr; + unsigned int idx; + struct arm_smmu_l1_ctx_desc *l1_desc; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + + if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) + return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; + + idx = ssid >> CTXDESC_SPLIT; + l1_desc = &cdcfg->l1_desc[idx]; + if (!l1_desc->l2ptr) { + if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + return NULL; + + l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + /* An invalid L1CD can be cached */ + arm_smmu_sync_cd(smmu_domain, ssid, false); + } + idx = ssid & (CTXDESC_L2_ENTRIES - 1); + return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; +} + static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, struct arm_smmu_ctx_desc *cd) { @@ -1504,9 +1575,15 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, */ u64 val; bool cd_live; + __le64 *cdptr; struct arm_smmu_device *smmu = smmu_domain->smmu; - __le64 *cdptr = smmu_domain->s1_cfg.cdcfg.cdtab + ssid * - CTXDESC_CD_DWORDS; + + if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) + return -E2BIG; + + cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + if (!cdptr) + return -ENOMEM; val = le64_to_cpu(cdptr[0]); cd_live = !!(val & CTXDESC_CD_0_V); @@ -1562,29 +1639,78 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) { + int ret; size_t l1size; + size_t max_contexts; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; - cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + max_contexts = 1 << cfg->s1cdmax; + + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || + max_contexts <= CTXDESC_L2_ENTRIES) { + cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + cdcfg->num_l1_ents = max_contexts; + + l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); + } else { + cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; + cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, + CTXDESC_L2_ENTRIES); + + cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, + sizeof(*cdcfg->l1_desc), + GFP_KERNEL); + if (!cdcfg->l1_desc) + return -ENOMEM; + + l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + } - cdcfg->num_l1_ents = 1UL << cfg->s1cdmax; - l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, GFP_KERNEL); if (!cdcfg->cdtab) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); - return -ENOMEM; + ret = -ENOMEM; + goto err_free_l1; } + return 0; + +err_free_l1: + if (cdcfg->l1_desc) { + devm_kfree(smmu->dev, cdcfg->l1_desc); + cdcfg->l1_desc = NULL; + } + return ret; } static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) { + int i; + size_t size, l1size; struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; - size_t l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + + if (cdcfg->l1_desc) { + size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); + + for (i = 0; i < cdcfg->num_l1_ents; i++) { + if (!cdcfg->l1_desc[i].l2ptr) + continue; + + dmam_free_coherent(smmu->dev, size, + cdcfg->l1_desc[i].l2ptr, + cdcfg->l1_desc[i].l2ptr_dma); + } + devm_kfree(smmu->dev, cdcfg->l1_desc); + cdcfg->l1_desc = NULL; + + l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + } else { + l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + } dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); cdcfg->cdtab_dma = 0; From d71e01716b3606a6648df7e5646ae12c75babde4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Jan 2020 15:21:47 +0000 Subject: [PATCH 25/28] iommu/arm-smmu-v3: Use WRITE_ONCE() when changing validity of an STE If, for some bizarre reason, the compiler decided to split up the write of STE DWORD 0, we could end up making a partial structure valid. Although this probably won't happen, follow the example of the context-descriptor code and use WRITE_ONCE() to ensure atomicity of the write. Reported-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index c2d1325c9d39..3aa6538d85e2 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1873,7 +1873,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, STRTAB_STE_1_EATS_TRANS)); arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ From a2be6218e649fb6e444e468dae5d177eea64d328 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:37 +0100 Subject: [PATCH 26/28] iommu/arm-smmu-v3: Improve add_device() error handling Let add_device() clean up after itself. The iommu_bus_init() function does call remove_device() on error, but other sites (e.g. of_iommu) do not. Don't free level-2 stream tables because we'd have to track if we allocated each of them or if they are used by other endpoints. It's not worth the hassle since they are managed resources. Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 3aa6538d85e2..59dbd63a2aae 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2821,14 +2821,16 @@ static int arm_smmu_add_device(struct device *dev) for (i = 0; i < master->num_sids; i++) { u32 sid = master->sids[i]; - if (!arm_smmu_sid_in_range(smmu, sid)) - return -ERANGE; + if (!arm_smmu_sid_in_range(smmu, sid)) { + ret = -ERANGE; + goto err_free_master; + } /* Ensure l2 strtab is initialised */ if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) { ret = arm_smmu_init_l2_strtab(smmu, sid); if (ret) - return ret; + goto err_free_master; } } @@ -2838,13 +2840,25 @@ static int arm_smmu_add_device(struct device *dev) master->ssid_bits = min_t(u8, master->ssid_bits, CTXDESC_LINEAR_CDMAX); + ret = iommu_device_link(&smmu->iommu, dev); + if (ret) + goto err_free_master; + group = iommu_group_get_for_dev(dev); - if (!IS_ERR(group)) { - iommu_group_put(group); - iommu_device_link(&smmu->iommu, dev); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto err_unlink; } - return PTR_ERR_OR_ZERO(group); + iommu_group_put(group); + return 0; + +err_unlink: + iommu_device_unlink(&smmu->iommu, dev); +err_free_master: + kfree(master); + fwspec->iommu_priv = NULL; + return ret; } static void arm_smmu_remove_device(struct device *dev) From 92c1d360dcda0ebd49253c0899bfeeb117be91fe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 15 Jan 2020 15:35:16 +0000 Subject: [PATCH 27/28] iommu/arm-smmu-v3: Return -EBUSY when trying to re-add a device Although we WARN in arm_smmu_add_device() if the device being added has been added already without a subsequent call to arm_smmu_remove_device(), we still continue half-heartedly, initialising the stream-table for any new StreamIDs that may have magically appeared and re-establishing device links that should still be there from last time. Given that calling ->add_device() twice without removing the device in the meantime is indicative of an error in the caller, just return -EBUSY after warning. Cc: Robin Murphy Cc: Jean Philippe-Brucker Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 59dbd63a2aae..5e04c1f3992a 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2794,28 +2794,23 @@ static int arm_smmu_add_device(struct device *dev) if (!fwspec || fwspec->ops != &arm_smmu_ops) return -ENODEV; - /* - * We _can_ actually withstand dodgy bus code re-calling add_device() - * without an intervening remove_device()/of_xlate() sequence, but - * we're not going to do so quietly... - */ - if (WARN_ON_ONCE(fwspec->iommu_priv)) { - master = fwspec->iommu_priv; - smmu = master->smmu; - } else { - smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); - if (!smmu) - return -ENODEV; - master = kzalloc(sizeof(*master), GFP_KERNEL); - if (!master) - return -ENOMEM; - master->dev = dev; - master->smmu = smmu; - master->sids = fwspec->ids; - master->num_sids = fwspec->num_ids; - fwspec->iommu_priv = master; - } + if (WARN_ON_ONCE(fwspec->iommu_priv)) + return -EBUSY; + + smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); + if (!smmu) + return -ENODEV; + + master = kzalloc(sizeof(*master), GFP_KERNEL); + if (!master) + return -ENOMEM; + + master->dev = dev; + master->smmu = smmu; + master->sids = fwspec->ids; + master->num_sids = fwspec->num_ids; + fwspec->iommu_priv = master; /* Check the SIDs are in range of the SMMU and our stream table */ for (i = 0; i < master->num_sids; i++) { From 5a4549fd790500d7db94b7d2af6d60cee42110c3 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 15 Jan 2020 13:52:38 +0100 Subject: [PATCH 28/28] PCI/ATS: Add PASID stubs The SMMUv3 driver, which may be built without CONFIG_PCI, will soon gain PASID support. Partially revert commit c6e9aefbf9db ("PCI/ATS: Remove unused PRI and PASID stubs") to re-introduce the PASID stubs, and avoid adding more #ifdefs to the SMMU driver. Acked-by: Bjorn Helgaas Reviewed-by: Eric Auger Reviewed-by: Jonathan Cameron Signed-off-by: Jean-Philippe Brucker Signed-off-by: Will Deacon --- include/linux/pci-ats.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 5d62e78946a3..d08f0869f121 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -33,6 +33,9 @@ void pci_disable_pasid(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ +static inline int pci_enable_pasid(struct pci_dev *pdev, int features) +{ return -EINVAL; } +static inline void pci_disable_pasid(struct pci_dev *pdev) { } static inline int pci_pasid_features(struct pci_dev *pdev) { return -EINVAL; } static inline int pci_max_pasids(struct pci_dev *pdev)