From d3867e7148318e12b5d69b64950622f5ed06fe86 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Tue, 4 Jun 2024 18:52:18 +0000 Subject: [PATCH 01/72] iommu/arm-smmu-v3: Avoid uninitialized asid in case of error Static checker is complaining about the ASID possibly set uninitialized. This only happens in case of error and this value would be ignored anyway. A simple fix would be just to initialize the local variable to zero, this path will only be reached on the first attach to a domain where the CD is already initialized to zero. This avoids having to bloat the function with an error path. Closes: https://lore.kernel.org/linux-iommu/849e3d77-0a3c-43c4-878d-a0e061c8cd61@moroto.mountain/T/#u Reported-by: Dan Carpenter Signed-off-by: Mostafa Saleh Fixes: 04905c17f648 ("iommu/arm-smmu-v3: Build the whole CD in arm_smmu_make_s1_cd()") Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240604185218.2602058-1-smostafa@google.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab415e107054..f456bcf1890b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2302,7 +2302,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain) { int ret; - u32 asid; + u32 asid = 0; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; refcount_set(&cd->refs, 1); From 16c0bad7ae04e4a1e7361fbb91573248de06a008 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 18:15:55 +0300 Subject: [PATCH 02/72] iommu/arm-smmu-v3: Use *-y instead of *-objs in Makefile *-objs suffix is reserved rather for (user-space) host programs while usually *-y suffix is used for kernel drivers (although *-objs works for that purpose for now). Let's correct the old usages of *-objs in Makefiles. Signed-off-by: Andy Shevchenko Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240508151611.1444352-1-andriy.shevchenko@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 014a997753a8..355173d1441d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o -arm_smmu_v3-objs-y += arm-smmu-v3.o -arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o -arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) +arm_smmu_v3-y := arm-smmu-v3.o +arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o From e5af06b7cfb3c7541bcbd52a4b090b7e976f9270 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sat, 25 May 2024 10:58:52 -0700 Subject: [PATCH 03/72] dt-bindings: arm-smmu: Fix Qualcomm SC8180X binding Update the Qualcomm SC8180X SMMU binding to allow describing the Adreno SMMU, with its three clocks. Signed-off-by: Bjorn Andersson Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240525-sc8180x-adreno-smmu-binding-fix-v1-1-e3c00aa9b9d4@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 5c130cf06a21..7f584ce4bb22 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -86,6 +86,7 @@ properties: - qcom,qcm2290-smmu-500 - qcom,sa8775p-smmu-500 - qcom,sc7280-smmu-500 + - qcom,sc8180x-smmu-500 - qcom,sc8280xp-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6125-smmu-500 @@ -415,6 +416,7 @@ allOf: compatible: contains: enum: + - qcom,sc8180x-smmu-500 - qcom,sm6350-smmu-v2 - qcom,sm7150-smmu-v2 - qcom,sm8150-smmu-500 @@ -550,7 +552,6 @@ allOf: - nvidia,smmu-500 - qcom,qdu1000-smmu-500 - qcom,sc7180-smmu-500 - - qcom,sc8180x-smmu-500 - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 From b5c29fba72a6c950655d1cb0f6aa16b60dc83be7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 28 May 2024 12:54:58 +0800 Subject: [PATCH 04/72] iommu: Make iommu_sva_domain_alloc() static iommu_sva_domain_alloc() is only called in iommu-sva.c, hence make it static. On the other hand, iommu_sva_domain_alloc() should not return NULL anymore after commit <80af5a452024> ("iommu: Add ops->domain_alloc_sva()"), the removal of inline code avoids potential confusion. Fixes: 80af5a452024 ("iommu: Add ops->domain_alloc_sva()") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240528045458.81458-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 6 ++++-- include/linux/iommu.h | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 18a35e798b72..25e581299226 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,6 +10,8 @@ #include "iommu-priv.h" static DEFINE_MUTEX(iommu_sva_lock); +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); /* Allocate a PASID for the mm within range (inclusive) */ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) @@ -277,8 +279,8 @@ static int iommu_sva_iopf_handler(struct iopf_group *group) return 0; } -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm) +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *domain; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..1cd19b903354 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1527,8 +1527,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1553,12 +1551,6 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} - -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} #endif /* CONFIG_IOMMU_SVA */ #ifdef CONFIG_IOMMU_IOPF From 8d485a69603f667032d61daf4f1cb9464f315e1c Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 4 Jun 2024 13:39:09 +0100 Subject: [PATCH 05/72] iommu/dma: Prune redundant pgprot arguments Somewhere amongst previous refactorings, the pgprot value in __iommu_dma_alloc_noncontiguous() became entirely unused, and the one used in iommu_dma_alloc_remap() can be computed locally rather than by its one remaining caller. Clean 'em up. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/c2a81b72df59a71a13f8bad94f834e627c4c93dd.1717504749.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 43520e7275cc..18603d63ad3f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -939,8 +939,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * but an IOMMU which supports smaller pages might not map the whole thing. */ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot, - unsigned long attrs) + size_t size, struct sg_table *sgt, gfp_t gfp, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -1014,15 +1013,14 @@ out_free_pages: } static void *iommu_dma_alloc_remap(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, - unsigned long attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page **pages; struct sg_table sgt; void *vaddr; + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); - pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot, - attrs); + pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, attrs); if (!pages) return NULL; *dma_handle = sgt.sgl->dma_address; @@ -1049,8 +1047,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, if (!sh) return NULL; - sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, - PAGE_KERNEL, attrs); + sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, attrs); if (!sh->pages) { kfree(sh); return NULL; @@ -1619,8 +1616,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, if (gfpflags_allow_blocking(gfp) && !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { - return iommu_dma_alloc_remap(dev, size, handle, gfp, - dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); + return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs); } if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && From c94ad1d5e3885bd4fa6abb695baf5a8f5c3c309c Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 13 Jun 2024 18:14:36 -0700 Subject: [PATCH 06/72] iommu/iova: Add missing MODULE_DESCRIPTION() macro With ARCH=arm, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/iommu/iova.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20240613-md-arm-drivers-iommu-v1-1-1fe0bd953119@quicinc.com Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d59d0ea2fd21..16c6adff3eb7 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -1000,4 +1000,5 @@ void iova_cache_put(void) EXPORT_SYMBOL_GPL(iova_cache_put); MODULE_AUTHOR("Anil S Keshavamurthy "); +MODULE_DESCRIPTION("IOMMU I/O Virtual Address management"); MODULE_LICENSE("GPL"); From 927c70c93d929f4c2dcaf72f51b31bb7d118a51a Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 16 Jun 2024 23:40:52 +0100 Subject: [PATCH 07/72] iommu: sun50i: clear bypass register The Allwinner H6 IOMMU has a bypass register, which allows to circumvent the page tables for each possible master. The reset value for this register is 0, which disables the bypass. The Allwinner H616 IOMMU resets this register to 0x7f, which activates the bypass for all masters, which is not what we want. Always clear this register to 0, to enforce the usage of page tables, and make this driver compatible with the H616 in this respect. Signed-off-by: Jernej Skrabec Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-2-andre.przywara@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index c519b991749d..dd3f07384624 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -452,6 +452,7 @@ static int sun50i_iommu_enable(struct sun50i_iommu *iommu) IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) | IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) | IOMMU_TLB_PREFETCH_MASTER_ENABLE(5)); + iommu_write(iommu, IOMMU_BYPASS_REG, 0); iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK); iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE), IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | From 7b9331a3ae93adfae54c6a56d23513e1f7db5dcb Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 16 Jun 2024 23:40:53 +0100 Subject: [PATCH 08/72] iommu: sun50i: allocate page tables from below 4 GiB The Allwinner IOMMU is a strict 32-bit device, with its input addresses, the page table root pointer as well as both level's page tables and also the target addresses all required to be below 4GB. The Allwinner H6 SoC only supports 32-bit worth of physical addresses anyway, so this isn't a problem so far, but the H616 and later SoCs extend the PA space beyond 32 bit to accommodate more DRAM. To make sure we stay within the 32-bit PA range required by the IOMMU, force the memory for the page tables to come from below 4GB. by using allocations with the DMA32 flag. Also reject any attempt to map target addresses beyond 4GB, and print a warning to give users a hint while this fails. Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-3-andre.przywara@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index dd3f07384624..20a07f829085 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -602,6 +602,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, u32 *page_table, *pte_addr; int ret = 0; + /* the IOMMU can only handle 32-bit addresses, both input and output */ + if ((uint64_t)paddr >> 32) { + ret = -EINVAL; + dev_warn_once(iommu->dev, + "attempt to map address beyond 4GB\n"); + goto out; + } + page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp); if (IS_ERR(page_table)) { ret = PTR_ERR(page_table); @@ -682,7 +690,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev) if (!sun50i_domain) return NULL; - sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL, get_order(DT_SIZE)); + sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(DT_SIZE)); if (!sun50i_domain->dt) goto err_free_domain; @@ -997,7 +1006,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev) iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev), PT_SIZE, PT_SIZE, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA32, NULL); if (!iommu->pt_pool) return -ENOMEM; From 2d1d1969a7e5b0dd043df4a6ca02156717401856 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 16 Jun 2024 23:40:54 +0100 Subject: [PATCH 09/72] dt-bindings: iommu: add new compatible strings The Allwinner H616 and A523 contain IOMMU IP very similar to the H6, but use a different reset value for the bypass register, which makes them strictly speaking incompatible. Add a new compatible string for the H616, and a version for the A523, falling back to the H616. Signed-off-by: Andre Przywara Acked-by: Krzysztof Kozlowski Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-4-andre.przywara@arm.com Signed-off-by: Joerg Roedel --- .../bindings/iommu/allwinner,sun50i-h6-iommu.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml b/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml index e20016f12017..a8409db4a3e3 100644 --- a/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml @@ -17,7 +17,12 @@ properties: The content of the cell is the master ID. compatible: - const: allwinner,sun50i-h6-iommu + oneOf: + - const: allwinner,sun50i-h6-iommu + - const: allwinner,sun50i-h616-iommu + - items: + - const: allwinner,sun55i-a523-iommu + - const: allwinner,sun50i-h616-iommu reg: maxItems: 1 From 8db07ce532c0b51fea974613002cdc6a27732929 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sun, 16 Jun 2024 23:40:55 +0100 Subject: [PATCH 10/72] iommu: sun50i: Add H616 compatible string The IOMMU IP in the Allwinner H616 SoC is *almost* compatible to the H6, but uses a different reset value for the bypass register, and adds some more registers. While a driver *can* be written to support both variants (which we in fact do), the hardware itself is not fully compatible, so we require a separate compatible string. Add the new compatible string to the list, but without changing the behaviour, since the driver already supports both variants. Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-5-andre.przywara@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/sun50i-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 20a07f829085..8d8f11854676 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -1067,6 +1067,7 @@ err_free_cache: static const struct of_device_id sun50i_iommu_dt[] = { { .compatible = "allwinner,sun50i-h6-iommu", }, + { .compatible = "allwinner,sun50i-h616-iommu", }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, sun50i_iommu_dt); From 7f7f41c33bd400ab9975151e4a51f4e41a5db9ef Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 6 Jun 2024 15:15:12 +0200 Subject: [PATCH 11/72] dt-bindings: iommu: qcom,iommu: Add MSM8953 GPU IOMMU to SMMUv2 compatibles Add MSM8953 compatible string with "qcom,msm-iommu-v2" as fallback for the MSM8953 GPU IOMMU which is compatible with Qualcomm's secure fw "SMMU v2" implementation. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20240606-topic-sm8953-upstream-smmu-gpu-v2-1-67be88007d87@linaro.org Signed-off-by: Joerg Roedel --- Documentation/devicetree/bindings/iommu/qcom,iommu.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index a74eb899c381..571e5746d177 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -25,6 +25,7 @@ properties: - const: qcom,msm-iommu-v1 - items: - enum: + - qcom,msm8953-iommu - qcom,msm8976-iommu - const: qcom,msm-iommu-v2 From 9a448e453151ec4e4b98a914b463539e790dd198 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 22 May 2024 10:26:47 +0200 Subject: [PATCH 12/72] iommu/amd: Use try_cmpxchg64() in v2_alloc_pte() Use try_cmpxchg64() instead of cmpxchg64 (*ptr, old, new) != old in v2_alloc_pte(). cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). This is the same improvement as implemented for alloc_pte() in: commit 0d10fe759117 ("iommu/amd: Use try_cmpxchg64 in alloc_pte and free_clear_pte") Signed-off-by: Uros Bizjak Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240522082729.971123-1-ubizjak@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 78ac37c5ccc1..664e91c88748 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -158,7 +158,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) + if (!try_cmpxchg64(pte, &__pte, __npte)) iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; From 5c555f1f1c31f7dd60a7697be9bb0e98706bb10a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 22 May 2024 10:26:48 +0200 Subject: [PATCH 13/72] iommu/vt-d: Use try_cmpxchg64() in intel_pasid_get_entry() Use try_cmpxchg64() instead of cmpxchg64 (*ptr, old, new) != old in intel_pasid_get_entry(). cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Signed-off-by: Uros Bizjak Cc: David Woodhouse Cc: Lu Baolu Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240522082729.971123-2-ubizjak@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index abce19e2ad6f..9bf45bc4b967 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -146,6 +146,8 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { + u64 tmp; + entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -156,8 +158,9 @@ retry: * clear. However, this entry might be populated by others * while we are preparing it. Use theirs with a retry. */ - if (cmpxchg64(&dir[dir_index].val, 0ULL, - (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { + tmp = 0ULL; + if (!try_cmpxchg64(&dir[dir_index].val, &tmp, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { iommu_free_page(entries); goto retry; } From b95a40122a8183873736e0506df8e3a881178099 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 22 May 2024 10:26:49 +0200 Subject: [PATCH 14/72] iommufd: Use atomic_long_try_cmpxchg() in incr_user_locked_vm() Use atomic_long_try_cmpxchg() instead of atomic_long_cmpxchg (*ptr, old, new) != old in incr_user_locked_vm(). cmpxchg returns success in ZF flag, so this change saves a compare after cmpxchg (and related move instruction in front of cmpxchg). Also, atomic_long_try_cmpxchg() implicitly assigns old *ptr value to "old" when cmpxchg fails. There is no need to re-read the value in the loop. Signed-off-by: Uros Bizjak Cc: Jason Gunthorpe Cc: Kevin Tian Cc: Joerg Roedel Cc: Will Deacon Cc: Robin Murphy Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240522082729.971123-3-ubizjak@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommufd/pages.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 528f356238b3..117f644a0c5b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -809,13 +809,14 @@ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + cur_pages = atomic_long_read(&pages->source_user->locked_vm); do { - cur_pages = atomic_long_read(&pages->source_user->locked_vm); new_pages = cur_pages + npages; if (new_pages > lock_limit) return -ENOMEM; - } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages, - new_pages) != cur_pages); + } while (!atomic_long_try_cmpxchg(&pages->source_user->locked_vm, + &cur_pages, new_pages)); return 0; } From 678d79b98028ce2365b30e35479bea0e555c23d3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:32 -0300 Subject: [PATCH 15/72] iommu/arm-smmu-v3: Convert to domain_alloc_sva() This allows the driver the receive the mm and always a device during allocation. Later patches need this to properly setup the notifier when the domain is first allocated. Remove ops->domain_alloc() as SVA was the only remaining purpose. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/1-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 6 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 +--------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 8 +++----- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e490ffb38015..28f8bf4327f6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -656,13 +656,15 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { .free = arm_smmu_sva_domain_free }; -struct iommu_domain *arm_smmu_sva_domain_alloc(void) +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { struct iommu_domain *domain; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) - return NULL; + return ERR_PTR(-ENOMEM); + domain->type = IOMMU_DOMAIN_SVA; domain->ops = &arm_smmu_sva_domain_ops; return domain; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f456bcf1890b..05939f483098 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2237,14 +2237,6 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) } } -static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) -{ - - if (type == IOMMU_DOMAIN_SVA) - return arm_smmu_sva_domain_alloc(); - return ERR_PTR(-EOPNOTSUPP); -} - static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; @@ -3097,8 +3089,8 @@ static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc = arm_smmu_domain_alloc, .domain_alloc_paging = arm_smmu_domain_alloc_paging, + .domain_alloc_sva = arm_smmu_sva_domain_alloc, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1242a086c9f9..b10712d3de66 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -802,7 +802,8 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); -struct iommu_domain *arm_smmu_sva_domain_alloc(void); +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ @@ -838,10 +839,7 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master static inline void arm_smmu_sva_notifier_synchronize(void) {} -static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) -{ - return NULL; -} +#define arm_smmu_sva_domain_alloc NULL static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, From 85f2fb6ef4137c631c9d2663716d998d7e4f164f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:33 -0300 Subject: [PATCH 16/72] iommu/arm-smmu-v3: Start building a generic PASID layer Add arm_smmu_set_pasid()/arm_smmu_remove_pasid() which are to be used by callers that already constructed the arm_smmu_cd they wish to program. These functions will encapsulate the shared logic to setup a CD entry that will be shared by SVA and S1 domain cases. Prior fixes had already moved most of this logic up into __arm_smmu_sva_bind(), move it to it's final home. Following patches will relieve some of the remaining SVA restrictions: - The RID domain is a S1 domain and has already setup the STE to point to the CD table - The programmed PASID is the mm_get_enqcmd_pasid() - Nothing changes while SVA is running (sva_enable) SVA invalidation will still iterate over the S1 domain's master list, later patches will resolve that. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/2-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 57 ++++++++++--------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 32 ++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 ++- 3 files changed, 67 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 28f8bf4327f6..71ca87c2c5c3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -417,29 +417,27 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) arm_smmu_free_shared_cd(cd); } -static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, - struct mm_struct *mm) +static struct arm_smmu_bond *__arm_smmu_sva_bind(struct device *dev, + struct mm_struct *mm) { int ret; - struct arm_smmu_cd target; - struct arm_smmu_cd *cdptr; struct arm_smmu_bond *bond; struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct iommu_domain *domain = iommu_get_domain_for_dev(dev); struct arm_smmu_domain *smmu_domain; if (!(domain->type & __IOMMU_DOMAIN_PAGING)) - return -ENODEV; + return ERR_PTR(-ENODEV); smmu_domain = to_smmu_domain(domain); if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return -ENODEV; + return ERR_PTR(-ENODEV); if (!master || !master->sva_enabled) - return -ENODEV; + return ERR_PTR(-ENODEV); bond = kzalloc(sizeof(*bond), GFP_KERNEL); if (!bond) - return -ENOMEM; + return ERR_PTR(-ENOMEM); bond->mm = mm; @@ -449,22 +447,12 @@ static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, goto err_free_bond; } - cdptr = arm_smmu_alloc_cd_ptr(master, mm_get_enqcmd_pasid(mm)); - if (!cdptr) { - ret = -ENOMEM; - goto err_put_notifier; - } - arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, pasid, cdptr, &target); - list_add(&bond->list, &master->bonds); - return 0; + return bond; -err_put_notifier: - arm_smmu_mmu_notifier_put(bond->smmu_mn); err_free_bond: kfree(bond); - return ret; + return ERR_PTR(ret); } bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) @@ -611,10 +599,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct arm_smmu_bond *bond = NULL, *t; struct arm_smmu_master *master = dev_iommu_priv_get(dev); + arm_smmu_remove_pasid(master, to_smmu_domain(domain), id); + mutex_lock(&sva_lock); - - arm_smmu_clear_cd(master, id); - list_for_each_entry(t, &master->bonds, list) { if (t->mm == mm) { bond = t; @@ -633,17 +620,33 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { - int ret = 0; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct mm_struct *mm = domain->mm; + struct arm_smmu_bond *bond; + struct arm_smmu_cd target; + int ret; if (mm_get_enqcmd_pasid(mm) != id) return -EINVAL; mutex_lock(&sva_lock); - ret = __arm_smmu_sva_bind(dev, id, mm); - mutex_unlock(&sva_lock); + bond = __arm_smmu_sva_bind(dev, mm); + if (IS_ERR(bond)) { + mutex_unlock(&sva_lock); + return PTR_ERR(bond); + } - return ret; + arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); + ret = arm_smmu_set_pasid(master, NULL, id, &target); + if (ret) { + list_del(&bond->list); + arm_smmu_mmu_notifier_put(bond->smmu_mn); + kfree(bond); + mutex_unlock(&sva_lock); + return ret; + } + mutex_unlock(&sva_lock); + return 0; } static void arm_smmu_sva_domain_free(struct iommu_domain *domain) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 05939f483098..fc78156f9d97 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1211,8 +1211,8 @@ struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; } -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid) +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -2412,6 +2412,10 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, int i, j; struct arm_smmu_device *smmu = master->smmu; + master->cd_table.in_ste = + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == + STRTAB_STE_0_CFG_S1_TRANS; + for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; struct arm_smmu_ste *step = @@ -2632,6 +2636,30 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +int arm_smmu_set_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid, + const struct arm_smmu_cd *cd) +{ + struct arm_smmu_cd *cdptr; + + /* The core code validates pasid */ + + if (!master->cd_table.in_ste) + return -ENODEV; + + cdptr = arm_smmu_alloc_cd_ptr(master, pasid); + if (!cdptr) + return -ENOMEM; + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); + return 0; +} + +void arm_smmu_remove_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid) +{ + arm_smmu_clear_cd(master, pasid); +} + static int arm_smmu_attach_dev_ste(struct device *dev, struct arm_smmu_ste *ste) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index b10712d3de66..6a74d3d884fe 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -602,6 +602,7 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; + u8 in_ste; u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; @@ -777,8 +778,6 @@ extern struct mutex arm_smmu_asid_lock; void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid); void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain); @@ -786,6 +785,12 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target); +int arm_smmu_set_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid, + const struct arm_smmu_cd *cd); +void arm_smmu_remove_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid); + void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, From ad10dce61303d82f7bdd2dbb116e02146778f728 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:34 -0300 Subject: [PATCH 17/72] iommu/arm-smmu-v3: Make smmu_domain->devices into an allocated list The next patch will need to store the same master twice (with different SSIDs), so allocate memory for each list element. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/3-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 11 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 39 ++++++++++++++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +++- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 71ca87c2c5c3..cb3a0e4143c8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -38,12 +38,13 @@ static DEFINE_MUTEX(sva_lock); static void arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_master *master; + struct arm_smmu_master_domain *master_domain; struct arm_smmu_cd target_cd; unsigned long flags; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd *cdptr; /* S1 domains only support RID attachment right now */ @@ -301,7 +302,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - struct arm_smmu_master *master; + struct arm_smmu_master_domain *master_domain; unsigned long flags; mutex_lock(&sva_lock); @@ -315,7 +316,9 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * but disable translation. */ spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd target; struct arm_smmu_cd *cdptr; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index fc78156f9d97..cee97372af0d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2015,10 +2015,10 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size) { + struct arm_smmu_master_domain *master_domain; int i; unsigned long flags; struct arm_smmu_cmdq_ent cmd; - struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) @@ -2046,7 +2046,10 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, cmds.num = 0; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; + if (!master->ats_enabled) continue; @@ -2534,9 +2537,26 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) pci_disable_pasid(pdev); } +static struct arm_smmu_master_domain * +arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master) +{ + struct arm_smmu_master_domain *master_domain; + + lockdep_assert_held(&smmu_domain->devices_lock); + + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + if (master_domain->master == master) + return master_domain; + } + return NULL; +} + static void arm_smmu_detach_dev(struct arm_smmu_master *master) { struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); + struct arm_smmu_master_domain *master_domain; struct arm_smmu_domain *smmu_domain; unsigned long flags; @@ -2547,7 +2567,11 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) arm_smmu_disable_ats(master, smmu_domain); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del_init(&master->domain_head); + master_domain = arm_smmu_find_master_domain(smmu_domain, master); + if (master_domain) { + list_del(&master_domain->devices_elm); + kfree(master_domain); + } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); master->ats_enabled = false; @@ -2561,6 +2585,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master_domain *master_domain; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; @@ -2597,6 +2622,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENOMEM; } + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); + if (!master_domain) + return -ENOMEM; + master_domain->master = master; + /* * Prevent arm_smmu_share_asid() from trying to change the ASID * of either the old or new domain while we are working on it. @@ -2610,7 +2640,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) master->ats_enabled = arm_smmu_ats_supported(master); spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_add(&master->domain_head, &smmu_domain->devices); + list_add(&master_domain->devices_elm, &smmu_domain->devices); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); switch (smmu_domain->stage) { @@ -2925,7 +2955,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; INIT_LIST_HEAD(&master->bonds); - INIT_LIST_HEAD(&master->domain_head); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 6a74d3d884fe..01769b5286a8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -697,7 +697,6 @@ struct arm_smmu_stream { struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; - struct list_head domain_head; struct arm_smmu_stream *streams; /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; @@ -731,6 +730,7 @@ struct arm_smmu_domain { struct iommu_domain domain; + /* List of struct arm_smmu_master_domain */ struct list_head devices; spinlock_t devices_lock; @@ -767,6 +767,11 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, u16 asid); #endif +struct arm_smmu_master_domain { + struct list_head devices_elm; + struct arm_smmu_master *master; +}; + static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); From 7497f4211f4fbdcec5fc5bb4df7f6ccd345966e8 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:35 -0300 Subject: [PATCH 18/72] iommu/arm-smmu-v3: Make changing domains be hitless for ATS The core code allows the domain to be changed on the fly without a forced stop in BLOCKED/IDENTITY. In this flow the driver should just continually maintain the ATS with no change while the STE is updated. ATS relies on a linked list smmu_domain->devices to keep track of which masters have the domain programmed, but this list is also used by arm_smmu_share_asid(), unrelated to ats. Create two new functions to encapsulate this combined logic: arm_smmu_attach_prepare() arm_smmu_attach_commit() The two functions can sequence both enabling ATS and disabling across the STE store. Have every update of the STE use this sequence. Installing a S1/S2 domain always enables the ATS if the PCIe device supports it. The enable flow is now ordered differently to allow it to be hitless: 1) Add the master to the new smmu_domain->devices list 2) Program the STE 3) Enable ATS at PCIe 4) Remove the master from the old smmu_domain This flow ensures that invalidations to either domain will generate an ATC invalidation to the device while the STE is being switched. Thus we don't need to turn off the ATS anymore for correctness. The disable flow is the reverse: 1) Disable ATS at PCIe 2) Program the STE 3) Invalidate the ATC 4) Remove the master from the old smmu_domain Move the nr_ats_masters adjustments to be close to the list manipulations. It is a count of the number of ATS enabled masters currently in the list. This is stricly before and after the STE/CD are revised, and done under the list's spin_lock. This is part of the bigger picture to allow changing the RID domain while a PASID is in use. If a SVA PASID is relying on ATS to function then changing the RID domain cannot just temporarily toggle ATS off without also wrecking the SVA PASID. The new infrastructure here is organized so that the PASID attach/detach flows will make use of it as well in following patches. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Michael Shavit Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 5 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 237 +++++++++++++----- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 6 +- 3 files changed, 177 insertions(+), 71 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index 315e487fd990..a460b71f5857 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -164,7 +164,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master); + arm_smmu_make_cdtable_ste(ste, &master, true); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -231,7 +231,6 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, { struct arm_smmu_master master = { .smmu = &smmu, - .ats_enabled = ats_enabled, }; struct io_pgtable io_pgtable = {}; struct arm_smmu_domain smmu_domain = { @@ -247,7 +246,7 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sl = 3; io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tsz = 4; - arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain); + arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain, ats_enabled); } static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index cee97372af0d..bb5647110d01 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1538,7 +1538,7 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); VISIBLE_IF_KUNIT void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master) + struct arm_smmu_master *master, bool ats_enabled) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1561,7 +1561,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, STRTAB_STE_1_S1STALLD : 0) | FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); if (smmu->features & ARM_SMMU_FEAT_E2H) { /* @@ -1591,7 +1591,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); VISIBLE_IF_KUNIT void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) + struct arm_smmu_domain *smmu_domain, + bool ats_enabled) { struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; const struct io_pgtable_cfg *pgtbl_cfg = @@ -1608,7 +1609,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, target->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, @@ -2450,22 +2451,16 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master) return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); } -static void arm_smmu_enable_ats(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) +static void arm_smmu_enable_ats(struct arm_smmu_master *master) { size_t stu; struct pci_dev *pdev; struct arm_smmu_device *smmu = master->smmu; - /* Don't enable ATS at the endpoint if it's not enabled in the STE */ - if (!master->ats_enabled) - return; - /* Smallest Translation Unit: log2 of the smallest supported granule */ stu = __ffs(smmu->pgsize_bitmap); pdev = to_pci_dev(master->dev); - atomic_inc(&smmu_domain->nr_ats_masters); /* * ATC invalidation of PASID 0 causes the entire ATC to be flushed. */ @@ -2474,22 +2469,6 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master, dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } -static void arm_smmu_disable_ats(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) -{ - if (!master->ats_enabled) - return; - - pci_disable_ats(to_pci_dev(master->dev)); - /* - * Ensure ATS is disabled at the endpoint before we issue the - * ATC invalidation via the SMMU. - */ - wmb(); - arm_smmu_atc_inv_master(master); - atomic_dec(&smmu_domain->nr_ats_masters); -} - static int arm_smmu_enable_pasid(struct arm_smmu_master *master) { int ret; @@ -2553,46 +2532,181 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, return NULL; } -static void arm_smmu_detach_dev(struct arm_smmu_master *master) +/* + * If the domain uses the smmu_domain->devices list return the arm_smmu_domain + * structure, otherwise NULL. These domains track attached devices so they can + * issue invalidations. + */ +static struct arm_smmu_domain * +to_smmu_domain_devices(struct iommu_domain *domain) { - struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); + /* The domain can be NULL only when processing the first attach */ + if (!domain) + return NULL; + if (domain->type & __IOMMU_DOMAIN_PAGING) + return to_smmu_domain(domain); + return NULL; +} + +static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, + struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; - struct arm_smmu_domain *smmu_domain; unsigned long flags; - if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING)) + if (!smmu_domain) return; - smmu_domain = to_smmu_domain(domain); - arm_smmu_disable_ats(master, smmu_domain); - spin_lock_irqsave(&smmu_domain->devices_lock, flags); master_domain = arm_smmu_find_master_domain(smmu_domain, master); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); + if (master->ats_enabled) + atomic_dec(&smmu_domain->nr_ats_masters); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +} - master->ats_enabled = false; +struct arm_smmu_attach_state { + /* Inputs */ + struct iommu_domain *old_domain; + struct arm_smmu_master *master; + /* Resulting state */ + bool ats_enabled; +}; + +/* + * Start the sequence to attach a domain to a master. The sequence contains three + * steps: + * arm_smmu_attach_prepare() + * arm_smmu_install_ste_for_dev() + * arm_smmu_attach_commit() + * + * If prepare succeeds then the sequence must be completed. The STE installed + * must set the STE.EATS field according to state.ats_enabled. + * + * If the device supports ATS then this determines if EATS should be enabled + * in the STE, and starts sequencing EATS disable if required. + * + * The change of the EATS in the STE and the PCI ATS config space is managed by + * this sequence to be in the right order so that if PCI ATS is enabled then + * STE.ETAS is enabled. + * + * new_domain can be a non-paging domain. In this case ATS will not be enabled, + * and invalidations won't be tracked. + */ +static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, + struct iommu_domain *new_domain) +{ + struct arm_smmu_master *master = state->master; + struct arm_smmu_master_domain *master_domain; + struct arm_smmu_domain *smmu_domain = + to_smmu_domain_devices(new_domain); + unsigned long flags; + + /* + * arm_smmu_share_asid() must not see two domains pointing to the same + * arm_smmu_master_domain contents otherwise it could randomly write one + * or the other to the CD. + */ + lockdep_assert_held(&arm_smmu_asid_lock); + + if (smmu_domain) { + /* + * The SMMU does not support enabling ATS with bypass/abort. + * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS + * Translation Requests and Translated transactions are denied + * as though ATS is disabled for the stream (STE.EATS == 0b00), + * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events + * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be + * enabled if we have arm_smmu_domain, those always have page + * tables. + */ + state->ats_enabled = arm_smmu_ats_supported(master); + + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); + if (!master_domain) + return -ENOMEM; + master_domain->master = master; + + /* + * During prepare we want the current smmu_domain and new + * smmu_domain to be in the devices list before we change any + * HW. This ensures that both domains will send ATS + * invalidations to the master until we are done. + * + * It is tempting to make this list only track masters that are + * using ATS, but arm_smmu_share_asid() also uses this to change + * the ASID of a domain, unrelated to ATS. + * + * Notice if we are re-attaching the same domain then the list + * will have two identical entries and commit will remove only + * one of them. + */ + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + if (state->ats_enabled) + atomic_inc(&smmu_domain->nr_ats_masters); + list_add(&master_domain->devices_elm, &smmu_domain->devices); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + } + + if (!state->ats_enabled && master->ats_enabled) { + pci_disable_ats(to_pci_dev(master->dev)); + /* + * This is probably overkill, but the config write for disabling + * ATS should complete before the STE is configured to generate + * UR to avoid AER noise. + */ + wmb(); + } + return 0; +} + +/* + * Commit is done after the STE/CD are configured with the EATS setting. It + * completes synchronizing the PCI device's ATC and finishes manipulating the + * smmu_domain->devices list. + */ +static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) +{ + struct arm_smmu_master *master = state->master; + + lockdep_assert_held(&arm_smmu_asid_lock); + + if (state->ats_enabled && !master->ats_enabled) { + arm_smmu_enable_ats(master); + } else if (master->ats_enabled) { + /* + * The translation has changed, flush the ATC. At this point the + * SMMU is translating for the new domain and both the old&new + * domain will issue invalidations. + */ + arm_smmu_atc_inv_master(master); + } + master->ats_enabled = state->ats_enabled; + + arm_smmu_remove_master_domain(master, state->old_domain); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; - unsigned long flags; struct arm_smmu_ste target; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - struct arm_smmu_master_domain *master_domain; + struct arm_smmu_attach_state state = { + .old_domain = iommu_get_domain_for_dev(dev), + }; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; if (!fwspec) return -ENOENT; - master = dev_iommu_priv_get(dev); + state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; /* @@ -2622,11 +2736,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENOMEM; } - master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); - if (!master_domain) - return -ENOMEM; - master_domain->master = master; - /* * Prevent arm_smmu_share_asid() from trying to change the ASID * of either the old or new domain while we are working on it. @@ -2635,13 +2744,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_detach_dev(master); - - master->ats_enabled = arm_smmu_ats_supported(master); - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_add(&master_domain->devices_elm, &smmu_domain->devices); - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + ret = arm_smmu_attach_prepare(&state, domain); + if (ret) { + mutex_unlock(&arm_smmu_asid_lock); + return ret; + } switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: { @@ -2650,18 +2757,19 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, &target_cd); - arm_smmu_make_cdtable_ste(&target, master); + arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled); arm_smmu_install_ste_for_dev(master, &target); break; } case ARM_SMMU_DOMAIN_S2: - arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); + arm_smmu_make_s2_domain_ste(&target, master, smmu_domain, + state.ats_enabled); arm_smmu_install_ste_for_dev(master, &target); arm_smmu_clear_cd(master, IOMMU_NO_PASID); break; } - arm_smmu_enable_ats(master, smmu_domain); + arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); return 0; } @@ -2690,10 +2798,14 @@ void arm_smmu_remove_pasid(struct arm_smmu_master *master, arm_smmu_clear_cd(master, pasid); } -static int arm_smmu_attach_dev_ste(struct device *dev, - struct arm_smmu_ste *ste) +static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, + struct device *dev, struct arm_smmu_ste *ste) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_attach_state state = { + .master = master, + .old_domain = iommu_get_domain_for_dev(dev), + }; if (arm_smmu_master_sva_enabled(master)) return -EBUSY; @@ -2704,16 +2816,9 @@ static int arm_smmu_attach_dev_ste(struct device *dev, */ mutex_lock(&arm_smmu_asid_lock); - /* - * The SMMU does not support enabling ATS with bypass/abort. When the - * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests - * and Translated transactions are denied as though ATS is disabled for - * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and - * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). - */ - arm_smmu_detach_dev(master); - + arm_smmu_attach_prepare(&state, domain); arm_smmu_install_ste_for_dev(master, ste); + arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); /* @@ -2732,7 +2837,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); arm_smmu_make_bypass_ste(master->smmu, &ste); - return arm_smmu_attach_dev_ste(dev, &ste); + return arm_smmu_attach_dev_ste(domain, dev, &ste); } static const struct iommu_domain_ops arm_smmu_identity_ops = { @@ -2750,7 +2855,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct arm_smmu_ste ste; arm_smmu_make_abort_ste(&ste); - return arm_smmu_attach_dev_ste(dev, &ste); + return arm_smmu_attach_dev_ste(domain, dev, &ste); } static const struct iommu_domain_ops arm_smmu_blocked_ops = { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 01769b5286a8..f9b4bfb2e6b7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -758,10 +758,12 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, struct arm_smmu_ste *target); void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master); + struct arm_smmu_master *master, + bool ats_enabled); void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain); + struct arm_smmu_domain *smmu_domain, + bool ats_enabled); void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct mm_struct *mm, u16 asid); From 64efb3def3a53effe01fa750eec6e7369f65e386 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:36 -0300 Subject: [PATCH 19/72] iommu/arm-smmu-v3: Add ssid to struct arm_smmu_master_domain Prepare to allow a S1 domain to be attached to a PASID as well. Keep track of the SSID the domain is using on each master in the arm_smmu_master_domain. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/5-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 15 ++++--- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 42 +++++++++++++++---- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 ++- 3 files changed, 43 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index cb3a0e4143c8..d31caceb5849 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -47,13 +47,12 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd *cdptr; - /* S1 domains only support RID attachment right now */ - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); - arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target_cd); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); @@ -294,8 +293,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, smmu_domain); } - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start, - size); + arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), start, + size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) @@ -332,7 +331,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); + arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); smmu_mn->cleared = true; mutex_unlock(&sva_lock); @@ -411,8 +410,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) */ if (!smmu_mn->cleared) { arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, - 0); + arm_smmu_atc_inv_domain_sva(smmu_domain, + mm_get_enqcmd_pasid(mm), 0, 0); } /* Frees smmu_mn */ diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index bb5647110d01..f07fabca1f03 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2013,8 +2013,8 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); } -int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, - unsigned long iova, size_t size) +static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + ioasid_t ssid, unsigned long iova, size_t size) { struct arm_smmu_master_domain *master_domain; int i; @@ -2042,8 +2042,6 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0; - arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - cmds.num = 0; spin_lock_irqsave(&smmu_domain->devices_lock, flags); @@ -2054,6 +2052,16 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, if (!master->ats_enabled) continue; + /* + * Non-zero ssid means SVA is co-opting the S1 domain to issue + * invalidations for SVA PASIDs. + */ + if (ssid != IOMMU_NO_PASID) + arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); + else + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, + &cmd); + for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); @@ -2064,6 +2072,19 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); } +static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size) +{ + return __arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, + size); +} + +int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, + ioasid_t ssid, unsigned long iova, size_t size) +{ + return __arm_smmu_atc_inv_domain(smmu_domain, ssid, iova, size); +} + /* IO_PGTABLE API */ static void arm_smmu_tlb_inv_context(void *cookie) { @@ -2085,7 +2106,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -2183,7 +2204,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2518,7 +2539,8 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) static struct arm_smmu_master_domain * arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master) + struct arm_smmu_master *master, + ioasid_t ssid) { struct arm_smmu_master_domain *master_domain; @@ -2526,7 +2548,8 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { - if (master_domain->master == master) + if (master_domain->master == master && + master_domain->ssid == ssid) return master_domain; } return NULL; @@ -2559,7 +2582,8 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, return; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, + IOMMU_NO_PASID); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index f9b4bfb2e6b7..f4061ffc1e61 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -772,6 +772,7 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master_domain { struct list_head devices_elm; struct arm_smmu_master *master; + ioasid_t ssid; }; static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) @@ -803,8 +804,8 @@ void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain); bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); -int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, - unsigned long iova, size_t size); +int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, + ioasid_t ssid, unsigned long iova, size_t size); #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); From be7c90de39fdebdba4f9cce7575b71c6b2506ea0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:37 -0300 Subject: [PATCH 20/72] iommu/arm-smmu-v3: Do not use master->sva_enable to restrict attaches We no longer need a master->sva_enable to control what attaches are allowed. Instead we can tell if the attach is legal based on the current configuration of the master. Keep track of the number of valid CD entries for SSID's in the cd_table and if the cd_table has been installed in the STE directly so we know what the configuration is. The attach logic is then made into: - SVA bind, check if the CD is installed - RID attach of S2, block if SSIDs are used - RID attach of IDENTITY/BLOCKING, block if SSIDs are used arm_smmu_set_pasid() is already checking if it is possible to setup a CD entry, at this patch it means the RID path already set a STE pointing at the CD table. Tested-by: Nicolin Chen Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/6-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 24 ++++++++++----------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 ++++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f07fabca1f03..6466ce062b48 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1289,6 +1289,8 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target) { + bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V); + bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V); struct arm_smmu_cd_writer cd_writer = { .writer = { .ops = &arm_smmu_cd_writer_ops, @@ -1297,6 +1299,13 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, .ssid = ssid, }; + if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) { + if (cur_valid) + master->cd_table.used_ssids--; + else + master->cd_table.used_ssids++; + } + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); } @@ -2733,16 +2742,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; - /* - * Checking that SVA is disabled ensures that this device isn't bound to - * any mm, and can be safely detached from its old domain. Bonds cannot - * be removed concurrently since we're holding the group mutex. - */ - if (arm_smmu_master_sva_enabled(master)) { - dev_err(dev, "cannot attach - SVA enabled\n"); - return -EBUSY; - } - mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { @@ -2758,7 +2757,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); if (!cdptr) return -ENOMEM; - } + } else if (arm_smmu_ssids_in_use(&master->cd_table)) + return -EBUSY; /* * Prevent arm_smmu_share_asid() from trying to change the ASID @@ -2831,7 +2831,7 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, .old_domain = iommu_get_domain_for_dev(dev), }; - if (arm_smmu_master_sva_enabled(master)) + if (arm_smmu_ssids_in_use(&master->cd_table)) return -EBUSY; /* diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index f4061ffc1e61..65b75dbfd159 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -602,12 +602,19 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; + unsigned int used_ssids; u8 in_ste; u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; }; +/* True if the cd table has SSIDS > 0 in use. */ +static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table) +{ + return cd_table->used_ssids; +} + struct arm_smmu_s2_cfg { u16 vmid; }; From 1d5f34f0002f9f56d0ca153022cfdead07d45dc6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:38 -0300 Subject: [PATCH 21/72] iommu/arm-smmu-v3: Thread SSID through the arm_smmu_attach_*() interface Allow creating and managing arm_smmu_mater_domain's with a non-zero SSID through the arm_smmu_attach_*() family of functions. This triggers ATC invalidation for the correct SSID in PASID cases and tracks the per-attachment SSID in the struct arm_smmu_master_domain. Generalize arm_smmu_attach_remove() to be able to remove SSID's as well by ensuring the ATC for the PASID is flushed properly. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/7-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 ++++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6466ce062b48..7794f4ee3431 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2005,13 +2005,14 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, cmd->atc.size = log2_span; } -static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) +static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, + ioasid_t ssid) { int i; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -2494,7 +2495,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) /* * ATC invalidation of PASID 0 causes the entire ATC to be flushed. */ - arm_smmu_atc_inv_master(master); + arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } @@ -2581,7 +2582,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) } static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, - struct iommu_domain *domain) + struct iommu_domain *domain, + ioasid_t ssid) { struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); struct arm_smmu_master_domain *master_domain; @@ -2591,8 +2593,7 @@ static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, return; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - master_domain = arm_smmu_find_master_domain(smmu_domain, master, - IOMMU_NO_PASID); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid); if (master_domain) { list_del(&master_domain->devices_elm); kfree(master_domain); @@ -2606,6 +2607,7 @@ struct arm_smmu_attach_state { /* Inputs */ struct iommu_domain *old_domain; struct arm_smmu_master *master; + ioasid_t ssid; /* Resulting state */ bool ats_enabled; }; @@ -2663,6 +2665,7 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, if (!master_domain) return -ENOMEM; master_domain->master = master; + master_domain->ssid = state->ssid; /* * During prepare we want the current smmu_domain and new @@ -2710,17 +2713,20 @@ static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) if (state->ats_enabled && !master->ats_enabled) { arm_smmu_enable_ats(master); - } else if (master->ats_enabled) { + } else if (state->ats_enabled && master->ats_enabled) { /* * The translation has changed, flush the ATC. At this point the * SMMU is translating for the new domain and both the old&new * domain will issue invalidations. */ - arm_smmu_atc_inv_master(master); + arm_smmu_atc_inv_master(master, state->ssid); + } else if (!state->ats_enabled && master->ats_enabled) { + /* ATS is being switched off, invalidate the entire ATC */ + arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); } master->ats_enabled = state->ats_enabled; - arm_smmu_remove_master_domain(master, state->old_domain); + arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -2732,6 +2738,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_attach_state state = { .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, }; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; @@ -2829,6 +2836,7 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, struct arm_smmu_attach_state state = { .master = master, .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, }; if (arm_smmu_ssids_in_use(&master->cd_table)) From d7b2d2ba1b84f4ae7cd94de22f74d6c6c5419de6 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:39 -0300 Subject: [PATCH 22/72] iommu/arm-smmu-v3: Make SVA allocate a normal arm_smmu_domain Currently the SVA domain is a naked struct iommu_domain, allocate a struct arm_smmu_domain instead. This is necessary to be able to use the struct arm_master_domain mechanism. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/8-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 21 +++++++------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 31 +++++++++++++------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 ++ 3 files changed, 35 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index d31caceb5849..aa033cd65adc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -639,7 +639,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, } arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - ret = arm_smmu_set_pasid(master, NULL, id, &target); + ret = arm_smmu_set_pasid(master, to_smmu_domain(domain), id, &target); if (ret) { list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); @@ -653,7 +653,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { - kfree(domain); + kfree(to_smmu_domain(domain)); } static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { @@ -664,13 +664,16 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) { - struct iommu_domain *domain; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_domain *smmu_domain; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return ERR_PTR(-ENOMEM); - domain->type = IOMMU_DOMAIN_SVA; - domain->ops = &arm_smmu_sva_domain_ops; + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); + smmu_domain->domain.type = IOMMU_DOMAIN_SVA; + smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + smmu_domain->smmu = smmu; - return domain; + return &smmu_domain->domain; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 7794f4ee3431..0d6142d80112 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2272,6 +2272,22 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) } } +struct arm_smmu_domain *arm_smmu_domain_alloc(void) +{ + struct arm_smmu_domain *smmu_domain; + + smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); + if (!smmu_domain) + return ERR_PTR(-ENOMEM); + + mutex_init(&smmu_domain->init_mutex); + INIT_LIST_HEAD(&smmu_domain->devices); + spin_lock_init(&smmu_domain->devices_lock); + INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); + + return smmu_domain; +} + static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) { struct arm_smmu_domain *smmu_domain; @@ -2281,14 +2297,9 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) * We can't really do anything meaningful until we've added a * master. */ - smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); - if (!smmu_domain) - return ERR_PTR(-ENOMEM); - - mutex_init(&smmu_domain->init_mutex); - INIT_LIST_HEAD(&smmu_domain->devices); - spin_lock_init(&smmu_domain->devices_lock); - INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); if (dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); @@ -2303,7 +2314,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) return &smmu_domain->domain; } -static void arm_smmu_domain_free(struct iommu_domain *domain) +static void arm_smmu_domain_free_paging(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -3305,7 +3316,7 @@ static struct iommu_ops arm_smmu_ops = { .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .enable_nesting = arm_smmu_enable_nesting, - .free = arm_smmu_domain_free, + .free = arm_smmu_domain_free_paging, } }; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 65b75dbfd159..212c18c70fa0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -790,6 +790,8 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; +struct arm_smmu_domain *arm_smmu_domain_alloc(void); + void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); From 49db2ed23c52f8371c12ab8646df23fa1daad4b2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:40 -0300 Subject: [PATCH 23/72] iommu/arm-smmu-v3: Keep track of arm_smmu_master_domain for SVA Fill in the smmu_domain->devices list in the new struct arm_smmu_domain that SVA allocates. Keep track of every SSID and master that is using the domain reusing the logic for the RID attach. This is the first step to making the SVA invalidation follow the same design as S1/S2 invalidation. At present nothing will read this list. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/9-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 30 +++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0d6142d80112..0c2ffd25419d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2587,7 +2587,8 @@ to_smmu_domain_devices(struct iommu_domain *domain) /* The domain can be NULL only when processing the first attach */ if (!domain) return NULL; - if (domain->type & __IOMMU_DOMAIN_PAGING) + if ((domain->type & __IOMMU_DOMAIN_PAGING) || + domain->type == IOMMU_DOMAIN_SVA) return to_smmu_domain(domain); return NULL; } @@ -2820,7 +2821,16 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, const struct arm_smmu_cd *cd) { + struct arm_smmu_attach_state state = { + .master = master, + /* + * For now the core code prevents calling this when a domain is + * already attached, no need to set old_domain. + */ + .ssid = pasid, + }; struct arm_smmu_cd *cdptr; + int ret; /* The core code validates pasid */ @@ -2830,14 +2840,30 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, cdptr = arm_smmu_alloc_cd_ptr(master, pasid); if (!cdptr) return -ENOMEM; + + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain); + if (ret) + goto out_unlock; + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); - return 0; + + arm_smmu_attach_commit(&state); + +out_unlock: + mutex_unlock(&arm_smmu_asid_lock); + return ret; } void arm_smmu_remove_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid) { + mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); + if (master->ats_enabled) + arm_smmu_atc_inv_master(master, pasid); + arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); + mutex_unlock(&arm_smmu_asid_lock); } static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, From d38c28dbefeee03d7dd02004ad80d9676ac54d86 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:41 -0300 Subject: [PATCH 24/72] iommu/arm-smmu-v3: Put the SVA mmu notifier in the smmu_domain This removes all the notifier de-duplication logic in the driver and relies on the core code to de-duplicate and allocate only one SVA domain per mm per smmu instance. This naturally gives a 1:1 relationship between SVA domain and mmu notifier. It is a significant simplication of the flow, as we end up with a single struct arm_smmu_domain for each MM and the invalidation can then be shifted to properly use the masters list like S1/S2 do. Remove all of the previous mmu_notifier, bond, shared cd, and cd refcount logic entirely. The logic here is tightly wound together with the unusued BTM support. Since the BTM logic requires holding all the iommu_domains in a global ASID xarray it conflicts with the design to have a single SVA domain per PASID, as multiple SMMU instances will need to have different domains. Following patches resolve this by making the ASID xarray per-instance instead of global. However, converting the BTM code over to this methodology requires many changes. Thus, since ARM_SMMU_FEAT_BTM is never enabled, remove the parts of the BTM support for ASID sharing that interact with SVA as well. A followup series is already working on fully enabling the BTM support, that requires iommufd's VIOMMU feature to bring in the KVM's VMID as well. It will come with an already written patch to bring back the ASID sharing using a per-instance ASID xarray. https://lore.kernel.org/linux-iommu/20240208151837.35068-1-shameerali.kolothum.thodi@huawei.com/ https://lore.kernel.org/linux-iommu/26-v6-228e7adf25eb+4155-smmuv3_newapi_p2_jgg@nvidia.com/ Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Michael Shavit Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/10-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 395 +++--------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 69 +-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 15 +- 3 files changed, 86 insertions(+), 393 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index aa033cd65adc..a7c36654dee5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -13,29 +13,9 @@ #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" -struct arm_smmu_mmu_notifier { - struct mmu_notifier mn; - struct arm_smmu_ctx_desc *cd; - bool cleared; - refcount_t refs; - struct list_head list; - struct arm_smmu_domain *domain; -}; - -#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn) - -struct arm_smmu_bond { - struct mm_struct *mm; - struct arm_smmu_mmu_notifier *smmu_mn; - struct list_head list; -}; - -#define sva_to_bond(handle) \ - container_of(handle, struct arm_smmu_bond, sva) - static DEFINE_MUTEX(sva_lock); -static void +static void __maybe_unused arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { struct arm_smmu_master_domain *master_domain; @@ -58,58 +38,6 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); } -/* - * Check if the CPU ASID is available on the SMMU side. If a private context - * descriptor is using it, try to replace it. - */ -static struct arm_smmu_ctx_desc * -arm_smmu_share_asid(struct mm_struct *mm, u16 asid) -{ - int ret; - u32 new_asid; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_device *smmu; - struct arm_smmu_domain *smmu_domain; - - cd = xa_load(&arm_smmu_asid_xa, asid); - if (!cd) - return NULL; - - if (cd->mm) { - if (WARN_ON(cd->mm != mm)) - return ERR_PTR(-EINVAL); - /* All devices bound to this mm use the same cd struct. */ - refcount_inc(&cd->refs); - return cd; - } - - smmu_domain = container_of(cd, struct arm_smmu_domain, cd); - smmu = smmu_domain->smmu; - - ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - if (ret) - return ERR_PTR(-ENOSPC); - /* - * Race with unmap: TLB invalidations will start targeting the new ASID, - * which isn't assigned yet. We'll do an invalidate-all on the old ASID - * later, so it doesn't matter. - */ - cd->asid = new_asid; - /* - * Update ASID and invalidate CD in all associated masters. There will - * be some overlap between use of both ASIDs, until we invalidate the - * TLB. - */ - arm_smmu_update_s1_domain_cd_entry(smmu_domain); - - /* Invalidate TLB entries previously associated with that context */ - arm_smmu_tlb_inv_asid(smmu, asid); - - xa_erase(&arm_smmu_asid_xa, asid); - return NULL; -} - static u64 page_size_to_cd(void) { static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K || @@ -187,69 +115,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); -static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) -{ - u16 asid; - int err = 0; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_ctx_desc *ret = NULL; - - /* Don't free the mm until we release the ASID */ - mmgrab(mm); - - asid = arm64_mm_context_get(mm); - if (!asid) { - err = -ESRCH; - goto out_drop_mm; - } - - cd = kzalloc(sizeof(*cd), GFP_KERNEL); - if (!cd) { - err = -ENOMEM; - goto out_put_context; - } - - refcount_set(&cd->refs, 1); - - mutex_lock(&arm_smmu_asid_lock); - ret = arm_smmu_share_asid(mm, asid); - if (ret) { - mutex_unlock(&arm_smmu_asid_lock); - goto out_free_cd; - } - - err = xa_insert(&arm_smmu_asid_xa, asid, cd, GFP_KERNEL); - mutex_unlock(&arm_smmu_asid_lock); - - if (err) - goto out_free_asid; - - cd->asid = asid; - cd->mm = mm; - - return cd; - -out_free_asid: - arm_smmu_free_asid(cd); -out_free_cd: - kfree(cd); -out_put_context: - arm64_mm_context_put(mm); -out_drop_mm: - mmdrop(mm); - return err < 0 ? ERR_PTR(err) : ret; -} - -static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) -{ - if (arm_smmu_free_asid(cd)) { - /* Unpin ASID */ - arm64_mm_context_put(cd->mm); - mmdrop(cd->mm); - kfree(cd); - } -} - /* * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this * is used as a threshold to replace per-page TLBI commands to issue in the @@ -264,8 +129,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); size_t size; /* @@ -282,34 +147,22 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, size = 0; } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { - if (!size) - arm_smmu_tlb_inv_asid(smmu_domain->smmu, - smmu_mn->cd->asid); - else - arm_smmu_tlb_inv_range_asid(start, size, - smmu_mn->cd->asid, - PAGE_SIZE, false, - smmu_domain); - } + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + else + arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid, + PAGE_SIZE, false, smmu_domain); - arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), start, - size); + arm_smmu_atc_inv_domain(smmu_domain, start, size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); struct arm_smmu_master_domain *master_domain; unsigned long flags; - mutex_lock(&sva_lock); - if (smmu_mn->cleared) { - mutex_unlock(&sva_lock); - return; - } - /* * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. @@ -321,25 +174,23 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) struct arm_smmu_cd target; struct arm_smmu_cd *cdptr; - cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; - arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr, + arm_smmu_make_sva_cd(&target, master, NULL, + smmu_domain->cd.asid); + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain_sva(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); - - smmu_mn->cleared = true; - mutex_unlock(&sva_lock); + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn) { - kfree(mn_to_smmu(mn)); + kfree(container_of(mn, struct arm_smmu_domain, mmu_notifier)); } static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { @@ -348,115 +199,6 @@ static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { .free_notifier = arm_smmu_mmu_notifier_free, }; -/* Allocate or get existing MMU notifier for this {domain, mm} pair */ -static struct arm_smmu_mmu_notifier * -arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_mmu_notifier *smmu_mn; - - list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { - if (smmu_mn->mn.mm == mm) { - refcount_inc(&smmu_mn->refs); - return smmu_mn; - } - } - - cd = arm_smmu_alloc_shared_cd(mm); - if (IS_ERR(cd)) - return ERR_CAST(cd); - - smmu_mn = kzalloc(sizeof(*smmu_mn), GFP_KERNEL); - if (!smmu_mn) { - ret = -ENOMEM; - goto err_free_cd; - } - - refcount_set(&smmu_mn->refs, 1); - smmu_mn->cd = cd; - smmu_mn->domain = smmu_domain; - smmu_mn->mn.ops = &arm_smmu_mmu_notifier_ops; - - ret = mmu_notifier_register(&smmu_mn->mn, mm); - if (ret) { - kfree(smmu_mn); - goto err_free_cd; - } - - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); - return smmu_mn; - -err_free_cd: - arm_smmu_free_shared_cd(cd); - return ERR_PTR(ret); -} - -static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) -{ - struct mm_struct *mm = smmu_mn->mn.mm; - struct arm_smmu_ctx_desc *cd = smmu_mn->cd; - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - - if (!refcount_dec_and_test(&smmu_mn->refs)) - return; - - list_del(&smmu_mn->list); - - /* - * If we went through clear(), we've already invalidated, and no - * new TLB entry can have been formed. - */ - if (!smmu_mn->cleared) { - arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain_sva(smmu_domain, - mm_get_enqcmd_pasid(mm), 0, 0); - } - - /* Frees smmu_mn */ - mmu_notifier_put(&smmu_mn->mn); - arm_smmu_free_shared_cd(cd); -} - -static struct arm_smmu_bond *__arm_smmu_sva_bind(struct device *dev, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_bond *bond; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain; - - if (!(domain->type & __IOMMU_DOMAIN_PAGING)) - return ERR_PTR(-ENODEV); - smmu_domain = to_smmu_domain(domain); - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return ERR_PTR(-ENODEV); - - if (!master || !master->sva_enabled) - return ERR_PTR(-ENODEV); - - bond = kzalloc(sizeof(*bond), GFP_KERNEL); - if (!bond) - return ERR_PTR(-ENOMEM); - - bond->mm = mm; - - bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); - if (IS_ERR(bond->smmu_mn)) { - ret = PTR_ERR(bond->smmu_mn); - goto err_free_bond; - } - - list_add(&bond->list, &master->bonds); - return bond; - -err_free_bond: - kfree(bond); - return ERR_PTR(ret); -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; @@ -573,11 +315,6 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master) int arm_smmu_master_disable_sva(struct arm_smmu_master *master) { mutex_lock(&sva_lock); - if (!list_empty(&master->bonds)) { - dev_err(master->dev, "cannot disable SVA, device is bound\n"); - mutex_unlock(&sva_lock); - return -EBUSY; - } arm_smmu_master_sva_disable_iopf(master); master->sva_enabled = false; mutex_unlock(&sva_lock); @@ -594,66 +331,51 @@ void arm_smmu_sva_notifier_synchronize(void) mmu_notifier_synchronize(); } -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id) -{ - struct mm_struct *mm = domain->mm; - struct arm_smmu_bond *bond = NULL, *t; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - arm_smmu_remove_pasid(master, to_smmu_domain(domain), id); - - mutex_lock(&sva_lock); - list_for_each_entry(t, &master->bonds, list) { - if (t->mm == mm) { - bond = t; - break; - } - } - - if (!WARN_ON(!bond)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct mm_struct *mm = domain->mm; - struct arm_smmu_bond *bond; struct arm_smmu_cd target; int ret; - if (mm_get_enqcmd_pasid(mm) != id) + /* Prevent arm_smmu_mm_release from being called while we are attaching */ + if (!mmget_not_zero(domain->mm)) return -EINVAL; - mutex_lock(&sva_lock); - bond = __arm_smmu_sva_bind(dev, mm); - if (IS_ERR(bond)) { - mutex_unlock(&sva_lock); - return PTR_ERR(bond); - } + /* + * This does not need the arm_smmu_asid_lock because SVA domains never + * get reassigned + */ + arm_smmu_make_sva_cd(&target, master, domain->mm, smmu_domain->cd.asid); + ret = arm_smmu_set_pasid(master, smmu_domain, id, &target); - arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - ret = arm_smmu_set_pasid(master, to_smmu_domain(domain), id, &target); - if (ret) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - mutex_unlock(&sva_lock); - return ret; - } - mutex_unlock(&sva_lock); - return 0; + mmput(domain->mm); + return ret; } static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { - kfree(to_smmu_domain(domain)); + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + /* + * Ensure the ASID is empty in the iommu cache before allowing reuse. + */ + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + + /* + * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can + * still be called/running at this point. We allow the ASID to be + * reused, and if there is a race then it just suffers harmless + * unnecessary invalidation. + */ + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); + + /* + * Actual free is defered to the SRCU callback + * arm_smmu_mmu_notifier_free() + */ + mmu_notifier_put(&smmu_domain->mmu_notifier); } static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { @@ -667,6 +389,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_domain *smmu_domain; + u32 asid; + int ret; smmu_domain = arm_smmu_domain_alloc(); if (IS_ERR(smmu_domain)) @@ -675,5 +399,22 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; smmu_domain->smmu = smmu; + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + goto err_free; + + smmu_domain->cd.asid = asid; + smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; + ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); + if (ret) + goto err_asid; + return &smmu_domain->domain; + +err_asid: + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 0c2ffd25419d..148690f5b624 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1439,22 +1439,6 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) cd_table->cdtab = NULL; } -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) -{ - bool free; - struct arm_smmu_ctx_desc *old_cd; - - if (!cd->asid) - return false; - - free = refcount_dec_and_test(&cd->refs); - if (free) { - old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid); - WARN_ON(old_cd != cd); - } - return free; -} - /* Stream table manipulation functions */ static void arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) @@ -2023,8 +2007,8 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); } -static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, - ioasid_t ssid, unsigned long iova, size_t size) +int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size) { struct arm_smmu_master_domain *master_domain; int i; @@ -2062,15 +2046,7 @@ static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, if (!master->ats_enabled) continue; - /* - * Non-zero ssid means SVA is co-opting the S1 domain to issue - * invalidations for SVA PASIDs. - */ - if (ssid != IOMMU_NO_PASID) - arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - else - arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, - &cmd); + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; @@ -2082,19 +2058,6 @@ static int __arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); } -static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, - unsigned long iova, size_t size) -{ - return __arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, - size); -} - -int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, - ioasid_t ssid, unsigned long iova, size_t size) -{ - return __arm_smmu_atc_inv_domain(smmu_domain, ssid, iova, size); -} - /* IO_PGTABLE API */ static void arm_smmu_tlb_inv_context(void *cookie) { @@ -2283,7 +2246,6 @@ struct arm_smmu_domain *arm_smmu_domain_alloc(void) mutex_init(&smmu_domain->init_mutex); INIT_LIST_HEAD(&smmu_domain->devices); spin_lock_init(&smmu_domain->devices_lock); - INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); return smmu_domain; } @@ -2325,7 +2287,7 @@ static void arm_smmu_domain_free_paging(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { /* Prevent SVA from touching the CD while we're freeing it */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_free_asid(&smmu_domain->cd); + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); mutex_unlock(&arm_smmu_asid_lock); } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -2343,11 +2305,9 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, u32 asid = 0; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; - refcount_set(&cd->refs, 1); - /* Prevent SVA from modifying the ASID until it is written to the CD */ mutex_lock(&arm_smmu_asid_lock); - ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); cd->asid = (u16)asid; mutex_unlock(&arm_smmu_asid_lock); @@ -2834,6 +2794,9 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, /* The core code validates pasid */ + if (smmu_domain->smmu != master->smmu) + return -EINVAL; + if (!master->cd_table.in_ste) return -ENODEV; @@ -2855,9 +2818,14 @@ out_unlock: return ret; } -void arm_smmu_remove_pasid(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain, ioasid_t pasid) +static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) { + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *smmu_domain; + + smmu_domain = to_smmu_domain(domain); + mutex_lock(&arm_smmu_asid_lock); arm_smmu_clear_cd(master, pasid); if (master->ats_enabled) @@ -3128,7 +3096,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; - INIT_LIST_HEAD(&master->bonds); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); @@ -3310,12 +3277,6 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) -{ - arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); -} - static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 212c18c70fa0..d175d9eee6c6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -587,9 +587,6 @@ struct arm_smmu_strtab_l1_desc { struct arm_smmu_ctx_desc { u16 asid; - - refcount_t refs; - struct mm_struct *mm; }; struct arm_smmu_l1_ctx_desc { @@ -712,7 +709,6 @@ struct arm_smmu_master { bool stall_enabled; bool sva_enabled; bool iopf_enabled; - struct list_head bonds; unsigned int ssid_bits; }; @@ -741,7 +737,7 @@ struct arm_smmu_domain { struct list_head devices; spinlock_t devices_lock; - struct list_head mmu_notifiers; + struct mmu_notifier mmu_notifier; }; /* The following are exposed for testing purposes. */ @@ -805,16 +801,13 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, const struct arm_smmu_cd *cd); -void arm_smmu_remove_pasid(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain, ioasid_t pasid); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain); -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); -int arm_smmu_atc_inv_domain_sva(struct arm_smmu_domain *smmu_domain, - ioasid_t ssid, unsigned long iova, size_t size); +int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, + unsigned long iova, size_t size); #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); @@ -826,8 +819,6 @@ bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, struct mm_struct *mm); -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { From ce26ea9e6e12df01432bd2a1cb8cbfa025b8a977 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:42 -0300 Subject: [PATCH 25/72] iommu/arm-smmu-v3: Allow IDENTITY/BLOCKED to be set while PASID is used The HW supports this, use the S1DSS bits to configure the behavior of SSID=0 which is the RID's translation. If SSID's are currently being used in the CD table then just update the S1DSS bits in the STE, remove the master_domain and leave ATS alone. For iommufd the driver design has a small problem that all the unused CD table entries are set with V=0 which will generate an event if VFIO userspace tries to use the CD entry. This patch extends this problem to include the RID as well if PASID is being used. For BLOCKED with used PASIDs the F_STREAM_DISABLED (STRTAB_STE_1_S1DSS_TERMINATE) event is generated on untagged traffic and a substream CD table entry with V=0 (removed pasid) will generate C_BAD_CD. Arguably there is no advantage to using S1DSS over the CD entry 0 with V=0. As we don't yet support PASID in iommufd this is a problem to resolve later, possibly by using EPD0 for unused CD table entries instead of V=0, and not using S1DSS for BLOCKED. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/11-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 60 +++++++++++++++---- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 4 +- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index a460b71f5857..d7e022bb9df5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -164,7 +164,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master, true); + arm_smmu_make_cdtable_ste(ste, &master, true, STRTAB_STE_1_S1DSS_SSID0); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 148690f5b624..391deb899170 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -991,6 +991,14 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | STRTAB_STE_1_EATS); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + + /* + * See 13.5 Summary of attribute/permission configuration fields + * for the SHCFG behavior. + */ + if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) == + STRTAB_STE_1_S1DSS_BYPASS) + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } /* S2 translates */ @@ -1531,7 +1539,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); VISIBLE_IF_KUNIT void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master, bool ats_enabled) + struct arm_smmu_master *master, bool ats_enabled, + unsigned int s1dss) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1545,7 +1554,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax)); target->data[1] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | + FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | @@ -1556,6 +1565,11 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_1_EATS, ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) && + s1dss == STRTAB_STE_1_S1DSS_BYPASS) + target->data[1] |= cpu_to_le64(FIELD_PREP( + STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); + if (smmu->features & ARM_SMMU_FEAT_E2H) { /* * To support BTM the streamworld needs to match the @@ -2579,6 +2593,7 @@ struct arm_smmu_attach_state { /* Inputs */ struct iommu_domain *old_domain; struct arm_smmu_master *master; + bool cd_needs_ats; ioasid_t ssid; /* Resulting state */ bool ats_enabled; @@ -2620,7 +2635,7 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, */ lockdep_assert_held(&arm_smmu_asid_lock); - if (smmu_domain) { + if (smmu_domain || state->cd_needs_ats) { /* * The SMMU does not support enabling ATS with bypass/abort. * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS @@ -2632,7 +2647,9 @@ static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, * tables. */ state->ats_enabled = arm_smmu_ats_supported(master); + } + if (smmu_domain) { master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); if (!master_domain) return -ENOMEM; @@ -2760,7 +2777,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, &target_cd); - arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled); + arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled, + STRTAB_STE_1_S1DSS_SSID0); arm_smmu_install_ste_for_dev(master, &target); break; } @@ -2834,8 +2852,10 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, mutex_unlock(&arm_smmu_asid_lock); } -static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, - struct device *dev, struct arm_smmu_ste *ste) +static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, + struct device *dev, + struct arm_smmu_ste *ste, + unsigned int s1dss) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); struct arm_smmu_attach_state state = { @@ -2844,16 +2864,28 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, .ssid = IOMMU_NO_PASID, }; - if (arm_smmu_ssids_in_use(&master->cd_table)) - return -EBUSY; - /* * Do not allow any ASID to be changed while are working on the STE, * otherwise we could miss invalidations. */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_attach_prepare(&state, domain); + /* + * If the CD table is not in use we can use the provided STE, otherwise + * we use a cdtable STE with the provided S1DSS. + */ + if (arm_smmu_ssids_in_use(&master->cd_table)) { + /* + * If a CD table has to be present then we need to run with ATS + * on even though the RID will fail ATS queries with UR. This is + * because we have no idea what the PASID's need. + */ + state.cd_needs_ats = true; + arm_smmu_attach_prepare(&state, domain); + arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss); + } else { + arm_smmu_attach_prepare(&state, domain); + } arm_smmu_install_ste_for_dev(master, ste); arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); @@ -2864,7 +2896,6 @@ static int arm_smmu_attach_dev_ste(struct iommu_domain *domain, * descriptor from arm_smmu_share_asid(). */ arm_smmu_clear_cd(master, IOMMU_NO_PASID); - return 0; } static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, @@ -2874,7 +2905,8 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); arm_smmu_make_bypass_ste(master->smmu, &ste); - return arm_smmu_attach_dev_ste(domain, dev, &ste); + arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); + return 0; } static const struct iommu_domain_ops arm_smmu_identity_ops = { @@ -2892,7 +2924,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct arm_smmu_ste ste; arm_smmu_make_abort_ste(&ste); - return arm_smmu_attach_dev_ste(domain, dev, &ste); + arm_smmu_attach_dev_ste(domain, dev, &ste, + STRTAB_STE_1_S1DSS_TERMINATE); + return 0; } static const struct iommu_domain_ops arm_smmu_blocked_ops = { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d175d9eee6c6..30459a800c7b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -761,8 +761,8 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, struct arm_smmu_ste *target); void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master, - bool ats_enabled); + struct arm_smmu_master *master, bool ats_enabled, + unsigned int s1dss); void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, From 3b5302cbb06af6b62022360066944a1ff6aea0d1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:43 -0300 Subject: [PATCH 26/72] iommu/arm-smmu-v3: Test the STE S1DSS functionality S1DSS brings in quite a few new transition pairs that are interesting. Test to/from S1DSS_BYPASS <-> S1DSS_SSID0, and BYPASS <-> S1DSS_SSID0. Test a contrived non-hitless flow to make sure that the logic works. Tested-by: Nicolin Chen Signed-off-by: Michael Shavit Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/12-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 113 +++++++++++++++++- 1 file changed, 108 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index d7e022bb9df5..e0fce31eba54 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -144,6 +144,14 @@ static void arm_smmu_v3_test_ste_expect_transition( KUNIT_EXPECT_MEMEQ(test, target->data, cur_copy.data, sizeof(cur_copy)); } +static void arm_smmu_v3_test_ste_expect_non_hitless_transition( + struct kunit *test, const struct arm_smmu_ste *cur, + const struct arm_smmu_ste *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_ste_expect_transition(test, cur, target, + num_syncs_expected, false); +} + static void arm_smmu_v3_test_ste_expect_hitless_transition( struct kunit *test, const struct arm_smmu_ste *cur, const struct arm_smmu_ste *target, unsigned int num_syncs_expected) @@ -155,6 +163,7 @@ static void arm_smmu_v3_test_ste_expect_hitless_transition( static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, + unsigned int s1dss, const dma_addr_t dma_addr) { struct arm_smmu_master master = { @@ -164,7 +173,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master, true, STRTAB_STE_1_S1DSS_SSID0); + arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -194,7 +203,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -203,7 +213,8 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -212,7 +223,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(3)); } @@ -221,11 +233,54 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(3)); } +static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + + /* + * Flipping s1dss on a CD table STE only involves changes to the second + * qword of an STE and can be done in a single write. + */ + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(1)); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &ste, NUM_EXPECTED_SYNCS(1)); +} + +static void +arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); +} + +static void +arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); +} + static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, bool ats_enabled) { @@ -285,6 +340,48 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) NUM_EXPECTED_SYNCS(2)); } +static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste ste_2; + + /* + * Although no flow resembles this in practice, one way to force an STE + * update to be non-hitless is to change its CD table pointer as well as + * s1 dss field in the same update. + */ + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, + 0x4B4B4b4B4B); + arm_smmu_v3_test_ste_expect_non_hitless_transition( + test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); +} + static void arm_smmu_v3_test_cd_expect_transition( struct kunit *test, const struct arm_smmu_cd *cur, const struct arm_smmu_cd *target, unsigned int num_syncs_expected, @@ -438,10 +535,16 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_cdtable), KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_cdtable), + KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_s1dss_change), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_abort), KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_s2), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1), + KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), From 8ee9175c25827240dd84a7adffbfa9c16938ac5d Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:44 -0300 Subject: [PATCH 27/72] iommu/arm-smmu-v3: Allow a PASID to be set when RID is IDENTITY/BLOCKED If the STE doesn't point to the CD table we can upgrade it by reprogramming the STE with the appropriate S1DSS. We may also need to turn on ATS at the same time. Keep track if the installed STE is pointing at the cd_table and the ATS state to trigger this path. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 49 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 391deb899170..8e661442b532 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2435,6 +2435,9 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, master->cd_table.in_ste = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == STRTAB_STE_0_CFG_S1_TRANS; + master->ste_ats_enabled = + FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) == + STRTAB_STE_1_EATS_TRANS; for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; @@ -2795,10 +2798,36 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +static void arm_smmu_update_ste(struct arm_smmu_master *master, + struct iommu_domain *sid_domain, + bool ats_enabled) +{ + unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE; + struct arm_smmu_ste ste; + + if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled) + return; + + if (sid_domain->type == IOMMU_DOMAIN_IDENTITY) + s1dss = STRTAB_STE_1_S1DSS_BYPASS; + else + WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED); + + /* + * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior + * using s1dss if necessary. If the cd_table is already installed then + * the S1DSS is correct and this will just update the EATS. Otherwise it + * installs the entire thing. This will be hitless. + */ + arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss); + arm_smmu_install_ste_for_dev(master, &ste); +} + int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, const struct arm_smmu_cd *cd) { + struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev); struct arm_smmu_attach_state state = { .master = master, /* @@ -2815,8 +2844,10 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, if (smmu_domain->smmu != master->smmu) return -EINVAL; - if (!master->cd_table.in_ste) - return -ENODEV; + if (!master->cd_table.in_ste && + sid_domain->type != IOMMU_DOMAIN_IDENTITY && + sid_domain->type != IOMMU_DOMAIN_BLOCKED) + return -EINVAL; cdptr = arm_smmu_alloc_cd_ptr(master, pasid); if (!cdptr) @@ -2828,6 +2859,7 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, goto out_unlock; arm_smmu_write_cd_entry(master, pasid, cdptr, cd); + arm_smmu_update_ste(master, sid_domain, state.ats_enabled); arm_smmu_attach_commit(&state); @@ -2850,6 +2882,19 @@ static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, arm_smmu_atc_inv_master(master, pasid); arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); mutex_unlock(&arm_smmu_asid_lock); + + /* + * When the last user of the CD table goes away downgrade the STE back + * to a non-cd_table one. + */ + if (!arm_smmu_ssids_in_use(&master->cd_table)) { + struct iommu_domain *sid_domain = + iommu_get_domain_for_dev(master->dev); + + if (sid_domain->type == IOMMU_DOMAIN_IDENTITY || + sid_domain->type == IOMMU_DOMAIN_BLOCKED) + sid_domain->ops->attach_dev(sid_domain, dev); + } } static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 30459a800c7b..cdd426efb384 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -705,7 +705,8 @@ struct arm_smmu_master { /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; - bool ats_enabled; + bool ats_enabled : 1; + bool ste_ats_enabled : 1; bool stall_enabled; bool sva_enabled; bool iopf_enabled; From f3b273b7c7e42ff7ef5b6063834d768d33c7ba79 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 25 Jun 2024 09:37:45 -0300 Subject: [PATCH 28/72] iommu/arm-smmu-v3: Allow setting a S1 domain to a PASID The SVA cleanup made the SSID logic entirely general so all we need to do is call it with the correct cd table entry for a S1 domain. This is slightly tricky because of the ASID and how the locking works, the simple fix is to just update the ASID once we get the right locks. Tested-by: Nicolin Chen Tested-by: Shameer Kolothum Reviewed-by: Nicolin Chen Reviewed-by: Jerry Snitselaar Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/14-v9-5cd718286059+79186-smmuv3_newapi_p2b_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 41 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8e661442b532..8c3031fe542b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2798,6 +2798,36 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return 0; } +static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_cd target_cd; + int ret = 0; + + mutex_lock(&smmu_domain->init_mutex); + if (!smmu_domain->smmu) + ret = arm_smmu_domain_finalise(smmu_domain, smmu); + else if (smmu_domain->smmu != smmu) + ret = -EINVAL; + mutex_unlock(&smmu_domain->init_mutex); + if (ret) + return ret; + + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + /* + * We can read cd.asid outside the lock because arm_smmu_set_pasid() + * will fix it + */ + arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); + return arm_smmu_set_pasid(master, to_smmu_domain(domain), id, + &target_cd); +} + static void arm_smmu_update_ste(struct arm_smmu_master *master, struct iommu_domain *sid_domain, bool ats_enabled) @@ -2825,7 +2855,7 @@ static void arm_smmu_update_ste(struct arm_smmu_master *master, int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, - const struct arm_smmu_cd *cd) + struct arm_smmu_cd *cd) { struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev); struct arm_smmu_attach_state state = { @@ -2858,6 +2888,14 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, if (ret) goto out_unlock; + /* + * We don't want to obtain to the asid_lock too early, so fix up the + * caller set ASID under the lock in case it changed. + */ + cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID); + cd->data[0] |= cpu_to_le64( + FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid)); + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); arm_smmu_update_ste(master, sid_domain, state.ats_enabled); @@ -3376,6 +3414,7 @@ static struct iommu_ops arm_smmu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, + .set_dev_pasid = arm_smmu_s1_set_dev_pasid, .map_pages = arm_smmu_map_pages, .unmap_pages = arm_smmu_unmap_pages, .flush_iotlb_all = arm_smmu_flush_iotlb_all, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index cdd426efb384..91ec2d49ecbf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -801,7 +801,7 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, int arm_smmu_set_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid, - const struct arm_smmu_cd *cd); + struct arm_smmu_cd *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, From c84c5ab76c9c04b5f1c8cc66ee9313198e89bb11 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 10 Jun 2024 21:31:10 -0300 Subject: [PATCH 29/72] iommu/arm-smmu-v3: Do not zero the strtab twice dmam_alloc_coherent() already returns zero'd memory so cfg->strtab.l1_desc (the list of DMA addresses for the L2 entries) is already zero'd. arm_smmu_init_l1_strtab() goes through and calls arm_smmu_write_strtab_l1_desc() on the newly allocated (and zero'd) struct arm_smmu_strtab_l1_desc, which ends up computing 'val = 0' and zeroing it again. Remove arm_smmu_init_l1_strtab() and just call devm_kcalloc() from arm_smmu_init_strtab_2lvl to allocate the companion struct. Tested-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/1-v2-318ed5f6983b+198f-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 26 +++++---------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8c3031fe542b..bb2d9b0c0f63 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3523,25 +3523,6 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) PRIQ_ENT_DWORDS, "priq"); } -static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) -{ - unsigned int i; - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - void *strtab = smmu->strtab_cfg.strtab; - - cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, - sizeof(*cfg->l1_desc), GFP_KERNEL); - if (!cfg->l1_desc) - return -ENOMEM; - - for (i = 0; i < cfg->num_l1_ents; ++i) { - arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); - strtab += STRTAB_L1_DESC_DWORDS << 3; - } - - return 0; -} - static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; @@ -3577,7 +3558,12 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); cfg->strtab_base_cfg = reg; - return arm_smmu_init_l1_strtab(smmu); + cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, + sizeof(*cfg->l1_desc), GFP_KERNEL); + if (!cfg->l1_desc) + return -ENOMEM; + + return 0; } static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) From a4d75360f7a6d979edd66af577847b0f4dbf4377 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 10 Jun 2024 21:31:11 -0300 Subject: [PATCH 30/72] iommu/arm-smmu-v3: Shrink the strtab l1_desc array The top of the 2 level stream table is (at most) 128k entries big, and two high order allocations are required. One of __le64 which is programmed into the HW (1M), and one of struct arm_smmu_strtab_l1_desc which holds the CPU pointer (3M). There is no reason to store the l2ptr_dma as nothing reads it. devm stores a copy of it and the DMA memory will be freed via devm mechanisms. span is a constant of 8+1. Remove both. This removes 16 bytes from each arm_smmu_l1_ctx_desc and saves up to 2M of memory per iommu instance. Tested-by: Nicolin Chen Reviewed-by: Mostafa Saleh Signed-off-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/2-v2-318ed5f6983b+198f-smmuv3_tidy_jgg@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 13 ++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 --- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index bb2d9b0c0f63..fbb7eb152d82 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1448,13 +1448,12 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) } /* Stream table manipulation functions */ -static void -arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) +static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) { u64 val = 0; - val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); - val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; + val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1); + val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; /* The HW has 64 bit atomicity with stores to the L2 STE table */ WRITE_ONCE(*dst, cpu_to_le64(val)); @@ -1663,6 +1662,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { size_t size; void *strtab; + dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; @@ -1672,8 +1672,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; - desc->span = STRTAB_SPLIT + 1; - desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, + desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma, GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, @@ -1683,7 +1682,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) } arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); - arm_smmu_write_strtab_l1_desc(strtab, desc); + arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma); return 0; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 91ec2d49ecbf..a05e02d6afd1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -579,10 +579,7 @@ struct arm_smmu_priq { /* High-level stream table and context descriptor structures */ struct arm_smmu_strtab_l1_desc { - u8 span; - struct arm_smmu_ste *l2ptr; - dma_addr_t l2ptr_dma; }; struct arm_smmu_ctx_desc { From a35f443d837ffcd5e73b64c13a46d12701839213 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Thu, 13 Jun 2024 12:44:17 -0700 Subject: [PATCH 31/72] iommu/arm-smmu-v3: add missing MODULE_DESCRIPTION() macro With ARCH=arm64, make allmodconfig && make W=1 C=1 reports: WARNING: modpost: missing MODULE_DESCRIPTION() in drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.o Add the missing invocation of the MODULE_DESCRIPTION() macro. Signed-off-by: Jeff Johnson Fixes: da55da5a42d4 ("iommu/arm-smmu-v3: Make the kunit into a module") Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240613-md-arm64-drivers-iommu-arm-arm-smmu-v3-v1-1-0e9f7584a5c8@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index e0fce31eba54..cceb737a7001 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -567,4 +567,5 @@ static struct kunit_suite arm_smmu_v3_test_module = { kunit_test_suites(&arm_smmu_v3_test_module); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); +MODULE_DESCRIPTION("KUnit tests for arm-smmu-v3 driver"); MODULE_LICENSE("GPL v2"); From d6c102881b302697802547d6abb6f24a49c79f2b Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Sat, 29 Jun 2024 07:19:37 +0530 Subject: [PATCH 32/72] dt-bindings: arm-smmu: Add X1E80100 GPU SMMU Update the devicetree bindings to support the gpu present in X1E80100 platform. Signed-off-by: Akhil P Oommen Link: https://lore.kernel.org/r/20240629015111.264564-5-quic_akhilpo@quicinc.com Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 7f584ce4bb22..280b4e49f219 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -96,6 +96,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 @@ -522,6 +523,7 @@ allOf: - enum: - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 @@ -558,7 +560,6 @@ allOf: - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - - qcom,x1e80100-smmu-500 then: properties: clock-names: false From d0166022be375ce72e7b220d688740b1c4424ad5 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jul 2024 09:20:10 -0700 Subject: [PATCH 33/72] iommu/arm-smmu: Add CB prefix to register bitfields For consistency, add the "CB" prefix to the bitfield defines for context registers. Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240701162025.375134-2-robdclark@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c | 2 +- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 18 ++++---- drivers/iommu/arm/arm-smmu/arm-smmu.c | 8 ++-- drivers/iommu/arm/arm-smmu/arm-smmu.h | 46 +++++++++---------- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 +- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 957d988b6d83..4b2994b6126d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -200,7 +200,7 @@ static irqreturn_t nvidia_smmu_context_fault_bank(int irq, void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 552199cbd9e2..e4ee78fb6a66 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -141,7 +141,7 @@ static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_doma writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if ((fsr & ARM_SMMU_FSR_FAULT) && (fsr & ARM_SMMU_FSR_SS)) { + if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) { u32 sctlr_orig, sctlr; /* @@ -298,7 +298,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (fsr & ARM_SMMU_FSR_FAULT) { + if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); @@ -306,7 +306,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, * TBU halt takes care of resuming any stalled transcation. * Kept it here for completeness sake. */ - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } @@ -320,11 +320,11 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (fsr & ARM_SMMU_FSR_FAULT) { + if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } @@ -394,7 +394,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) DEFAULT_RATELIMIT_BURST); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -403,7 +403,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) if (list_empty(&tbu_list)) { ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) dev_err_ratelimited(smmu->dev, @@ -417,7 +417,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) phys_soft = ops->iova_to_phys(ops, iova); tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { dev_dbg(smmu->dev, "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", @@ -481,7 +481,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); /* Retry or terminate any stalled transactions */ - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 87c81f75cf84..23cf91ac409b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -415,7 +415,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) int ret; fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); @@ -423,7 +423,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) dev_err_ratelimited(smmu->dev, @@ -1306,7 +1306,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va); reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR; - if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE), + if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_CB_ATSR_ACTIVE), 5, 50)) { spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, @@ -1642,7 +1642,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { arm_smmu_write_context_bank(smmu, i); - arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); } /* Invalidate the TLB, just in case */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 4765c6945c34..b04a00126a12 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -196,34 +196,34 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_PAR_F BIT(0) #define ARM_SMMU_CB_FSR 0x58 -#define ARM_SMMU_FSR_MULTI BIT(31) -#define ARM_SMMU_FSR_SS BIT(30) -#define ARM_SMMU_FSR_UUT BIT(8) -#define ARM_SMMU_FSR_ASF BIT(7) -#define ARM_SMMU_FSR_TLBLKF BIT(6) -#define ARM_SMMU_FSR_TLBMCF BIT(5) -#define ARM_SMMU_FSR_EF BIT(4) -#define ARM_SMMU_FSR_PF BIT(3) -#define ARM_SMMU_FSR_AFF BIT(2) -#define ARM_SMMU_FSR_TF BIT(1) +#define ARM_SMMU_CB_FSR_MULTI BIT(31) +#define ARM_SMMU_CB_FSR_SS BIT(30) +#define ARM_SMMU_CB_FSR_UUT BIT(8) +#define ARM_SMMU_CB_FSR_ASF BIT(7) +#define ARM_SMMU_CB_FSR_TLBLKF BIT(6) +#define ARM_SMMU_CB_FSR_TLBMCF BIT(5) +#define ARM_SMMU_CB_FSR_EF BIT(4) +#define ARM_SMMU_CB_FSR_PF BIT(3) +#define ARM_SMMU_CB_FSR_AFF BIT(2) +#define ARM_SMMU_CB_FSR_TF BIT(1) -#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ - ARM_SMMU_FSR_ASF | \ - ARM_SMMU_FSR_TLBMCF | \ - ARM_SMMU_FSR_TLBLKF) +#define ARM_SMMU_CB_FSR_IGN (ARM_SMMU_CB_FSR_AFF | \ + ARM_SMMU_CB_FSR_ASF | \ + ARM_SMMU_CB_FSR_TLBMCF | \ + ARM_SMMU_CB_FSR_TLBLKF) -#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ - ARM_SMMU_FSR_SS | \ - ARM_SMMU_FSR_UUT | \ - ARM_SMMU_FSR_EF | \ - ARM_SMMU_FSR_PF | \ - ARM_SMMU_FSR_TF | \ - ARM_SMMU_FSR_IGN) +#define ARM_SMMU_CB_FSR_FAULT (ARM_SMMU_CB_FSR_MULTI | \ + ARM_SMMU_CB_FSR_SS | \ + ARM_SMMU_CB_FSR_UUT | \ + ARM_SMMU_CB_FSR_EF | \ + ARM_SMMU_CB_FSR_PF | \ + ARM_SMMU_CB_FSR_TF | \ + ARM_SMMU_CB_FSR_IGN) #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 -#define ARM_SMMU_FSYNR0_WNR BIT(4) +#define ARM_SMMU_CB_FSYNR0_WNR BIT(4) #define ARM_SMMU_CB_FSYNR1 0x6c @@ -237,7 +237,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 -#define ARM_SMMU_ATSR_ACTIVE BIT(0) +#define ARM_SMMU_CB_ATSR_ACTIVE BIT(0) #define ARM_SMMU_RESUME_TERMINATE BIT(0) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index e079bb7a993e..b98a7a598b89 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -194,7 +194,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); @@ -274,7 +274,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* Clear context bank fault address fault status registers */ iommu_writel(ctx, ARM_SMMU_CB_FAR, 0); - iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, From 55089781ff7724dd10040231a6d8b791cf24afcd Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jul 2024 09:20:11 -0700 Subject: [PATCH 34/72] iommu/arm-smmu-qcom-debug: Do not print for handled faults Handled faults can be "normal", don't spam dmesg about them. Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240701162025.375134-3-robdclark@gmail.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index e4ee78fb6a66..681fbdfc325d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -419,10 +419,6 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { - dev_dbg(smmu->dev, - "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", - iova, fsr, fsynr, idx); - dev_dbg(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; } else { From d525b0af0c3b8275e6f83fa0c0640338ed90661a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 1 Jul 2024 09:20:12 -0700 Subject: [PATCH 35/72] iommu/arm-smmu: Pretty-print context fault related regs Parse out the bitfields for easier-to-read fault messages. Signed-off-by: Rob Clark Reviewed-by: Pranjal Shrivastava Link: https://lore.kernel.org/r/20240701162025.375134-4-robdclark@gmail.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 52 +++++--------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 70 +++++++++++++++---- drivers/iommu/arm/arm-smmu/arm-smmu.h | 21 ++++++ 3 files changed, 92 insertions(+), 51 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 681fbdfc325d..ef93f825f11f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -383,64 +383,44 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) struct arm_smmu_domain *smmu_domain = dev; struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; struct arm_smmu_device *smmu = smmu_domain->smmu; - u32 fsr, fsynr, cbfrsynra, resume = 0; + struct arm_smmu_context_fault_info cfi; + u32 resume = 0; int idx = smmu_domain->cfg.cbndx; phys_addr_t phys_soft; - unsigned long iova; int ret, tmp; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) + arm_smmu_read_context_fault_info(smmu, idx, &cfi); + + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); - if (list_empty(&tbu_list)) { - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); + arm_smmu_print_context_fault_info(smmu, idx, &cfi); - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } - phys_soft = ops->iova_to_phys(ops, iova); + phys_soft = ops->iova_to_phys(ops, cfi.iova); - tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; } else { - phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, iova, fsr); + phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); if (__ratelimit(&_rs)) { - dev_err(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); - dev_err(smmu->dev, - "FSR = %08x [%s%s%s%s%s%s%s%s%s], SID=0x%x\n", - fsr, - (fsr & 0x02) ? "TF " : "", - (fsr & 0x04) ? "AFF " : "", - (fsr & 0x08) ? "PF " : "", - (fsr & 0x10) ? "EF " : "", - (fsr & 0x20) ? "TLBMCF " : "", - (fsr & 0x40) ? "TLBLKF " : "", - (fsr & 0x80) ? "MHF " : "", - (fsr & 0x40000000) ? "SS " : "", - (fsr & 0x80000000) ? "MULTI " : "", - cbfrsynra); + arm_smmu_print_context_fault_info(smmu, idx, &cfi); dev_err(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); @@ -474,10 +454,10 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) */ if (tmp != -EBUSY) { /* Clear the faulting FSR */ - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); /* Retry or terminate any stalled transactions */ - if (fsr & ARM_SMMU_CB_FSR_SS) + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 23cf91ac409b..79ec911ae151 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -405,32 +405,72 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_add_page = arm_smmu_tlb_add_page_s2_v1, }; + +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, + struct arm_smmu_context_fault_info *cfi) +{ + cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); + cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); + cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); + cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); +} + +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, + const struct arm_smmu_context_fault_info *cfi) +{ + dev_dbg(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", + cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); + + dev_err(smmu->dev, "FSR = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n", + cfi->fsr, + (cfi->fsr & ARM_SMMU_CB_FSR_MULTI) ? "MULTI " : "", + (cfi->fsr & ARM_SMMU_CB_FSR_SS) ? "SS " : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSR_FORMAT, cfi->fsr), + (cfi->fsr & ARM_SMMU_CB_FSR_UUT) ? " UUT" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_ASF) ? " ASF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TLBLKF) ? " TLBLKF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TLBMCF) ? " TLBMCF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_EF) ? " EF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_PF) ? " PF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_AFF) ? " AFF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TF) ? " TF" : "", + cfi->cbfrsynra); + + dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n", + cfi->fsynr, + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr), + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr)); +} + static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { - u32 fsr, fsynr, cbfrsynra; - unsigned long iova; + struct arm_smmu_context_fault_info cfi; struct arm_smmu_domain *smmu_domain = dev; struct arm_smmu_device *smmu = smmu_domain->smmu; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); int idx = smmu_domain->cfg.cbndx; int ret; - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) + arm_smmu_read_context_fault_info(smmu, idx, &cfi); + + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + if (ret == -ENOSYS && __ratelimit(&rs)) + arm_smmu_print_context_fault_info(smmu, idx, &cfi); - if (ret == -ENOSYS) - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); - - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index b04a00126a12..e2aeb511ae90 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -198,6 +198,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_FSR 0x58 #define ARM_SMMU_CB_FSR_MULTI BIT(31) #define ARM_SMMU_CB_FSR_SS BIT(30) +#define ARM_SMMU_CB_FSR_FORMAT GENMASK(10, 9) #define ARM_SMMU_CB_FSR_UUT BIT(8) #define ARM_SMMU_CB_FSR_ASF BIT(7) #define ARM_SMMU_CB_FSR_TLBLKF BIT(6) @@ -223,7 +224,14 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_FSYNR0_PLVL GENMASK(1, 0) #define ARM_SMMU_CB_FSYNR0_WNR BIT(4) +#define ARM_SMMU_CB_FSYNR0_PNU BIT(5) +#define ARM_SMMU_CB_FSYNR0_IND BIT(6) +#define ARM_SMMU_CB_FSYNR0_NSATTR BIT(8) +#define ARM_SMMU_CB_FSYNR0_PTWF BIT(10) +#define ARM_SMMU_CB_FSYNR0_AFR BIT(11) +#define ARM_SMMU_CB_FSYNR0_S1CBNDX GENMASK(23, 16) #define ARM_SMMU_CB_FSYNR1 0x6c @@ -533,4 +541,17 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu); void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx); int arm_mmu500_reset(struct arm_smmu_device *smmu); +struct arm_smmu_context_fault_info { + unsigned long iova; + u32 fsr; + u32 fsynr; + u32 cbfrsynra; +}; + +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, + struct arm_smmu_context_fault_info *cfi); + +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, + const struct arm_smmu_context_fault_info *cfi); + #endif /* _ARM_SMMU_H */ From 9796cf9b3eb9a0b9502dfe0b3acf63610090ef44 Mon Sep 17 00:00:00 2001 From: Zhenhua Huang Date: Tue, 2 Jul 2024 17:01:10 +0800 Subject: [PATCH 36/72] iommu/arm-smmu-qcom: record reason for deferring probe To avoid deferring probe smmu driver silently, record reason for it. It can be checked through ../debugfs/devices_deferred as well: /sys/kernel/debug# cat devices_deferred 15000000.iommu arm-smmu: qcom_scm not ready Signed-off-by: Zhenhua Huang Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/1719910870-25079-1-git-send-email-quic_zhenhuah@quicinc.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 25f034677f56..971c6a2e592b 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -469,7 +469,8 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, /* Check to make sure qcom_scm has finished probing */ if (!qcom_scm_is_available()) - return ERR_PTR(-EPROBE_DEFER); + return ERR_PTR(dev_err_probe(smmu->dev, -EPROBE_DEFER, + "qcom_scm not ready\n")); qsmmu = devm_krealloc(smmu->dev, smmu, sizeof(*qsmmu), GFP_KERNEL); if (!qsmmu) From 52acd7d8a4130ad4dda6540dbbb821a92e1c0138 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 3 Jul 2024 11:16:00 +0100 Subject: [PATCH 37/72] iommu/arm-smmu-v3: Add support for domain_alloc_user fn This will be used by iommufd for allocating usr managed domains and is also required when we add support for iommufd based dirty tracking support. Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-2-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index fbb7eb152d82..c616a38ecf28 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -36,6 +36,8 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); +static struct iommu_ops arm_smmu_ops; + enum arm_smmu_msi_index { EVTQ_MSI_INDEX, GERROR_MSI_INDEX, @@ -3020,6 +3022,34 @@ static struct iommu_domain arm_smmu_blocked_domain = { .ops = &arm_smmu_blocked_ops, }; +static struct iommu_domain * +arm_smmu_domain_alloc_user(struct device *dev, u32 flags, + struct iommu_domain *parent, + const struct iommu_user_data *user_data) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *smmu_domain; + int ret; + + if (flags || parent || user_data) + return ERR_PTR(-EOPNOTSUPP); + + smmu_domain = arm_smmu_domain_alloc(); + if (!smmu_domain) + return ERR_PTR(-ENOMEM); + + smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; + smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + if (ret) + goto err_free; + return &smmu_domain->domain; + +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); +} + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -3190,8 +3220,6 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master) kfree(master->streams); } -static struct iommu_ops arm_smmu_ops; - static struct iommu_device *arm_smmu_probe_device(struct device *dev) { int ret; @@ -3399,6 +3427,7 @@ static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc_paging = arm_smmu_domain_alloc_paging, .domain_alloc_sva = arm_smmu_sva_domain_alloc, + .domain_alloc_user = arm_smmu_domain_alloc_user, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, From 2f8d6178b4fe3e2f50782fa640921a9ee46b6d6f Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Wed, 3 Jul 2024 11:16:01 +0100 Subject: [PATCH 38/72] iommu/arm-smmu-v3: Add feature detection for HTTU If the SMMU supports it and the kernel was built with HTTU support, Probe support for Hardware Translation Table Update (HTTU) which is essentially to enable hardware update of access and dirty flags. Probe and set the smmu::features for Hardware Dirty and Hardware Access bits. This is in preparation, to enable it on the context descriptors of stage 1 format. Signed-off-by: Jean-Philippe Brucker Signed-off-by: Joao Martins Reviewed-by: Jason Gunthorpe Reviewed-by: Ryan Roberts Reviewed-by: Kevin Tian Reviewed-by: Nicolin Chen Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-3-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 32 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 5 ++++ 2 files changed, 37 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c616a38ecf28..6b35954940b8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4014,6 +4014,28 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) } } +static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) +{ + u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); + u32 hw_features = 0; + + switch (FIELD_GET(IDR0_HTTU, reg)) { + case IDR0_HTTU_ACCESS_DIRTY: + hw_features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + hw_features |= ARM_SMMU_FEAT_HA; + } + + if (smmu->dev->of_node) + smmu->features |= hw_features; + else if (hw_features != fw_features) + /* ACPI IORT sets the HTTU bits */ + dev_warn(smmu->dev, + "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n", + hw_features, fw_features); +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -4074,6 +4096,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_E2H; } + arm_smmu_get_httu(smmu, reg); + /* * The coherency feature as set by FW is used in preference to the ID * register, but warn on mismatch. @@ -4269,6 +4293,14 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; + switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) { + case IDR0_HTTU_ACCESS_DIRTY: + smmu->features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + smmu->features |= ARM_SMMU_FEAT_HA; + } + return 0; } #else diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index a05e02d6afd1..af74b59032b5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -33,6 +33,9 @@ #define IDR0_ASID16 (1 << 12) #define IDR0_ATS (1 << 10) #define IDR0_HYP (1 << 9) +#define IDR0_HTTU GENMASK(7, 6) +#define IDR0_HTTU_ACCESS 1 +#define IDR0_HTTU_ACCESS_DIRTY 2 #define IDR0_COHACC (1 << 4) #define IDR0_TTF GENMASK(3, 2) #define IDR0_TTF_AARCH64 2 @@ -650,6 +653,8 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) +#define ARM_SMMU_FEAT_HA (1 << 21) +#define ARM_SMMU_FEAT_HD (1 << 22) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) From 4fe88fd8b4aecb7f9680bf898811db76b94095a9 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Wed, 3 Jul 2024 11:16:02 +0100 Subject: [PATCH 39/72] iommu/io-pgtable-arm: Add read_and_clear_dirty() support .read_and_clear_dirty() IOMMU domain op takes care of reading the dirty bits (i.e. PTE has DBM set and AP[2] clear) and marshalling into a bitmap of a given page size. While reading the dirty bits we also set the PTE AP[2] bit to mark it as writeable-clean depending on read_and_clear_dirty() flags. PTE states with respect to DBM bit: DBM bit AP[2]("RDONLY" bit) 1. writable_clean 1 1 2. writable_dirty 1 0 3. read-only 0 1 Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-4-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 114 ++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 3d23b924cec1..2e57e8616387 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -76,6 +76,7 @@ #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) @@ -85,7 +86,7 @@ #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ @@ -93,7 +94,11 @@ /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) -#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ + ARM_LPAE_PTE_AP_RDONLY_BIT) +#define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ + ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) @@ -139,6 +144,12 @@ #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) +#define iopte_writeable_dirty(pte) \ + (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) + +#define iopte_set_writeable_clean(ptep) \ + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) + struct arm_lpae_io_pgtable { struct io_pgtable iop; @@ -160,6 +171,13 @@ static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; } +static inline bool iopte_table(arm_lpae_iopte pte, int lvl) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) + return false; + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; +} + static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, struct arm_lpae_io_pgtable *data) { @@ -726,6 +744,97 @@ found_translation: return iopte_to_paddr(pte, data) | iova; } +struct io_pgtable_walk_data { + struct iommu_dirty_bitmap *dirty; + unsigned long flags; + u64 addr; + const u64 end; +}; + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); + +static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) +{ + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte pte = READ_ONCE(*ptep); + + if (iopte_leaf(pte, lvl, iop->fmt)) { + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + if (iopte_writeable_dirty(pte)) { + iommu_dirty_bitmap_record(walk_data->dirty, + walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + walk_data->addr += size; + return 0; + } + + if (WARN_ON(!iopte_table(pte, lvl))) + return -EINVAL; + + ptep = iopte_deref(pte, data); + return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); +} + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) +{ + u32 idx; + int max_entries, ret; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + if (lvl == data->start_level) + max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); + else + max_entries = ARM_LPAE_PTES_PER_TABLE(data); + + for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); + (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { + ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); + if (ret) + return ret; + } + + return 0; +} + +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct io_pgtable_walk_data walk_data = { + .dirty = dirty, + .flags = flags, + .addr = iova, + .end = iova + size, + }; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + + if (WARN_ON(!size)) + return -EINVAL; + if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) + return -EINVAL; + if (data->iop.fmt != ARM_64_LPAE_S1) + return -EINVAL; + + return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -804,6 +913,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .map_pages = arm_lpae_map_pages, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, }; return data; From eb054d67b21a53f6ccf3af49a62fb99397b48fc2 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 3 Jul 2024 11:16:03 +0100 Subject: [PATCH 40/72] iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc This provides all the infrastructure to enable dirty tracking if the hardware has the capability and domain alloc request for it. Also, add a device_iommu_capable() check in iommufd core for IOMMU_CAP_DIRTY_TRACKING before we request a user domain with dirty tracking support. Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING as it will finally be enabled in a subsequent patch. Signed-off-by: Joao Martins Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-5-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 84 +++++++++++++++------ drivers/iommu/iommufd/hw_pagetable.c | 3 + include/linux/io-pgtable.h | 3 + 3 files changed, 67 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6b35954940b8..c1e32a2ea3f8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "arm-smmu-v3.h" #include "../../dma-iommu.h" @@ -37,6 +38,7 @@ MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); static struct iommu_ops arm_smmu_ops; +static struct iommu_dirty_ops arm_smmu_dirty_ops; enum arm_smmu_msi_index { EVTQ_MSI_INDEX, @@ -82,7 +84,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { }; static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu); + struct arm_smmu_device *smmu, u32 flags); static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -2282,7 +2284,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) struct arm_smmu_master *master = dev_iommu_priv_get(dev); int ret; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); if (ret) { kfree(smmu_domain); return ERR_PTR(ret); @@ -2346,15 +2348,15 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, } static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu) + struct arm_smmu_device *smmu, u32 flags) { int ret; - unsigned long ias, oas; enum io_pgtable_fmt fmt; struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain); + bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) @@ -2362,17 +2364,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = smmu->pgsize_bitmap, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, + .tlb = &arm_smmu_flush_ops, + .iommu_dev = smmu->dev, + }; + switch (smmu_domain->stage) { - case ARM_SMMU_DOMAIN_S1: - ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48; - ias = min_t(unsigned long, ias, VA_BITS); - oas = smmu->ias; + case ARM_SMMU_DOMAIN_S1: { + unsigned long ias = (smmu->features & + ARM_SMMU_FEAT_VAX) ? 52 : 48; + + pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS); + pgtbl_cfg.oas = smmu->ias; + if (enable_dirty) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; fmt = ARM_64_LPAE_S1; finalise_stage_fn = arm_smmu_domain_finalise_s1; break; + } case ARM_SMMU_DOMAIN_S2: - ias = smmu->ias; - oas = smmu->oas; + if (enable_dirty) + return -EOPNOTSUPP; + pgtbl_cfg.ias = smmu->ias; + pgtbl_cfg.oas = smmu->oas; fmt = ARM_64_LPAE_S2; finalise_stage_fn = arm_smmu_domain_finalise_s2; break; @@ -2380,15 +2396,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, return -EINVAL; } - pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = smmu->pgsize_bitmap, - .ias = ias, - .oas = oas, - .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, - .tlb = &arm_smmu_flush_ops, - .iommu_dev = smmu->dev, - }; - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -2396,6 +2403,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; smmu_domain->domain.geometry.force_aperture = true; + if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops; ret = finalise_stage_fn(smmu, smmu_domain); if (ret < 0) { @@ -2745,7 +2754,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { - ret = arm_smmu_domain_finalise(smmu_domain, smmu); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); } else if (smmu_domain->smmu != smmu) ret = -EINVAL; @@ -2810,7 +2819,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) - ret = arm_smmu_domain_finalise(smmu_domain, smmu); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); else if (smmu_domain->smmu != smmu) ret = -EINVAL; mutex_unlock(&smmu_domain->init_mutex); @@ -3028,10 +3037,13 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, const struct iommu_user_data *user_data) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; struct arm_smmu_domain *smmu_domain; int ret; - if (flags || parent || user_data) + if (flags & ~PAGING_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + if (parent || user_data) return ERR_PTR(-EOPNOTSUPP); smmu_domain = arm_smmu_domain_alloc(); @@ -3040,7 +3052,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags, smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); if (ret) goto err_free; return &smmu_domain->domain; @@ -3295,6 +3307,27 @@ static void arm_smmu_release_device(struct device *dev) kfree(master); } +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + + return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); +} + +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain, + bool enabled) +{ + /* + * Always enabled and the dirty bitmap is cleared prior to + * set_dirty_tracking(). + */ + return 0; +} + static struct iommu_group *arm_smmu_device_group(struct device *dev) { struct iommu_group *group; @@ -3453,6 +3486,11 @@ static struct iommu_ops arm_smmu_ops = { } }; +static struct iommu_dirty_ops arm_smmu_dirty_ops = { + .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, + .set_dirty_tracking = arm_smmu_set_dirty_tracking, +}; + /* Probing and initialisation functions */ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 33d142f8057d..6d5b2fffeea0 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -114,6 +114,9 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, return ERR_PTR(-EOPNOTSUPP); if (flags & ~valid_flags) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && + !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86cf1f7ae389..f9a81761bfce 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -85,6 +85,8 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. + * + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -92,6 +94,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; From 25c776dd03b3e3ee16ad3402feabe20d811c7cb2 Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Wed, 3 Jul 2024 11:16:04 +0100 Subject: [PATCH 41/72] iommu/arm-smmu-v3: Enable HTTU for stage1 with io-pgtable mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If io-pgtable quirk flag indicates support for hardware update of dirty state, enable HA/HD bits in the SMMU CD and also set the DBM bit in the page descriptor. Now report the dirty page tracking capability of SMMUv3 and select IOMMUFD_DRIVER for ARM_SMMU_V3 if IOMMUFD is enabled. Co-developed-by: Keqian Zhu Signed-off-by: Keqian Zhu Signed-off-by: Kunkun Jiang Signed-off-by: Joao Martins Reviewed-by: Ryan Roberts Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Signed-off-by: Shameer Kolothum Link: https://lore.kernel.org/r/20240703101604.2576-6-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +++ drivers/iommu/io-pgtable-arm.c | 5 ++++- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c04584be3089..a82f10054aec 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -394,6 +394,7 @@ config ARM_SMMU_V3 select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select GENERIC_MSI_IRQ + select IOMMUFD_DRIVER if IOMMUFD help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index c1e32a2ea3f8..a31460f9f3d4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1352,6 +1352,12 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, CTXDESC_CD_0_ASET | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) ); + + /* To enable dirty flag update, set both Access flag and dirty state update */ + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA | + CTXDESC_CD_0_TCR_HD); + target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & CTXDESC_CD_1_TTB0_MASK); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); @@ -2235,6 +2241,13 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_add_page = arm_smmu_tlb_inv_page_nosync, }; +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) +{ + u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); + + return (smmu->features & features) == features; +} + /* IOMMU API */ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { @@ -2247,6 +2260,8 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_NOEXEC: case IOMMU_CAP_DEFERRED_FLUSH: return true; + case IOMMU_CAP_DIRTY_TRACKING: + return arm_smmu_dbm_capable(master->smmu); default: return false; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index af74b59032b5..14bca41a981b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -304,6 +304,9 @@ struct arm_smmu_cd { #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) +#define CTXDESC_CD_0_TCR_HA (1UL << 43) +#define CTXDESC_CD_0_TCR_HD (1UL << 42) + #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 2e57e8616387..f5d9fd1f45bf 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -440,6 +440,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, pte = ARM_LPAE_PTE_nG; if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) + pte |= ARM_LPAE_PTE_DBM; if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; } else { @@ -929,7 +931,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_ARM_TTBR1 | - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | + IO_PGTABLE_QUIRK_ARM_HD)) return NULL; data = arm_lpae_alloc_pgtable(cfg); From b5e86a95541cea737394a1da967df4cd4d8f7182 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 2 Jul 2024 21:08:33 +0800 Subject: [PATCH 42/72] iommu/vt-d: Handle volatile descriptor status read Queued invalidation wait descriptor status is volatile in that IOMMU hardware writes the data upon completion. Use READ_ONCE() to prevent compiler optimizations which ensures memory reads every time. As a side effect, READ_ONCE() also enforces strict types and may add an extra instruction. But it should not have negative performance impact since we use cpu_relax anyway and the extra time(by adding an instruction) may allow IOMMU HW request cacheline ownership easier. e.g. gcc 12.3 BEFORE: 81 38 ad de 00 00 cmpl $0x2,(%rax) AFTER (with READ_ONCE()) 772f: 8b 00 mov (%rax),%eax 7731: 3d ad de 00 00 cmp $0x2,%eax //status data is 32 bit Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20240607173817.3914600-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20240702130839.108139-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 304e84949ca7..1c8d3141cb55 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1446,7 +1446,7 @@ restart: */ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); - while (qi->desc_status[wait_index] != QI_DONE) { + while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) { /* * We will leave the interrupts disabled, to prevent interrupt * context to queue another cmd while a cmd is already submitted From 5fbf97371dc0a48794280445bc94e2e15dd81a63 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:34 +0800 Subject: [PATCH 43/72] iommu/vt-d: Remove comment for def_domain_type The comment for def_domain_type is outdated. Part of it is irrelevant. Furthermore, it could just be deleted since the iommu_ops::def_domain_type callback is properly documented in iommu.h, so individual implementations shouldn't need to repeat that. Remove it to avoid confusion. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240624024327.234979-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2e9811bf2a4e..abf0097f899d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2177,17 +2177,6 @@ static bool device_rmrr_is_relaxable(struct device *dev) return false; } -/* - * Return the required default domain type for a specific device. - * - * @dev: the device in query - * @startup: true if this is during early boot - * - * Returns: - * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain - * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain - * - 0: both identity and dynamic domains work for this device - */ static int device_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { From e995fcde6070f0981e083c1e2e17e401e6c17ad9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:35 +0800 Subject: [PATCH 44/72] iommu/vt-d: Remove control over Execute-Requested requests The VT-d specification has removed architectural support of the requests with pasid with a value of 1 for Execute-Requested (ER). And the NXE bit in the pasid table entry and XD bit in the first-stage paging Entries are deprecated accordingly. Remove the programming of these bits to make it consistent with the spec. Suggested-by: Jacob Pan Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240624032351.249858-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 4 ++-- drivers/iommu/intel/iommu.h | 6 ++---- drivers/iommu/intel/pasid.c | 1 - drivers/iommu/intel/pasid.h | 10 ---------- 4 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index abf0097f899d..1b5519dfa085 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -854,7 +854,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (domain->use_first_level) - pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; tmp = 0ULL; if (!try_cmpxchg64(&pte->val, &tmp, pteval)) @@ -1872,7 +1872,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); attr |= DMA_FL_PTE_PRESENT; if (domain->use_first_level) { - attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + attr |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index eaf015b4353b..9a3b064126de 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -49,7 +49,6 @@ #define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_ACCESS BIT_ULL(5) #define DMA_FL_PTE_DIRTY BIT_ULL(6) -#define DMA_FL_PTE_XD BIT_ULL(63) #define DMA_SL_PTE_DIRTY_BIT 9 #define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) @@ -831,11 +830,10 @@ static inline void dma_clear_pte(struct dma_pte *pte) static inline u64 dma_pte_addr(struct dma_pte *pte) { #ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return pte->val & VTD_PAGE_MASK; #else /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & - VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; #endif } diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 9bf45bc4b967..ffac7a75be95 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -336,7 +336,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - pasid_set_nxe(pte); /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index da9978fef7ac..dde6d3ba5ae0 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -247,16 +247,6 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) pasid_set_bits(&pe->val[1], 1 << 23, value << 23); } -/* - * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID - * entry. It is required when XD bit of the first level page table - * entry is about to be set. - */ -static inline void pasid_set_nxe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); -} - /* * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode * PASID entry. From 804f98e224e41c16e3b70f97790f84894745a392 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:36 +0800 Subject: [PATCH 45/72] iommu/vt-d: Downgrade warning for pre-enabled IR Emitting a warning is overkill in intel_setup_irq_remapping() since the interrupt remapping is pre-enabled. For example, there's no guarantee that kexec will explicitly disable interrupt remapping before booting a new kernel. As a result, users are seeing warning messages like below when they kexec boot a kernel, though there is nothing wrong: DMAR-IR: IRQ remapping was enabled on dmar18 but we are not in kdump mode DMAR-IR: IRQ remapping was enabled on dmar17 but we are not in kdump mode DMAR-IR: IRQ remapping was enabled on dmar16 but we are not in kdump mode ... ... Downgrade the severity of this message to avoid user confusion. CC: Paul Menzel Link: https://lore.kernel.org/linux-iommu/5517f76a-94ad-452c-bae6-34ecc0ec4831@molgen.mpg.de/ Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240625043912.258036-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/irq_remapping.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index e4a70886678c..e090ca07364b 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -597,8 +597,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (ir_pre_enabled(iommu)) { if (!is_kdump_kernel()) { - pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", - iommu->name); + pr_info_once("IRQ remapping was enabled on %s but we are not in kdump mode\n", + iommu->name); clear_ir_pre_enabled(iommu); iommu_disable_irq_remapping(iommu); } else if (iommu_load_old_irte(iommu)) From 2b989ab9bc89b29dd4b5509408b8fa42337eda56 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:37 +0800 Subject: [PATCH 46/72] iommu/vt-d: Add helper to allocate paging domain The domain_alloc_user operation is currently implemented by allocating a paging domain using iommu_domain_alloc(). This is because it needs to fully initialize the domain before return. Add a helper to do this to avoid using iommu_domain_alloc(). Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-16-baolu.lu@linux.intel.com Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20240702130839.108139-6-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 90 +++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1b5519dfa085..1f0d6892a0b6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3622,6 +3622,79 @@ static struct iommu_domain blocking_domain = { } }; +static int iommu_superpage_capability(struct intel_iommu *iommu, bool first_stage) +{ + if (!intel_iommu_superpage) + return 0; + + if (first_stage) + return cap_fl1gp_support(iommu->cap) ? 2 : 1; + + return fls(cap_super_page_val(iommu->cap)); +} + +static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_stage) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct dmar_domain *domain; + int addr_width; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); + INIT_LIST_HEAD(&domain->cache_tags); + spin_lock_init(&domain->lock); + spin_lock_init(&domain->cache_lock); + xa_init(&domain->iommu_array); + + domain->nid = dev_to_node(dev); + domain->has_iotlb_device = info->ats_enabled; + domain->use_first_level = first_stage; + + /* calculate the address width */ + addr_width = agaw_to_width(iommu->agaw); + if (addr_width > cap_mgaw(iommu->cap)) + addr_width = cap_mgaw(iommu->cap); + domain->gaw = addr_width; + domain->agaw = iommu->agaw; + domain->max_addr = __DOMAIN_MAX_ADDR(addr_width); + + /* iommu memory access coherency */ + domain->iommu_coherency = iommu_paging_structure_coherency(iommu); + + /* pagesize bitmap */ + domain->domain.pgsize_bitmap = SZ_4K; + domain->iommu_superpage = iommu_superpage_capability(iommu, first_stage); + domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); + + /* + * IOVA aperture: First-level translation restricts the input-address + * to a canonical address (i.e., address bits 63:N have the same value + * as address bit [N-1], where N is 48-bits with 4-level paging and + * 57-bits with 5-level paging). Hence, skip bit [N-1]. + */ + domain->domain.geometry.force_aperture = true; + domain->domain.geometry.aperture_start = 0; + if (first_stage) + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); + else + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); + + /* always allocate the top pgd */ + domain->pgd = iommu_alloc_page_node(domain->nid, GFP_KERNEL); + if (!domain->pgd) { + kfree(domain); + return ERR_PTR(-ENOMEM); + } + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + + return domain; +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; @@ -3684,15 +3757,14 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, if (user_data || (dirty_tracking && !ssads_supported(iommu))) return ERR_PTR(-EOPNOTSUPP); - /* - * domain_alloc_user op needs to fully initialize a domain before - * return, so uses iommu_domain_alloc() here for simple. - */ - domain = iommu_domain_alloc(dev->bus); - if (!domain) - return ERR_PTR(-ENOMEM); - - dmar_domain = to_dmar_domain(domain); + /* Do not use first stage for user domain translation. */ + dmar_domain = paging_domain_alloc(dev, false); + if (IS_ERR(dmar_domain)) + return ERR_CAST(dmar_domain); + domain = &dmar_domain->domain; + domain->type = IOMMU_DOMAIN_UNMANAGED; + domain->owner = &intel_iommu_ops; + domain->ops = intel_iommu_ops.default_domain_ops; if (nested_parent) { dmar_domain->nested_parent = true; From f90584f4beb84211c4d21b319cc13f391fe9f3c2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:38 +0800 Subject: [PATCH 47/72] iommu/vt-d: Add helper to flush caches for context change This helper is used to flush the related caches following a change in a context table entry that was previously present. The VT-d specification provides guidance for such invalidations in section 6.5.3.3. This helper replaces the existing open code in the code paths where a present context entry is being torn down. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240701112317.94022-2-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-7-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 32 +---------- drivers/iommu/intel/iommu.h | 4 ++ drivers/iommu/intel/pasid.c | 106 +++++++++++++++++++++++++++++------- 3 files changed, 92 insertions(+), 50 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1f0d6892a0b6..e84b0fdca107 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1359,21 +1359,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) } } -static void __iommu_flush_dev_iotlb(struct device_domain_info *info, - u64 addr, unsigned int mask) -{ - u16 sid, qdep; - - if (!info || !info->ats_enabled) - return; - - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); -} - static void intel_flush_iotlb_all(struct iommu_domain *domain) { cache_tag_flush_all(to_dmar_domain(domain)); @@ -1959,7 +1944,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; - u16 did_old; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); @@ -1968,24 +1952,10 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } - did_old = context_domain_id(context); - context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - iommu->flush.flush_context(iommu, - did_old, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - - iommu->flush.flush_iotlb(iommu, - did_old, - 0, - 0, - DMA_TLB_DSI_FLUSH); - - __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); + intel_context_flush_present(info, context, true); } static int domain_setup_first_level(struct intel_iommu *iommu, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 9a3b064126de..63eb3306c025 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1143,6 +1143,10 @@ void cache_tag_flush_all(struct dmar_domain *domain); void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end); +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + bool affect_domains); + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index ffac7a75be95..3d23cc6d3214 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -694,25 +694,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - - /* - * Cache invalidation for changes to a scalable-mode context table - * entry. - * - * Section 6.5.3.3 of the VT-d spec: - * - Device-selective context-cache invalidation; - * - Domain-selective PASID-cache invalidation to affected domains - * (can be skipped if all PASID entries were not-present); - * - Domain-selective IOTLB invalidation to affected domains; - * - Global Device-TLB invalidation to affected functions. - * - * The iommu has been parked in the blocking state. All domains have - * been detached from the device or PASID. The PASID and IOTLB caches - * have been invalidated during the domain detach path. - */ - iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), - DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); + intel_context_flush_present(info, context, false); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -874,3 +856,89 @@ int intel_pasid_setup_sm_context(struct device *dev) return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); } + +/* + * Global Device-TLB invalidation following changes in a context entry which + * was present. + */ +static void __context_flush_dev_iotlb(struct device_domain_info *info) +{ + if (!info->ats_enabled) + return; + + qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), + info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); + + /* + * There is no guarantee that the device DMA is stopped when it reaches + * here. Therefore, always attempt the extra device TLB invalidation + * quirk. The impact on performance is acceptable since this is not a + * performance-critical path. + */ + quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, + info->ats_qdep); +} + +/* + * Cache invalidations after change in a context table entry that was present + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If + * IOMMU is in scalable mode and all PASID table entries of the device were + * non-present, set flush_domains to false. Otherwise, true. + */ +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + bool flush_domains) +{ + struct intel_iommu *iommu = info->iommu; + u16 did = context_domain_id(context); + struct pasid_entry *pte; + int i; + + /* + * Device-selective context-cache invalidation. The Domain-ID field + * of the Context-cache Invalidate Descriptor is ignored by hardware + * when operating in scalable mode. Therefore the @did value doesn't + * matter in scalable mode. + */ + iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), + DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + + /* + * For legacy mode: + * - Domain-selective IOTLB invalidation + * - Global Device-TLB invalidation to all affected functions + */ + if (!sm_supported(iommu)) { + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + __context_flush_dev_iotlb(info); + + return; + } + + /* + * For scalable mode: + * - Domain-selective PASID-cache invalidation to affected domains + * - Domain-selective IOTLB invalidation to affected domains + * - Global Device-TLB invalidation to affected functions + */ + if (flush_domains) { + /* + * If the IOMMU is running in scalable mode and there might + * be potential PASID translations, the caller should hold + * the lock to ensure that context changes and cache flushes + * are atomic. + */ + assert_spin_locked(&iommu->lock); + for (i = 0; i < info->pasid_table->max_pasid; i++) { + pte = intel_pasid_get_entry(info->dev, i); + if (!pte || !pasid_pte_is_present(pte)) + continue; + + did = pasid_get_domain_id(pte); + qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + } + } + + __context_flush_dev_iotlb(info); +} From 3753311c9190f833963fb47336dfe17221d93706 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 2 Jul 2024 21:08:39 +0800 Subject: [PATCH 48/72] iommu/vt-d: Refactor PCI PRI enabling/disabling callbacks Commit 0095bf83554f8 ("iommu: Improve iopf_queue_remove_device()") specified the flow for disabling the PRI on a device. Refactor the PRI callbacks in the intel iommu driver to better manage PRI enabling and disabling and align it with the device queue interfaces in the iommu core. Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240701112317.94022-3-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20240702130839.108139-8-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 57 +++++++++++++++++++++++++++++++++---- drivers/iommu/intel/iommu.h | 9 ++++++ drivers/iommu/intel/pasid.c | 2 -- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index e84b0fdca107..523407f6f6b2 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4244,6 +4244,37 @@ static int intel_iommu_enable_sva(struct device *dev) return 0; } +static int context_flip_pri(struct device_domain_info *info, bool enable) +{ + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; + struct context_entry *context; + + spin_lock(&iommu->lock); + if (context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return -EINVAL; + } + + context = iommu_context_addr(iommu, bus, devfn, false); + if (!context || !context_present(context)) { + spin_unlock(&iommu->lock); + return -ENODEV; + } + + if (enable) + context_set_sm_pre(context); + else + context_clear_sm_pre(context); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); + intel_context_flush_present(info, context, true); + spin_unlock(&iommu->lock); + + return 0; +} + static int intel_iommu_enable_iopf(struct device *dev) { struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; @@ -4273,15 +4304,23 @@ static int intel_iommu_enable_iopf(struct device *dev) if (ret) return ret; + ret = context_flip_pri(info, true); + if (ret) + goto err_remove_device; + ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) { - iopf_queue_remove_device(iommu->iopf_queue, dev); - return ret; - } + if (ret) + goto err_clear_pri; info->pri_enabled = 1; return 0; +err_clear_pri: + context_flip_pri(info, false); +err_remove_device: + iopf_queue_remove_device(iommu->iopf_queue, dev); + + return ret; } static int intel_iommu_disable_iopf(struct device *dev) @@ -4292,6 +4331,15 @@ static int intel_iommu_disable_iopf(struct device *dev) if (!info->pri_enabled) return -EINVAL; + /* Disable new PRI reception: */ + context_flip_pri(info, false); + + /* + * Remove device from fault queue and acknowledge all outstanding + * PRQs to the device: + */ + iopf_queue_remove_device(iommu->iopf_queue, dev); + /* * PCIe spec states that by clearing PRI enable bit, the Page * Request Interface will not issue new page requests, but has @@ -4302,7 +4350,6 @@ static int intel_iommu_disable_iopf(struct device *dev) */ pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - iopf_queue_remove_device(iommu->iopf_queue, dev); return 0; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 63eb3306c025..b67c14da1240 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1045,6 +1045,15 @@ static inline void context_set_sm_pre(struct context_entry *context) context->lo |= BIT_ULL(4); } +/* + * Clear the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_clear_sm_pre(struct context_entry *context) +{ + context->lo &= ~BIT_ULL(4); +} + /* Returns a number of VTD pages, but aligned to MM page size */ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) { diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3d23cc6d3214..5792c817cefa 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -752,8 +752,6 @@ static int context_entry_set_pasid_table(struct context_entry *context, if (info->ats_supported) context_set_sm_dte(context); - if (info->pri_supported) - context_set_sm_pre(context); if (info->pasid_supported) context_set_pasid(context); From 0b4eeee2876f2b08442eb32081451bf130e01a4c Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Wed, 3 Jul 2024 18:07:59 -0700 Subject: [PATCH 49/72] iommu/arm-smmu-qcom: Register the TBU driver in qcom_smmu_impl_init Currently the TBU driver will only probe when CONFIG_ARM_SMMU_QCOM_DEBUG is enabled. The driver not probing would prevent the platform to reach sync_state and the system will remain in sub-optimal power consumption mode while waiting for all consumer drivers to probe. To address this, let's register the TBU driver in qcom_smmu_impl_init(), so that it can probe, but still enable its functionality only when the debug option in Kconfig is enabled. Reported-by: Dmitry Baryshkov Closes: https://lore.kernel.org/r/CAA8EJppcXVu72OSo+OiYEiC1HQjP3qCwKMumOsUhcn6Czj0URg@mail.gmail.com Fixes: 414ecb030870 ("iommu/arm-smmu-qcom-debug: Add support for TBUs") Signed-off-by: Georgi Djakov Link: https://lore.kernel.org/r/20240704010759.507798-1-quic_c_gdjako@quicinc.com Signed-off-by: Will Deacon --- .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c | 17 +------- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 39 +++++++++++++++++++ drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h | 2 + 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index ef93f825f11f..548783f3f8e8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -464,7 +464,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) return ret; } -static int qcom_tbu_probe(struct platform_device *pdev) +int qcom_tbu_probe(struct platform_device *pdev) { struct of_phandle_args args = { .args_count = 2 }; struct device_node *np = pdev->dev.of_node; @@ -506,18 +506,3 @@ static int qcom_tbu_probe(struct platform_device *pdev) return 0; } - -static const struct of_device_id qcom_tbu_of_match[] = { - { .compatible = "qcom,sc7280-tbu" }, - { .compatible = "qcom,sdm845-tbu" }, - { } -}; - -static struct platform_driver qcom_tbu_driver = { - .driver = { - .name = "qcom_tbu", - .of_match_table = qcom_tbu_of_match, - }, - .probe = qcom_tbu_probe, -}; -builtin_platform_driver(qcom_tbu_driver); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 971c6a2e592b..36c6b36ad4ff 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "arm-smmu.h" #include "arm-smmu-qcom.h" @@ -562,10 +564,47 @@ static struct acpi_platform_list qcom_acpi_platlist[] = { }; #endif +static int qcom_smmu_tbu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int ret; + + if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM_DEBUG)) { + ret = qcom_tbu_probe(pdev); + if (ret) + return ret; + } + + if (dev->pm_domain) { + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + } + + return 0; +} + +static const struct of_device_id qcom_smmu_tbu_of_match[] = { + { .compatible = "qcom,sc7280-tbu" }, + { .compatible = "qcom,sdm845-tbu" }, + { } +}; + +static struct platform_driver qcom_smmu_tbu_driver = { + .driver = { + .name = "qcom_tbu", + .of_match_table = qcom_smmu_tbu_of_match, + }, + .probe = qcom_smmu_tbu_probe, +}; + struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; const struct of_device_id *match; + static u8 tbu_registered; + + if (!tbu_registered++) + platform_driver_register(&qcom_smmu_tbu_driver); #ifdef CONFIG_ACPI if (np == NULL) { diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 9bb3ae7d62da..3c134d1a6277 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -34,8 +34,10 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev); #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); +int qcom_tbu_probe(struct platform_device *pdev); #else static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } +static inline int qcom_tbu_probe(struct platform_device *pdev) { return -EINVAL; } #endif #endif /* _ARM_SMMU_QCOM_H */ From a27bf2743cb80d3b36b5b43e8e2e702412c41668 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:35 +0800 Subject: [PATCH 50/72] iommu: Add iommu_paging_domain_alloc() interface Commit <17de3f5fdd35> ("iommu: Retire bus ops") removes iommu ops from bus. The iommu subsystem no longer relies on bus for operations. So the bus parameter in iommu_domain_alloc() is no longer relevant. Add a new interface named iommu_paging_domain_alloc(), which explicitly indicates the allocation of a paging domain for DMA managed by a kernel driver. The new interface takes a device pointer as its parameter, that better aligns with the current iommu subsystem. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20240610085555.88197-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/iommu.c | 20 ++++++++++++++++++++ include/linux/iommu.h | 6 ++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9df7cc75c1bc..e03c71a34347 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2016,6 +2016,10 @@ static int __iommu_domain_alloc_dev(struct device *dev, void *data) return 0; } +/* + * The iommu ops in bus has been retired. Do not use this interface in + * new drivers. + */ struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { const struct iommu_ops *ops = NULL; @@ -2032,6 +2036,22 @@ struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) } EXPORT_SYMBOL_GPL(iommu_domain_alloc); +/** + * iommu_paging_domain_alloc() - Allocate a paging domain + * @dev: device for which the domain is allocated + * + * Allocate a paging domain which will be managed by a kernel driver. Return + * allocated domain if successful, or a ERR pointer for failure. + */ +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + if (!dev_has_iommu(dev)) + return ERR_PTR(-ENODEV); + + return __iommu_domain_alloc(dev_iommu_ops(dev), dev, IOMMU_DOMAIN_UNMANAGED); +} +EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc); + void iommu_domain_free(struct iommu_domain *domain) { if (domain->type == IOMMU_DOMAIN_SVA) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..58ea0935d355 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -780,6 +780,7 @@ extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -1086,6 +1087,11 @@ static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus return NULL; } +static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } From 26a581606fab44ff76b394f0ba44cd19c6ec0a6e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:36 +0800 Subject: [PATCH 51/72] iommufd: Use iommu_paging_domain_alloc() If the iommu driver doesn't implement its domain_alloc_user callback, iommufd_hwpt_paging_alloc() rolls back to allocate an iommu paging domain. Replace iommu_domain_alloc() with iommu_user_domain_alloc() to pass the device pointer along the path. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/iommufd/hw_pagetable.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 33d142f8057d..cbddfa3ca95a 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -137,9 +137,10 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } hwpt->domain->owner = ops; } else { - hwpt->domain = iommu_domain_alloc(idev->dev->bus); - if (!hwpt->domain) { - rc = -ENOMEM; + hwpt->domain = iommu_paging_domain_alloc(idev->dev); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; goto out_abort; } } From 60ffc45017229ee8288ba139ee12c5ebf07c6f6a Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:37 +0800 Subject: [PATCH 52/72] vfio/type1: Use iommu_paging_domain_alloc() Replace iommu_domain_alloc() with iommu_paging_domain_alloc(). Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-4-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/vfio/vfio_iommu_type1.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 3a0218171cfa..0960699e7554 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2135,7 +2135,7 @@ static int vfio_iommu_domain_alloc(struct device *dev, void *data) { struct iommu_domain **domain = data; - *domain = iommu_domain_alloc(dev->bus); + *domain = iommu_paging_domain_alloc(dev); return 1; /* Don't iterate */ } @@ -2192,11 +2192,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, * us a representative device for the IOMMU API call. We don't actually * want to iterate beyond the first device (if any). */ - ret = -EIO; iommu_group_for_each_dev(iommu_group, &domain->domain, vfio_iommu_domain_alloc); - if (!domain->domain) + if (IS_ERR(domain->domain)) { + ret = PTR_ERR(domain->domain); goto out_free_domain; + } if (iommu->nesting) { ret = iommu_enable_nesting(domain->domain); From 9c159f6de1aedf200ac94eace47f6082399b561c Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:38 +0800 Subject: [PATCH 53/72] vhost-vdpa: Use iommu_paging_domain_alloc() Replace iommu_domain_alloc() with iommu_paging_domain_alloc(). Signed-off-by: Lu Baolu Acked-by: Michael S. Tsirkin Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-5-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/vhost/vdpa.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 63a53680a85c..e31ec9ebc4ce 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1312,26 +1312,24 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; struct device *dma_dev = vdpa_get_dma_dev(vdpa); - const struct bus_type *bus; int ret; /* Device want to do DMA by itself */ if (ops->set_map || ops->dma_map) return 0; - bus = dma_dev->bus; - if (!bus) - return -EFAULT; - if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { dev_warn_once(&v->dev, "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); return -ENOTSUPP; } - v->domain = iommu_domain_alloc(bus); - if (!v->domain) - return -EIO; + v->domain = iommu_paging_domain_alloc(dma_dev); + if (IS_ERR(v->domain)) { + ret = PTR_ERR(v->domain); + v->domain = NULL; + return ret; + } ret = iommu_attach_device(v->domain, dma_dev); if (ret) From 45acf35af200b305d1e6119ca9de47aa4c3c45b9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:39 +0800 Subject: [PATCH 54/72] drm/msm: Use iommu_paging_domain_alloc() The domain allocated in msm_iommu_new() is for the @dev. Replace iommu_domain_alloc() with iommu_paging_domain_alloc() to make it explicit. Signed-off-by: Lu Baolu Acked-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240610085555.88197-6-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/gpu/drm/msm/msm_iommu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d5512037c38b..2a94e82316f9 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -407,10 +407,13 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) struct msm_iommu *iommu; int ret; - domain = iommu_domain_alloc(dev->bus); - if (!domain) + if (!device_iommu_mapped(dev)) return NULL; + domain = iommu_paging_domain_alloc(dev); + if (IS_ERR(domain)) + return ERR_CAST(domain); + iommu_set_pgtable_quirks(domain, quirks); iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); From d5b7485588dffb39c5687e965623124ab7ebcd51 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:44 +0800 Subject: [PATCH 55/72] wifi: ath10k: Use iommu_paging_domain_alloc() An iommu domain is allocated in ath10k_fw_init() and is attached to ar_snoc->fw.dev in the same function. Use iommu_paging_domain_alloc() to make it explicit. Signed-off-by: Lu Baolu Acked-by: Jeff Johnson Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-11-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/net/wireless/ath/ath10k/snoc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 8530550cf5df..0fe47d51013c 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1635,10 +1635,10 @@ static int ath10k_fw_init(struct ath10k *ar) ar_snoc->fw.dev = &pdev->dev; - iommu_dom = iommu_domain_alloc(&platform_bus_type); - if (!iommu_dom) { + iommu_dom = iommu_paging_domain_alloc(ar_snoc->fw.dev); + if (IS_ERR(iommu_dom)) { ath10k_err(ar, "failed to allocate iommu domain\n"); - ret = -ENOMEM; + ret = PTR_ERR(iommu_dom); goto err_unregister; } From ef50d41fbf1c95b07f636f4e268c53488a39284b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:45 +0800 Subject: [PATCH 56/72] wifi: ath11k: Use iommu_paging_domain_alloc() An iommu domain is allocated in ath11k_ahb_fw_resources_init() and is attached to ab_ahb->fw.dev in the same function. Use iommu_paging_domain_alloc() to make it explicit. Signed-off-by: Lu Baolu Acked-by: Jeff Johnson Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-12-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/net/wireless/ath/ath11k/ahb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index ca0f17ddebba..a469647719f9 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1001,10 +1001,10 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ab_ahb->fw.dev = &pdev->dev; - iommu_dom = iommu_domain_alloc(&platform_bus_type); - if (!iommu_dom) { + iommu_dom = iommu_paging_domain_alloc(ab_ahb->fw.dev); + if (IS_ERR(iommu_dom)) { ath11k_err(ab, "failed to allocate iommu domain\n"); - ret = -ENOMEM; + ret = PTR_ERR(iommu_dom); goto err_unregister; } From 3b10f25704beefd0534f89db8323398c89b720e1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 10 Jun 2024 16:55:48 +0800 Subject: [PATCH 57/72] RDMA/usnic: Use iommu_paging_domain_alloc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit usnic_uiom_alloc_pd() allocates a paging domain for a given device. In this case, iommu_domain_alloc(dev->bus) is equivalent to  iommu_paging_domain_alloc(dev). Replace it as iommu_domain_alloc() has been deprecated. Signed-off-by: Lu Baolu Acked-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240610085555.88197-15-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/infiniband/hw/usnic/usnic_uiom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 84e0f41e7dfa..f948b76f984d 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -443,11 +443,11 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev) if (!pd) return ERR_PTR(-ENOMEM); - pd->domain = domain = iommu_domain_alloc(dev->bus); - if (!domain) { + pd->domain = domain = iommu_paging_domain_alloc(dev); + if (IS_ERR(domain)) { usnic_err("Failed to allocate IOMMU domain"); kfree(pd); - return ERR_PTR(-ENOMEM); + return ERR_CAST(domain); } iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); From e7acc36f26b0b1a71dd068f4afd33d871352d67d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:47 +0100 Subject: [PATCH 58/72] iommu/mediatek-v1: Clean up redundant fwspec checks The driver explicitly clears any existing fwspec before calling mtk_iommu_v1_create_mapping(), but even if it didn't, the checks it's doing there duplicate what iommu_fwspec_init() would do anyway. Clean them up. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407020415.KKnhPTUj-lkp@intel.com/ Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/7d6ebec135483f889af00eb376aa31c012efc3b2.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu_v1.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index d6e4002200bd..2b64ea46318f 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -401,7 +401,6 @@ static const struct iommu_ops mtk_iommu_v1_ops; static int mtk_iommu_v1_create_mapping(struct device *dev, const struct of_phandle_args *args) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_v1_data *data; struct platform_device *m4updev; struct dma_iommu_mapping *mtk_mapping; @@ -413,14 +412,9 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, return -EINVAL; } - if (!fwspec) { - ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); - if (ret) - return ret; - fwspec = dev_iommu_fwspec_get(dev); - } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_v1_ops) { - return -EINVAL; - } + ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); + if (ret) + return ret; if (!dev_iommu_priv_get(dev)) { /* Get the m4u device */ From 3f7c320916282c26812d70cfe8830abb9e4dc696 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:48 +0100 Subject: [PATCH 59/72] iommu: Resolve fwspec ops automatically There's no real need for callers to resolve ops from a fwnode in order to then pass both to iommu_fwspec_init() - it's simpler and more sensible for that to resolve the ops itself. This in turn means we can centralise the notion of checking for a present driver, and enforce that fwspecs aren't allocated unless and until we know they will be usable. Also use this opportunity to modernise with some "new" helpers that arrived shortly after this code was first written; the generic fwnode_handle_get() clears up that ugly get/put mismatch, while of_fwnode_handle() can now abstract those open-coded dereferences. Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/0e2727adeb8cd73274425322f2f793561bdc927e.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/acpi/arm64/iort.c | 19 +++++-------------- drivers/acpi/scan.c | 8 +++----- drivers/acpi/viot.c | 11 ++--------- drivers/iommu/arm/arm-smmu/arm-smmu.c | 3 +-- drivers/iommu/iommu-priv.h | 2 ++ drivers/iommu/iommu.c | 9 ++++++--- drivers/iommu/mtk_iommu_v1.c | 2 +- drivers/iommu/of_iommu.c | 19 ++++++------------- drivers/iommu/tegra-smmu.c | 2 +- include/acpi/acpi_bus.h | 3 +-- include/linux/iommu.h | 13 ++----------- 11 files changed, 30 insertions(+), 61 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index c0b1c2c19444..1b39e9ae7ac1 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1221,10 +1221,10 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) { - const struct iommu_ops *ops; struct fwnode_handle *iort_fwnode; - if (!node) + /* If there's no SMMU driver at all, give up now */ + if (!node || !iort_iommu_driver_enabled(node->type)) return -ENODEV; iort_fwnode = iort_get_fwnode(node); @@ -1232,19 +1232,10 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return -ENODEV; /* - * If the ops look-up fails, this means that either - * the SMMU drivers have not been probed yet or that - * the SMMU drivers are not built in the kernel; - * Depending on whether the SMMU drivers are built-in - * in the kernel or not, defer the IOMMU configuration - * or just abort it. + * If the SMMU drivers are enabled but not loaded/probed + * yet, this will defer. */ - ops = iommu_ops_from_fwnode(iort_fwnode); - if (!ops) - return iort_iommu_driver_enabled(node->type) ? - -EPROBE_DEFER : -ENODEV; - - return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode); } struct iort_pci_alias_info { diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 503773707e01..8d5a589db141 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1577,12 +1577,11 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) #ifdef CONFIG_IOMMU_API int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *fwnode) { int ret; - ret = iommu_fwspec_init(dev, fwnode, ops); + ret = iommu_fwspec_init(dev, fwnode); if (ret) return ret; @@ -1639,8 +1638,7 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) #else /* !CONFIG_IOMMU_API */ int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *fwnode) { return -ENODEV; } diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c index c8025921c129..2aa69a2fba73 100644 --- a/drivers/acpi/viot.c +++ b/drivers/acpi/viot.c @@ -307,21 +307,14 @@ void __init acpi_viot_init(void) static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, u32 epid) { - const struct iommu_ops *ops; - - if (!viommu) + if (!viommu || !IS_ENABLED(CONFIG_VIRTIO_IOMMU)) return -ENODEV; /* We're not translating ourself */ if (device_match_fwnode(dev, viommu->fwnode)) return -EINVAL; - ops = iommu_ops_from_fwnode(viommu->fwnode); - if (!ops) - return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? - -EPROBE_DEFER : -ENODEV; - - return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode); } static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 87c81f75cf84..c200e6d3aed5 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -178,8 +178,7 @@ static int arm_smmu_register_legacy_master(struct device *dev, it.cur_count = 1; } - err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode, - &arm_smmu_ops); + err = iommu_fwspec_init(dev, NULL); if (err) return err; diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 5f731d994803..078cafcf49b4 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -17,6 +17,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); + int iommu_group_replace_domain(struct iommu_group *group, struct iommu_domain *new_domain); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9df7cc75c1bc..7618c4285cf9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2822,11 +2822,14 @@ const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode return ops; } -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) { + const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + if (!ops) + return -EPROBE_DEFER; + if (fwspec) return ops == fwspec->ops ? 0 : -EINVAL; @@ -2838,7 +2841,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, if (!fwspec) return -ENOMEM; - of_node_get(to_of_node(iommu_fwnode)); + fwnode_handle_get(iommu_fwnode); fwspec->iommu_fwnode = iommu_fwnode; fwspec->ops = ops; dev_iommu_fwspec_set(dev, fwspec); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 2b64ea46318f..c6ea5b4baff3 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -412,7 +412,7 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, return -EINVAL; } - ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); + ret = iommu_fwspec_init(dev, of_fwnode_handle(args->np)); if (ret) return ret; diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 3afe0b48a48d..08c523ad55ad 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -21,26 +21,19 @@ static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { const struct iommu_ops *ops; - struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; int ret; - ops = iommu_ops_from_fwnode(fwnode); - if ((ops && !ops->of_xlate) || - !of_device_is_available(iommu_spec->np)) + if (!of_device_is_available(iommu_spec->np)) return -ENODEV; - ret = iommu_fwspec_init(dev, fwnode, ops); + ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np)); + if (ret == -EPROBE_DEFER) + return driver_deferred_probe_check_state(dev); if (ret) return ret; - /* - * The otherwise-empty fwspec handily serves to indicate the specific - * IOMMU device we're waiting for, which will be useful if we ever get - * a proper probe-ordering dependency mechanism in future. - */ - if (!ops) - return driver_deferred_probe_check_state(dev); - if (!try_module_get(ops->owner)) + ops = dev_iommu_fwspec_get(dev)->ops; + if (!ops->of_xlate || !try_module_get(ops->owner)) return -ENODEV; ret = ops->of_xlate(dev, iommu_spec); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f86c7ae91814..4365d9936e68 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -837,7 +837,7 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, const struct iommu_ops *ops = smmu->iommu.ops; int err; - err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + err = iommu_fwspec_init(dev, of_fwnode_handle(dev->of_node)); if (err < 0) { dev_err(dev, "failed to initialize fwspec: %d\n", err); return err; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1a4dfd7a1c4a..9d815837e297 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -736,8 +736,7 @@ struct iommu_ops; bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops); + struct fwnode_handle *fwnode); int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..81893aad9ee4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1005,11 +1005,9 @@ struct iommu_mm_data { struct list_head sva_handles; }; -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops); +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -1315,8 +1313,7 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link) } static inline int iommu_fwspec_init(struct device *dev, - struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *iommu_fwnode) { return -ENODEV; } @@ -1331,12 +1328,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) -{ - return NULL; -} - static inline int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { From 78596b5c321c9d74eeef1ad51c964563a4081f79 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:49 +0100 Subject: [PATCH 60/72] ACPI: Retire acpi_iommu_fwspec_ops() Now that iommu_fwspec_init() can signal for probe deferral directly, acpi_iommu_fwspec_ops() is unneeded and can be cleaned up. Acked-by: Rafael J. Wysocki Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/011e39e275aba3ad451c5a1965ca8ddf20ed36c2.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/acpi/scan.c | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 8d5a589db141..2cfbb365c4ab 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1588,26 +1588,14 @@ int acpi_iommu_fwspec_init(struct device *dev, u32 id, return iommu_fwspec_add_ids(dev, &id, 1); } -static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return fwspec ? fwspec->ops : NULL; -} - static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) { int err; - const struct iommu_ops *ops; /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); - /* - * If we already translated the fwspec there is nothing left to do, - * return the iommu_ops. - */ - ops = acpi_iommu_fwspec_ops(dev); - if (ops) { + /* If we already translated the fwspec there is nothing left to do */ + if (dev_iommu_fwspec_get(dev)) { mutex_unlock(&iommu_probe_device_lock); return 0; } @@ -1624,15 +1612,7 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) if (!err && dev->bus) err = iommu_probe_device(dev); - if (err == -EPROBE_DEFER) - return err; - if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - return err; - } - if (!acpi_iommu_fwspec_ops(dev)) - return -ENODEV; - return 0; + return err; } #else /* !CONFIG_IOMMU_API */ @@ -1672,6 +1652,8 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, ret = acpi_iommu_configure_id(dev, input_id); if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; + if (ret) + dev_dbg(dev, "Adding to IOMMU failed: %d\n", ret); arch_setup_dma_ops(dev, attr == DEV_DMA_COHERENT); From 5f937bc48a6aa63970648c54fb40ea8f96b633dc Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:50 +0100 Subject: [PATCH 61/72] OF: Simplify of_iommu_configure() We no longer have a notion of partially-initialised fwspecs existing, and we also no longer need to use an iommu_ops pointer to return status to of_dma_configure(). Clean up the remains of those, which lends itself to clarifying the logic around the dma_range_map allocation as well. Acked-by: Rob Herring (Arm) Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/61972f88e31a6eda8bf5852f0853951164279a3c.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/of_iommu.c | 29 ++++++++++------------------- drivers/of/device.c | 30 +++++++++++------------------- 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 08c523ad55ad..c946521a5906 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -108,7 +108,6 @@ static int of_iommu_configure_device(struct device_node *master_np, int of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) { - struct iommu_fwspec *fwspec; int err; if (!master_np) @@ -116,14 +115,9 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); - fwspec = dev_iommu_fwspec_get(dev); - if (fwspec) { - if (fwspec->ops) { - mutex_unlock(&iommu_probe_device_lock); - return 0; - } - /* In the deferred case, start again from scratch */ - iommu_fwspec_free(dev); + if (dev_iommu_fwspec_get(dev)) { + mutex_unlock(&iommu_probe_device_lock); + return 0; } /* @@ -143,20 +137,17 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, } else { err = of_iommu_configure_device(master_np, dev, id); } + + if (err) + iommu_fwspec_free(dev); mutex_unlock(&iommu_probe_device_lock); - if (err == -ENODEV || err == -EPROBE_DEFER) - return err; - if (err) - goto err_log; + if (!err && dev->bus) + err = iommu_probe_device(dev); - err = iommu_probe_device(dev); - if (err) - goto err_log; - return 0; + if (err && err != -EPROBE_DEFER) + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); -err_log: - dev_dbg(dev, "Adding to IOMMU failed: %pe\n", ERR_PTR(err)); return err; } diff --git a/drivers/of/device.c b/drivers/of/device.c index 312c63361211..edf3be197265 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -96,8 +96,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, const struct bus_dma_region *map = NULL; struct device_node *bus_np; u64 mask, end = 0; - bool coherent; - int iommu_ret; + bool coherent, set_map = false; int ret; if (np == dev->of_node) @@ -118,6 +117,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, } else { /* Determine the overall bounds of all DMA regions */ end = dma_range_map_max(map); + set_map = true; } /* @@ -144,7 +144,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; /* ...but only set bus limit and range map if we found valid dma-ranges earlier */ - if (!ret) { + if (set_map) { dev->bus_dma_limit = end; dev->dma_range_map = map; } @@ -153,29 +153,21 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev_dbg(dev, "device is%sdma coherent\n", coherent ? " " : " not "); - iommu_ret = of_iommu_configure(dev, np, id); - if (iommu_ret == -EPROBE_DEFER) { + ret = of_iommu_configure(dev, np, id); + if (ret == -EPROBE_DEFER) { /* Don't touch range map if it wasn't set from a valid dma-ranges */ - if (!ret) + if (set_map) dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; - } else if (iommu_ret == -ENODEV) { - dev_dbg(dev, "device is not behind an iommu\n"); - } else if (iommu_ret) { - dev_err(dev, "iommu configuration for device failed with %pe\n", - ERR_PTR(iommu_ret)); - - /* - * Historically this routine doesn't fail driver probing - * due to errors in of_iommu_configure() - */ - } else - dev_dbg(dev, "device is behind an iommu\n"); + } + /* Take all other IOMMU errors to mean we'll just carry on without it */ + dev_dbg(dev, "device is%sbehind an iommu\n", + !ret ? " " : " not "); arch_setup_dma_ops(dev, coherent); - if (iommu_ret) + if (ret) of_dma_set_restricted_buffer(dev, np); return 0; From 3e36c15fc1cce65cccc93ed16f86d8ff9d2f9992 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 2 Jul 2024 12:40:51 +0100 Subject: [PATCH 62/72] iommu: Remove iommu_fwspec ops The ops in iommu_fwspec are only needed for the early configuration and probe process, and by now are easy enough to derive on-demand in those couple of places which need them, so remove the redundant stored copy. Tested-by: Jean-Philippe Brucker Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/55c1410b2cd09531eab4f8e2f18f92a0faa0ea75.1719919669.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- drivers/iommu/iommu-priv.h | 5 +++++ drivers/iommu/iommu.c | 11 ++--------- drivers/iommu/of_iommu.c | 4 +++- include/linux/iommu.h | 2 -- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index 078cafcf49b4..a34efed2884b 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -19,6 +19,11 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); +static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) +{ + return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL); +} + int iommu_group_replace_domain(struct iommu_group *group, struct iommu_domain *new_domain); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7618c4285cf9..e15ae1dd494b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -510,7 +510,6 @@ DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops; - struct iommu_fwspec *fwspec; struct iommu_group *group; struct group_device *gdev; int ret; @@ -523,12 +522,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list * be present, and that any of their registered instances has suitable * ops for probing, and thus cheekily co-opt the same mechanism. */ - fwspec = dev_iommu_fwspec_get(dev); - if (fwspec && fwspec->ops) - ops = fwspec->ops; - else - ops = iommu_ops_from_fwnode(NULL); - + ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev)); if (!ops) return -ENODEV; /* @@ -2831,7 +2825,7 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) return -EPROBE_DEFER; if (fwspec) - return ops == fwspec->ops ? 0 : -EINVAL; + return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; if (!dev_iommu_get(dev)) return -ENOMEM; @@ -2843,7 +2837,6 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) fwnode_handle_get(iommu_fwnode); fwspec->iommu_fwnode = iommu_fwnode; - fwspec->ops = ops; dev_iommu_fwspec_set(dev, fwspec); return 0; } diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index c946521a5906..559c5db78edb 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -17,6 +17,8 @@ #include #include +#include "iommu-priv.h" + static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { @@ -32,7 +34,7 @@ static int of_iommu_xlate(struct device *dev, if (ret) return ret; - ops = dev_iommu_fwspec_get(dev)->ops; + ops = iommu_ops_from_fwnode(&iommu_spec->np->fwnode); if (!ops->of_xlate || !try_module_get(ops->owner)) return -ENODEV; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 81893aad9ee4..11ae1750cb1d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -968,7 +968,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data - * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @flags: IOMMU_FWSPEC_* flags * @num_ids: number of associated device IDs @@ -979,7 +978,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); * consumers. */ struct iommu_fwspec { - const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; u32 flags; unsigned int num_ids; From 40929e8e5449a18bc98baf7a907dd6674bd60049 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 7 Jun 2024 11:54:14 +0100 Subject: [PATCH 63/72] dt-bindings: PCI: generic: Add ats-supported property Add a way for firmware to tell the OS that ATS is supported by the PCI root complex. An endpoint with ATS enabled may send Translation Requests and Translated Memory Requests, which look just like Normal Memory Requests with a non-zero AT field. So a root controller that ignores the AT field may simply forward the request to the IOMMU as a Normal Memory Request, which could end badly. In any case, the endpoint will be unusable. The ats-supported property allows the OS to only enable ATS in endpoints if the root controller can handle ATS requests. Only add the property to pcie-host-ecam-generic for the moment. For non-generic root controllers, availability of ATS can be inferred from the compatible string. Reviewed-by: Rob Herring Reviewed-by: Liviu Dudau Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20240607105415.2501934-3-jean-philippe@linaro.org Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/pci/host-generic-pci.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml index 3484e0b4b412..bcfbaf5582cc 100644 --- a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml +++ b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml @@ -110,6 +110,12 @@ properties: iommu-map-mask: true msi-parent: true + ats-supported: + description: + Indicates that a PCIe host controller supports ATS, and can handle Memory + Requests with Address Type (AT). + type: boolean + required: - compatible - reg From 86e02a88bedc1072beb5445d408e379674b0b7f3 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 7 Jun 2024 11:54:15 +0100 Subject: [PATCH 64/72] iommu/of: Support ats-supported device-tree property Device-tree declares whether a PCI root-complex supports ATS by setting the "ats-supported" property. Copy this flag into device fwspec to let IOMMU drivers quickly check if they can enable ATS for a device. Tested-by: Ketan Patil Reviewed-by: Jason Gunthorpe Reviewed-by: Liviu Dudau Reviewed-by: Robin Murphy Reviewed-by: Nicolin Chen Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20240607105415.2501934-4-jean-philippe@linaro.org Signed-off-by: Will Deacon --- drivers/iommu/of_iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 3afe0b48a48d..082b94c2b329 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -105,6 +105,14 @@ static int of_iommu_configure_device(struct device_node *master_np, of_iommu_configure_dev(master_np, dev); } +static void of_pci_check_device_ats(struct device *dev, struct device_node *np) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (fwspec && of_property_read_bool(np, "ats-supported")) + fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; +} + /* * Returns: * 0 on success, an iommu was configured @@ -147,6 +155,7 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, pci_request_acs(); err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); + of_pci_check_device_ats(dev, master_np); } else { err = of_iommu_configure_device(master_np, dev, id); } From 6bac3388889cec379ecb06e5557dd6f31a31544e Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 7 Jun 2024 11:54:16 +0100 Subject: [PATCH 65/72] arm64: dts: fvp: Enable PCIe ATS for Base RevC FVP Declare that the host controller supports ATS, so the OS can enable it for ATS-capable PCIe endpoints. Acked-by: Sudeep Holla Signed-off-by: Jean-Philippe Brucker Link: https://lore.kernel.org/r/20240607105415.2501934-5-jean-philippe@linaro.org Signed-off-by: Will Deacon --- arch/arm64/boot/dts/arm/fvp-base-revc.dts | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 60472d65a355..85f1c15cc65d 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -243,6 +243,7 @@ iommu-map = <0x0 &smmu 0x0 0x10000>; dma-coherent; + ats-supported; }; smmu: iommu@2b400000 { From 228159802bcebd95438b54b0bd7c97798582178b Mon Sep 17 00:00:00 2001 From: Mark-PK Tsai Date: Tue, 2 Jul 2024 20:05:39 +0800 Subject: [PATCH 66/72] docs: iommu: Remove outdated Documentation/userspace-api/iommu.rst The Documentation/userspace-api/iommu.rst file has become outdated due to the removal of associated structures and APIs. Specifically, struct such as iommu_cache_invalidate_info and guest pasid related uapi were removed in commit 0c9f17877891 ("iommu: Remove guest pasid related interfaces and definitions"). And the corresponding uapi/linux/iommu.h file was removed in commit 00a9bc607043 ("iommu: Move iommu fault data to linux/iommu.h"). Signed-off-by: Mark-PK Tsai Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/20240702120617.26882-1-mark-pk.tsai@mediatek.com [will: Remove stale reference to 'iommu' from index.rst] Signed-off-by: Will Deacon --- Documentation/userspace-api/index.rst | 1 - Documentation/userspace-api/iommu.rst | 209 -------------------------- MAINTAINERS | 1 - 3 files changed, 211 deletions(-) delete mode 100644 Documentation/userspace-api/iommu.rst diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 5926115ec0ed..2e0bb6068583 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -44,7 +44,6 @@ Devices and I/O accelerators/ocxl dma-buf-alloc-exchange gpio/index - iommu iommufd media/index dcdbas diff --git a/Documentation/userspace-api/iommu.rst b/Documentation/userspace-api/iommu.rst deleted file mode 100644 index d3108c1519d5..000000000000 --- a/Documentation/userspace-api/iommu.rst +++ /dev/null @@ -1,209 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. iommu: - -===================================== -IOMMU Userspace API -===================================== - -IOMMU UAPI is used for virtualization cases where communications are -needed between physical and virtual IOMMU drivers. For baremetal -usage, the IOMMU is a system device which does not need to communicate -with userspace directly. - -The primary use cases are guest Shared Virtual Address (SVA) and -guest IO virtual address (IOVA), wherein the vIOMMU implementation -relies on the physical IOMMU and for this reason requires interactions -with the host driver. - -.. contents:: :local: - -Functionalities -=============== -Communications of user and kernel involve both directions. The -supported user-kernel APIs are as follows: - -1. Bind/Unbind guest PASID (e.g. Intel VT-d) -2. Bind/Unbind guest PASID table (e.g. ARM SMMU) -3. Invalidate IOMMU caches upon guest requests -4. Report errors to the guest and serve page requests - -Requirements -============ -The IOMMU UAPIs are generic and extensible to meet the following -requirements: - -1. Emulated and para-virtualised vIOMMUs -2. Multiple vendors (Intel VT-d, ARM SMMU, etc.) -3. Extensions to the UAPI shall not break existing userspace - -Interfaces -========== -Although the data structures defined in IOMMU UAPI are self-contained, -there are no user API functions introduced. Instead, IOMMU UAPI is -designed to work with existing user driver frameworks such as VFIO. - -Extension Rules & Precautions ------------------------------ -When IOMMU UAPI gets extended, the data structures can *only* be -modified in two ways: - -1. Adding new fields by re-purposing the padding[] field. No size change. -2. Adding new union members at the end. May increase the structure sizes. - -No new fields can be added *after* the variable sized union in that it -will break backward compatibility when offset moves. A new flag must -be introduced whenever a change affects the structure using either -method. The IOMMU driver processes the data based on flags which -ensures backward compatibility. - -Version field is only reserved for the unlikely event of UAPI upgrade -at its entirety. - -It's *always* the caller's responsibility to indicate the size of the -structure passed by setting argsz appropriately. -Though at the same time, argsz is user provided data which is not -trusted. The argsz field allows the user app to indicate how much data -it is providing; it's still the kernel's responsibility to validate -whether it's correct and sufficient for the requested operation. - -Compatibility Checking ----------------------- -When IOMMU UAPI extension results in some structure size increase, -IOMMU UAPI code shall handle the following cases: - -1. User and kernel has exact size match -2. An older user with older kernel header (smaller UAPI size) running on a - newer kernel (larger UAPI size) -3. A newer user with newer kernel header (larger UAPI size) running - on an older kernel. -4. A malicious/misbehaving user passing illegal/invalid size but within - range. The data may contain garbage. - -Feature Checking ----------------- -While launching a guest with vIOMMU, it is strongly advised to check -the compatibility upfront, as some subsequent errors happening during -vIOMMU operation, such as cache invalidation failures cannot be nicely -escalated to the guest due to IOMMU specifications. This can lead to -catastrophic failures for the users. - -User applications such as QEMU are expected to import kernel UAPI -headers. Backward compatibility is supported per feature flags. -For example, an older QEMU (with older kernel header) can run on newer -kernel. Newer QEMU (with new kernel header) may refuse to initialize -on an older kernel if new feature flags are not supported by older -kernel. Simply recompiling existing code with newer kernel header should -not be an issue in that only existing flags are used. - -IOMMU vendor driver should report the below features to IOMMU UAPI -consumers (e.g. via VFIO). - -1. IOMMU_NESTING_FEAT_SYSWIDE_PASID -2. IOMMU_NESTING_FEAT_BIND_PGTBL -3. IOMMU_NESTING_FEAT_BIND_PASID_TABLE -4. IOMMU_NESTING_FEAT_CACHE_INVLD -5. IOMMU_NESTING_FEAT_PAGE_REQUEST - -Take VFIO as example, upon request from VFIO userspace (e.g. QEMU), -VFIO kernel code shall query IOMMU vendor driver for the support of -the above features. Query result can then be reported back to the -userspace caller. Details can be found in -Documentation/driver-api/vfio.rst. - - -Data Passing Example with VFIO ------------------------------- -As the ubiquitous userspace driver framework, VFIO is already IOMMU -aware and shares many key concepts such as device model, group, and -protection domain. Other user driver frameworks can also be extended -to support IOMMU UAPI but it is outside the scope of this document. - -In this tight-knit VFIO-IOMMU interface, the ultimate consumer of the -IOMMU UAPI data is the host IOMMU driver. VFIO facilitates user-kernel -transport, capability checking, security, and life cycle management of -process address space ID (PASID). - -VFIO layer conveys the data structures down to the IOMMU driver. It -follows the pattern below:: - - struct { - __u32 argsz; - __u32 flags; - __u8 data[]; - }; - -Here data[] contains the IOMMU UAPI data structures. VFIO has the -freedom to bundle the data as well as parse data size based on its own flags. - -In order to determine the size and feature set of the user data, argsz -and flags (or the equivalent) are also embedded in the IOMMU UAPI data -structures. - -A "__u32 argsz" field is *always* at the beginning of each structure. - -For example: -:: - - struct iommu_cache_invalidate_info { - __u32 argsz; - #define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; - /* IOMMU paging structure cache */ - #define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ - #define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ - #define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ - #define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; - }; - -VFIO is responsible for checking its own argsz and flags. It then -invokes appropriate IOMMU UAPI functions. The user pointers are passed -to the IOMMU layer for further processing. The responsibilities are -divided as follows: - -- Generic IOMMU layer checks argsz range based on UAPI data in the - current kernel version. - -- Generic IOMMU layer checks content of the UAPI data for non-zero - reserved bits in flags, padding fields, and unsupported version. - This is to ensure not breaking userspace in the future when these - fields or flags are used. - -- Vendor IOMMU driver checks argsz based on vendor flags. UAPI data - is consumed based on flags. Vendor driver has access to - unadulterated argsz value in case of vendor specific future - extensions. Currently, it does not perform the copy_from_user() - itself. A __user pointer can be provided in some future scenarios - where there's vendor data outside of the structure definition. - -IOMMU code treats UAPI data in two categories: - -- structure contains vendor data - (Example: iommu_uapi_cache_invalidate()) - -- structure contains only generic data - (Example: iommu_uapi_sva_bind_gpasid()) - - - -Sharing UAPI with in-kernel users ---------------------------------- -For UAPIs that are shared with in-kernel users, a wrapper function is -provided to distinguish the callers. For example, - -Userspace caller :: - - int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, - void __user *udata) - -In-kernel caller :: - - int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t ioasid); diff --git a/MAINTAINERS b/MAINTAINERS index aacccb376c28..59392d80a7e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11543,7 +11543,6 @@ L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: Documentation/devicetree/bindings/iommu/ -F: Documentation/userspace-api/iommu.rst F: drivers/iommu/ F: include/linux/iommu.h F: include/linux/iova.h From c420a2b4e8be06f16f3305472bd25a1dd12059ec Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 9 Jul 2024 23:26:42 +0800 Subject: [PATCH 67/72] iommu/vt-d: Limit max address mask to MAX_AGAW_PFN_WIDTH Address mask specifies the number of low order bits of the address field that must be masked for the invalidation operation. Since address bits masked start from bit 12, the max address mask should be MAX_AGAW_PFN_WIDTH, as defined in Table 19 ("Invalidate Descriptor Address Mask Encodings") of the spec. Limit the max address mask returned from calculate_psi_aligned_address() to MAX_AGAW_PFN_WIDTH to prevent potential integer overflow in the following code: qi_flush_dev_iotlb(): ... addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1; ... Fixes: c4d27ffaa8eb ("iommu/vt-d: Add cache tag invalidation helpers") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240709152643.28109-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index e8418cdd8331..0a3bb38a5289 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -245,7 +245,7 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, * shared_bits are all equal in both pfn and end_pfn. */ shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; } *_pages = aligned_pages; From 0a3f6b3463014b03f6ad10eacc4d1d9af75d54a1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 9 Jul 2024 23:26:43 +0800 Subject: [PATCH 68/72] iommu/vt-d: Fix aligned pages in calculate_psi_aligned_address() The helper calculate_psi_aligned_address() is used to convert an arbitrary range into a size-aligned one. The aligned_pages variable is calculated from input start and end, but is not adjusted when the start pfn is not aligned and the mask is adjusted, which results in an incorrect number of pages returned. The number of pages is used by qi_flush_piotlb() to flush caches for the first-stage translation. With the wrong number of pages, the cache is not synchronized, leading to inconsistencies in some cases. Fixes: c4d27ffaa8eb ("iommu/vt-d: Add cache tag invalidation helpers") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240709152643.28109-3-baolu.lu@linux.intel.com Signed-off-by: Will Deacon --- drivers/iommu/intel/cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index 0a3bb38a5289..44e92638c0cd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -246,6 +246,7 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, */ shared_bits = ~(pfn ^ end_pfn) & ~bitmask; mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; + aligned_pages = 1UL << mask; } *_pages = aligned_pages; From b577060ac7ba6085d54f2066d185de4a318c913c Mon Sep 17 00:00:00 2001 From: David Heidelberg Date: Fri, 5 Jul 2024 15:14:54 -0700 Subject: [PATCH 69/72] dt-bindings: iommu: Convert msm,iommu-v0 to yaml Convert Qualcomm IOMMU v0 implementation to yaml format. iommus part being ommited for the other bindings, as mdp4 one. Signed-off-by: David Heidelberg Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240705221520.109540-1-david@ixit.cz Signed-off-by: Will Deacon --- .../bindings/iommu/msm,iommu-v0.txt | 64 --------------- .../bindings/iommu/qcom,apq8064-iommu.yaml | 78 +++++++++++++++++++ 2 files changed, 78 insertions(+), 64 deletions(-) delete mode 100644 Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt create mode 100644 Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml diff --git a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt deleted file mode 100644 index 20236385f26e..000000000000 --- a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt +++ /dev/null @@ -1,64 +0,0 @@ -* QCOM IOMMU - -The MSM IOMMU is an implementation compatible with the ARM VMSA short -descriptor page tables. It provides address translation for bus masters outside -of the CPU, each connected to the IOMMU through a port called micro-TLB. - -Required Properties: - - - compatible: Must contain "qcom,apq8064-iommu". - - reg: Base address and size of the IOMMU registers. - - interrupts: Specifiers for the MMU fault interrupts. For instances that - support secure mode two interrupts must be specified, for non-secure and - secure mode, in that order. For instances that don't support secure mode a - single interrupt must be specified. - - #iommu-cells: The number of cells needed to specify the stream id. This - is always 1. - - qcom,ncb: The total number of context banks in the IOMMU. - - clocks : List of clocks to be used during SMMU register access. See - Documentation/devicetree/bindings/clock/clock-bindings.txt - for information about the format. For each clock specified - here, there must be a corresponding entry in clock-names - (see below). - - - clock-names : List of clock names corresponding to the clocks specified in - the "clocks" property (above). - Should be "smmu_pclk" for specifying the interface clock - required for iommu's register accesses. - Should be "smmu_clk" for specifying the functional clock - required by iommu for bus accesses. - -Each bus master connected to an IOMMU must reference the IOMMU in its device -node with the following property: - - - iommus: A reference to the IOMMU in multiple cells. The first cell is a - phandle to the IOMMU and the second cell is the stream id. - A single master device can be connected to more than one iommu - and multiple contexts in each of the iommu. So multiple entries - are required to list all the iommus and the stream ids that the - master is connected to. - -Example: mdp iommu and its bus master - - mdp_port0: iommu@7500000 { - compatible = "qcom,apq8064-iommu"; - #iommu-cells = <1>; - clock-names = - "smmu_pclk", - "smmu_clk"; - clocks = - <&mmcc SMMU_AHB_CLK>, - <&mmcc MDP_AXI_CLK>; - reg = <0x07500000 0x100000>; - interrupts = - , - ; - qcom,ncb = <2>; - }; - - mdp: qcom,mdp@5100000 { - compatible = "qcom,mdp"; - ... - iommus = <&mdp_port0 0 - &mdp_port0 2>; - }; diff --git a/Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml new file mode 100644 index 000000000000..9f83f851e61a --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- + +$id: http://devicetree.org/schemas/iommu/qcom,apq8064-iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm APQ8064 IOMMU + +maintainers: + - David Heidelberg + +description: + The MSM IOMMU is an implementation compatible with the ARM VMSA short + descriptor page tables. It provides address translation for bus masters + outside of the CPU, each connected to the IOMMU through a port called micro-TLB. + +properties: + compatible: + const: qcom,apq8064-iommu + + clocks: + items: + - description: interface clock for register accesses + - description: functional clock for bus accesses + + clock-names: + items: + - const: smmu_pclk + - const: iommu_clk + + reg: + maxItems: 1 + + interrupts: + description: Specifiers for the MMU fault interrupts. + minItems: 1 + items: + - description: non-secure mode interrupt + - description: secure mode interrupt (for instances which supports it) + + "#iommu-cells": + const: 1 + description: Each IOMMU specifier describes a single Stream ID. + + qcom,ncb: + $ref: /schemas/types.yaml#/definitions/uint32 + description: The total number of context banks in the IOMMU. + minimum: 1 + maximum: 4 + +required: + - reg + - interrupts + - clocks + - clock-names + - qcom,ncb + +additionalProperties: false + +examples: + - | + #include + #include + #include + + iommu@7500000 { + compatible = "qcom,apq8064-iommu"; + reg = <0x07500000 0x100000>; + interrupts = , + ; + clocks = <&clk SMMU_AHB_CLK>, + <&clk MDP_AXI_CLK>; + clock-names = "smmu_pclk", + "iommu_clk"; + #iommu-cells = <1>; + qcom,ncb = <2>; + }; From 9b2bc6b9a264b863a2273c02db5ee9e214e0a526 Mon Sep 17 00:00:00 2001 From: Shameer Kolothum Date: Fri, 12 Jul 2024 12:31:32 +0100 Subject: [PATCH 70/72] iommu: Move IOMMU_DIRTY_NO_CLEAR define Fixes the compile issue when CONFIG_IOMMU_API is not set. Fixes: 4fe88fd8b4ae ("iommu/io-pgtable-arm: Add read_and_clear_dirty() support") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407121602.HL9ih1it-lkp@intel.com/ Signed-off-by: Shameer Kolothum Reviewed-by: Joao Martins Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240712113132.45100-1-shameerali.kolothum.thodi@huawei.com Signed-off-by: Will Deacon --- include/linux/iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 7bc8dff7cf6d..104ce84647d4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -317,6 +317,9 @@ enum iommu_dev_features { #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + #ifdef CONFIG_IOMMU_API /** @@ -353,9 +356,6 @@ struct iommu_dirty_bitmap { struct iommu_iotlb_gather *gather; }; -/* Read but do not clear any dirty bits */ -#define IOMMU_DIRTY_NO_CLEAR (1 << 0) - /** * struct iommu_dirty_ops - domain specific dirty tracking operations * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain From 31000732d56b43765d51e08cccb68818fbc0032c Mon Sep 17 00:00:00 2001 From: Jon Pan-Doh Date: Tue, 9 Jul 2024 16:49:13 -0700 Subject: [PATCH 71/72] iommu/vt-d: Fix identity map bounds in si_domain_init() Intel IOMMU operates on inclusive bounds (both generally aas well as iommu_domain_identity_map()). Meanwhile, for_each_mem_pfn_range() uses exclusive bounds for end_pfn. This creates an off-by-one error when switching between the two. Fixes: c5395d5c4a82 ("intel-iommu: Clean up iommu_domain_identity_map()") Signed-off-by: Jon Pan-Doh Tested-by: Sudheer Dantuluri Suggested-by: Gary Zibrat Reviewed-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240709234913.2749386-1-pandoh@google.com Signed-off-by: Will Deacon --- drivers/iommu/intel/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 523407f6f6b2..3917279afca7 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2041,7 +2041,7 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, mm_to_dma_pfn_start(start_pfn), - mm_to_dma_pfn_end(end_pfn)); + mm_to_dma_pfn_end(end_pfn-1)); if (ret) return ret; } From eac93f4d4ec63423704657895ce9a4ddac7b023b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 12 Jul 2024 16:35:25 +0100 Subject: [PATCH 72/72] iommu/tegra-smmu: Pass correct fwnode to iommu_fwspec_init() iommu_fwspec_init() expects to receive the fwnode corresponding to the IOMMU device, not the fwnode corresponding to the client device being probed. Fix arm_smmu_configure() to pass the correct fwnode to iommu_fwspec_init(). Reported-by: Jon Hunter Suggested-by: Robin Murphy Link: https://lore.kernel.org/r/0eec5f84-6b39-43ba-ab2f-914688a5cf45@nvidia.com Signed-off-by: Will Deacon --- drivers/iommu/tegra-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f86c7ae91814..c0c6dbd87fca 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -837,7 +837,7 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, const struct iommu_ops *ops = smmu->iommu.ops; int err; - err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + err = iommu_fwspec_init(dev, &smmu->dev->of_node->fwnode, ops); if (err < 0) { dev_err(dev, "failed to initialize fwspec: %d\n", err); return err;