From fa3c109a6d302b56437a6412c5f3044c3e12de03 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Sat, 10 Aug 2024 00:59:11 +0000 Subject: [PATCH 01/18] dma-mapping: use bit masking to check VM_DMA_COHERENT In dma_common_find_pages(), area->flags are compared directly with VM_DMA_COHERENT. This works because VM_DMA_COHERENT is the only set flag. During development of a new feature (ASI [1]), a new VM flag is introduced, and that flag can be injected into VM_DMA_COHERENT mappings (among others). The presence of that flag caused dma_common_find_pages() to return NULL for VM_DMA_COHERENT addresses, leading to a lot of problems ending in crashing during boot. It took a bit of time to figure this problem out. It was a mistake to inject a VM flag to begin with, but it took a significant amount of debugging to figure out the problem. Most users of area->flags use bitmasking rather than equivalency to check for flags. Update dma_common_find_pages() and dma_common_free_remap() to do the same, which would have avoided the boot crashing. Instead, add a warning in dma_common_find_pages() if any extra VM flags are set to catch such problems more easily during development. No functional change intended. [1]https://lore.kernel.org/lkml/20240712-asi-rfc-24-v1-0-144b319a40d8@google.com/ Signed-off-by: Yosry Ahmed Signed-off-by: Christoph Hellwig --- kernel/dma/remap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c index 27596f3b4aef..9e2afad1c615 100644 --- a/kernel/dma/remap.c +++ b/kernel/dma/remap.c @@ -10,8 +10,10 @@ struct page **dma_common_find_pages(void *cpu_addr) { struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || area->flags != VM_DMA_COHERENT) + if (!area || !(area->flags & VM_DMA_COHERENT)) return NULL; + WARN(area->flags != VM_DMA_COHERENT, + "unexpected flags in area: %p\n", cpu_addr); return area->pages; } @@ -61,7 +63,7 @@ void dma_common_free_remap(void *cpu_addr, size_t size) { struct vm_struct *area = find_vm_area(cpu_addr); - if (!area || area->flags != VM_DMA_COHERENT) { + if (!area || !(area->flags & VM_DMA_COHERENT)) { WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); return; } From ba0fb44aed47693cc2482427f63ba6cd19051327 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sun, 11 Aug 2024 10:09:35 +0300 Subject: [PATCH 02/18] dma-mapping: replace zone_dma_bits by zone_dma_limit The hardware DMA limit might not be power of 2. When RAM range starts above 0, say 4GB, DMA limit of 30 bits should end at 5GB. A single high bit can not encode this limit. Use a plain address for the DMA zone limit instead. Since the DMA zone can now potentially span beyond 4GB physical limit of DMA32, make sure to use DMA zone for GFP_DMA32 allocations in that case. Signed-off-by: Catalin Marinas Co-developed-by: Baruch Siach Signed-off-by: Baruch Siach Reviewed-by: Catalin Marinas Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- arch/arm64/mm/init.c | 30 +++++++++++++++--------------- arch/powerpc/mm/mem.c | 5 ++++- arch/s390/mm/init.c | 2 +- include/linux/dma-direct.h | 2 +- kernel/dma/direct.c | 6 +++--- kernel/dma/pool.c | 4 ++-- kernel/dma/swiotlb.c | 6 +++--- 7 files changed, 29 insertions(+), 26 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9b5ab6818f7f..c45e2152ca9e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -115,35 +115,35 @@ static void __init arch_reserve_crashkernel(void) } /* - * Return the maximum physical address for a zone accessible by the given bits - * limit. If DRAM starts above 32-bit, expand the zone to the maximum + * Return the maximum physical address for a zone given its limit. + * If DRAM starts above 32-bit, expand the zone to the maximum * available memory, otherwise cap it at 32-bit. */ -static phys_addr_t __init max_zone_phys(unsigned int zone_bits) +static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { - phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); phys_addr_t phys_start = memblock_start_of_DRAM(); if (phys_start > U32_MAX) - zone_mask = PHYS_ADDR_MAX; - else if (phys_start > zone_mask) - zone_mask = U32_MAX; + zone_limit = PHYS_ADDR_MAX; + else if (phys_start > zone_limit) + zone_limit = U32_MAX; - return min(zone_mask, memblock_end_of_DRAM() - 1) + 1; + return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; - unsigned int __maybe_unused acpi_zone_dma_bits; - unsigned int __maybe_unused dt_zone_dma_bits; - phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32); + phys_addr_t __maybe_unused acpi_zone_dma_limit; + phys_addr_t __maybe_unused dt_zone_dma_limit; + phys_addr_t __maybe_unused dma32_phys_limit = + max_zone_phys(DMA_BIT_MASK(32)); #ifdef CONFIG_ZONE_DMA - acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); - dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); - zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits); - arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); + acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address(); + dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL); + zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit); + arm64_dma_phys_limit = max_zone_phys(zone_dma_limit); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index da21cb018984..7e217aa4a274 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -216,7 +216,7 @@ static int __init mark_nonram_nosave(void) * everything else. GFP_DMA32 page allocations automatically fall back to * ZONE_DMA. * - * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the + * By using 31-bit unconditionally, we can exploit zone_dma_limit to inform the * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by * ZONE_DMA. @@ -230,6 +230,7 @@ void __init paging_init(void) { unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); + int zone_dma_bits; #ifdef CONFIG_HIGHMEM unsigned long v = __fix_to_virt(FIX_KMAP_END); @@ -256,6 +257,8 @@ void __init paging_init(void) else zone_dma_bits = 31; + zone_dma_limit = DMA_BIT_MASK(zone_dma_bits); + #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 1UL << (zone_dma_bits - PAGE_SHIFT)); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index e3d258f9e726..688abc65c79e 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -97,7 +97,7 @@ void __init paging_init(void) vmem_map_init(); sparse_init(); - zone_dma_bits = 31; + zone_dma_limit = DMA_BIT_MASK(31); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index edbe13d00776..d7e30d4f7503 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -12,7 +12,7 @@ #include #include -extern unsigned int zone_dma_bits; +extern u64 zone_dma_limit; /* * Record the mapping of CPU physical to DMA addresses for a given region. diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 4480a3cd92e0..f2ba074a6a54 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -20,7 +20,7 @@ * it for entirely different regions. In that case the arch code needs to * override the variable below for dma-direct to work properly. */ -unsigned int zone_dma_bits __ro_after_init = 24; +u64 zone_dma_limit __ro_after_init = DMA_BIT_MASK(24); static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) @@ -59,7 +59,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit) * zones. */ *phys_limit = dma_to_phys(dev, dma_limit); - if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + if (*phys_limit <= zone_dma_limit) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) return GFP_DMA32; @@ -580,7 +580,7 @@ int dma_direct_supported(struct device *dev, u64 mask) * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); + min_mask = min_t(u64, min_mask, zone_dma_limit); return mask >= phys_to_dma_unencrypted(dev, min_mask); } diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index d10613eb0f63..7b04f7575796 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -70,9 +70,9 @@ static bool cma_in_zone(gfp_t gfp) /* CMA can't cross zone boundaries, see cma_activate_area() */ end = cma_get_base(cma) + size - 1; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) - return end <= DMA_BIT_MASK(zone_dma_bits); + return end <= zone_dma_limit; if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) - return end <= DMA_BIT_MASK(32); + return end <= max(DMA_BIT_MASK(32), zone_dma_limit); return true; } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index df68d29740a0..abcf3fa63a56 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -450,9 +450,9 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, if (!remap) io_tlb_default_mem.can_grow = true; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA)) - io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits); + io_tlb_default_mem.phys_limit = zone_dma_limit; else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32)) - io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32); + io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit); else io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1); #endif @@ -629,7 +629,7 @@ static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes, } gfp &= ~GFP_ZONEMASK; - if (phys_limit <= DMA_BIT_MASK(zone_dma_bits)) + if (phys_limit <= zone_dma_limit) gfp |= __GFP_DMA; else if (phys_limit <= DMA_BIT_MASK(32)) gfp |= __GFP_DMA32; From 3be9b846896d1d8b66040ece6e216cd58d03242e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sun, 11 Aug 2024 10:09:36 +0300 Subject: [PATCH 03/18] arm64: support DMA zone above 4GB Commit 791ab8b2e3db ("arm64: Ignore any DMA offsets in the max_zone_phys() calculation") made arm64 DMA/DMA32 zones span the entire RAM when RAM starts above 32-bits. This breaks hardware with DMA area that start above 32-bits. But the commit log says that "we haven't noticed any such hardware". It turns out that such hardware does exist. One such platform has RAM starting at 32GB with an internal bus that has the following DMA limits: #address-cells = <2>; #size-cells = <2>; dma-ranges = <0x00 0xc0000000 0x08 0x00000000 0x00 0x40000000>; That is, devices under this bus see 1GB of DMA range between 3GB-4GB in their address space. This range is mapped to CPU memory at 32GB-33GB. With current code DMA allocations for devices under this bus are not limited to DMA area, leading to run-time allocation failure. This commit reinstates DMA zone at the bottom of RAM. The result is DMA zone that properly reflects the hardware constraints as follows: [ 0.000000] Zone ranges: [ 0.000000] DMA [mem 0x0000000800000000-0x000000083fffffff] [ 0.000000] DMA32 empty [ 0.000000] Normal [mem 0x0000000840000000-0x0000000bffffffff] Signed-off-by: Catalin Marinas [baruch: split off the original patch] Signed-off-by: Baruch Siach Reviewed-by: Petr Tesarik Signed-off-by: Christoph Hellwig --- arch/arm64/mm/init.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index c45e2152ca9e..bfb10969cbf0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -114,20 +114,8 @@ static void __init arch_reserve_crashkernel(void) low_size, high); } -/* - * Return the maximum physical address for a zone given its limit. - * If DRAM starts above 32-bit, expand the zone to the maximum - * available memory, otherwise cap it at 32-bit. - */ static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { - phys_addr_t phys_start = memblock_start_of_DRAM(); - - if (phys_start > U32_MAX) - zone_limit = PHYS_ADDR_MAX; - else if (phys_start > zone_limit) - zone_limit = U32_MAX; - return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } From f69e342eec008e1bab772d3963c3dd9979293e13 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jul 2024 21:04:48 +0300 Subject: [PATCH 04/18] dma-mapping: call ->unmap_page and ->unmap_sg unconditionally Almost all instances of the dma_map_ops ->map_page()/map_sg() methods implement ->unmap_page()/unmap_sg() too. The once instance which doesn't dma_dummy_ops which is used to fail the DMA mapping and thus there won't be any calls to ->unmap_page()/unmap_sg(). Remove the checks for ->unmap_page()/unmap_sg() and call them directly to create an interface that is symmetrical to ->map_page()/map_sg(). Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Reviewed-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/dummy.c | 21 +++++++++++++++++++++ kernel/dma/mapping.c | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index b492d59ac77e..92de80e5b057 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -17,6 +17,15 @@ static dma_addr_t dma_dummy_map_page(struct device *dev, struct page *page, { return DMA_MAPPING_ERROR; } +static void dma_dummy_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_page, so unmap_page should never be + * called. + */ + WARN_ON_ONCE(true); +} static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, @@ -25,6 +34,16 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl, return -EINVAL; } +static void dma_dummy_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + unsigned long attrs) +{ + /* + * Dummy ops doesn't support map_sg, so unmap_sg should never be called. + */ + WARN_ON_ONCE(true); +} + static int dma_dummy_supported(struct device *hwdev, u64 mask) { return 0; @@ -33,6 +52,8 @@ static int dma_dummy_supported(struct device *hwdev, u64 mask) const struct dma_map_ops dma_dummy_ops = { .mmap = dma_dummy_mmap, .map_page = dma_dummy_map_page, + .unmap_page = dma_dummy_unmap_page, .map_sg = dma_dummy_map_sg, + .unmap_sg = dma_dummy_unmap_sg, .dma_supported = dma_dummy_supported, }; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index b1c18058d55f..71416b156bb5 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -177,7 +177,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); - else if (ops->unmap_page) + else ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); } @@ -291,7 +291,7 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else if (ops->unmap_sg) + else ops->unmap_sg(dev, sg, nents, dir, attrs); } EXPORT_SYMBOL(dma_unmap_sg_attrs); From b5c58b2fdc427e7958412ecb2de2804a1f7c1572 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 24 Jul 2024 21:04:49 +0300 Subject: [PATCH 05/18] dma-mapping: direct calls for dma-iommu Directly call into dma-iommu just like we have been doing for dma-direct for a while. This avoids the indirect call overhead for IOMMU ops and removes the need to have DMA ops entirely for many common configurations. Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Acked-by: Greg Kroah-Hartman Acked-by: Robin Murphy Signed-off-by: Christoph Hellwig --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 2 +- drivers/iommu/dma-iommu.c | 104 +++++++++---------------- drivers/iommu/intel/Kconfig | 1 - include/linux/device.h | 5 ++ include/linux/dma-map-ops.h | 13 ---- include/linux/iommu-dma.h | 147 ++++++++++++++++++++++++++++++++++++ kernel/dma/Kconfig | 4 + kernel/dma/Makefile | 2 +- kernel/dma/mapping.c | 83 +++++++++++++++++--- 10 files changed, 269 insertions(+), 93 deletions(-) create mode 100644 include/linux/iommu-dma.h diff --git a/MAINTAINERS b/MAINTAINERS index f328373463b0..6e653684efe9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11722,6 +11722,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: drivers/iommu/dma-iommu.c F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c +F: include/linux/iommu-dma.h F: include/linux/iova.h IOMMU SUBSYSTEM diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index a82f10054aec..61a6889f4dee 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -151,7 +151,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA def_bool ARM64 || X86 || S390 - select DMA_OPS + select DMA_OPS_HELPERS select IOMMU_API select IOMMU_IOVA select IRQ_MSI_IOMMU diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7b1dfa0665df..3672d619bcb6 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1037,9 +1038,8 @@ out_unmap: return NULL; } -static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, enum dma_data_direction dir, gfp_t gfp, - unsigned long attrs) +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) { struct dma_sgt_handle *sh; @@ -1055,7 +1055,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, return &sh->sgt; } -static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir) { struct dma_sgt_handle *sh = sgt_handle(sgt); @@ -1066,8 +1066,8 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, kfree(sh); } -static void iommu_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) { phys_addr_t phys; @@ -1081,8 +1081,8 @@ static void iommu_dma_sync_single_for_cpu(struct device *dev, swiotlb_sync_single_for_cpu(dev, phys, size, dir); } -static void iommu_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir) { phys_addr_t phys; @@ -1096,9 +1096,8 @@ static void iommu_dma_sync_single_for_device(struct device *dev, arch_sync_dma_for_device(phys, size, dir); } -static void iommu_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; @@ -1112,9 +1111,8 @@ static void iommu_dma_sync_sg_for_cpu(struct device *dev, arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); } -static void iommu_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir) { struct scatterlist *sg; int i; @@ -1129,9 +1127,9 @@ static void iommu_dma_sync_sg_for_device(struct device *dev, arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); } -static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) { phys_addr_t phys = page_to_phys(page) + offset; bool coherent = dev_is_dma_coherent(dev); @@ -1189,7 +1187,7 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, return iova; } -static void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); @@ -1342,8 +1340,8 @@ out_unmap: * impedance-matching, to be able to hand off a suitably-aligned list, * but still preserve the original offsets and sizes for the caller. */ -static int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -1462,8 +1460,8 @@ out: return ret; } -static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs) { dma_addr_t end = 0, start; struct scatterlist *tmp; @@ -1512,7 +1510,7 @@ static void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, __iommu_dma_unmap(dev, start, end - start); } -static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { return __iommu_dma_map(dev, phys, size, @@ -1520,7 +1518,7 @@ static dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, dma_get_mask(dev)); } -static void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { __iommu_dma_unmap(dev, handle, size); @@ -1557,7 +1555,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) dma_free_contiguous(dev, page, alloc_size); } -static void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, unsigned long attrs) { __iommu_dma_unmap(dev, handle, size); @@ -1601,8 +1599,8 @@ out_free_pages: return NULL; } -static void *iommu_dma_alloc(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) { bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); @@ -1635,7 +1633,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, return cpu_addr; } -static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { @@ -1666,7 +1664,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, vma->vm_page_prot); } -static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { @@ -1693,19 +1691,19 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, return ret; } -static unsigned long iommu_dma_get_merge_boundary(struct device *dev) +unsigned long iommu_dma_get_merge_boundary(struct device *dev) { struct iommu_domain *domain = iommu_get_dma_domain(dev); return (1UL << __ffs(domain->pgsize_bitmap)) - 1; } -static size_t iommu_dma_opt_mapping_size(void) +size_t iommu_dma_opt_mapping_size(void) { return iova_rcache_range(); } -static size_t iommu_dma_max_mapping_size(struct device *dev) +size_t iommu_dma_max_mapping_size(struct device *dev) { if (dev_is_untrusted(dev)) return swiotlb_max_mapping_size(dev); @@ -1713,32 +1711,6 @@ static size_t iommu_dma_max_mapping_size(struct device *dev) return SIZE_MAX; } -static const struct dma_map_ops iommu_dma_ops = { - .flags = DMA_F_PCI_P2PDMA_SUPPORTED | - DMA_F_CAN_SKIP_SYNC, - .alloc = iommu_dma_alloc, - .free = iommu_dma_free, - .alloc_pages_op = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, - .free_noncontiguous = iommu_dma_free_noncontiguous, - .mmap = iommu_dma_mmap, - .get_sgtable = iommu_dma_get_sgtable, - .map_page = iommu_dma_map_page, - .unmap_page = iommu_dma_unmap_page, - .map_sg = iommu_dma_map_sg, - .unmap_sg = iommu_dma_unmap_sg, - .sync_single_for_cpu = iommu_dma_sync_single_for_cpu, - .sync_single_for_device = iommu_dma_sync_single_for_device, - .sync_sg_for_cpu = iommu_dma_sync_sg_for_cpu, - .sync_sg_for_device = iommu_dma_sync_sg_for_device, - .map_resource = iommu_dma_map_resource, - .unmap_resource = iommu_dma_unmap_resource, - .get_merge_boundary = iommu_dma_get_merge_boundary, - .opt_mapping_size = iommu_dma_opt_mapping_size, - .max_mapping_size = iommu_dma_max_mapping_size, -}; - void iommu_setup_dma_ops(struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); @@ -1746,19 +1718,15 @@ void iommu_setup_dma_ops(struct device *dev) if (dev_is_pci(dev)) dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; - if (iommu_is_dma_domain(domain)) { - if (iommu_dma_init_domain(domain, dev)) - goto out_err; - dev->dma_ops = &iommu_dma_ops; - } else if (dev->dma_ops == &iommu_dma_ops) { - /* Clean up if we've switched *from* a DMA domain */ - dev->dma_ops = NULL; - } + dev->dma_iommu = iommu_is_dma_domain(domain); + if (dev->dma_iommu && iommu_dma_init_domain(domain, dev)) + goto out_err; return; out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", - dev_name(dev)); + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + dev_name(dev)); + dev->dma_iommu = false; } static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index f52fb39c968e..88fd32a9323c 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -12,7 +12,6 @@ config DMAR_DEBUG config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && X86 - select DMA_OPS select IOMMU_API select IOMMU_IOVA select IOMMUFD_DRIVER if IOMMUFD diff --git a/include/linux/device.h b/include/linux/device.h index 34eb20f5966f..1c5280d28bc3 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -707,6 +707,8 @@ struct device_physical_location { * for dma allocations. This flag is managed by the dma ops * instance from ->dma_supported. * @dma_skip_sync: DMA sync operations can be skipped for coherent buffers. + * @dma_iommu: Device is using default IOMMU implementation for DMA and + * doesn't rely on dma_ops structure. * * At the lowest level, every device in a Linux system is represented by an * instance of struct device. The device structure contains the information @@ -822,6 +824,9 @@ struct device { #ifdef CONFIG_DMA_NEED_SYNC bool dma_skip_sync:1; #endif +#ifdef CONFIG_IOMMU_DMA + bool dma_iommu:1; +#endif }; /** diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 02a1c825896b..077b15c93bb8 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -13,20 +13,7 @@ struct cma; struct iommu_ops; -/* - * Values for struct dma_map_ops.flags: - * - * DMA_F_PCI_P2PDMA_SUPPORTED: Indicates the dma_map_ops implementation can - * handle PCI P2PDMA pages in the map_sg/unmap_sg operation. - * DMA_F_CAN_SKIP_SYNC: DMA sync operations can be skipped if the device is - * coherent and it's not an SWIOTLB buffer. - */ -#define DMA_F_PCI_P2PDMA_SUPPORTED (1 << 0) -#define DMA_F_CAN_SKIP_SYNC (1 << 1) - struct dma_map_ops { - unsigned int flags; - void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h new file mode 100644 index 000000000000..d30a58bf00fd --- /dev/null +++ b/include/linux/iommu-dma.h @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved + * + * DMA operations that map physical memory through IOMMU. + */ +#ifndef _LINUX_IOMMU_DMA_H +#define _LINUX_IOMMU_DMA_H + +#include + +#ifdef CONFIG_IOMMU_DMA +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +void iommu_dma_unmap_page(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); +void *iommu_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs); +int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +unsigned long iommu_dma_get_merge_boundary(struct device *dev); +size_t iommu_dma_opt_mapping_size(void); +size_t iommu_dma_max_mapping_size(struct device *dev); +void iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs); +dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs); +void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs); +struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); +void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir); +void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir); +#else +static inline dma_addr_t iommu_dma_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_page(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return -EINVAL; +} +static inline void iommu_dma_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void *iommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return -EINVAL; +} +static inline int iommu_dma_get_sgtable(struct device *dev, + struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs) +{ + return -EINVAL; +} +static inline unsigned long iommu_dma_get_merge_boundary(struct device *dev) +{ + return 0; +} +static inline size_t iommu_dma_opt_mapping_size(void) +{ + return 0; +} +static inline size_t iommu_dma_max_mapping_size(struct device *dev) +{ + return 0; +} +static inline void iommu_dma_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, unsigned long attrs) +{ +} +static inline dma_addr_t iommu_dma_map_resource(struct device *dev, + phys_addr_t phys, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return DMA_MAPPING_ERROR; +} +static inline void iommu_dma_unmap_resource(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline struct sg_table * +iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, + enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) +{ + return NULL; +} +static inline void iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +static inline void iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ +} +#endif /* CONFIG_IOMMU_DMA */ +#endif /* _LINUX_IOMMU_DMA_H */ diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c06e56be0ca1..21bae1700836 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -8,8 +8,12 @@ config HAS_DMA depends on !NO_DMA default y +config DMA_OPS_HELPERS + bool + config DMA_OPS depends on HAS_DMA + select DMA_OPS_HELPERS bool # diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 21926e46ef4f..2e6e933cf7f3 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o -obj-$(CONFIG_DMA_OPS) += ops_helpers.o +obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 71416b156bb5..b50ae3d198a6 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -113,11 +114,27 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, } EXPORT_SYMBOL(dmam_alloc_attrs); +#ifdef CONFIG_IOMMU_DMA +static bool use_dma_iommu(struct device *dev) +{ + return dev->dma_iommu; +} +#else +static bool use_dma_iommu(struct device *dev) +{ + return false; +} +#endif + static bool dma_go_direct(struct device *dev, dma_addr_t mask, const struct dma_map_ops *ops) { + if (use_dma_iommu(dev)) + return false; + if (likely(!ops)) return true; + #ifdef CONFIG_DMA_OPS_BYPASS if (dev->dma_ops_bypass) return min_not_zero(mask, dev->bus_dma_limit) >= @@ -159,6 +176,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, if (dma_map_direct(dev, ops) || arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_page(dev, page, offset, size, dir, attrs); else addr = ops->map_page(dev, page, offset, size, dir, attrs); kmsan_handle_dma(page, offset, size, dir); @@ -177,6 +196,8 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, if (dma_map_direct(dev, ops) || arch_dma_unmap_page_direct(dev, addr + size)) dma_direct_unmap_page(dev, addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_unmap_page(dev, addr, size, dir, attrs); else ops->unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); @@ -197,6 +218,8 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_map_sg_direct(dev, sg, nents)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); + else if (use_dma_iommu(dev)) + ents = iommu_dma_map_sg(dev, sg, nents, dir, attrs); else ents = ops->map_sg(dev, sg, nents, dir, attrs); @@ -291,7 +314,9 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) dma_direct_unmap_sg(dev, sg, nents, dir, attrs); - else + else if (use_dma_iommu(dev)) + iommu_dma_unmap_sg(dev, sg, nents, dir, attrs); + else if (ops->unmap_sg) ops->unmap_sg(dev, sg, nents, dir, attrs); } EXPORT_SYMBOL(dma_unmap_sg_attrs); @@ -309,6 +334,8 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, if (dma_map_direct(dev, ops)) addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs); + else if (use_dma_iommu(dev)) + addr = iommu_dma_map_resource(dev, phys_addr, size, dir, attrs); else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); @@ -323,7 +350,11 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); - if (!dma_map_direct(dev, ops) && ops->unmap_resource) + if (dma_map_direct(dev, ops)) + ; /* nothing to do: uncached and no swiotlb */ + else if (use_dma_iommu(dev)) + iommu_dma_unmap_resource(dev, addr, size, dir, attrs); + else if (ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } @@ -338,6 +369,8 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_cpu(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); @@ -352,6 +385,8 @@ void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_single_for_device(dev, addr, size, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); @@ -366,6 +401,8 @@ void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); @@ -380,6 +417,8 @@ void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG_ON(!valid_dma_direction(dir)); if (dma_map_direct(dev, ops)) dma_direct_sync_sg_for_device(dev, sg, nelems, dir); + else if (use_dma_iommu(dev)) + iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); @@ -405,7 +444,7 @@ static void dma_setup_need_sync(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (dma_map_direct(dev, ops) || (ops->flags & DMA_F_CAN_SKIP_SYNC)) + if (dma_map_direct(dev, ops) || use_dma_iommu(dev)) /* * dma_skip_sync will be reset to %false on first SWIOTLB buffer * mapping, if any. During the device initialization, it's @@ -446,6 +485,9 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, if (dma_alloc_direct(dev, ops)) return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_get_sgtable(dev, sgt, cpu_addr, dma_addr, + size, attrs); if (!ops->get_sgtable) return -ENXIO; return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); @@ -482,6 +524,8 @@ bool dma_can_mmap(struct device *dev) if (dma_alloc_direct(dev, ops)) return dma_direct_can_mmap(dev); + if (use_dma_iommu(dev)) + return true; return ops->mmap != NULL; } EXPORT_SYMBOL_GPL(dma_can_mmap); @@ -508,6 +552,9 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_alloc_direct(dev, ops)) return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); + if (use_dma_iommu(dev)) + return iommu_dma_mmap(dev, vma, cpu_addr, dma_addr, size, + attrs); if (!ops->mmap) return -ENXIO; return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -559,6 +606,8 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_direct(dev, ops)) cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs); + else if (use_dma_iommu(dev)) + cpu_addr = iommu_dma_alloc(dev, size, dma_handle, flag, attrs); else if (ops->alloc) cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); else @@ -591,6 +640,8 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); + else if (use_dma_iommu(dev)) + iommu_dma_free(dev, size, cpu_addr, dma_handle, attrs); else if (ops->free) ops->free(dev, size, cpu_addr, dma_handle, attrs); } @@ -611,6 +662,8 @@ static struct page *__dma_alloc_pages(struct device *dev, size_t size, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + if (use_dma_iommu(dev)) + return dma_common_alloc_pages(dev, size, dma_handle, dir, gfp); if (!ops->alloc_pages_op) return NULL; return ops->alloc_pages_op(dev, size, dma_handle, dir, gfp); @@ -635,6 +688,8 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, size = PAGE_ALIGN(size); if (dma_alloc_direct(dev, ops)) dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (use_dma_iommu(dev)) + dma_common_free_pages(dev, size, page, dma_handle, dir); else if (ops->free_pages) ops->free_pages(dev, size, page, dma_handle, dir); } @@ -697,6 +752,8 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (ops && ops->alloc_noncontiguous) sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); + else if (use_dma_iommu(dev)) + sgt = iommu_dma_alloc_noncontiguous(dev, size, dir, gfp, attrs); else sgt = alloc_single_sgt(dev, size, dir, gfp); @@ -725,6 +782,8 @@ void dma_free_noncontiguous(struct device *dev, size_t size, debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); if (ops && ops->free_noncontiguous) ops->free_noncontiguous(dev, size, sgt, dir); + else if (use_dma_iommu(dev)) + iommu_dma_free_noncontiguous(dev, size, sgt, dir); else free_single_sgt(dev, size, sgt, dir); } @@ -772,6 +831,8 @@ static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (WARN_ON(ops && use_dma_iommu(dev))) + return false; /* * ->dma_supported sets the bypass flag, so we must always call * into the method here unless the device is truly direct mapped. @@ -787,17 +848,14 @@ bool dma_pci_p2pdma_supported(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); - /* if ops is not set, dma direct will be used which supports P2PDMA */ - if (!ops) - return true; - /* * Note: dma_ops_bypass is not checked here because P2PDMA should * not be used with dma mapping ops that do not have support even * if the specific device is bypassing them. */ - return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED; + /* if ops is not set, dma direct and default IOMMU support P2PDMA */ + return !ops; } EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported); @@ -865,6 +923,8 @@ size_t dma_max_mapping_size(struct device *dev) if (dma_map_direct(dev, ops)) size = dma_direct_max_mapping_size(dev); + else if (use_dma_iommu(dev)) + size = iommu_dma_max_mapping_size(dev); else if (ops && ops->max_mapping_size) size = ops->max_mapping_size(dev); @@ -877,7 +937,9 @@ size_t dma_opt_mapping_size(struct device *dev) const struct dma_map_ops *ops = get_dma_ops(dev); size_t size = SIZE_MAX; - if (ops && ops->opt_mapping_size) + if (use_dma_iommu(dev)) + size = iommu_dma_opt_mapping_size(); + else if (ops && ops->opt_mapping_size) size = ops->opt_mapping_size(); return min(dma_max_mapping_size(dev), size); @@ -888,6 +950,9 @@ unsigned long dma_get_merge_boundary(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); + if (use_dma_iommu(dev)) + return iommu_dma_get_merge_boundary(dev); + if (!ops || !ops->get_merge_boundary) return 0; /* can't merge */ From 833bd284a45448f04ab9fcd4fdf7ef594d458eed Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Wed, 28 Aug 2024 12:51:24 +0300 Subject: [PATCH 06/18] arm64: mm: fix DMA zone when dma-ranges is missing Some platforms, like Rockchip RK3568 based Odroid M1, do not provide DMA limits information in device-tree dma-ranges property. Still some device drivers set DMA limit that relies on DMA zone at low 4GB memory area. Until commit ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit"), zone_sizes_init() restricted DMA zone to low 32-bit. Restore DMA zone 32-bit limit when the platform provides no DMA bus limit information. Fixes: ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") Reported-by: Marek Szyprowski Link: https://lore.kernel.org/r/53d988b1-bdce-422a-ae4e-158f305ad703@samsung.com Suggested-by: Robin Murphy Signed-off-by: Baruch Siach Reviewed-by: Catalin Marinas Tested-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- arch/arm64/mm/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bfb10969cbf0..5710129b8033 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -116,6 +116,9 @@ static void __init arch_reserve_crashkernel(void) static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { + if (zone_limit == PHYS_ADDR_MAX) + zone_limit = U32_MAX; + return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } From b5a73bf4d1de95e620bf5f592557b81f71c76f0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Aug 2024 06:56:31 +0200 Subject: [PATCH 07/18] scsi: check that busses support the DMA API before setting dma parameters We'll start throwing warnings soon when dma_set_seg_boundary and dma_set_max_seg_size are called on devices for buses that don't fully support the DMA API. Prepare for that by making the calls in the SCSI midlayer conditional. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 3958a6d14bf4..7f0394c44920 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1988,8 +1988,15 @@ void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) if (shost->no_highmem) lim->features |= BLK_FEAT_BOUNCE_HIGH; - dma_set_seg_boundary(dev, shost->dma_boundary); - dma_set_max_seg_size(dev, shost->max_segment_size); + /* + * Propagate the DMA formation properties to the dma-mapping layer as + * a courtesy service to the LLDDs. This needs to check that the buses + * actually support the DMA API first, though. + */ + if (dev->dma_parms) { + dma_set_seg_boundary(dev, shost->dma_boundary); + dma_set_max_seg_size(dev, shost->max_segment_size); + } } EXPORT_SYMBOL_GPL(scsi_init_limits); From c42a01264ba1497eb3193c08ff3c2656d98250a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 06:10:24 +0200 Subject: [PATCH 08/18] dma-mapping: don't return errors from dma_set_min_align_mask A NULL dev->dma_parms indicates either a bus that is not DMA capable or grave bug in the implementation of the bus code. There isn't much the driver can do in terms of error handling for either case, so just warn and continue as DMA operations will fail anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen --- include/linux/dma-mapping.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index f693aafe221f..cfd6bafec3f9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -575,13 +575,12 @@ static inline unsigned int dma_get_min_align_mask(struct device *dev) return 0; } -static inline int dma_set_min_align_mask(struct device *dev, +static inline void dma_set_min_align_mask(struct device *dev, unsigned int min_align_mask) { if (WARN_ON_ONCE(!dev->dma_parms)) - return -EIO; + return; dev->dma_parms->min_align_mask = min_align_mask; - return 0; } #ifndef dma_get_cache_alignment From 560a861ab4174b42240157ab5cebe36b8c7bc418 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 06:06:28 +0200 Subject: [PATCH 09/18] dma-mapping: don't return errors from dma_set_seg_boundary A NULL dev->dma_parms indicates either a bus that is not DMA capable or grave bug in the implementation of the bus code. There isn't much the driver can do in terms of error handling for either case, so just warn and continue as DMA operations will fail anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen --- include/linux/dma-mapping.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index cfd6bafec3f9..6bd1333dbacb 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -559,13 +559,11 @@ static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, return (dma_get_seg_boundary(dev) >> page_shift) + 1; } -static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) +static inline void dma_set_seg_boundary(struct device *dev, unsigned long mask) { - if (dev->dma_parms) { - dev->dma_parms->segment_boundary_mask = mask; - return 0; - } - return -EIO; + if (WARN_ON_ONCE(!dev->dma_parms)) + return; + dev->dma_parms->segment_boundary_mask = mask; } static inline unsigned int dma_get_min_align_mask(struct device *dev) From 334304ac2baca7f3e821c47cf5129d90e7a6b1e6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Jul 2024 06:07:38 +0200 Subject: [PATCH 10/18] dma-mapping: don't return errors from dma_set_max_seg_size A NULL dev->dma_parms indicates either a bus that is not DMA capable or grave bug in the implementation of the bus code. There isn't much the driver can do in terms of error handling for either case, so just warn and continue as DMA operations will fail anyway. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Martin K. Petersen Acked-by: Ulf Hansson # For MMC --- drivers/accel/qaic/qaic_drv.c | 4 +--- drivers/dma/idma64.c | 4 +--- drivers/dma/pl330.c | 5 +---- drivers/dma/qcom/bam_dma.c | 6 +----- drivers/dma/sh/rcar-dmac.c | 4 +--- drivers/dma/ste_dma40.c | 6 +----- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 6 +----- drivers/media/common/videobuf2/videobuf2-dma-contig.c | 3 +-- drivers/media/pci/intel/ipu6/ipu6.c | 4 +--- drivers/mmc/host/mmci_stm32_sdmmc.c | 3 ++- drivers/net/ethernet/microsoft/mana/gdma_main.c | 6 +----- drivers/scsi/lpfc/lpfc_init.c | 7 +------ include/linux/dma-mapping.h | 10 ++++------ 13 files changed, 17 insertions(+), 51 deletions(-) diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 580b29ed1902..bf10156c334e 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -447,9 +447,7 @@ static int init_pci(struct qaic_device *qdev, struct pci_dev *pdev) ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) return ret; - ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX); - if (ret) - return ret; + dma_set_max_seg_size(&pdev->dev, UINT_MAX); qdev->bar_0 = devm_ioremap_resource(&pdev->dev, &pdev->resource[0]); if (IS_ERR(qdev->bar_0)) diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c index e3505e56784b..1398814d8fbb 100644 --- a/drivers/dma/idma64.c +++ b/drivers/dma/idma64.c @@ -598,9 +598,7 @@ static int idma64_probe(struct idma64_chip *chip) idma64->dma.dev = chip->sysdev; - ret = dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); - if (ret) - return ret; + dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK); ret = dma_async_device_register(&idma64->dma); if (ret) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 60c4de8dac1d..82a9fe88ad54 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -3163,10 +3163,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id) * This is the limit for transfers with a buswidth of 1, larger * buswidths will have larger limits. */ - ret = dma_set_max_seg_size(&adev->dev, 1900800); - if (ret) - dev_err(&adev->dev, "unable to set the seg size\n"); - + dma_set_max_seg_size(&adev->dev, 1900800); init_pl330_debugfs(pl330); dev_info(&adev->dev, diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 5e7d332731e0..368ffaa40037 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -1325,11 +1325,7 @@ static int bam_dma_probe(struct platform_device *pdev) /* set max dma segment size */ bdev->common.dev = bdev->dev; - ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); - if (ret) { - dev_err(bdev->dev, "cannot set maximum segment size\n"); - goto err_bam_channel_exit; - } + dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE); platform_set_drvdata(pdev, bdev); diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 40482cb73d79..1094a2f82164 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1868,9 +1868,7 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); - ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); - if (ret) - return ret; + dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); if (ret) diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 2c489299148e..d52e1685aed5 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -3632,11 +3632,7 @@ static int __init d40_probe(struct platform_device *pdev) if (ret) goto destroy_cache; - ret = dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE); - if (ret) { - d40_err(dev, "Failed to set dma max seg size\n"); - goto destroy_cache; - } + dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE); d40_hw_init(base); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 77b50c56c124..3e807195a0d0 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -559,11 +559,7 @@ static int mtk_drm_kms_init(struct drm_device *drm) * Configure the DMA segment size to make sure we get contiguous IOVA * when importing PRIME buffers. */ - ret = dma_set_max_seg_size(dma_dev, UINT_MAX); - if (ret) { - dev_err(dma_dev, "Failed to set DMA segment size\n"); - goto err_component_unbind; - } + dma_set_max_seg_size(dma_dev, UINT_MAX); ret = drm_vblank_init(drm, MAX_CRTC); if (ret < 0) diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index 3d4fd4ef5310..bb0b7fa67b53 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -854,8 +854,7 @@ int vb2_dma_contig_set_max_seg_size(struct device *dev, unsigned int size) return -ENODEV; } if (dma_get_max_seg_size(dev) < size) - return dma_set_max_seg_size(dev, size); - + dma_set_max_seg_size(dev, size); return 0; } EXPORT_SYMBOL_GPL(vb2_dma_contig_set_max_seg_size); diff --git a/drivers/media/pci/intel/ipu6/ipu6.c b/drivers/media/pci/intel/ipu6/ipu6.c index bbd646378ab3..83e70c692d95 100644 --- a/drivers/media/pci/intel/ipu6/ipu6.c +++ b/drivers/media/pci/intel/ipu6/ipu6.c @@ -576,9 +576,7 @@ static int ipu6_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) return dev_err_probe(dev, ret, "Failed to set DMA mask\n"); - ret = dma_set_max_seg_size(dev, UINT_MAX); - if (ret) - return dev_err_probe(dev, ret, "Failed to set max_seg_size\n"); + dma_set_max_seg_size(dev, UINT_MAX); ret = ipu6_pci_config_setup(pdev, isp->hw_ver); if (ret) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index f5da7f9baa52..9dc51859c2e5 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -213,7 +213,8 @@ static int sdmmc_idma_setup(struct mmci_host *host) host->mmc->max_seg_size = host->mmc->max_req_size; } - return dma_set_max_seg_size(dev, host->mmc->max_seg_size); + dma_set_max_seg_size(dev, host->mmc->max_seg_size); + return 0; } static int sdmmc_idma_start(struct mmci_host *host, unsigned int *datactrl) diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index ddb8f68d80a2..ca4ed58f1206 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1496,11 +1496,7 @@ static int mana_gd_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto release_region; - err = dma_set_max_seg_size(&pdev->dev, UINT_MAX); - if (err) { - dev_err(&pdev->dev, "Failed to set dma device segment size\n"); - goto release_region; - } + dma_set_max_seg_size(&pdev->dev, UINT_MAX); err = -ENOMEM; gc = vzalloc(sizeof(*gc)); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e1dfa96c2a55..50620918becd 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -13861,12 +13861,7 @@ fcponly: if (sli4_params->sge_supp_len > LPFC_MAX_SGE_SIZE) sli4_params->sge_supp_len = LPFC_MAX_SGE_SIZE; - rc = dma_set_max_seg_size(&phba->pcidev->dev, sli4_params->sge_supp_len); - if (unlikely(rc)) { - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "6400 Can't set dma maximum segment size\n"); - return rc; - } + dma_set_max_seg_size(&phba->pcidev->dev, sli4_params->sge_supp_len); /* * Check whether the adapter supports an embedded copy of the diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 6bd1333dbacb..1524da363734 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -524,13 +524,11 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) +static inline void dma_set_max_seg_size(struct device *dev, unsigned int size) { - if (dev->dma_parms) { - dev->dma_parms->max_segment_size = size; - return 0; - } - return -EIO; + if (WARN_ON_ONCE(!dev->dma_parms)) + return; + dev->dma_parms->max_segment_size = size; } static inline unsigned long dma_get_seg_boundary(struct device *dev) From 122c234ef4e182440f8a60490e9344bfc4e2b5e9 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sun, 1 Sep 2024 09:12:34 +0300 Subject: [PATCH 11/18] arm64: mm: keep low RAM dma zone Commit ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") optimistically assumed that device-tree dma-ranges property describes the system DMA limits. That assumption ignores DMA limits of individual devices that are not encoded in device tree. Commit 833bd284a45 ("arm64: mm: fix DMA zone when dma-ranges is missing") fixed part of the problem for platforms that do not provide dma-ranges at all. However platforms like SM8550-HDK provide DMA bus limit, but have devices with stronger DMA limits. of_dma_get_max_cpu_address() does not take device limitations into account. These platforms implicitly rely on DMA zone in low 32-bit RAM area. Until we find a better way to figure out the optimal DMA zone range, restore the low RAM DMA zone we had before commit ba0fb44aed47. Fixes: ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") Closes: https://lore.kernel.org/r/1a0c7282-63e0-4add-8e38-3abe3e0a8e2f@linaro.org Reported-by: Neil Armstrong Reviewed-by: Catalin Marinas Suggested-by: Robin Murphy Signed-off-by: Baruch Siach Tested-by: Neil Armstrong # on SM8550-HDK Tested-by: Marek Szyprowski Signed-off-by: Christoph Hellwig --- arch/arm64/mm/init.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 5710129b8033..eb76ac6d87c0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -116,8 +116,14 @@ static void __init arch_reserve_crashkernel(void) static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { - if (zone_limit == PHYS_ADDR_MAX) - zone_limit = U32_MAX; + /** + * Information we get from firmware (e.g. DT dma-ranges) describe DMA + * bus constraints. Devices using DMA might have their own limitations. + * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM + * DMA zone on platforms that have RAM there. + */ + if (memblock_start_of_DRAM() < U32_MAX) + zone_limit = min(zone_limit, U32_MAX); return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } From 92193b3569ade4968011007356c8606def0657a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Aug 2024 09:00:08 +0300 Subject: [PATCH 12/18] vdpa_sim: don't select DMA_OPS vdpa_sim has been fixed to not override the dma_map_ops in commit 6c3d329e6486 ("vdpa_sim: get rid of DMA ops"), so don't select the symbol and don't depend on HAS_DMA. Signed-off-by: Christoph Hellwig Acked-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 5265d09fc1c4..b08de3b77061 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -11,8 +11,7 @@ if VDPA config VDPA_SIM tristate "vDPA device simulator core" - depends on RUNTIME_TESTING_MENU && HAS_DMA - select DMA_OPS + depends on RUNTIME_TESTING_MENU select VHOST_RING select IOMMU_IOVA help From de6c85bf918ea52d5c680f0d130b37ee2ff152d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Aug 2024 09:02:47 +0300 Subject: [PATCH 13/18] dma-mapping: clearly mark DMA ops as an architecture feature DMA ops are a helper for architectures and not for drivers to override the DMA implementation. Unfortunately driver authors keep ignoring this. Make the fact more clear by renaming the symbol to ARCH_HAS_DMA_OPS and having the two drivers overriding their dma_ops depend on that. These drivers should probably be marked broken, but we can give them a bit of a grace period for that. Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Acked-by: Sakari Ailus # for IPU6 Acked-by: Robin Murphy --- arch/Kconfig | 9 +++++++++ arch/alpha/Kconfig | 2 +- arch/arm/Kconfig | 2 +- arch/arm64/Kconfig | 1 + arch/mips/Kconfig | 2 +- arch/parisc/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/s390/Kconfig | 2 +- arch/sparc/Kconfig | 2 +- arch/x86/Kconfig | 2 +- drivers/macintosh/macio_asic.c | 4 ++-- drivers/media/pci/intel/ipu6/Kconfig | 7 ++++++- drivers/vdpa/Kconfig | 7 ++++++- drivers/xen/Kconfig | 4 ++-- include/linux/device.h | 2 +- include/linux/dma-map-ops.h | 6 +++--- kernel/dma/Kconfig | 9 ++------- kernel/dma/Makefile | 2 +- 18 files changed, 41 insertions(+), 26 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 975dd22a2dbd..61c4ec048787 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -17,6 +17,15 @@ config CPU_MITIGATIONS def_bool y endif +# +# Selected by architectures that need custom DMA operations for e.g. legacy +# IOMMUs not handled by dma-iommu. Drivers must never select this symbol. +# +config ARCH_HAS_DMA_OPS + depends on HAS_DMA + select DMA_OPS_HELPERS + bool + menu "General architecture-dependent options" config ARCH_HAS_SUBPAGE_FAULTS diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 50ff06d5b799..109a4cddcd13 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -4,12 +4,12 @@ config ALPHA default y select ARCH_32BIT_USTAT_F_TINODE select ARCH_HAS_CURRENT_STACK_POINTER + select ARCH_HAS_DMA_OPS if PCI select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ARCH_NO_PREEMPT select ARCH_NO_SG_CHAIN select ARCH_USE_CMPXCHG_LOCKREF - select DMA_OPS if PCI select FORCE_PCI select PCI_DOMAINS if PCI select PCI_SYSCALL if PCI diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 54b2bb817a7f..f5f7995a2f8f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -10,6 +10,7 @@ config ARM select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_ALLOC if MMU + select ARCH_HAS_DMA_OPS select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORTIFY_SOURCE @@ -54,7 +55,6 @@ config ARM select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS select DMA_DECLARE_COHERENT select DMA_GLOBAL_POOL if !MMU - select DMA_OPS select DMA_NONCOHERENT_MMAP if MMU select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a2f8ff354ca6..40940cbde435 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -24,6 +24,7 @@ config ARM64 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE + select ARCH_HAS_DMA_OPS if XEN select ARCH_HAS_DMA_PREP_COHERENT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 60077e576935..023ad33a7e94 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -8,6 +8,7 @@ config MIPS select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CURRENT_STACK_POINTER if !CC_IS_CLANG || CLANG_VERSION >= 140000 select ARCH_HAS_DEBUG_VIRTUAL if !64BIT + select ARCH_HAS_DMA_OPS if MACH_JAZZ select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE if !EVA @@ -393,7 +394,6 @@ config MACH_JAZZ select ARC_PROMLIB select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO - select DMA_OPS select FW_ARC select FW_ARC32 select ARCH_MAY_HAVE_PC_FDC diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b0a2ac3ba916..859835a0692c 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,6 +10,7 @@ config PARISC select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_DMA_ALLOC if PA11 + select ARCH_HAS_DMA_OPS select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX @@ -23,7 +24,6 @@ config PARISC select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE - select DMA_OPS select RTC_CLASS select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d7b09b064a8a..f87fc0375a92 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -133,6 +133,7 @@ config PPC select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES + select ARCH_HAS_DMA_OPS if PPC64 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV @@ -185,7 +186,6 @@ config PPC select CPUMASK_OFFSTACK if NR_CPUS >= 8192 select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DMA_OPS_BYPASS if PPC64 - select DMA_OPS if PPC64 select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a822f952f64a..f31f6b85c25c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -70,6 +70,7 @@ config S390 select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_OPS if PCI select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FORCE_DMA_UNENCRYPTED select ARCH_HAS_FORTIFY_SOURCE @@ -137,7 +138,6 @@ config S390 select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DCACHE_WORD_ACCESS if !KMSAN - select DMA_OPS if PCI select DYNAMIC_FTRACE if FUNCTION_TRACER select FUNCTION_ALIGNMENT_8B if CC_IS_GCC select FUNCTION_ALIGNMENT_16B if !CC_IS_GCC diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 11bf9d312318..dcfdb7f1dae9 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -14,9 +14,9 @@ config SPARC bool default y select ARCH_HAS_CPU_CACHE_ALIASING + select ARCH_HAS_DMA_OPS select ARCH_MIGHT_HAVE_PC_PARPORT if SPARC64 && PCI select ARCH_MIGHT_HAVE_PC_SERIO - select DMA_OPS select OF select OF_PROMTREE select HAVE_ASM_MODVERSIONS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 007bab9f2a0e..9e2e7d361019 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -79,6 +79,7 @@ config X86 select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER @@ -943,7 +944,6 @@ config DMI config GART_IOMMU bool "Old AMD GART IOMMU support" - select DMA_OPS select IOMMU_HELPER select SWIOTLB depends on X86_64 && PCI && AMD_NB diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c index 13626205530d..bede200e32e8 100644 --- a/drivers/macintosh/macio_asic.c +++ b/drivers/macintosh/macio_asic.c @@ -387,7 +387,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, dma_set_max_seg_size(&dev->ofdev.dev, 65536); dma_set_seg_boundary(&dev->ofdev.dev, 0xffffffff); -#if defined(CONFIG_PCI) && defined(CONFIG_DMA_OPS) +#if defined(CONFIG_PCI) && defined(CONFIG_ARCH_HAS_DMA_OPS) /* Set the DMA ops to the ones from the PCI device, this could be * fishy if we didn't know that on PowerMac it's always direct ops * or iommu ops that will work fine @@ -396,7 +396,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip, */ dev->ofdev.dev.archdata = chip->lbus.pdev->dev.archdata; dev->ofdev.dev.dma_ops = chip->lbus.pdev->dev.dma_ops; -#endif /* CONFIG_PCI && CONFIG_DMA_OPS */ +#endif /* CONFIG_PCI && CONFIG_ARCH_HAS_DMA_OPS */ #ifdef DEBUG printk("preparing mdev @%p, ofdev @%p, dev @%p, kobj @%p\n", diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig index 40e20f0aa5ae..49e4fb696573 100644 --- a/drivers/media/pci/intel/ipu6/Kconfig +++ b/drivers/media/pci/intel/ipu6/Kconfig @@ -4,8 +4,13 @@ config VIDEO_INTEL_IPU6 depends on VIDEO_DEV depends on X86 && X86_64 && HAS_DMA depends on IPU_BRIDGE || !IPU_BRIDGE + # + # This driver incorrectly tries to override the dma_ops. It should + # never have done that, but for now keep it working on architectures + # that use dma ops + # + depends on ARCH_HAS_DMA_OPS select AUXILIARY_BUS - select DMA_OPS select IOMMU_IOVA select VIDEO_V4L2_SUBDEV_API select MEDIA_CONTROLLER diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index b08de3b77061..559fb9d3271f 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -35,7 +35,12 @@ config VDPA_SIM_BLOCK config VDPA_USER tristate "VDUSE (vDPA Device in Userspace) support" depends on EVENTFD && MMU && HAS_DMA - select DMA_OPS + # + # This driver incorrectly tries to override the dma_ops. It should + # never have done that, but for now keep it working on architectures + # that use dma ops + # + depends on ARCH_HAS_DMA_OPS select VHOST_IOTLB select IOMMU_IOVA help diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index d5989871dd5d..f7d6f47971fd 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -177,8 +177,8 @@ config XEN_GRANT_DMA_ALLOC config SWIOTLB_XEN def_bool y + depends on ARCH_HAS_DMA_OPS depends on XEN_PV || ARM || ARM64 - select DMA_OPS select SWIOTLB config XEN_PCI_STUB @@ -348,10 +348,10 @@ config XEN_GRANT_DMA_IOMMU config XEN_GRANT_DMA_OPS bool - select DMA_OPS config XEN_VIRTIO bool "Xen virtio support" + depends on ARCH_HAS_DMA_OPS depends on VIRTIO select XEN_GRANT_DMA_OPS select XEN_GRANT_DMA_IOMMU if OF diff --git a/include/linux/device.h b/include/linux/device.h index 1c5280d28bc3..b4bde8d22697 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -750,7 +750,7 @@ struct device { struct dev_pin_info *pins; #endif struct dev_msi_info msi; -#ifdef CONFIG_DMA_OPS +#ifdef CONFIG_ARCH_HAS_DMA_OPS const struct dma_map_ops *dma_ops; #endif u64 *dma_mask; /* dma mask (if dma'able device) */ diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 077b15c93bb8..9668ddf3696e 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -75,7 +75,7 @@ struct dma_map_ops { unsigned long (*get_merge_boundary)(struct device *dev); }; -#ifdef CONFIG_DMA_OPS +#ifdef CONFIG_ARCH_HAS_DMA_OPS #include static inline const struct dma_map_ops *get_dma_ops(struct device *dev) @@ -90,7 +90,7 @@ static inline void set_dma_ops(struct device *dev, { dev->dma_ops = dma_ops; } -#else /* CONFIG_DMA_OPS */ +#else /* CONFIG_ARCH_HAS_DMA_OPS */ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return NULL; @@ -99,7 +99,7 @@ static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *dma_ops) { } -#endif /* CONFIG_DMA_OPS */ +#endif /* CONFIG_ARCH_HAS_DMA_OPS */ #ifdef CONFIG_DMA_CMA extern struct cma *dma_contiguous_default_area; diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 21bae1700836..4c0dcd909121 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -11,11 +11,6 @@ config HAS_DMA config DMA_OPS_HELPERS bool -config DMA_OPS - depends on HAS_DMA - select DMA_OPS_HELPERS - bool - # # IOMMU drivers that can bypass the IOMMU code and optionally use the direct # mapping fast path should select this option and set the dma_ops_bypass @@ -113,8 +108,8 @@ config DMA_BOUNCE_UNALIGNED_KMALLOC config DMA_NEED_SYNC def_bool ARCH_HAS_SYNC_DMA_FOR_DEVICE || ARCH_HAS_SYNC_DMA_FOR_CPU || \ - ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || DMA_OPS || \ - SWIOTLB + ARCH_HAS_SYNC_DMA_FOR_CPU_ALL || DMA_API_DEBUG || \ + ARCH_HAS_DMA_OPS || SWIOTLB config DMA_RESTRICTED_POOL bool "DMA Restricted Pool" diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 2e6e933cf7f3..6977033444a3 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_HAS_DMA) += mapping.o direct.o obj-$(CONFIG_DMA_OPS_HELPERS) += ops_helpers.o -obj-$(CONFIG_DMA_OPS) += dummy.o +obj-$(CONFIG_ARCH_HAS_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o From f689a3ab7b8ece9e5787ff058b96b8630e4931ad Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sat, 31 Aug 2024 19:01:19 +0800 Subject: [PATCH 14/18] dma-direct: optimize page freeing when it is not addressable When the CMA allocation succeeds but isn't addressable, its buffer has already been released and the page is set to NULL. So later when the normal page allocation succeeds but isn't addressable, __free_pages() can be used to free that normal page rather than using dma_free_contiguous that does extra checks that are not needed. Signed-off-by: Chen Yu Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index f2ba074a6a54..5b4e6d3bf7bc 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -140,7 +140,7 @@ again: if (!page) page = alloc_pages_node(node, gfp, get_order(size)); if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_free_contiguous(dev, page, size); + __free_pages(page, get_order(size)); page = NULL; if (IS_ENABLED(CONFIG_ZONE_DMA32) && From 19156263cb1f24128a9ba6ef7340be5cbacc3d22 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 5 Sep 2024 10:14:05 +0300 Subject: [PATCH 15/18] dma-mapping: use IOMMU DMA calls for common alloc/free page calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Common alloca and free pages routines are called when IOMMU DMA is used, and internally it calls to DMA ops structure which is not available for default IOMMU. This patch adds necessary if checks to call IOMMU DMA. It fixes the following crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000040 Mem abort info: ESR = 0x0000000096000006 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000000d20bb000 [0000000000000040] pgd=08000000d20c1003 , p4d=08000000d20c1003 , pud=08000000d20c2003, pmd=0000000000000000 Internal error: Oops: 0000000096000006 [#1] PREEMPT SMP Modules linked in: ipv6 hci_uart venus_core btqca v4l2_mem2mem btrtl qcom_spmi_adc5 sbs_battery btbcm qcom_vadc_common cros_ec_typec videobuf2_v4l2 leds_cros_ec cros_kbd_led_backlight cros_ec_chardev videodev elan_i2c videobuf2_common qcom_stats mc bluetooth coresight_stm stm_core ecdh_generic ecc pwrseq_core panel_edp icc_bwmon ath10k_snoc ath10k_core ath mac80211 phy_qcom_qmp_combo aux_bridge libarc4 coresight_replicator coresight_etm4x coresight_tmc coresight_funnel cfg80211 rfkill coresight qcom_wdt cbmem ramoops reed_solomon pwm_bl coreboot_table backlight crct10dif_ce CPU: 7 UID: 0 PID: 70 Comm: kworker/u32:4 Not tainted 6.11.0-rc6-next-20240903-00003-gdfc6015d0711 #660 Hardware name: Google Lazor Limozeen without Touchscreen (rev5 - rev8) (DT) Workqueue: events_unbound deferred_probe_work_func hub 2-1:1.0: 4 ports detected pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : dma_common_alloc_pages+0x54/0x1b4 lr : dma_common_alloc_pages+0x4c/0x1b4 sp : ffff8000807d3730 x29: ffff8000807d3730 x28: ffff02a7d312f880 x27: 0000000000000001 x26: 000000000000c000 x25: 0000000000000000 x24: 0000000000000001 x23: ffff02a7d23b6898 x22: 0000000000006cc0 x21: 000000000000c000 x20: ffff02a7858bf410 x19: fffffe0a60006000 x18: 0000000000000001 x17: 00000000000000d5 x16: 1fffe054f0bcc261 x15: 0000000000000001 x14: ffff02a7844dc680 x13: 0000000000100180 x12: dead000000000100 x11: dead000000000122 x10: 00000000001001ff x9 : ffff02a87f7b7b00 x8 : ffff02a87f7b7b00 x7 : ffff405977d6b000 x6 : ffff8000807d3310 x5 : ffff02a87f6b6398 x4 : 0000000000000001 x3 : ffff405977d6b000 x2 : ffff02a7844dc600 x1 : 0000000100000000 x0 : fffffe0a60006000 Call trace: dma_common_alloc_pages+0x54/0x1b4 __dma_alloc_pages+0x68/0x90 dma_alloc_pages+0x10/0x1c snd_dma_noncoherent_alloc+0x28/0x8c __snd_dma_alloc_pages+0x30/0x50 snd_dma_alloc_dir_pages+0x40/0x80 do_alloc_pages+0xb8/0x13c preallocate_pcm_pages+0x6c/0xf8 preallocate_pages+0x160/0x1a4 snd_pcm_set_managed_buffer_all+0x64/0xb0 lpass_platform_pcm_new+0xc0/0xe8 snd_soc_pcm_component_new+0x3c/0xc8 soc_new_pcm+0x4fc/0x668 snd_soc_bind_card+0xabc/0xbac snd_soc_register_card+0xf0/0x108 devm_snd_soc_register_card+0x4c/0xa4 sc7180_snd_platform_probe+0x180/0x224 platform_probe+0x68/0xc0 really_probe+0xbc/0x298 __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x15c __device_attach_driver+0xb8/0x134 bus_for_each_drv+0x84/0xe0 __device_attach+0x9c/0x188 device_initial_probe+0x14/0x20 bus_probe_device+0xac/0xb0 deferred_probe_work_func+0x88/0xc0 process_one_work+0x14c/0x28c worker_thread+0x2cc/0x3d4 kthread+0x114/0x118 ret_from_fork+0x10/0x20 Code: f9411c19 940000c9 aa0003f3 b4000460 (f9402326) ---[ end trace 0000000000000000 ]--- Fixes: b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu") Closes: https://lore.kernel.org/all/10431dfd-ce04-4e0f-973b-c78477303c18@notapiano Reported-by: Nícolas F. R. A. Prado #KernelCI Signed-off-by: Leon Romanovsky Tested-by: Nícolas F. R. A. Prado Signed-off-by: Christoph Hellwig --- include/linux/iommu-dma.h | 8 ++++++++ kernel/dma/mapping.c | 12 ------------ kernel/dma/ops_helpers.c | 14 +++++++++++--- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/include/linux/iommu-dma.h b/include/linux/iommu-dma.h index d30a58bf00fd..1bb55ca1ab79 100644 --- a/include/linux/iommu-dma.h +++ b/include/linux/iommu-dma.h @@ -10,6 +10,10 @@ #include #ifdef CONFIG_IOMMU_DMA +static inline bool use_dma_iommu(struct device *dev) +{ + return dev->dma_iommu; +} dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs); @@ -49,6 +53,10 @@ void iommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, void iommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir); #else +static inline bool use_dma_iommu(struct device *dev) +{ + return false; +} static inline dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, unsigned long attrs) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index b50ae3d198a6..056f27962f69 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -114,18 +114,6 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, } EXPORT_SYMBOL(dmam_alloc_attrs); -#ifdef CONFIG_IOMMU_DMA -static bool use_dma_iommu(struct device *dev) -{ - return dev->dma_iommu; -} -#else -static bool use_dma_iommu(struct device *dev) -{ - return false; -} -#endif - static bool dma_go_direct(struct device *dev, dma_addr_t mask, const struct dma_map_ops *ops) { diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index af4a6ef48ce0..9afd569eadb9 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -4,6 +4,7 @@ * the allocated memory contains normal pages in the direct kernel mapping. */ #include +#include static struct page *dma_common_vaddr_to_page(void *cpu_addr) { @@ -70,8 +71,12 @@ struct page *dma_common_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; - *dma_handle = ops->map_page(dev, page, 0, size, dir, - DMA_ATTR_SKIP_CPU_SYNC); + if (use_dma_iommu(dev)) + *dma_handle = iommu_dma_map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else + *dma_handle = ops->map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); if (*dma_handle == DMA_MAPPING_ERROR) { dma_free_contiguous(dev, page, size); return NULL; @@ -86,7 +91,10 @@ void dma_common_free_pages(struct device *dev, size_t size, struct page *page, { const struct dma_map_ops *ops = get_dma_ops(dev); - if (ops->unmap_page) + if (use_dma_iommu(dev)) + iommu_dma_unmap_page(dev, dma_handle, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + else if (ops->unmap_page) ops->unmap_page(dev, dma_handle, size, dir, DMA_ATTR_SKIP_CPU_SYNC); dma_free_contiguous(dev, page, size); From 038eb433dc1474c4bc7d33188294e3d4778efdfd Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 6 Sep 2024 17:54:34 -0400 Subject: [PATCH 16/18] dma-mapping: add tracing for dma-mapping API calls When debugging drivers, it can often be useful to trace when memory gets (un)mapped for DMA (and can be accessed by the device). Add some tracepoints for this purpose. Use u64 instead of phys_addr_t and dma_addr_t (and similarly %llx instead of %pa) because libtraceevent can't handle typedefs in all cases. Signed-off-by: Sean Anderson Signed-off-by: Christoph Hellwig --- include/trace/events/dma.h | 341 +++++++++++++++++++++++++++++++++++++ kernel/dma/mapping.c | 24 ++- 2 files changed, 364 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/dma.h diff --git a/include/trace/events/dma.h b/include/trace/events/dma.h new file mode 100644 index 000000000000..f57f05331d73 --- /dev/null +++ b/include/trace/events/dma.h @@ -0,0 +1,341 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM dma + +#if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DMA_H + +#include +#include +#include +#include + +TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL); +TRACE_DEFINE_ENUM(DMA_TO_DEVICE); +TRACE_DEFINE_ENUM(DMA_FROM_DEVICE); +TRACE_DEFINE_ENUM(DMA_NONE); + +#define decode_dma_data_direction(dir) \ + __print_symbolic(dir, \ + { DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \ + { DMA_TO_DEVICE, "TO_DEVICE" }, \ + { DMA_FROM_DEVICE, "FROM_DEVICE" }, \ + { DMA_NONE, "NONE" }) + +#define decode_dma_attrs(attrs) \ + __print_flags(attrs, "|", \ + { DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \ + { DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \ + { DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \ + { DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \ + { DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \ + { DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \ + { DMA_ATTR_NO_WARN, "NO_WARN" }, \ + { DMA_ATTR_PRIVILEGED, "PRIVILEGED" }) + +DECLARE_EVENT_CLASS(dma_map, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = phys_addr; + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + decode_dma_attrs(__entry->attrs)) +); + +DEFINE_EVENT(dma_map, dma_map_page, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); + +DEFINE_EVENT(dma_map, dma_map_resource, + TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr, + size_t size, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs)); + +DECLARE_EVENT_CLASS(dma_unmap, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->addr = addr; + __entry->size = size; + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->addr, + __entry->size, + decode_dma_attrs(__entry->attrs)) +); + +DEFINE_EVENT(dma_unmap, dma_unmap_page, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs)); + +DEFINE_EVENT(dma_unmap, dma_unmap_resource, + TP_PROTO(struct device *dev, dma_addr_t addr, size_t size, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, addr, size, dir, attrs)); + +TRACE_EVENT(dma_alloc, + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, + size_t size, gfp_t flags, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(gfp_t, flags) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = virt_to_phys(virt_addr); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->flags = flags; + __entry->attrs = attrs; + ), + + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s", + __get_str(device), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + show_gfp_flags(__entry->flags), + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_free, + TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs), + TP_ARGS(dev, virt_addr, dma_addr, size, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, phys_addr) + __field(u64, dma_addr) + __field(size_t, size) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + __assign_str(device); + __entry->phys_addr = virt_to_phys(virt_addr); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->attrs = attrs; + ), + + TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s", + __get_str(device), + __entry->dma_addr, + __entry->size, + __entry->phys_addr, + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_map_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + int ents, enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sg, nents, ents, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, phys_addrs, nents) + __dynamic_array(u64, dma_addrs, ents) + __dynamic_array(unsigned int, lengths, ents) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) + ((u64 *)__get_dynamic_array(phys_addrs))[i] = + sg_phys(sg + i); + for (i = 0; i < ents; i++) { + ((u64 *)__get_dynamic_array(dma_addrs))[i] = + sg_dma_address(sg + i); + ((unsigned int *)__get_dynamic_array(lengths))[i] = + sg_dma_len(sg + i); + } + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(dma_addrs), + __get_dynamic_array_len(dma_addrs) / + sizeof(u64), sizeof(u64)), + __print_array(__get_dynamic_array(lengths), + __get_dynamic_array_len(lengths) / + sizeof(unsigned int), sizeof(unsigned int)), + __print_array(__get_dynamic_array(phys_addrs), + __get_dynamic_array_len(phys_addrs) / + sizeof(u64), sizeof(u64)), + decode_dma_attrs(__entry->attrs)) +); + +TRACE_EVENT(dma_unmap_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs), + TP_ARGS(dev, sg, nents, dir, attrs), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, addrs, nents) + __field(enum dma_data_direction, dir) + __field(unsigned long, attrs) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) + ((u64 *)__get_dynamic_array(addrs))[i] = + sg_phys(sg + i); + __entry->dir = dir; + __entry->attrs = attrs; + ), + + TP_printk("%s dir=%s phys_addrs=%s attrs=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(addrs), + __get_dynamic_array_len(addrs) / + sizeof(u64), sizeof(u64)), + decode_dma_attrs(__entry->attrs)) +); + +DECLARE_EVENT_CLASS(dma_sync_single, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __field(u64, dma_addr) + __field(size_t, size) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + __assign_str(device); + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addr=%llx size=%zu", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __entry->dma_addr, + __entry->size) +); + +DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir)); + +DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device, + TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir), + TP_ARGS(dev, dma_addr, size, dir)); + +DECLARE_EVENT_CLASS(dma_sync_sg, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir), + + TP_STRUCT__entry( + __string(device, dev_name(dev)) + __dynamic_array(u64, dma_addrs, nents) + __dynamic_array(unsigned int, lengths, nents) + __field(enum dma_data_direction, dir) + ), + + TP_fast_assign( + int i; + + __assign_str(device); + for (i = 0; i < nents; i++) { + ((u64 *)__get_dynamic_array(dma_addrs))[i] = + sg_dma_address(sg + i); + ((unsigned int *)__get_dynamic_array(lengths))[i] = + sg_dma_len(sg + i); + } + __entry->dir = dir; + ), + + TP_printk("%s dir=%s dma_addrs=%s sizes=%s", + __get_str(device), + decode_dma_data_direction(__entry->dir), + __print_array(__get_dynamic_array(dma_addrs), + __get_dynamic_array_len(dma_addrs) / + sizeof(u64), sizeof(u64)), + __print_array(__get_dynamic_array(lengths), + __get_dynamic_array_len(lengths) / + sizeof(unsigned int), sizeof(unsigned int))) +); + +DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir)); + +DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device, + TP_PROTO(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir), + TP_ARGS(dev, sg, nents, dir)); + +#endif /* _TRACE_DMA_H */ + +/* This part must be outside protection */ +#include diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 056f27962f69..7550b5dc5e55 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -18,6 +18,9 @@ #include "debug.h" #include "direct.h" +#define CREATE_TRACE_POINTS +#include + #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) @@ -169,6 +172,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, else addr = ops->map_page(dev, page, offset, size, dir, attrs); kmsan_handle_dma(page, offset, size, dir); + trace_dma_map_page(dev, page_to_phys(page) + offset, addr, size, dir, + attrs); debug_dma_map_page(dev, page, offset, size, dir, addr, attrs); return addr; @@ -188,6 +193,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, iommu_dma_unmap_page(dev, addr, size, dir, attrs); else ops->unmap_page(dev, addr, size, dir, attrs); + trace_dma_unmap_page(dev, addr, size, dir, attrs); debug_dma_unmap_page(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_page_attrs); @@ -213,6 +219,7 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, if (ents > 0) { kmsan_handle_dma_sg(sg, nents, dir); + trace_dma_map_sg(dev, sg, nents, ents, dir, attrs); debug_dma_map_sg(dev, sg, nents, ents, dir, attrs); } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM && ents != -EIO && ents != -EREMOTEIO)) { @@ -298,6 +305,7 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); + trace_dma_unmap_sg(dev, sg, nents, dir, attrs); debug_dma_unmap_sg(dev, sg, nents, dir); if (dma_map_direct(dev, ops) || arch_dma_unmap_sg_direct(dev, sg, nents)) @@ -327,6 +335,7 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, else if (ops->map_resource) addr = ops->map_resource(dev, phys_addr, size, dir, attrs); + trace_dma_map_resource(dev, phys_addr, addr, size, dir, attrs); debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs); return addr; } @@ -344,6 +353,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, iommu_dma_unmap_resource(dev, addr, size, dir, attrs); else if (ops->unmap_resource) ops->unmap_resource(dev, addr, size, dir, attrs); + trace_dma_unmap_resource(dev, addr, size, dir, attrs); debug_dma_unmap_resource(dev, addr, size, dir); } EXPORT_SYMBOL(dma_unmap_resource); @@ -361,6 +371,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, iommu_dma_sync_single_for_cpu(dev, addr, size, dir); else if (ops->sync_single_for_cpu) ops->sync_single_for_cpu(dev, addr, size, dir); + trace_dma_sync_single_for_cpu(dev, addr, size, dir); debug_dma_sync_single_for_cpu(dev, addr, size, dir); } EXPORT_SYMBOL(__dma_sync_single_for_cpu); @@ -377,6 +388,7 @@ void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, iommu_dma_sync_single_for_device(dev, addr, size, dir); else if (ops->sync_single_for_device) ops->sync_single_for_device(dev, addr, size, dir); + trace_dma_sync_single_for_device(dev, addr, size, dir); debug_dma_sync_single_for_device(dev, addr, size, dir); } EXPORT_SYMBOL(__dma_sync_single_for_device); @@ -393,6 +405,7 @@ void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir); else if (ops->sync_sg_for_cpu) ops->sync_sg_for_cpu(dev, sg, nelems, dir); + trace_dma_sync_sg_for_cpu(dev, sg, nelems, dir); debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir); } EXPORT_SYMBOL(__dma_sync_sg_for_cpu); @@ -409,6 +422,7 @@ void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, iommu_dma_sync_sg_for_device(dev, sg, nelems, dir); else if (ops->sync_sg_for_device) ops->sync_sg_for_device(dev, sg, nelems, dir); + trace_dma_sync_sg_for_device(dev, sg, nelems, dir); debug_dma_sync_sg_for_device(dev, sg, nelems, dir); } EXPORT_SYMBOL(__dma_sync_sg_for_device); @@ -601,6 +615,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, else return NULL; + trace_dma_alloc(dev, cpu_addr, *dma_handle, size, flag, attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs); return cpu_addr; } @@ -625,6 +640,7 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, if (!cpu_addr) return; + trace_dma_free(dev, cpu_addr, dma_handle, size, attrs); debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); if (dma_alloc_direct(dev, ops)) dma_direct_free(dev, size, cpu_addr, dma_handle, attrs); @@ -662,8 +678,11 @@ struct page *dma_alloc_pages(struct device *dev, size_t size, { struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp); - if (page) + if (page) { + trace_dma_map_page(dev, page_to_phys(page), *dma_handle, size, + dir, 0); debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0); + } return page; } EXPORT_SYMBOL_GPL(dma_alloc_pages); @@ -685,6 +704,7 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page, void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir) { + trace_dma_unmap_page(dev, dma_handle, size, dir, 0); debug_dma_unmap_page(dev, dma_handle, size, dir); __dma_free_pages(dev, size, page, dma_handle, dir); } @@ -747,6 +767,7 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, if (sgt) { sgt->nents = 1; + trace_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs); } return sgt; @@ -767,6 +788,7 @@ void dma_free_noncontiguous(struct device *dev, size_t size, { const struct dma_map_ops *ops = get_dma_ops(dev); + trace_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir, 0); debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir); if (ops && ops->free_noncontiguous) ops->free_noncontiguous(dev, size, sgt, dir); From f45cfab28fcd5ac67a38750b6c68316b26d35ac8 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 11 Sep 2024 13:15:00 +0300 Subject: [PATCH 17/18] dma-mapping: reliably inform about DMA support for IOMMU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the DMA IOMMU path is going to be used, the appropriate check should return that DMA is supported. Fixes: b5c58b2fdc42 ("dma-mapping: direct calls for dma-iommu") Closes: https://lore.kernel.org/all/181e06ff-35a3-434f-b505-672f430bd1cb@notapiano Reported-by: Nícolas F. R. A. Prado #KernelCI Signed-off-by: Leon Romanovsky Reviewed-by: Robin Murphy Tested-by: Nícolas F. R. A. Prado Signed-off-by: Christoph Hellwig --- kernel/dma/mapping.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 7550b5dc5e55..022d670f8cad 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -843,6 +843,9 @@ static int dma_supported(struct device *dev, u64 mask) if (WARN_ON(ops && use_dma_iommu(dev))) return false; + + if (use_dma_iommu(dev)) + return true; /* * ->dma_supported sets the bypass flag, so we must always call * into the method here unless the device is truly direct mapped. From a5fb217f13f74b2af2ab366ffad522bae717f93c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Sep 2024 09:21:18 +0200 Subject: [PATCH 18/18] dma-mapping: reflow dma_supported dma_supported has become too much spaghetti for my taste. Reflow it to remove the duplicate use_dma_iommu condition and make the main path more obvious. Signed-off-by: Christoph Hellwig Reviewed-by: Leon Romanovsky --- kernel/dma/mapping.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 022d670f8cad..b839683da0ba 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -841,20 +841,23 @@ static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); - if (WARN_ON(ops && use_dma_iommu(dev))) - return false; - - if (use_dma_iommu(dev)) + if (use_dma_iommu(dev)) { + if (WARN_ON(ops)) + return false; return true; + } + /* - * ->dma_supported sets the bypass flag, so we must always call - * into the method here unless the device is truly direct mapped. + * ->dma_supported sets and clears the bypass flag, so ignore it here + * and always call into the method if there is one. */ - if (!ops) - return dma_direct_supported(dev, mask); - if (!ops->dma_supported) - return 1; - return ops->dma_supported(dev, mask); + if (ops) { + if (!ops->dma_supported) + return true; + return ops->dma_supported(dev, mask); + } + + return dma_direct_supported(dev, mask); } bool dma_pci_p2pdma_supported(struct device *dev)