dma-mapping: add tracing for dma-mapping API calls

When debugging drivers, it can often be useful to trace when memory gets
(un)mapped for DMA (and can be accessed by the device). Add some
tracepoints for this purpose.

Use u64 instead of phys_addr_t and dma_addr_t (and similarly %llx instead
of %pa) because libtraceevent can't handle typedefs in all cases.

Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Sean Anderson 2024-09-06 17:54:34 -04:00 committed by Christoph Hellwig
parent 19156263cb
commit 038eb433dc
2 changed files with 364 additions and 1 deletions

341
include/trace/events/dma.h Normal file
View File

@ -0,0 +1,341 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM dma
#if !defined(_TRACE_DMA_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_DMA_H
#include <linux/tracepoint.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
#include <trace/events/mmflags.h>
TRACE_DEFINE_ENUM(DMA_BIDIRECTIONAL);
TRACE_DEFINE_ENUM(DMA_TO_DEVICE);
TRACE_DEFINE_ENUM(DMA_FROM_DEVICE);
TRACE_DEFINE_ENUM(DMA_NONE);
#define decode_dma_data_direction(dir) \
__print_symbolic(dir, \
{ DMA_BIDIRECTIONAL, "BIDIRECTIONAL" }, \
{ DMA_TO_DEVICE, "TO_DEVICE" }, \
{ DMA_FROM_DEVICE, "FROM_DEVICE" }, \
{ DMA_NONE, "NONE" })
#define decode_dma_attrs(attrs) \
__print_flags(attrs, "|", \
{ DMA_ATTR_WEAK_ORDERING, "WEAK_ORDERING" }, \
{ DMA_ATTR_WRITE_COMBINE, "WRITE_COMBINE" }, \
{ DMA_ATTR_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING" }, \
{ DMA_ATTR_SKIP_CPU_SYNC, "SKIP_CPU_SYNC" }, \
{ DMA_ATTR_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS" }, \
{ DMA_ATTR_ALLOC_SINGLE_PAGES, "ALLOC_SINGLE_PAGES" }, \
{ DMA_ATTR_NO_WARN, "NO_WARN" }, \
{ DMA_ATTR_PRIVILEGED, "PRIVILEGED" })
DECLARE_EVENT_CLASS(dma_map,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__field(u64, phys_addr)
__field(u64, dma_addr)
__field(size_t, size)
__field(enum dma_data_direction, dir)
__field(unsigned long, attrs)
),
TP_fast_assign(
__assign_str(device);
__entry->phys_addr = phys_addr;
__entry->dma_addr = dma_addr;
__entry->size = size;
__entry->dir = dir;
__entry->attrs = attrs;
),
TP_printk("%s dir=%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s",
__get_str(device),
decode_dma_data_direction(__entry->dir),
__entry->dma_addr,
__entry->size,
__entry->phys_addr,
decode_dma_attrs(__entry->attrs))
);
DEFINE_EVENT(dma_map, dma_map_page,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs));
DEFINE_EVENT(dma_map, dma_map_resource,
TP_PROTO(struct device *dev, phys_addr_t phys_addr, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, phys_addr, dma_addr, size, dir, attrs));
DECLARE_EVENT_CLASS(dma_unmap,
TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, addr, size, dir, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__field(u64, addr)
__field(size_t, size)
__field(enum dma_data_direction, dir)
__field(unsigned long, attrs)
),
TP_fast_assign(
__assign_str(device);
__entry->addr = addr;
__entry->size = size;
__entry->dir = dir;
__entry->attrs = attrs;
),
TP_printk("%s dir=%s dma_addr=%llx size=%zu attrs=%s",
__get_str(device),
decode_dma_data_direction(__entry->dir),
__entry->addr,
__entry->size,
decode_dma_attrs(__entry->attrs))
);
DEFINE_EVENT(dma_unmap, dma_unmap_page,
TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, addr, size, dir, attrs));
DEFINE_EVENT(dma_unmap, dma_unmap_resource,
TP_PROTO(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, addr, size, dir, attrs));
TRACE_EVENT(dma_alloc,
TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
size_t size, gfp_t flags, unsigned long attrs),
TP_ARGS(dev, virt_addr, dma_addr, size, flags, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__field(u64, phys_addr)
__field(u64, dma_addr)
__field(size_t, size)
__field(gfp_t, flags)
__field(unsigned long, attrs)
),
TP_fast_assign(
__assign_str(device);
__entry->phys_addr = virt_to_phys(virt_addr);
__entry->dma_addr = dma_addr;
__entry->size = size;
__entry->flags = flags;
__entry->attrs = attrs;
),
TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx flags=%s attrs=%s",
__get_str(device),
__entry->dma_addr,
__entry->size,
__entry->phys_addr,
show_gfp_flags(__entry->flags),
decode_dma_attrs(__entry->attrs))
);
TRACE_EVENT(dma_free,
TP_PROTO(struct device *dev, void *virt_addr, dma_addr_t dma_addr,
size_t size, unsigned long attrs),
TP_ARGS(dev, virt_addr, dma_addr, size, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__field(u64, phys_addr)
__field(u64, dma_addr)
__field(size_t, size)
__field(unsigned long, attrs)
),
TP_fast_assign(
__assign_str(device);
__entry->phys_addr = virt_to_phys(virt_addr);
__entry->dma_addr = dma_addr;
__entry->size = size;
__entry->attrs = attrs;
),
TP_printk("%s dma_addr=%llx size=%zu phys_addr=%llx attrs=%s",
__get_str(device),
__entry->dma_addr,
__entry->size,
__entry->phys_addr,
decode_dma_attrs(__entry->attrs))
);
TRACE_EVENT(dma_map_sg,
TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
int ents, enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, sg, nents, ents, dir, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__dynamic_array(u64, phys_addrs, nents)
__dynamic_array(u64, dma_addrs, ents)
__dynamic_array(unsigned int, lengths, ents)
__field(enum dma_data_direction, dir)
__field(unsigned long, attrs)
),
TP_fast_assign(
int i;
__assign_str(device);
for (i = 0; i < nents; i++)
((u64 *)__get_dynamic_array(phys_addrs))[i] =
sg_phys(sg + i);
for (i = 0; i < ents; i++) {
((u64 *)__get_dynamic_array(dma_addrs))[i] =
sg_dma_address(sg + i);
((unsigned int *)__get_dynamic_array(lengths))[i] =
sg_dma_len(sg + i);
}
__entry->dir = dir;
__entry->attrs = attrs;
),
TP_printk("%s dir=%s dma_addrs=%s sizes=%s phys_addrs=%s attrs=%s",
__get_str(device),
decode_dma_data_direction(__entry->dir),
__print_array(__get_dynamic_array(dma_addrs),
__get_dynamic_array_len(dma_addrs) /
sizeof(u64), sizeof(u64)),
__print_array(__get_dynamic_array(lengths),
__get_dynamic_array_len(lengths) /
sizeof(unsigned int), sizeof(unsigned int)),
__print_array(__get_dynamic_array(phys_addrs),
__get_dynamic_array_len(phys_addrs) /
sizeof(u64), sizeof(u64)),
decode_dma_attrs(__entry->attrs))
);
TRACE_EVENT(dma_unmap_sg,
TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, unsigned long attrs),
TP_ARGS(dev, sg, nents, dir, attrs),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__dynamic_array(u64, addrs, nents)
__field(enum dma_data_direction, dir)
__field(unsigned long, attrs)
),
TP_fast_assign(
int i;
__assign_str(device);
for (i = 0; i < nents; i++)
((u64 *)__get_dynamic_array(addrs))[i] =
sg_phys(sg + i);
__entry->dir = dir;
__entry->attrs = attrs;
),
TP_printk("%s dir=%s phys_addrs=%s attrs=%s",
__get_str(device),
decode_dma_data_direction(__entry->dir),
__print_array(__get_dynamic_array(addrs),
__get_dynamic_array_len(addrs) /
sizeof(u64), sizeof(u64)),
decode_dma_attrs(__entry->attrs))
);
DECLARE_EVENT_CLASS(dma_sync_single,
TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir),
TP_ARGS(dev, dma_addr, size, dir),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__field(u64, dma_addr)
__field(size_t, size)
__field(enum dma_data_direction, dir)
),
TP_fast_assign(
__assign_str(device);
__entry->dma_addr = dma_addr;
__entry->size = size;
__entry->dir = dir;
),
TP_printk("%s dir=%s dma_addr=%llx size=%zu",
__get_str(device),
decode_dma_data_direction(__entry->dir),
__entry->dma_addr,
__entry->size)
);
DEFINE_EVENT(dma_sync_single, dma_sync_single_for_cpu,
TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir),
TP_ARGS(dev, dma_addr, size, dir));
DEFINE_EVENT(dma_sync_single, dma_sync_single_for_device,
TP_PROTO(struct device *dev, dma_addr_t dma_addr, size_t size,
enum dma_data_direction dir),
TP_ARGS(dev, dma_addr, size, dir));
DECLARE_EVENT_CLASS(dma_sync_sg,
TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir),
TP_ARGS(dev, sg, nents, dir),
TP_STRUCT__entry(
__string(device, dev_name(dev))
__dynamic_array(u64, dma_addrs, nents)
__dynamic_array(unsigned int, lengths, nents)
__field(enum dma_data_direction, dir)
),
TP_fast_assign(
int i;
__assign_str(device);
for (i = 0; i < nents; i++) {
((u64 *)__get_dynamic_array(dma_addrs))[i] =
sg_dma_address(sg + i);
((unsigned int *)__get_dynamic_array(lengths))[i] =
sg_dma_len(sg + i);
}
__entry->dir = dir;
),
TP_printk("%s dir=%s dma_addrs=%s sizes=%s",
__get_str(device),
decode_dma_data_direction(__entry->dir),
__print_array(__get_dynamic_array(dma_addrs),
__get_dynamic_array_len(dma_addrs) /
sizeof(u64), sizeof(u64)),
__print_array(__get_dynamic_array(lengths),
__get_dynamic_array_len(lengths) /
sizeof(unsigned int), sizeof(unsigned int)))
);
DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_cpu,
TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir),
TP_ARGS(dev, sg, nents, dir));
DEFINE_EVENT(dma_sync_sg, dma_sync_sg_for_device,
TP_PROTO(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir),
TP_ARGS(dev, sg, nents, dir));
#endif /* _TRACE_DMA_H */
/* This part must be outside protection */
#include <trace/define_trace.h>

View File

@ -18,6 +18,9 @@
#include "debug.h"
#include "direct.h"
#define CREATE_TRACE_POINTS
#include <trace/events/dma.h>
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
@ -169,6 +172,8 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
else
addr = ops->map_page(dev, page, offset, size, dir, attrs);
kmsan_handle_dma(page, offset, size, dir);
trace_dma_map_page(dev, page_to_phys(page) + offset, addr, size, dir,
attrs);
debug_dma_map_page(dev, page, offset, size, dir, addr, attrs);
return addr;
@ -188,6 +193,7 @@ void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
iommu_dma_unmap_page(dev, addr, size, dir, attrs);
else
ops->unmap_page(dev, addr, size, dir, attrs);
trace_dma_unmap_page(dev, addr, size, dir, attrs);
debug_dma_unmap_page(dev, addr, size, dir);
}
EXPORT_SYMBOL(dma_unmap_page_attrs);
@ -213,6 +219,7 @@ static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
if (ents > 0) {
kmsan_handle_dma_sg(sg, nents, dir);
trace_dma_map_sg(dev, sg, nents, ents, dir, attrs);
debug_dma_map_sg(dev, sg, nents, ents, dir, attrs);
} else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&
ents != -EIO && ents != -EREMOTEIO)) {
@ -298,6 +305,7 @@ void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
const struct dma_map_ops *ops = get_dma_ops(dev);
BUG_ON(!valid_dma_direction(dir));
trace_dma_unmap_sg(dev, sg, nents, dir, attrs);
debug_dma_unmap_sg(dev, sg, nents, dir);
if (dma_map_direct(dev, ops) ||
arch_dma_unmap_sg_direct(dev, sg, nents))
@ -327,6 +335,7 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
else if (ops->map_resource)
addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
trace_dma_map_resource(dev, phys_addr, addr, size, dir, attrs);
debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs);
return addr;
}
@ -344,6 +353,7 @@ void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
iommu_dma_unmap_resource(dev, addr, size, dir, attrs);
else if (ops->unmap_resource)
ops->unmap_resource(dev, addr, size, dir, attrs);
trace_dma_unmap_resource(dev, addr, size, dir, attrs);
debug_dma_unmap_resource(dev, addr, size, dir);
}
EXPORT_SYMBOL(dma_unmap_resource);
@ -361,6 +371,7 @@ void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
iommu_dma_sync_single_for_cpu(dev, addr, size, dir);
else if (ops->sync_single_for_cpu)
ops->sync_single_for_cpu(dev, addr, size, dir);
trace_dma_sync_single_for_cpu(dev, addr, size, dir);
debug_dma_sync_single_for_cpu(dev, addr, size, dir);
}
EXPORT_SYMBOL(__dma_sync_single_for_cpu);
@ -377,6 +388,7 @@ void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
iommu_dma_sync_single_for_device(dev, addr, size, dir);
else if (ops->sync_single_for_device)
ops->sync_single_for_device(dev, addr, size, dir);
trace_dma_sync_single_for_device(dev, addr, size, dir);
debug_dma_sync_single_for_device(dev, addr, size, dir);
}
EXPORT_SYMBOL(__dma_sync_single_for_device);
@ -393,6 +405,7 @@ void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
iommu_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
else if (ops->sync_sg_for_cpu)
ops->sync_sg_for_cpu(dev, sg, nelems, dir);
trace_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
}
EXPORT_SYMBOL(__dma_sync_sg_for_cpu);
@ -409,6 +422,7 @@ void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
iommu_dma_sync_sg_for_device(dev, sg, nelems, dir);
else if (ops->sync_sg_for_device)
ops->sync_sg_for_device(dev, sg, nelems, dir);
trace_dma_sync_sg_for_device(dev, sg, nelems, dir);
debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
}
EXPORT_SYMBOL(__dma_sync_sg_for_device);
@ -601,6 +615,7 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
else
return NULL;
trace_dma_alloc(dev, cpu_addr, *dma_handle, size, flag, attrs);
debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs);
return cpu_addr;
}
@ -625,6 +640,7 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
if (!cpu_addr)
return;
trace_dma_free(dev, cpu_addr, dma_handle, size, attrs);
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
if (dma_alloc_direct(dev, ops))
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
@ -662,8 +678,11 @@ struct page *dma_alloc_pages(struct device *dev, size_t size,
{
struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp);
if (page)
if (page) {
trace_dma_map_page(dev, page_to_phys(page), *dma_handle, size,
dir, 0);
debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0);
}
return page;
}
EXPORT_SYMBOL_GPL(dma_alloc_pages);
@ -685,6 +704,7 @@ static void __dma_free_pages(struct device *dev, size_t size, struct page *page,
void dma_free_pages(struct device *dev, size_t size, struct page *page,
dma_addr_t dma_handle, enum dma_data_direction dir)
{
trace_dma_unmap_page(dev, dma_handle, size, dir, 0);
debug_dma_unmap_page(dev, dma_handle, size, dir);
__dma_free_pages(dev, size, page, dma_handle, dir);
}
@ -747,6 +767,7 @@ struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
if (sgt) {
sgt->nents = 1;
trace_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs);
debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs);
}
return sgt;
@ -767,6 +788,7 @@ void dma_free_noncontiguous(struct device *dev, size_t size,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
trace_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir, 0);
debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir);
if (ops && ops->free_noncontiguous)
ops->free_noncontiguous(dev, size, sgt, dir);