iommu/vt-d: Add common code for dmar latency performance monitors

The execution time of some operations is very performance critical, such as cache invalidation and PRQ processing time. This adds some common code to monitor the execution time range of those operations. The interfaces include enabling/disabling, checking status, updating sampling data and providing a common string format for users. Signed-off-by: Fenghua Yu <fenghua.yu@intel.com> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Link: https://lore.kernel.org/r/20210520031531.712333-1-baolu.lu@linux.intel.com Link: https://lore.kernel.org/r/20210610020115.1637656-14-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel <jroedel@suse.de>
2024-11-10 22:21:40 +00:00 · 2021-06-10 10:01:05 +08:00 · 2021-06-10 10:01:05 +08:00 · 55ee5e67a5
commit 55ee5e67a5
parent e93a67f5a0
5 changed files with 244 additions and 0 deletions
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@ -3,6 +3,9 @@
 config DMAR_TABLE
 	bool
 config DMAR_PERF
 	bool
 config INTEL_IOMMU
 	bool "Support for Intel IOMMU using DMA Remapping Devices"
 	depends on PCI_MSI && ACPI && (X86 || IA64)
--- a/drivers/iommu/intel/Makefile
+++ b/drivers/iommu/intel/Makefile
@ -2,6 +2,7 @@
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
 obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
 obj-$(CONFIG_DMAR_PERF) += perf.o
 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
 obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o
--- a/drivers/iommu/intel/perf.c
+++ b/drivers/iommu/intel/perf.c
@ -0,0 +1,166 @@
 // SPDX-License-Identifier: GPL-2.0
 /**
 * perf.c - performance monitor
 *
 * Copyright (C) 2021 Intel Corporation
 *
 * Author: Lu Baolu <baolu.lu@linux.intel.com>
 *         Fenghua Yu <fenghua.yu@intel.com>
 */
 #include <linux/spinlock.h>
 #include <linux/intel-iommu.h>
 #include "perf.h"
 static DEFINE_SPINLOCK(latency_lock);
 bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
 {
 	struct latency_statistic *lstat = iommu->perf_statistic;
 	return lstat && lstat[type].enabled;
 }
 int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
 {
 	struct latency_statistic *lstat;
 	unsigned long flags;
 	int ret = -EBUSY;
 	if (dmar_latency_enabled(iommu, type))
 		return 0;
 	spin_lock_irqsave(&latency_lock, flags);
 	if (!iommu->perf_statistic) {
 		iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
 						GFP_ATOMIC);
 		if (!iommu->perf_statistic) {
 			ret = -ENOMEM;
 			goto unlock_out;
 		}
 	}
 	lstat = iommu->perf_statistic;
 	if (!lstat[type].enabled) {
 		lstat[type].enabled = true;
 		lstat[type].counter[COUNTS_MIN] = UINT_MAX;
 		ret = 0;
 	}
 unlock_out:
 	spin_unlock_irqrestore(&latency_lock, flags);
 	return ret;
 }
 void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
 {
 	struct latency_statistic *lstat = iommu->perf_statistic;
 	unsigned long flags;
 	if (!dmar_latency_enabled(iommu, type))
 		return;
 	spin_lock_irqsave(&latency_lock, flags);
 	memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
 	spin_unlock_irqrestore(&latency_lock, flags);
 }
 void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
 {
 	struct latency_statistic *lstat = iommu->perf_statistic;
 	unsigned long flags;
 	u64 min, max;
 	if (!dmar_latency_enabled(iommu, type))
 		return;
 	spin_lock_irqsave(&latency_lock, flags);
 	if (latency < 100)
 		lstat[type].counter[COUNTS_10e2]++;
 	else if (latency < 1000)
 		lstat[type].counter[COUNTS_10e3]++;
 	else if (latency < 10000)
 		lstat[type].counter[COUNTS_10e4]++;
 	else if (latency < 100000)
 		lstat[type].counter[COUNTS_10e5]++;
 	else if (latency < 1000000)
 		lstat[type].counter[COUNTS_10e6]++;
 	else if (latency < 10000000)
 		lstat[type].counter[COUNTS_10e7]++;
 	else
 		lstat[type].counter[COUNTS_10e8_plus]++;
 	min = lstat[type].counter[COUNTS_MIN];
 	max = lstat[type].counter[COUNTS_MAX];
 	lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
 	lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
 	lstat[type].counter[COUNTS_SUM] += latency;
 	lstat[type].samples++;
 	spin_unlock_irqrestore(&latency_lock, flags);
 }
 static char *latency_counter_names[] = {
 	"                  <0.1us",
 	"   0.1us-1us", "    1us-10us", "  10us-100us",
 	"   100us-1ms", "    1ms-10ms", "      >=10ms",
 	"     min(us)", "     max(us)", " average(us)"
 };
 static char *latency_type_names[] = {
 	"   inv_iotlb", "  inv_devtlb", "     inv_iec",
 	"     svm_prq"
 };
 int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
 {
 	struct latency_statistic *lstat = iommu->perf_statistic;
 	unsigned long flags;
 	int bytes = 0, i, j;
 	memset(str, 0, size);
 	for (i = 0; i < COUNTS_NUM; i++)
 		bytes += snprintf(str + bytes, size - bytes,
 				  "%s", latency_counter_names[i]);
 	spin_lock_irqsave(&latency_lock, flags);
 	for (i = 0; i < DMAR_LATENCY_NUM; i++) {
 		if (!dmar_latency_enabled(iommu, i))
 			continue;
 		bytes += snprintf(str + bytes, size - bytes,
 				  "\n%s", latency_type_names[i]);
 		for (j = 0; j < COUNTS_NUM; j++) {
 			u64 val = lstat[i].counter[j];
 			switch (j) {
 			case COUNTS_MIN:
 				if (val == UINT_MAX)
 					val = 0;
 				else
 					val /= 1000;
 				break;
 			case COUNTS_MAX:
 				val /= 1000;
 				break;
 			case COUNTS_SUM:
 				if (lstat[i].samples)
 					val /= (lstat[i].samples * 1000);
 				else
 					val = 0;
 				break;
 			default:
 				break;
 			}
 			bytes += snprintf(str + bytes, size - bytes,
 					  "%12lld", val);
 		}
 	}
 	spin_unlock_irqrestore(&latency_lock, flags);
 	return bytes;
 }
--- a/drivers/iommu/intel/perf.h
+++ b/drivers/iommu/intel/perf.h
@ -0,0 +1,73 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * perf.h - performance monitor header
 *
 * Copyright (C) 2021 Intel Corporation
 *
 * Author: Lu Baolu <baolu.lu@linux.intel.com>
 */
 enum latency_type {
 	DMAR_LATENCY_INV_IOTLB = 0,
 	DMAR_LATENCY_INV_DEVTLB,
 	DMAR_LATENCY_INV_IEC,
 	DMAR_LATENCY_PRQ,
 	DMAR_LATENCY_NUM
 };
 enum latency_count {
 	COUNTS_10e2 = 0,	/* < 0.1us	*/
 	COUNTS_10e3,		/* 0.1us ~ 1us	*/
 	COUNTS_10e4,		/* 1us ~ 10us	*/
 	COUNTS_10e5,		/* 10us ~ 100us	*/
 	COUNTS_10e6,		/* 100us ~ 1ms	*/
 	COUNTS_10e7,		/* 1ms ~ 10ms	*/
 	COUNTS_10e8_plus,	/* 10ms and plus*/
 	COUNTS_MIN,
 	COUNTS_MAX,
 	COUNTS_SUM,
 	COUNTS_NUM
 };
 struct latency_statistic {
 	bool enabled;
 	u64 counter[COUNTS_NUM];
 	u64 samples;
 };
 #ifdef CONFIG_DMAR_PERF
 int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type);
 void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type);
 bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type);
 void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type,
 			 u64 latency);
 int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
 #else
 static inline int
 dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
 {
 	return -EINVAL;
 }
 static inline void
 dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
 {
 }
 static inline bool
 dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
 {
 	return false;
 }
 static inline void
 dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
 {
 }
 static inline int
 dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
 {
 	return 0;
 }
 #endif /* CONFIG_DMAR_PERF */
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@ -621,6 +621,7 @@ struct intel_iommu {
 	u32		flags;      /* Software defined flags */
 	struct dmar_drhd_unit *drhd;
 	void *perf_statistic;
 };
 /* Per subdevice private data */