mirror of
https://github.com/torvalds/linux.git
synced 2024-09-20 23:13:00 +00:00
perf metrics: Compute unmerged uncore metrics individually
When merging counts from multiple uncore PMUs the metric is only computed for the metric leader. When merging/aggregation is disabled, prior to this patch just the leader's metric would be computed. Fix this by computing the metric for each PMU. On a SkylakeX: Before: ``` $ perf stat -A -M memory_bandwidth_total -a sleep 1 Performance counter stats for 'system wide': CPU0 82,217 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.2 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total CPU0 61,395 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU0 81,570 UNC_M_CAS_COUNT.RD [uncore_imc_2] CPU18 113,886 UNC_M_CAS_COUNT.RD [uncore_imc_2] CPU0 62,330 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU18 66,942 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU0 75,489 UNC_M_CAS_COUNT.RD [uncore_imc_3] CPU18 27,958 UNC_M_CAS_COUNT.RD [uncore_imc_3] CPU0 55,864 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU18 38,727 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU0 75,423 UNC_M_CAS_COUNT.RD [uncore_imc_5] CPU18 104,527 UNC_M_CAS_COUNT.RD [uncore_imc_5] CPU0 57,596 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU18 56,777 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU0 1,003,440,851 ns duration_time 1.003440851 seconds time elapsed ``` After: ``` $ perf stat -A -M memory_bandwidth_total -a sleep 1 Performance counter stats for 'system wide': CPU0 88,968 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 9.5 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_0] # 0.0 MB/s memory_bandwidth_total CPU0 59,498 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_0] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_1] # 0.0 MB/s memory_bandwidth_total CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_1] CPU0 88,635 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 9.5 MB/s memory_bandwidth_total CPU18 117,975 UNC_M_CAS_COUNT.RD [uncore_imc_2] # 11.5 MB/s memory_bandwidth_total CPU0 60,829 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU18 62,105 UNC_M_CAS_COUNT.WR [uncore_imc_2] CPU0 82,238 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 8.7 MB/s memory_bandwidth_total CPU18 22,906 UNC_M_CAS_COUNT.RD [uncore_imc_3] # 3.6 MB/s memory_bandwidth_total CPU0 53,959 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU18 32,990 UNC_M_CAS_COUNT.WR [uncore_imc_3] CPU0 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total CPU18 0 UNC_M_CAS_COUNT.RD [uncore_imc_4] # 0.0 MB/s memory_bandwidth_total CPU0 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU18 0 UNC_M_CAS_COUNT.WR [uncore_imc_4] CPU0 83,595 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 8.9 MB/s memory_bandwidth_total CPU18 110,151 UNC_M_CAS_COUNT.RD [uncore_imc_5] # 10.5 MB/s memory_bandwidth_total CPU0 56,540 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU18 53,816 UNC_M_CAS_COUNT.WR [uncore_imc_5] CPU0 1,003,353,416 ns duration_time ``` Signed-off-by: Ian Rogers <irogers@google.com> | Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Stephane Eranian <eranian@google.com> Cc: Kaige Ye <ye@kaige.org> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: John Garry <john.g.garry@oracle.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240221070754.4163916-2-irogers@google.com
This commit is contained in:
parent
eee41e6b28
commit
a59fb796a3
|
@ -44,6 +44,8 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,
|
|||
if (!metric_events)
|
||||
return NULL;
|
||||
|
||||
if (evsel->metric_leader)
|
||||
me.evsel = evsel->metric_leader;
|
||||
nd = rblist__find(metric_events, &me);
|
||||
if (nd)
|
||||
return container_of(nd, struct metric_event, nd);
|
||||
|
|
|
@ -356,6 +356,7 @@ static void print_nsecs(struct perf_stat_config *config,
|
|||
}
|
||||
|
||||
static int prepare_metric(const struct metric_expr *mexp,
|
||||
const struct evsel *evsel,
|
||||
struct expr_parse_ctx *pctx,
|
||||
int aggr_idx)
|
||||
{
|
||||
|
@ -399,8 +400,29 @@ static int prepare_metric(const struct metric_expr *mexp,
|
|||
source_count = 1;
|
||||
} else {
|
||||
struct perf_stat_evsel *ps = metric_events[i]->stats;
|
||||
struct perf_stat_aggr *aggr = &ps->aggr[aggr_idx];
|
||||
struct perf_stat_aggr *aggr;
|
||||
|
||||
/*
|
||||
* If there are multiple uncore PMUs and we're not
|
||||
* reading the leader's stats, determine the stats for
|
||||
* the appropriate uncore PMU.
|
||||
*/
|
||||
if (evsel && evsel->metric_leader &&
|
||||
evsel->pmu != evsel->metric_leader->pmu &&
|
||||
mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) {
|
||||
struct evsel *pos;
|
||||
|
||||
evlist__for_each_entry(evsel->evlist, pos) {
|
||||
if (pos->pmu != evsel->pmu)
|
||||
continue;
|
||||
if (pos->metric_leader != mexp->metric_events[i])
|
||||
continue;
|
||||
ps = pos->stats;
|
||||
source_count = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
aggr = &ps->aggr[aggr_idx];
|
||||
if (!aggr)
|
||||
break;
|
||||
|
||||
|
@ -416,7 +438,8 @@ static int prepare_metric(const struct metric_expr *mexp,
|
|||
source_count = 0;
|
||||
} else {
|
||||
val = aggr->counts.val;
|
||||
source_count = evsel__source_count(metric_events[i]);
|
||||
if (!source_count)
|
||||
source_count = evsel__source_count(metric_events[i]);
|
||||
}
|
||||
}
|
||||
n = strdup(evsel__metric_id(metric_events[i]));
|
||||
|
@ -463,7 +486,7 @@ static void generic_metric(struct perf_stat_config *config,
|
|||
pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list);
|
||||
pctx->sctx.runtime = runtime;
|
||||
pctx->sctx.system_wide = config->system_wide;
|
||||
i = prepare_metric(mexp, pctx, aggr_idx);
|
||||
i = prepare_metric(mexp, evsel, pctx, aggr_idx);
|
||||
if (i < 0) {
|
||||
expr__ctx_free(pctx);
|
||||
return;
|
||||
|
@ -524,7 +547,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx)
|
|||
if (!pctx)
|
||||
return NAN;
|
||||
|
||||
if (prepare_metric(mexp, pctx, aggr_idx) < 0)
|
||||
if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0)
|
||||
goto out;
|
||||
|
||||
if (expr__parse(&ratio, pctx, mexp->metric_expr))
|
||||
|
|
Loading…
Reference in New Issue
Block a user