perf mem: Add statistics for peer snooping
Since the flag PERF_MEM_SNOOPX_PEER is added to support cache snooping from peer cache line, it can come from a peer core, a peer cluster, or a remote NUMA node. This patch adds statistics for the flag PERF_MEM_SNOOPX_PEER. Note, we take PERF_MEM_SNOOPX_PEER as an affiliated info, it needs to cooperate with cache level statistics. Therefore, we account the load operations for both the cache level's metrics (e.g. ld_l2hit, ld_llchit, etc.) and peer related metrics when flag PERF_MEM_SNOOPX_PEER is set. So three new metrics are introduced: 'lcl_peer' is for local cache access, the metric 'rmt_peer' is for remote access (includes remote DRAM and any caches in remote node), and the metric 'tot_peer' is accounting the sum value of 'lcl_peer' and 'rmt_peer'. Reviewed-by: Ali Saidi <alisaidi@amazon.com> Signed-off-by: Leo Yan <leo.yan@linaro.org> Tested-by: Ali Saidi <alisaidi@amazon.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: German Gomez <german.gomez@arm.com> Cc: Gustavo A. R. Silva <gustavoars@kernel.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.garry@huawei.com> Cc: Kajol Jain <kjain@linux.ibm.com> Cc: Like Xu <likexu@tencent.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mike Leach <mike.leach@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Timothy Hayes <timothy.hayes@arm.com> Cc: Will Deacon <will@kernel.org> Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20220811062451.435810-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
committed by
Arnaldo Carvalho de Melo
parent
4e6430cbb1
commit
e843dec53a
@@ -525,6 +525,7 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
|
|||||||
u64 op = data_src->mem_op;
|
u64 op = data_src->mem_op;
|
||||||
u64 lvl = data_src->mem_lvl;
|
u64 lvl = data_src->mem_lvl;
|
||||||
u64 snoop = data_src->mem_snoop;
|
u64 snoop = data_src->mem_snoop;
|
||||||
|
u64 snoopx = data_src->mem_snoopx;
|
||||||
u64 lock = data_src->mem_lock;
|
u64 lock = data_src->mem_lock;
|
||||||
u64 blk = data_src->mem_blk;
|
u64 blk = data_src->mem_blk;
|
||||||
/*
|
/*
|
||||||
@@ -544,6 +545,12 @@ do { \
|
|||||||
stats->tot_hitm++; \
|
stats->tot_hitm++; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define PEER_INC(__f) \
|
||||||
|
do { \
|
||||||
|
stats->__f++; \
|
||||||
|
stats->tot_peer++; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#define P(a, b) PERF_MEM_##a##_##b
|
#define P(a, b) PERF_MEM_##a##_##b
|
||||||
|
|
||||||
stats->nr_entries++;
|
stats->nr_entries++;
|
||||||
@@ -567,12 +574,20 @@ do { \
|
|||||||
if (lvl & P(LVL, IO)) stats->ld_io++;
|
if (lvl & P(LVL, IO)) stats->ld_io++;
|
||||||
if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
|
if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
|
||||||
if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
|
if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
|
||||||
if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
|
if (lvl & P(LVL, L2)) {
|
||||||
|
stats->ld_l2hit++;
|
||||||
|
|
||||||
|
if (snoopx & P(SNOOPX, PEER))
|
||||||
|
PEER_INC(lcl_peer);
|
||||||
|
}
|
||||||
if (lvl & P(LVL, L3 )) {
|
if (lvl & P(LVL, L3 )) {
|
||||||
if (snoop & P(SNOOP, HITM))
|
if (snoop & P(SNOOP, HITM))
|
||||||
HITM_INC(lcl_hitm);
|
HITM_INC(lcl_hitm);
|
||||||
else
|
else
|
||||||
stats->ld_llchit++;
|
stats->ld_llchit++;
|
||||||
|
|
||||||
|
if (snoopx & P(SNOOPX, PEER))
|
||||||
|
PEER_INC(lcl_peer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lvl & P(LVL, LOC_RAM)) {
|
if (lvl & P(LVL, LOC_RAM)) {
|
||||||
@@ -597,10 +612,14 @@ do { \
|
|||||||
if ((lvl & P(LVL, REM_CCE1)) ||
|
if ((lvl & P(LVL, REM_CCE1)) ||
|
||||||
(lvl & P(LVL, REM_CCE2)) ||
|
(lvl & P(LVL, REM_CCE2)) ||
|
||||||
mrem) {
|
mrem) {
|
||||||
if (snoop & P(SNOOP, HIT))
|
if (snoop & P(SNOOP, HIT)) {
|
||||||
stats->rmt_hit++;
|
stats->rmt_hit++;
|
||||||
else if (snoop & P(SNOOP, HITM))
|
} else if (snoop & P(SNOOP, HITM)) {
|
||||||
HITM_INC(rmt_hitm);
|
HITM_INC(rmt_hitm);
|
||||||
|
} else if (snoopx & P(SNOOPX, PEER)) {
|
||||||
|
stats->rmt_hit++;
|
||||||
|
PEER_INC(rmt_peer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((lvl & P(LVL, MISS)))
|
if ((lvl & P(LVL, MISS)))
|
||||||
@@ -664,6 +683,9 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
|
|||||||
stats->lcl_hitm += add->lcl_hitm;
|
stats->lcl_hitm += add->lcl_hitm;
|
||||||
stats->rmt_hitm += add->rmt_hitm;
|
stats->rmt_hitm += add->rmt_hitm;
|
||||||
stats->tot_hitm += add->tot_hitm;
|
stats->tot_hitm += add->tot_hitm;
|
||||||
|
stats->lcl_peer += add->lcl_peer;
|
||||||
|
stats->rmt_peer += add->rmt_peer;
|
||||||
|
stats->tot_peer += add->tot_peer;
|
||||||
stats->rmt_hit += add->rmt_hit;
|
stats->rmt_hit += add->rmt_hit;
|
||||||
stats->lcl_dram += add->lcl_dram;
|
stats->lcl_dram += add->lcl_dram;
|
||||||
stats->rmt_dram += add->rmt_dram;
|
stats->rmt_dram += add->rmt_dram;
|
||||||
|
|||||||
@@ -78,6 +78,9 @@ struct c2c_stats {
|
|||||||
u32 lcl_hitm; /* count of loads with local HITM */
|
u32 lcl_hitm; /* count of loads with local HITM */
|
||||||
u32 rmt_hitm; /* count of loads with remote HITM */
|
u32 rmt_hitm; /* count of loads with remote HITM */
|
||||||
u32 tot_hitm; /* count of loads with local and remote HITM */
|
u32 tot_hitm; /* count of loads with local and remote HITM */
|
||||||
|
u32 lcl_peer; /* count of loads with local peer cache */
|
||||||
|
u32 rmt_peer; /* count of loads with remote peer cache */
|
||||||
|
u32 tot_peer; /* count of loads with local and remote peer cache */
|
||||||
u32 rmt_hit; /* count of loads with remote hit clean; */
|
u32 rmt_hit; /* count of loads with remote hit clean; */
|
||||||
u32 lcl_dram; /* count of loads miss to local DRAM */
|
u32 lcl_dram; /* count of loads miss to local DRAM */
|
||||||
u32 rmt_dram; /* count of loads miss to remote DRAM */
|
u32 rmt_dram; /* count of loads miss to remote DRAM */
|
||||||
|
|||||||
Reference in New Issue
Block a user