perf/core improvements and fixes:

- Support syscall name glob matching in 'perf trace' (Arnaldo Carvalho de Melo)
 
   e.g.:
 
    # perf trace -e pkey_*
    32.784 (0.006 ms): pkey/16018 pkey_alloc(init_val: DISABLE_WRITE) = -1 EINVAL Invalid argument
    32.795 (0.004 ms): pkey/16018 pkey_mprotect(start: 0x7f380d0a6000, len: 4096, prot: READ|WRITE, pkey: -1) = 0
    32.801 (0.002 ms): pkey/16018 pkey_free(pkey: -1                ) = -1 EINVAL Invalid argument
    ^C#
 
 - Do not auto merge counts for explicitely specified events in
   'perf stat' (Arnaldo Carvalho de Melo)
 
 - Fix syntax in documentation of .perfconfig intel-pt option (Jack Henschel)
 
 - Calculate the average cycles of iterations for loops detected by the
   branch history support in 'perf report' (Jin Yao)
 
 - Support PERF_SAMPLE_PHYS_ADDR as a sort key "phys_daddr" in the 'script', 'mem',
   'top' and 'report'. Also add a test entry for it in 'perf test' (Kan Liang)
 
 - Fix 'Object code reading' 'perf test' entry in PowerPC (Ravi Bangoria)
 
 - Remove some duplicate Power9 duplicate vendor events (described in JSON
   files) (Sukadev Bhattiprolu)
 
 - Add help entry in the TUI annotate browser about cycling thru hottest
   instructions with TAB/shift+TAB (Arnaldo Carvalho de Melo)
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEELb9bqkb7Te0zijNb1lAW81NSqkAFAlmpouQACgkQ1lAW81NS
 qkAs/Q/+OyKNSxZKgptHaR8ZDxOKPLr7hKCEfCMzRuTX22xhFGHsa5YYACQuGqe+
 dLfsblxbhsR6kVP/bLpCg9/jtfCc8N0pKHJKnwqpMGC46qmeW9orduA0PI6j0Dpj
 Um2aSCCoG5gISO6ojCDFBUpCY8NK+rLLWzQB7jNids0x/cB2A+pvzxSQlIZVuGuD
 E4D2G7smAcWCOs4ZPHtKnqPeizLwKp8Idu1/lwU2N4ZpjTk20l3olI++NO/ZZPfP
 g4XPE7Uhs4zoEkb7wXOrAjsOhPgRgPPR0gvoz/EeVSZMtuL39aZeCDcArgOBynR2
 5BLYMAEXOAWt7K+JfR2VxVr/tRz0tAPJC0Pw4QssmLni50O/uLuoHSCg/+zBPwNP
 Y++rsXMqt5XnxeRejye+8onfITpXBX1FTAEe3T1u3wvbAwD3EWpBpQyco8ghT9Jc
 Qai85FrN47cAkrbqZtjt1qzqD4qujJMgnAXWccc3jjjcywPijKSZje5v2Oh0Qhg4
 sf2Zf+efhXWxvBfBuHpiC5eT2NPnzA1ryBmpVn5q6TbJGVIgFIwrc9/iE3Z+WWtr
 cy0lYGRmLXeugwvsZSu/82g5uO/dm9HJz1FeIAj5OBISK+RcbBzuuERBNXbQ186S
 KSxrYeUgIxYolA0W9N7kGGwyvntEmCeAgMuK4/9Arml2Mw9+3qQ=
 =pjbo
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-4.14-20170901' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Support syscall name glob matching in 'perf trace' (Arnaldo Carvalho de Melo)

  e.g.:

   # perf trace -e pkey_*
   32.784 (0.006 ms): pkey/16018 pkey_alloc(init_val: DISABLE_WRITE) = -1 EINVAL Invalid argument
   32.795 (0.004 ms): pkey/16018 pkey_mprotect(start: 0x7f380d0a6000, len: 4096, prot: READ|WRITE, pkey: -1) = 0
   32.801 (0.002 ms): pkey/16018 pkey_free(pkey: -1                ) = -1 EINVAL Invalid argument
   ^C#

- Do not auto merge counts for explicitely specified events in
  'perf stat' (Arnaldo Carvalho de Melo)

- Fix syntax in documentation of .perfconfig intel-pt option (Jack Henschel)

- Calculate the average cycles of iterations for loops detected by the
  branch history support in 'perf report' (Jin Yao)

- Support PERF_SAMPLE_PHYS_ADDR as a sort key "phys_daddr" in the 'script', 'mem',
  'top' and 'report'. Also add a test entry for it in 'perf test' (Kan Liang)

- Fix 'Object code reading' 'perf test' entry in PowerPC (Ravi Bangoria)

- Remove some duplicate Power9 duplicate vendor events (described in JSON
  files) (Sukadev Bhattiprolu)

- Add help entry in the TUI annotate browser about cycling thru hottest
  instructions with TAB/shift+TAB (Arnaldo Carvalho de Melo)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2017-09-05 07:14:28 +02:00
commit 770e961255
37 changed files with 369 additions and 269 deletions

View File

@ -139,8 +139,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_IDENTIFIER = 1U << 16,
PERF_SAMPLE_TRANSACTION = 1U << 17,
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
};
/*
@ -814,6 +815,7 @@ enum perf_event_type {
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
* { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
* };
*/
PERF_RECORD_SAMPLE = 9,

View File

@ -873,7 +873,7 @@ amended to take the number of elements as a parameter.
$ cat ~/.perfconfig
[intel-pt]
mispred-all
mispred-all = on
$ perf record -e intel_pt//u ./sort 3000
Bubble sorting array of 3000 elements

View File

@ -59,6 +59,10 @@ OPTIONS
--ldload::
Specify desired latency for loads event.
-p::
--phys-data::
Record/Report sample physical addresses
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]

View File

@ -249,7 +249,10 @@ OPTIONS
-d::
--data::
Record the sample addresses.
Record the sample virtual addresses.
--phys-data::
Record the sample physical addresses.
-T::
--timestamp::

View File

@ -137,6 +137,7 @@ OPTIONS
- mem: type of memory access for the data at the time of the sample
- snoop: type of snoop (if any) for the data at the time of the sample
- dcacheline: the cacheline the data address is on at the time of the sample
- phys_daddr: physical address of data being executed on at the time of sample
And the default sort keys are changed to local_weight, mem, sym, dso,
symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.

View File

@ -117,7 +117,7 @@ OPTIONS
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
callindent, insn, insnlen, synth.
callindent, insn, insnlen, synth, phys_addr.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace

View File

@ -37,7 +37,7 @@ OPTIONS
--expr::
--event::
List of syscalls and other perf events (tracepoints, HW cache events,
etc) to show.
etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
See 'perf list' for a complete list of events.
Prefixing with ! shows all syscalls but the ones specified. You may
need to escape it.

View File

@ -23,6 +23,7 @@ struct perf_mem {
bool hide_unresolved;
bool dump_raw;
bool force;
bool phys_addr;
int operation;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
rec_argv[i++] = "-d";
if (mem->phys_addr)
rec_argv[i++] = "--phys-data";
for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
if (!perf_mem_events[j].record)
continue;
@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool,
if (al.map != NULL)
al.map->dso->hit = 1;
if (symbol_conf.field_sep) {
fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
"%s0x%"PRIx64"%s%s:%s\n";
} else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
"%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
symbol_conf.field_sep = " ";
}
if (mem->phys_addr) {
if (symbol_conf.field_sep) {
fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
"%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
} else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
"%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
"%s%s:%s\n";
symbol_conf.field_sep = " ";
}
printf(fmt,
sample->pid,
symbol_conf.field_sep,
sample->tid,
symbol_conf.field_sep,
sample->ip,
symbol_conf.field_sep,
sample->addr,
symbol_conf.field_sep,
sample->weight,
symbol_conf.field_sep,
sample->data_src,
symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???");
printf(fmt,
sample->pid,
symbol_conf.field_sep,
sample->tid,
symbol_conf.field_sep,
sample->ip,
symbol_conf.field_sep,
sample->addr,
symbol_conf.field_sep,
sample->phys_addr,
symbol_conf.field_sep,
sample->weight,
symbol_conf.field_sep,
sample->data_src,
symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???");
} else {
if (symbol_conf.field_sep) {
fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
"%s0x%"PRIx64"%s%s:%s\n";
} else {
fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
"%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
symbol_conf.field_sep = " ";
}
printf(fmt,
sample->pid,
symbol_conf.field_sep,
sample->tid,
symbol_conf.field_sep,
sample->ip,
symbol_conf.field_sep,
sample->addr,
symbol_conf.field_sep,
sample->weight,
symbol_conf.field_sep,
sample->data_src,
symbol_conf.field_sep,
al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
al.sym ? al.sym->name : "???");
}
out_put:
addr_location__put(&al);
return 0;
@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem)
if (ret < 0)
goto out_delete;
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
if (mem->phys_addr)
printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
else
printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
ret = perf_session__process_events(session);
@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
* there is no weight (cost) associated with stores, so don't print
* the column
*/
if (!(mem->operation & MEM_OPERATION_LOAD))
rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked";
if (!(mem->operation & MEM_OPERATION_LOAD)) {
if (mem->phys_addr)
rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked,phys_daddr";
else
rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
"dso_daddr,tlb,locked";
} else if (mem->phys_addr)
rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
"dso_daddr,snoop,tlb,locked,phys_daddr";
for (j = 1; j < argc; j++, i++)
rep_argv[i] = argv[j];
@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv)
"separator for columns, no spaces will be added"
" between columns '.' is reserved."),
OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
OPT_END()
};
const char *const mem_subcommands[] = { "record", "report", NULL };

View File

@ -1604,6 +1604,8 @@ static struct option __record_options[] = {
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
"per thread counts"),
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
"Record the sample physical addresses"),
OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
&record.opts.sample_time_set,

View File

@ -87,6 +87,7 @@ enum perf_output_field {
PERF_OUTPUT_BRSTACKINSN = 1U << 23,
PERF_OUTPUT_BRSTACKOFF = 1U << 24,
PERF_OUTPUT_SYNTH = 1U << 25,
PERF_OUTPUT_PHYS_ADDR = 1U << 26,
};
struct output_option {
@ -119,6 +120,7 @@ struct output_option {
{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
{.str = "synth", .field = PERF_OUTPUT_SYNTH},
{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
};
enum {
@ -175,7 +177,8 @@ static struct {
PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR |
PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
PERF_OUTPUT_PHYS_ADDR,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
PERF_OUTPUT_IREGS))
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
PERF_OUTPUT_PHYS_ADDR))
return -EINVAL;
return 0;
}
@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script,
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
print_sample_bpf_output(sample);
print_insn(sample, attr, thread, machine);
if (PRINT_FIELD(PHYS_ADDR))
printf("%16" PRIx64, sample->phys_addr);
printf("\n");
}
@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv)
"Valid types: hw,sw,trace,raw,synth. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,period,iregs,brstack,brstacksym,flags,"
"bpf-output,callindent,insn,insnlen,brstackinsn,synth",
"bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),

View File

@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter,
if (counter->merged_stat)
return false;
cb(counter, data, true);
if (!no_merge)
if (!no_merge && counter->auto_merge_stats)
collect_all_aliases(counter, cb, data);
return true;
}

View File

@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
static int trace__validate_ev_qualifier(struct trace *trace)
{
int err = 0, i;
size_t nr_allocated;
struct str_node *pos;
trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
goto out;
}
nr_allocated = trace->ev_qualifier_ids.nr;
i = 0;
strlist__for_each_entry(pos, trace->ev_qualifier) {
const char *sc = pos->s;
int id = syscalltbl__id(trace->sctbl, sc);
int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
if (id < 0) {
id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
if (id >= 0)
goto matches;
if (err == 0) {
fputs("Error:\tInvalid syscall ", trace->output);
err = -EINVAL;
@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace)
fputs(sc, trace->output);
}
matches:
trace->ev_qualifier_ids.entries[i++] = id;
if (match_next == -1)
continue;
while (1) {
id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
if (id < 0)
break;
if (nr_allocated == trace->ev_qualifier_ids.nr) {
void *entries;
nr_allocated += 8;
entries = realloc(trace->ev_qualifier_ids.entries,
nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
if (entries == NULL) {
err = -ENOMEM;
fputs("\nError:\t Not enough memory for parsing\n", trace->output);
goto out_free;
}
trace->ev_qualifier_ids.entries = entries;
}
trace->ev_qualifier_ids.nr++;
trace->ev_qualifier_ids.entries[i++] = id;
}
}
if (err < 0) {
fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
"\nHint:\tand: 'man syscalls'\n", trace->output);
out_free:
zfree(&trace->ev_qualifier_ids.entries);
trace->ev_qualifier_ids.nr = 0;
}
@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
struct trace *trace = (struct trace *)opt->value;
const char *s = str;
char *sep = NULL, *lists[2] = { NULL, NULL, };
int len = strlen(str) + 1, err = -1, list;
int len = strlen(str) + 1, err = -1, list, idx;
char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
char group_name[PATH_MAX];
@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
*sep = '\0';
list = 0;
if (syscalltbl__id(trace->sctbl, s) >= 0) {
if (syscalltbl__id(trace->sctbl, s) >= 0 ||
syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
list = 1;
} else {
path__join(group_name, sizeof(group_name), strace_groups_dir, s);

View File

@ -43,6 +43,7 @@ struct record_opts {
bool no_samples;
bool raw_samples;
bool sample_address;
bool sample_phys_addr;
bool sample_weight;
bool sample_time;
bool sample_time_set;

View File

@ -79,11 +79,6 @@
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{,
"EventCode": "0x400F0",
"EventName": "PM_LD_MISS_L1",
"BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
},
{,
"EventCode": "0x2E01A",
"EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
@ -374,4 +369,4 @@
"EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
"BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
}
]
]

View File

@ -604,11 +604,6 @@
"EventName": "PM_L2_RTY_LD",
"BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
},
{,
"EventCode": "0x3689E",
"EventName": "PM_L2_RTY_LD",
"BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
},
{,
"EventCode": "0xE08C",
"EventName": "PM_LSU0_ERAT_HIT",
@ -714,11 +709,6 @@
"EventName": "PM_L3_RD0_BUSY",
"BriefDescription": "Lifetime, sample of RD machine 0 valid"
},
{,
"EventCode": "0x468B4",
"EventName": "PM_L3_RD0_BUSY",
"BriefDescription": "Lifetime, sample of RD machine 0 valid"
},
{,
"EventCode": "0x46080",
"EventName": "PM_L2_DISP_ALL_L2MISS",
@ -849,21 +839,11 @@
"EventName": "PM_RC0_BUSY",
"BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
},
{,
"EventCode": "0x2608C",
"EventName": "PM_RC0_BUSY",
"BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
},
{,
"EventCode": "0x36082",
"EventName": "PM_L2_LD_DISP",
"BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
},
{,
"EventCode": "0x1609E",
"EventName": "PM_L2_LD_DISP",
"BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)"
},
{,
"EventCode": "0xF8B0",
"EventName": "PM_L3_SW_PREF",
@ -1039,11 +1019,6 @@
"EventName": "PM_L3_CO_MEPF",
"BriefDescription": "L3 castouts in Mepf state for this thread"
},
{,
"EventCode": "0x168A0",
"EventName": "PM_L3_CO_MEPF",
"BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
},
{,
"EventCode": "0x460A2",
"EventName": "PM_L3_LAT_CI_HIT",
@ -1149,11 +1124,6 @@
"EventName": "PM_L2_RTY_ST",
"BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
},
{,
"EventCode": "0x4689E",
"EventName": "PM_L2_RTY_ST",
"BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
},
{,
"EventCode": "0x24040",
"EventName": "PM_INST_FROM_L2_MEPF",
@ -1254,11 +1224,6 @@
"EventName": "PM_CO0_BUSY",
"BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
},
{,
"EventCode": "0x4608C",
"EventName": "PM_CO0_BUSY",
"BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
},
{,
"EventCode": "0x2C122",
"EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
@ -1394,11 +1359,6 @@
"EventName": "PM_IPTEG_FROM_LMEM",
"BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
},
{,
"EventCode": "0x40006",
"EventName": "PM_ISLB_MISS",
"BriefDescription": "Number of ISLB misses for this thread"
},
{,
"EventCode": "0xD8A8",
"EventName": "PM_ISLB_MISS",
@ -1514,11 +1474,6 @@
"EventName": "PM_L2_INST",
"BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
},
{,
"EventCode": "0x3609E",
"EventName": "PM_L2_INST",
"BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
},
{,
"EventCode": "0x3504C",
"EventName": "PM_IPTEG_FROM_DL4",
@ -1689,11 +1644,6 @@
"EventName": "PM_L2_LD_HIT",
"BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
},
{,
"EventCode": "0x2609E",
"EventName": "PM_L2_LD_HIT",
"BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread"
},
{,
"EventCode": "0x168AC",
"EventName": "PM_L3_CI_USAGE",
@ -1794,21 +1744,11 @@
"EventName": "PM_L3_WI0_BUSY",
"BriefDescription": "Rotating sample of 8 WI valid"
},
{,
"EventCode": "0x260B6",
"EventName": "PM_L3_WI0_BUSY",
"BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
},
{,
"EventCode": "0x368AC",
"EventName": "PM_L3_CO0_BUSY",
"BriefDescription": "Lifetime, sample of CO machine 0 valid"
},
{,
"EventCode": "0x468AC",
"EventName": "PM_L3_CO0_BUSY",
"BriefDescription": "Lifetime, sample of CO machine 0 valid"
},
{,
"EventCode": "0x2E040",
"EventName": "PM_DPTEG_FROM_L2_MEPF",
@ -1839,11 +1779,6 @@
"EventName": "PM_L3_P0_PF_RTY",
"BriefDescription": "L3 PF received retry port 0, every retry counted"
},
{,
"EventCode": "0x260AE",
"EventName": "PM_L3_P0_PF_RTY",
"BriefDescription": "L3 PF received retry port 0, every retry counted"
},
{,
"EventCode": "0x268B2",
"EventName": "PM_L3_LOC_GUESS_WRONG",
@ -1894,11 +1829,6 @@
"EventName": "PM_L3_SN0_BUSY",
"BriefDescription": "Lifetime, sample of snooper machine 0 valid"
},
{,
"EventCode": "0x460AC",
"EventName": "PM_L3_SN0_BUSY",
"BriefDescription": "Lifetime, sample of snooper machine 0 valid"
},
{,
"EventCode": "0x3005C",
"EventName": "PM_BFU_BUSY",
@ -1934,11 +1864,6 @@
"EventName": "PM_L3_PF0_BUSY",
"BriefDescription": "Lifetime, sample of PF machine 0 valid"
},
{,
"EventCode": "0x460B4",
"EventName": "PM_L3_PF0_BUSY",
"BriefDescription": "Lifetime, sample of PF machine 0 valid"
},
{,
"EventCode": "0xC0B0",
"EventName": "PM_LSU_FLUSH_UE",
@ -2084,11 +2009,6 @@
"EventName": "PM_L3_P1_CO_RTY",
"BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
},
{,
"EventCode": "0x468AE",
"EventName": "PM_L3_P1_CO_RTY",
"BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
},
{,
"EventCode": "0xC0AC",
"EventName": "PM_LSU_FLUSH_EMSH",
@ -2194,11 +2114,6 @@
"EventName": "PM_L2_SN_M_WR_DONE",
"BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
},
{,
"EventCode": "0x46886",
"EventName": "PM_L2_SN_M_WR_DONE",
"BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
},
{,
"EventCode": "0x489C",
"EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
@ -2289,21 +2204,11 @@
"EventName": "PM_SN0_BUSY",
"BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
},
{,
"EventCode": "0x26090",
"EventName": "PM_SN0_BUSY",
"BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
},
{,
"EventCode": "0x360AE",
"EventName": "PM_L3_P0_CO_RTY",
"BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
},
{,
"EventCode": "0x460AE",
"EventName": "PM_L3_P0_CO_RTY",
"BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
},
{,
"EventCode": "0x168A8",
"EventName": "PM_L3_WI_USAGE",
@ -2339,26 +2244,11 @@
"EventName": "PM_L3_P1_PF_RTY",
"BriefDescription": "L3 PF received retry port 1, every retry counted"
},
{,
"EventCode": "0x268AE",
"EventName": "PM_L3_P1_PF_RTY",
"BriefDescription": "L3 PF received retry port 3, every retry counted"
},
{,
"EventCode": "0x46082",
"EventName": "PM_L2_ST_DISP",
"BriefDescription": "All successful D-side store dispatches for this thread "
},
{,
"EventCode": "0x1689E",
"EventName": "PM_L2_ST_DISP",
"BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
},
{,
"EventCode": "0x36880",
"EventName": "PM_L2_INST_MISS",
"BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
},
{,
"EventCode": "0x4609E",
"EventName": "PM_L2_INST_MISS",
@ -2429,11 +2319,6 @@
"EventName": "PM_INST_DISP",
"BriefDescription": "# PPC Dispatched"
},
{,
"EventCode": "0x300F2",
"EventName": "PM_INST_DISP",
"BriefDescription": "# PPC Dispatched"
},
{,
"EventCode": "0x4E05E",
"EventName": "PM_TM_OUTER_TBEGIN_DISP",
@ -2459,11 +2344,6 @@
"EventName": "PM_L2_ST_HIT",
"BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
},
{,
"EventCode": "0x2689E",
"EventName": "PM_L2_ST_HIT",
"BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread"
},
{,
"EventCode": "0x360A8",
"EventName": "PM_L3_CO",

View File

@ -419,11 +419,6 @@
"EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
"BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
},
{,
"EventCode": "0x10016",
"EventName": "PM_DSLB_MISS",
"BriefDescription": "Data SLB Miss - Total of all segment sizes"
},
{,
"EventCode": "0xD0A8",
"EventName": "PM_DSLB_MISS",
@ -554,4 +549,4 @@
"EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
"BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
}
]
]

View File

@ -4,11 +4,6 @@
"EventName": "PM_BR_2PATH",
"BriefDescription": "Branches that are not strongly biased"
},
{,
"EventCode": "0x40036",
"EventName": "PM_BR_2PATH",
"BriefDescription": "Branches that are not strongly biased"
},
{,
"EventCode": "0x40056",
"EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
@ -124,4 +119,4 @@
"EventName": "PM_1FLOP_CMPL",
"BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
}
]
]

View File

@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
if (!al.map || !al.map->dso) {
if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
pr_debug("Hypervisor address can not be resolved - skipping\n");
return 0;
}
pr_debug("thread__find_addr_map failed\n");
return -1;
}

View File

@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1,
}
}
if (type & PERF_SAMPLE_PHYS_ADDR)
COMP(phys_addr);
return true;
}
@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.mask = sample_regs,
.regs = regs,
},
.phys_addr = 113,
};
struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
struct perf_sample sample_out;
@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
* were added. Please actually update the test rather than just change
* the condition below.
*/
if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) {
if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1;
}

View File

@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
"q/ESC/CTRL+C Exit\n\n"
"ENTER Go to target\n"
"ESC Exit\n"
"H Cycle thru hottest instructions\n"
"H Go to hottest instruction\n"
"TAB/shift+TAB Cycle thru hottest instructions\n"
"j Toggle showing jump to target arrows\n"
"J Toggle showing number of jump sources on targets\n"
"n Search next string\n"

View File

@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
browser->show_dso);
if (symbol_conf.show_branchflag_count) {
if (need_percent)
callchain_list_counts__printf_value(node, chain, NULL,
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf));
callchain_list_counts__printf_value(chain, NULL,
buf, sizeof(buf));
if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
str = "Not enough memory!";

View File

@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
if (symbol_conf.show_branchflag_count) {
if (!period)
callchain_list_counts__printf_value(node, chain, NULL,
buf, sizeof(buf));
else
callchain_list_counts__printf_value(NULL, chain, NULL,
buf, sizeof(buf));
callchain_list_counts__printf_value(chain, NULL,
buf, sizeof(buf));
if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
str = "Not enough memory!";
@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
if (symbol_conf.show_branchflag_count)
ret += callchain_list_counts__printf_value(
NULL, chain, fp, NULL, 0);
chain, fp, NULL, 0);
ret += fprintf(fp, "\n");
if (++entries_printed == callchain_param.print_limit)

View File

@ -588,7 +588,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
call->cycles_count =
cursor_node->branch_flags.cycles;
call->iter_count = cursor_node->nr_loop_iter;
call->samples_count = cursor_node->samples;
call->iter_cycles = cursor_node->iter_cycles;
}
}
@ -722,7 +722,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
cnode->cycles_count +=
node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter;
cnode->samples_count += node->samples;
cnode->iter_cycles += node->iter_cycles;
}
}
@ -998,7 +998,7 @@ int callchain_merge(struct callchain_cursor *cursor,
int callchain_cursor_append(struct callchain_cursor *cursor,
u64 ip, struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples, u64 branch_from)
int nr_loop_iter, u64 iter_cycles, u64 branch_from)
{
struct callchain_cursor_node *node = *cursor->last;
@ -1016,7 +1016,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
node->sym = sym;
node->branch = branch;
node->nr_loop_iter = nr_loop_iter;
node->samples = samples;
node->iter_cycles = iter_cycles;
if (flags)
memcpy(&node->branch_flags, flags,
@ -1306,7 +1306,7 @@ static int branch_to_str(char *bf, int bfsize,
static int branch_from_str(char *bf, int bfsize,
u64 branch_count,
u64 cycles_count, u64 iter_count,
u64 samples_count)
u64 iter_cycles)
{
int printed = 0, i = 0;
u64 cycles;
@ -1318,9 +1318,13 @@ static int branch_from_str(char *bf, int bfsize,
bf + printed, bfsize - printed);
}
if (iter_count && samples_count) {
printed += count_pri64_printf(i++, "iterations",
iter_count / samples_count,
if (iter_count) {
printed += count_pri64_printf(i++, "iter",
iter_count,
bf + printed, bfsize - printed);
printed += count_pri64_printf(i++, "avg_cycles",
iter_cycles / iter_count,
bf + printed, bfsize - printed);
}
@ -1333,7 +1337,7 @@ static int branch_from_str(char *bf, int bfsize,
static int counts_str_build(char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count,
u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat)
{
int printed;
@ -1346,7 +1350,7 @@ static int counts_str_build(char *bf, int bfsize,
predicted_count, abort_count, brtype_stat);
} else {
printed = branch_from_str(bf, bfsize, branch_count,
cycles_count, iter_count, samples_count);
cycles_count, iter_count, iter_cycles);
}
if (!printed)
@ -1358,14 +1362,14 @@ static int counts_str_build(char *bf, int bfsize,
static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
u64 iter_count, u64 samples_count,
u64 iter_count, u64 iter_cycles,
struct branch_type_stat *brtype_stat)
{
char str[256];
counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count,
iter_count, samples_count, brtype_stat);
iter_count, iter_cycles, brtype_stat);
if (fp)
return fprintf(fp, "%s", str);
@ -1373,31 +1377,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
return scnprintf(bf, bfsize, "%s", str);
}
int callchain_list_counts__printf_value(struct callchain_node *node,
struct callchain_list *clist,
int callchain_list_counts__printf_value(struct callchain_list *clist,
FILE *fp, char *bf, int bfsize)
{
u64 branch_count, predicted_count;
u64 abort_count, cycles_count;
u64 iter_count = 0, samples_count = 0;
u64 iter_count, iter_cycles;
branch_count = clist->branch_count;
predicted_count = clist->predicted_count;
abort_count = clist->abort_count;
cycles_count = clist->cycles_count;
if (node) {
struct callchain_list *call;
list_for_each_entry(call, &node->val, list) {
iter_count += call->iter_count;
samples_count += call->samples_count;
}
}
iter_count = clist->iter_count;
iter_cycles = clist->iter_cycles;
return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count,
cycles_count, iter_count, samples_count,
cycles_count, iter_count, iter_cycles,
&clist->brtype_stat);
}
@ -1523,7 +1519,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
node->branch, &node->branch_flags,
node->nr_loop_iter, node->samples,
node->nr_loop_iter,
node->iter_cycles,
node->branch_from);
if (rc)
break;

View File

@ -119,7 +119,7 @@ struct callchain_list {
u64 abort_count;
u64 cycles_count;
u64 iter_count;
u64 samples_count;
u64 iter_cycles;
struct branch_type_stat brtype_stat;
char *srcline;
struct list_head list;
@ -139,7 +139,7 @@ struct callchain_cursor_node {
struct branch_flags branch_flags;
u64 branch_from;
int nr_loop_iter;
int samples;
u64 iter_cycles;
struct callchain_cursor_node *next;
};
@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym,
bool branch, struct branch_flags *flags,
int nr_loop_iter, int samples, u64 branch_from);
int nr_loop_iter, u64 iter_cycles, u64 branch_from);
/* Close a cursor writing session. Initialize for the reader */
static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
int callchain_node__fprintf_value(struct callchain_node *node,
FILE *fp, u64 total);
int callchain_list_counts__printf_value(struct callchain_node *node,
struct callchain_list *clist,
int callchain_list_counts__printf_value(struct callchain_list *clist,
FILE *fp, char *bf, int bfsize);
void free_callchain(struct callchain_root *root);

View File

@ -200,6 +200,7 @@ struct perf_sample {
u32 cpu;
u32 raw_size;
u64 data_src;
u64 phys_addr;
u32 flags;
u16 insn_len;
u8 cpumode;

View File

@ -955,6 +955,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
if (opts->sample_address)
perf_evsel__set_sample_bit(evsel, DATA_SRC);
if (opts->sample_phys_addr)
perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
if (opts->no_buffering) {
attr->watermark = 0;
attr->wakeup_events = 1;
@ -1464,7 +1467,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
bit_name(WEIGHT),
bit_name(WEIGHT), bit_name(PHYS_ADDR),
{ .name = NULL, }
};
#undef bit_name
@ -2206,6 +2209,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
}
}
data->phys_addr = 0;
if (type & PERF_SAMPLE_PHYS_ADDR) {
data->phys_addr = *array;
array++;
}
return 0;
}
@ -2311,6 +2320,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
}
}
if (type & PERF_SAMPLE_PHYS_ADDR)
result += sizeof(u64);
return result;
}
@ -2500,6 +2512,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
}
}
if (type & PERF_SAMPLE_PHYS_ADDR) {
*array = sample->phys_addr;
array++;
}
return 0;
}

View File

@ -131,6 +131,7 @@ struct perf_evsel {
bool cmdline_group_boundary;
struct list_head config_terms;
int bpf_fd;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
const char * metric_name;

View File

@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
symlen = unresolved_col_width + 4 + 2;
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
unresolved_col_width + 4 + 2);
} else {
symlen = unresolved_col_width + 4 + 2;
hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);

View File

@ -47,6 +47,7 @@ enum hist_column {
HISTC_GLOBAL_WEIGHT,
HISTC_MEM_DADDR_SYMBOL,
HISTC_MEM_DADDR_DSO,
HISTC_MEM_PHYS_DADDR,
HISTC_MEM_LOCKED,
HISTC_MEM_TLB,
HISTC_MEM_LVL,

View File

@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread,
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
ams->phys_addr = 0;
}
static void ip__resolve_data(struct thread *thread,
u8 m, struct addr_map_symbol *ams, u64 addr)
u8 m, struct addr_map_symbol *ams,
u64 addr, u64 phys_addr)
{
struct addr_location al;
@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread,
ams->al_addr = al.addr;
ams->sym = al.sym;
ams->map = al.map;
ams->phys_addr = phys_addr;
}
struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
return NULL;
ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr);
ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
sample->addr, sample->phys_addr);
mi->data_src.val = sample->data_src;
return mi;
}
struct iterations {
int nr_loop_iter;
u64 cycles;
};
static int add_callchain_ip(struct thread *thread,
struct callchain_cursor *cursor,
struct symbol **parent,
@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread,
u64 ip,
bool branch,
struct branch_flags *flags,
int nr_loop_iter,
int samples,
struct iterations *iter,
u64 branch_from)
{
struct addr_location al;
int nr_loop_iter = 0;
u64 iter_cycles = 0;
al.filtered = 0;
al.sym = NULL;
@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread,
if (symbol_conf.hide_unresolved && al.sym == NULL)
return 0;
if (iter) {
nr_loop_iter = iter->nr_loop_iter;
iter_cycles = iter->cycles;
}
return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
branch, flags, nr_loop_iter, samples,
branch_from);
branch, flags, nr_loop_iter,
iter_cycles, branch_from);
}
struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
return bi;
}
static void save_iterations(struct iterations *iter,
struct branch_entry *be, int nr)
{
int i;
iter->nr_loop_iter = nr;
iter->cycles = 0;
for (i = 0; i < nr; i++)
iter->cycles += be[i].flags.cycles;
}
#define CHASHSZ 127
#define CHASHBITS 7
#define NO_ENTRY 0xff
@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
#define PERF_MAX_BRANCH_DEPTH 127
/* Remove loops. */
static int remove_loops(struct branch_entry *l, int nr)
static int remove_loops(struct branch_entry *l, int nr,
struct iterations *iter)
{
int i, j, off;
unsigned char chash[CHASHSZ];
@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr)
break;
}
if (is_loop) {
memmove(l + i, l + i + off,
(nr - (i + off)) * sizeof(*l));
j = nr - (i + off);
if (j > 0) {
save_iterations(iter + i + off,
l + i, off);
memmove(iter + i, iter + i + off,
j * sizeof(*iter));
memmove(l + i, l + i + off,
j * sizeof(*l));
}
nr -= off;
}
}
@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
branch, flags, 0, 0,
branch, flags, NULL,
branch_from);
if (err)
return (err < 0) ? err : 0;
@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int i, j, err, nr_entries;
int skip_idx = -1;
int first_call = 0;
int nr_loop_iter;
if (chain)
chain_nr = chain->nr;
@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (branch && callchain_param.branch_callstack) {
int nr = min(max_stack, (int)branch->nr);
struct branch_entry be[nr];
struct iterations iter[nr];
if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
pr_warning("corrupted branch chain. skipping...\n");
@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i] = branch->entries[branch->nr - i - 1];
}
nr_loop_iter = nr;
nr = remove_loops(be, nr);
/*
* Get the number of iterations.
* It's only approximation, but good enough in practice.
*/
if (nr_loop_iter > nr)
nr_loop_iter = nr_loop_iter - nr + 1;
else
nr_loop_iter = 0;
memset(iter, 0, sizeof(struct iterations) * nr);
nr = remove_loops(be, nr, iter);
for (i = 0; i < nr; i++) {
if (i == nr - 1)
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
nr_loop_iter, 1,
be[i].from);
else
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
0, 0, be[i].from);
err = add_callchain_ip(thread, cursor, parent,
root_al,
NULL, be[i].to,
true, &be[i].flags,
NULL, be[i].from);
if (!err)
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
0, 0, 0);
&iter[i], 0);
if (err == -EINVAL)
break;
if (err)
@ -2037,7 +2059,7 @@ check_calls:
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, 0, 0, 0);
false, NULL, NULL, 0);
if (err)
return (err < 0) ? err : 0;

View File

@ -310,7 +310,7 @@ static struct perf_evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
char *name, struct cpu_map *cpus,
struct list_head *config_terms)
struct list_head *config_terms, bool auto_merge_stats)
{
struct perf_evsel *evsel;
@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx,
evsel->cpus = cpu_map__get(cpus);
evsel->own_cpus = cpu_map__get(cpus);
evsel->system_wide = !!cpus;
evsel->auto_merge_stats = auto_merge_stats;
if (name)
evsel->name = strdup(name);
@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, char *name,
struct list_head *config_terms)
{
return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
}
static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
get_config_name(head_config), &config_terms);
}
int parse_events_add_pmu(struct parse_events_state *parse_state,
static int __parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config)
struct list_head *head_config, bool auto_merge_stats)
{
struct perf_event_attr attr;
struct perf_pmu_info info;
@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (!head_config) {
attr.type = pmu->type;
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL);
evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
return evsel ? 0 : -ENOMEM;
}
@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr,
get_config_name(head_config), pmu->cpus,
&config_terms);
&config_terms, auto_merge_stats);
if (evsel) {
evsel->unit = info.unit;
evsel->scale = info.scale;
@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
return evsel ? 0 : -ENOMEM;
}
int parse_events_add_pmu(struct parse_events_state *parse_state,
struct list_head *list, char *name,
struct list_head *head_config)
{
return __parse_events_add_pmu(parse_state, list, name, head_config, false);
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str, struct list_head **listp)
{
@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
return -1;
list_add_tail(&term->list, head);
if (!parse_events_add_pmu(parse_state, list,
pmu->name, head)) {
if (!__parse_events_add_pmu(parse_state, list,
pmu->name, head, true)) {
pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;

View File

@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
if (sample_type & PERF_SAMPLE_DATA_SRC)
printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
if (sample_type & PERF_SAMPLE_TRANSACTION)
printf("... transaction: %" PRIx64 "\n", sample->transaction);

View File

@ -1315,6 +1315,47 @@ struct sort_entry sort_mem_dcacheline = {
.se_width_idx = HISTC_MEM_DCACHELINE,
};
static int64_t
sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
{
uint64_t l = 0, r = 0;
if (left->mem_info)
l = left->mem_info->daddr.phys_addr;
if (right->mem_info)
r = right->mem_info->daddr.phys_addr;
return (int64_t)(r - l);
}
static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
size_t size, unsigned int width)
{
uint64_t addr = 0;
size_t ret = 0;
size_t len = BITS_PER_LONG / 4;
addr = he->mem_info->daddr.phys_addr;
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
if (ret > width)
bf[width] = '\0';
return width;
}
struct sort_entry sort_mem_phys_daddr = {
.se_header = "Data Physical Address",
.se_cmp = sort__phys_daddr_cmp,
.se_snprintf = hist_entry__phys_daddr_snprintf,
.se_width_idx = HISTC_MEM_PHYS_DADDR,
};
static int64_t
sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
{
@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
};
#undef DIM

View File

@ -245,6 +245,7 @@ enum sort_type {
SORT_MEM_SNOOP,
SORT_MEM_DCACHELINE,
SORT_MEM_IADDR_SYMBOL,
SORT_MEM_PHYS_DADDR,
};
/*

View File

@ -186,6 +186,7 @@ struct addr_map_symbol {
struct symbol *sym;
u64 addr;
u64 al_addr;
u64 phys_addr;
};
struct branch_info {

View File

@ -19,6 +19,7 @@
#ifdef HAVE_SYSCALL_TABLE
#include <linux/compiler.h>
#include <string.h>
#include "string2.h"
#include "util.h"
#if defined(__x86_64__)
@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
return sc ? sc->id : -1;
}
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
{
int i;
struct syscall *syscalls = tbl->syscalls.entries;
for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
if (strglobmatch(syscalls[i].name, syscall_glob)) {
*idx = i;
return syscalls[i].id;
}
}
return -1;
}
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
{
*idx = -1;
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
}
#else /* HAVE_SYSCALL_TABLE */
#include <libaudit.h>
@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
{
return audit_name_to_syscall(name, tbl->audit_machine);
}
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
{
return -1;
}
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
{
return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
}
#endif /* HAVE_SYSCALL_TABLE */

View File

@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl);
const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
int syscalltbl__id(struct syscalltbl *tbl, const char *name);
int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
#endif /* __PERF_SYSCALLTBL_H */