diff --git a/tools/objtool/Build b/tools/objtool/Build index 749becdf5b90..8dc4f0848362 100644 --- a/tools/objtool/Build +++ b/tools/objtool/Build @@ -9,6 +9,7 @@ objtool-y += special.o objtool-y += objtool.o objtool-y += libstring.o +objtool-y += libctype.o objtool-y += str_error_r.o CFLAGS += -I$(srctree)/tools/lib @@ -17,6 +18,10 @@ $(OUTPUT)libstring.o: ../lib/string.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)libctype.o: ../lib/ctype.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + $(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index facd91e4e945..d5cc15e651cf 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -90,9 +90,10 @@ OPTIONS -c:: --compute:: - Differential computation selection - delta, ratio, wdiff, delta-abs - (default is delta-abs). Default can be changed using diff.compute - config option. See COMPARISON METHODS section for more info. + Differential computation selection - delta, ratio, wdiff, cycles, + delta-abs (default is delta-abs). Default can be changed using + diff.compute config option. See COMPARISON METHODS section for + more info. -p:: --period:: @@ -280,6 +281,16 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as: - WEIGHT-A being the weight of the data file - WEIGHT-B being the weight of the baseline data file +cycles +~~~~~~ +If specified the '[Program Block Range] Cycles Diff' column is displayed. +It displays the cycles difference of same program basic block amongst +two perf.data. The program basic block is the code between two branches. + +'[Program Block Range]' indicates the range of a program basic block. +Source line is reported if it can be found otherwise uses symbol+offset +instead. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8c4372819e11..987261d158d4 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -89,7 +89,7 @@ OPTIONS - socket: processor socket number the task ran at the time of sample - srcline: filename and line number executed at the time of sample. The DWARF debugging info must be provided. - - srcfile: file name of the source file of the same. Requires dwarf + - srcfile: file name of the source file of the samples. Requires dwarf information. - weight: Event specific weight, e.g. memory latency or transaction abort cost. This is the global weight. diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 869965d629ce..825745a645c1 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -38,6 +38,6 @@ To report cacheline events from previous recording: perf c2c report To browse sample contexts use perf report --sample 10 and select in context menu To separate samples by time use perf report --sort time,overhead,sym To set sample time separation other than 100ms with --sort time use --time-quantum -Add -I to perf report to sample register values visible in perf report context. +Add -I to perf record to sample register values, which will be visible in perf report sample context. To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context To show context switches in perf report sample context add --switch-events to perf record. diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 6e7920793729..f924b46910b5 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -20,6 +20,8 @@ #include "util/data.h" #include "util/config.h" #include "util/time-utils.h" +#include "util/annotate.h" +#include "util/map.h" #include #include @@ -32,6 +34,7 @@ struct perf_diff { struct perf_time_interval *ptime_range; int range_size; int range_num; + bool has_br_stack; }; /* Diff command specific HPP columns. */ @@ -44,6 +47,7 @@ enum { PERF_HPP_DIFF__WEIGHTED_DIFF, PERF_HPP_DIFF__FORMULA, PERF_HPP_DIFF__DELTA_ABS, + PERF_HPP_DIFF__CYCLES, PERF_HPP_DIFF__MAX_INDEX }; @@ -86,11 +90,14 @@ static s64 compute_wdiff_w2; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +static struct addr_location dummy_al; + enum { COMPUTE_DELTA, COMPUTE_RATIO, COMPUTE_WEIGHTED_DIFF, COMPUTE_DELTA_ABS, + COMPUTE_CYCLES, COMPUTE_MAX, }; @@ -99,6 +106,7 @@ const char *compute_names[COMPUTE_MAX] = { [COMPUTE_DELTA_ABS] = "delta-abs", [COMPUTE_RATIO] = "ratio", [COMPUTE_WEIGHTED_DIFF] = "wdiff", + [COMPUTE_CYCLES] = "cycles", }; static int compute = COMPUTE_DELTA_ABS; @@ -108,6 +116,7 @@ static int compute_2_hpp[COMPUTE_MAX] = { [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS, [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO, [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF, + [COMPUTE_CYCLES] = PERF_HPP_DIFF__CYCLES, }; #define MAX_COL_WIDTH 70 @@ -146,6 +155,10 @@ static struct header_column { [PERF_HPP_DIFF__FORMULA] = { .name = "Formula", .width = MAX_COL_WIDTH, + }, + [PERF_HPP_DIFF__CYCLES] = { + .name = "[Program Block Range] Cycles Diff", + .width = 70, } }; @@ -335,6 +348,31 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, return -1; } +static void *block_hist_zalloc(size_t size) +{ + struct block_hist *bh; + + bh = zalloc(size + sizeof(*bh)); + if (!bh) + return NULL; + + return &bh->he; +} + +static void block_hist_free(void *he) +{ + struct block_hist *bh; + + bh = container_of(he, struct block_hist, he); + hists__delete_entries(&bh->block_hists); + free(bh); +} + +struct hist_entry_ops block_hist_ops = { + .new = block_hist_zalloc, + .free = block_hist_free, +}; + static int diff__process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -362,9 +400,22 @@ static int diff__process_sample_event(struct perf_tool *tool, goto out_put; } - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) { - pr_warning("problem incrementing symbol period, skipping event\n"); - goto out_put; + if (compute != COMPUTE_CYCLES) { + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, + true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } + } else { + if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, + NULL, NULL, sample, true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } + + hist__account_cycles(sample->branch_stack, &al, sample, false); } /* @@ -474,6 +525,203 @@ static void hists__baseline_only(struct hists *hists) } } +static int64_t block_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct block_info *bi_l = left->block_info; + struct block_info *bi_r = right->block_info; + int cmp; + + if (!bi_l->sym || !bi_r->sym) { + if (!bi_l->sym && !bi_r->sym) + return 0; + else if (!bi_l->sym) + return -1; + else + return 1; + } + + if (bi_l->sym == bi_r->sym) { + if (bi_l->start == bi_r->start) { + if (bi_l->end == bi_r->end) + return 0; + else + return (int64_t)(bi_r->end - bi_l->end); + } else + return (int64_t)(bi_r->start - bi_l->start); + } else { + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + return cmp; + } + + if (bi_l->sym->start != bi_r->sym->start) + return (int64_t)(bi_r->sym->start - bi_l->sym->start); + + return (int64_t)(bi_r->sym->end - bi_l->sym->end); +} + +static int64_t block_cycles_diff_cmp(struct hist_entry *left, + struct hist_entry *right) +{ + bool pairs_left = hist_entry__has_pairs(left); + bool pairs_right = hist_entry__has_pairs(right); + s64 l, r; + + if (!pairs_left && !pairs_right) + return 0; + + l = labs(left->diff.cycles); + r = labs(right->diff.cycles); + return r - l; +} + +static int64_t block_sort(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return block_cycles_diff_cmp(right, left); +} + +static void init_block_hist(struct block_hist *bh) +{ + __hists__init(&bh->block_hists, &bh->block_list); + perf_hpp_list__init(&bh->block_list); + + INIT_LIST_HEAD(&bh->block_fmt.list); + INIT_LIST_HEAD(&bh->block_fmt.sort_list); + bh->block_fmt.cmp = block_cmp; + bh->block_fmt.sort = block_sort; + perf_hpp_list__register_sort_field(&bh->block_list, + &bh->block_fmt); + bh->valid = true; +} + +static void init_block_info(struct block_info *bi, struct symbol *sym, + struct cyc_hist *ch, int offset) +{ + bi->sym = sym; + bi->start = ch->start; + bi->end = offset; + bi->cycles = ch->cycles; + bi->cycles_aggr = ch->cycles_aggr; + bi->num = ch->num; + bi->num_aggr = ch->num_aggr; +} + +static int process_block_per_sym(struct hist_entry *he) +{ + struct annotation *notes; + struct cyc_hist *ch; + struct block_hist *bh; + + if (!he->ms.map || !he->ms.sym) + return 0; + + notes = symbol__annotation(he->ms.sym); + if (!notes || !notes->src || !notes->src->cycles_hist) + return 0; + + bh = container_of(he, struct block_hist, he); + init_block_hist(bh); + + ch = notes->src->cycles_hist; + for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) { + if (ch[i].num_aggr) { + struct block_info *bi; + struct hist_entry *he_block; + + bi = block_info__new(); + if (!bi) + return -1; + + init_block_info(bi, he->ms.sym, &ch[i], i); + he_block = hists__add_entry_block(&bh->block_hists, + &dummy_al, bi); + if (!he_block) { + block_info__put(bi); + return -1; + } + } + } + + return 0; +} + +static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) +{ + struct block_info *bi_a = a->block_info; + struct block_info *bi_b = b->block_info; + int cmp; + + if (!bi_a->sym || !bi_b->sym) + return -1; + + cmp = strcmp(bi_a->sym->name, bi_b->sym->name); + + if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) + return 0; + + return -1; +} + +static struct hist_entry *get_block_pair(struct hist_entry *he, + struct hists *hists_pair) +{ + struct rb_root_cached *root = hists_pair->entries_in; + struct rb_node *next = rb_first_cached(root); + int cmp; + + while (next != NULL) { + struct hist_entry *he_pair = rb_entry(next, struct hist_entry, + rb_node_in); + + next = rb_next(&he_pair->rb_node_in); + + cmp = block_pair_cmp(he_pair, he); + if (!cmp) + return he_pair; + } + + return NULL; +} + +static void compute_cycles_diff(struct hist_entry *he, + struct hist_entry *pair) +{ + pair->diff.computed = true; + if (pair->block_info->num && he->block_info->num) { + pair->diff.cycles = + pair->block_info->cycles_aggr / pair->block_info->num_aggr - + he->block_info->cycles_aggr / he->block_info->num_aggr; + } +} + +static void block_hists_match(struct hists *hists_base, + struct hists *hists_pair) +{ + struct rb_root_cached *root = hists_base->entries_in; + struct rb_node *next = rb_first_cached(root); + + while (next != NULL) { + struct hist_entry *he = rb_entry(next, struct hist_entry, + rb_node_in); + struct hist_entry *pair = get_block_pair(he, hists_pair); + + next = rb_next(&he->rb_node_in); + + if (pair) { + hist_entry__add_pair(pair, he); + compute_cycles_diff(he, pair); + } + } +} + +static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) +{ + /* Skip the calculation of column length in output_resort */ + he->filtered = true; + return 0; +} + static void hists__precompute(struct hists *hists) { struct rb_root_cached *root; @@ -486,6 +734,7 @@ static void hists__precompute(struct hists *hists) next = rb_first_cached(root); while (next != NULL) { + struct block_hist *bh, *pair_bh; struct hist_entry *he, *pair; struct data__file *d; int i; @@ -493,6 +742,9 @@ static void hists__precompute(struct hists *hists) he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); + if (compute == COMPUTE_CYCLES) + process_block_per_sym(he); + data__for_each_file_new(i, d) { pair = get_pair_data(he, d); if (!pair) @@ -509,6 +761,19 @@ static void hists__precompute(struct hists *hists) case COMPUTE_WEIGHTED_DIFF: compute_wdiff(he, pair); break; + case COMPUTE_CYCLES: + process_block_per_sym(pair); + bh = container_of(he, struct block_hist, he); + pair_bh = container_of(pair, struct block_hist, + he); + + if (bh->valid && pair_bh->valid) { + block_hists_match(&bh->block_hists, + &pair_bh->block_hists); + hists__output_resort_cb(&pair_bh->block_hists, + NULL, filter_cb); + } + break; default: BUG_ON(1); } @@ -720,6 +985,9 @@ static void hists__process(struct hists *hists) hists__precompute(hists); hists__output_resort(hists, NULL); + if (compute == COMPUTE_CYCLES) + symbol_conf.report_block = true; + hists__fprintf(hists, !quiet, 0, 0, 0, stdout, !symbol_conf.use_callchain); } @@ -873,6 +1141,31 @@ static int parse_time_str(struct data__file *d, char *abstime_ostr, return ret; } +static int check_file_brstack(void) +{ + struct data__file *d; + bool has_br_stack; + int i; + + data__for_each_file(i, d) { + d->session = perf_session__new(&d->data, false, &pdiff.tool); + if (!d->session) { + pr_err("Failed to open %s\n", d->data.path); + return -1; + } + + has_br_stack = perf_header__has_feat(&d->session->header, + HEADER_BRANCH_STACK); + perf_session__delete(d->session); + if (!has_br_stack) + return 0; + } + + /* Set only all files having branch stacks */ + pdiff.has_br_stack = true; + return 0; +} + static int __cmd_diff(void) { struct data__file *d; @@ -950,7 +1243,7 @@ static const struct option options[] = { OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, - "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)", + "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs),cycles", "Entries differential computation selection", setup_compute), OPT_BOOLEAN('p', "period", &show_period, @@ -1028,6 +1321,49 @@ static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size) return ret; } +static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, + struct perf_hpp *hpp, int width) +{ + struct block_hist *bh = container_of(he, struct block_hist, he); + struct block_hist *bh_pair = container_of(pair, struct block_hist, he); + struct hist_entry *block_he; + struct block_info *bi; + char buf[128]; + char *start_line, *end_line; + + block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx); + if (!block_he) { + hpp->skip = true; + return 0; + } + + /* + * Avoid printing the warning "addr2line_init failed for ..." + */ + symbol_conf.disable_add2line_warn = true; + + bi = block_he->block_info; + + start_line = map__srcline(he->ms.map, bi->sym->start + bi->start, + he->ms.sym); + + end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, + he->ms.sym); + + if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", + start_line, end_line, block_he->diff.cycles); + } else { + scnprintf(buf, sizeof(buf), "[%7lx -> %7lx] %4ld", + bi->start, bi->end, block_he->diff.cycles); + } + + free_srcline(start_line); + free_srcline(end_line); + + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + static int __hpp__color_compare(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, int comparison_method) @@ -1039,8 +1375,17 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, s64 wdiff; char pfmt[20] = " "; - if (!pair) + if (!pair) { + if (comparison_method == COMPUTE_CYCLES) { + struct block_hist *bh; + + bh = container_of(he, struct block_hist, he); + if (bh->block_idx) + hpp->skip = true; + } + goto no_print; + } switch (comparison_method) { case COMPUTE_DELTA: @@ -1075,6 +1420,8 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, return color_snprintf(hpp->buf, hpp->size, get_percent_color(wdiff), pfmt, wdiff); + case COMPUTE_CYCLES: + return cycles_printf(he, pair, hpp, dfmt->header_width); default: BUG_ON(1); } @@ -1104,6 +1451,12 @@ static int hpp__color_wdiff(struct perf_hpp_fmt *fmt, return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF); } +static int hpp__color_cycles(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp, struct hist_entry *he) +{ + return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES); +} + static void hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) { @@ -1305,6 +1658,10 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->color = hpp__color_delta; fmt->sort = hist_entry__cmp_delta_abs; break; + case PERF_HPP_DIFF__CYCLES: + fmt->color = hpp__color_cycles; + fmt->sort = hist_entry__cmp_nop; + break; default: fmt->sort = hist_entry__cmp_nop; break; @@ -1385,6 +1742,13 @@ static int ui_init(void) case COMPUTE_DELTA_ABS: fmt->sort = hist_entry__cmp_delta_abs_idx; break; + case COMPUTE_CYCLES: + /* + * Should set since 'fmt->sort' is called without + * checking valid during sorting + */ + fmt->sort = hist_entry__cmp_nop; + break; default: BUG_ON(1); } @@ -1481,12 +1845,20 @@ int cmd_diff(int argc, const char **argv) if (quiet) perf_quiet_option(); + symbol__annotation_init(); + if (symbol__init(NULL) < 0) return -1; if (data_init(argc, argv) < 0) return -1; + if (check_file_brstack() < 0) + return -1; + + if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) + return -1; + if (ui_init() < 0) return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 520e5b6b9ef9..2f6232f1bfdc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3522,6 +3522,15 @@ int cmd_script(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), + OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory", + "guest mount directory under which every guest os" + " instance has a subdir"), + OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name, + "file", "file saving guest os vmlinux"), + OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms, + "file", "file saving guest os /proc/kallsyms"), + OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules, + "file", "file saving guest os /proc/modules"), OPT_END() }; const char * const script_subcommands[] = { "record", "report", NULL }; @@ -3541,6 +3550,16 @@ int cmd_script(int argc, const char **argv) argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (symbol_conf.guestmount || + symbol_conf.default_guest_vmlinux_name || + symbol_conf.default_guest_kallsyms || + symbol_conf.default_guest_modules) { + /* + * Enable guest sample processing. + */ + perf_guest = true; + } + data.path = input_name; data.force = symbol_conf.force; diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json new file mode 100644 index 000000000000..0d1556fcdffe --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -0,0 +1,44 @@ +[ + { + "EventCode": "0x02", + "EventName": "uncore_hisi_ddrc.flux_wcmd", + "BriefDescription": "DDRC write commands", + "PublicDescription": "DDRC write commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x03", + "EventName": "uncore_hisi_ddrc.flux_rcmd", + "BriefDescription": "DDRC read commands", + "PublicDescription": "DDRC read commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x04", + "EventName": "uncore_hisi_ddrc.flux_wr", + "BriefDescription": "DDRC precharge commands", + "PublicDescription": "DDRC precharge commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x05", + "EventName": "uncore_hisi_ddrc.act_cmd", + "BriefDescription": "DDRC active commands", + "PublicDescription": "DDRC active commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x06", + "EventName": "uncore_hisi_ddrc.rnk_chg", + "BriefDescription": "DDRC rank commands", + "PublicDescription": "DDRC rank commands", + "Unit": "hisi_sccl,ddrc", + }, + { + "EventCode": "0x07", + "EventName": "uncore_hisi_ddrc.rw_chg", + "BriefDescription": "DDRC read and write changes", + "PublicDescription": "DDRC read and write changes", + "Unit": "hisi_sccl,ddrc", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json new file mode 100644 index 000000000000..447d3064de90 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -0,0 +1,51 @@ +[ + { + "EventCode": "0x00", + "EventName": "uncore_hisi_hha.rx_ops_num", + "BriefDescription": "The number of all operations received by the HHA", + "PublicDescription": "The number of all operations received by the HHA", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_hha.rx_outer", + "BriefDescription": "The number of all operations received by the HHA from another socket", + "PublicDescription": "The number of all operations received by the HHA from another socket", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x02", + "EventName": "uncore_hisi_hha.rx_sccl", + "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", + "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1c", + "EventName": "uncore_hisi_hha.rd_ddr_64b", + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1d", + "EventName": "uncore_hisi_hha.wr_dr_64b", + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1e", + "EventName": "uncore_hisi_hha.rd_ddr_128b", + "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", + "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", + "Unit": "hisi_sccl,hha", + }, + { + "EventCode": "0x1f", + "EventName": "uncore_hisi_hha.wr_ddr_128b", + "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", + "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", + "Unit": "hisi_sccl,hha", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json new file mode 100644 index 000000000000..ca48747642e1 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -0,0 +1,37 @@ +[ + { + "EventCode": "0x00", + "EventName": "uncore_hisi_l3c.rd_cpipe", + "BriefDescription": "Total read accesses", + "PublicDescription": "Total read accesses", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x01", + "EventName": "uncore_hisi_l3c.wr_cpipe", + "BriefDescription": "Total write accesses", + "PublicDescription": "Total write accesses", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x02", + "EventName": "uncore_hisi_l3c.rd_hit_cpipe", + "BriefDescription": "Total read hits", + "PublicDescription": "Total read hits", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x03", + "EventName": "uncore_hisi_l3c.wr_hit_cpipe", + "BriefDescription": "Total write hits", + "PublicDescription": "Total write hits", + "Unit": "hisi_sccl,l3c", + }, + { + "EventCode": "0x04", + "EventName": "uncore_hisi_l3c.victim_num", + "BriefDescription": "l3c precharge commands", + "PublicDescription": "l3c precharge commands", + "Unit": "hisi_sccl,l3c", + }, +] diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 1a1a3501180a..a382b115633d 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -314,13 +314,13 @@ "MetricName": "DRAM_BW_Use" }, { - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_Lat", "MetricName": "DRAM_Read_Latency" }, { - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_BW", "MetricName": "DRAM_Parallel_Reads" diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 56e03ba771f4..35b255fa6a79 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -314,35 +314,17 @@ "MetricName": "DRAM_BW_Use" }, { - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_Lat", "MetricName": "DRAM_Read_Latency" }, { - "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", + "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@", "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", "MetricGroup": "Memory_BW", "MetricName": "DRAM_Parallel_Reads" }, - { - "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0", - "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", - "MetricGroup": "Memory_Lat", - "MetricName": "MEM_PMM_Read_Latency" - }, - { - "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", - "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", - "MetricGroup": "Memory_BW", - "MetricName": "PMM_Read_BW" - }, - { - "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0", - "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", - "MetricGroup": "Memory_BW", - "MetricName": "PMM_Write_BW" - }, { "MetricExpr": "cha_0@event\\=0x0@", "BriefDescription": "Socket actual clocks when any core is active on that socket", diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index a1184ea64cc6..287a6f10ca48 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -236,6 +236,9 @@ static struct map { { "CPU-M-CF", "cpum_cf" }, { "CPU-M-SF", "cpum_sf" }, { "UPI LL", "uncore_upi" }, + { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, + { "hisi_sccl,hha", "hisi_sccl,hha" }, + { "hisi_sccl,l3c", "hisi_sccl,l3c" }, {} }; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 9eb0131c3ade..89393c79d870 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -531,6 +531,30 @@ out: return printed; } +static int hist_entry__block_fprintf(struct hist_entry *he, + char *bf, size_t size, + FILE *fp) +{ + struct block_hist *bh = container_of(he, struct block_hist, he); + int ret = 0; + + for (unsigned int i = 0; i < bh->block_hists.nr_entries; i++) { + struct perf_hpp hpp = { + .buf = bf, + .size = size, + .skip = false, + }; + + bh->block_idx = i; + hist_entry__snprintf(he, &hpp); + + if (!hpp.skip) + ret += fprintf(fp, "%s\n", bf); + } + + return ret; +} + static int hist_entry__fprintf(struct hist_entry *he, size_t size, char *bf, size_t bfsz, FILE *fp, bool ignore_callchains) @@ -550,6 +574,9 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, if (symbol_conf.report_hierarchy) return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); + if (symbol_conf.report_block) + return hist_entry__block_fprintf(he, bf, size, fp); + hist_entry__snprintf(he, &hpp); ret = fprintf(fp, "%s\n", bf); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index fb3271fd420c..27cecb59f866 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -376,6 +376,24 @@ void hists__delete_entries(struct hists *hists) } } +struct hist_entry *hists__get_entry(struct hists *hists, int idx) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + struct hist_entry *n; + int i = 0; + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + if (i == idx) + return n; + + next = rb_next(&n->rb_node); + i++; + } + + return NULL; +} + /* * histogram, sorted on item, collects periods */ @@ -574,6 +592,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, */ mem_info__zput(entry->mem_info); + block_info__zput(entry->block_info); + /* If the map of an existing hist_entry has * become out-of-date due to an exec() or * similar, update it. Otherwise we will @@ -645,6 +665,7 @@ __hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, + struct block_info *block_info, struct perf_sample *sample, bool sample_self, struct hist_entry_ops *ops) @@ -677,6 +698,7 @@ __hists__add_entry(struct hists *hists, .hists = hists, .branch_info = bi, .mem_info = mi, + .block_info = block_info, .transaction = sample->transaction, .raw_data = sample->raw_data, .raw_size = sample->raw_size, @@ -699,7 +721,7 @@ struct hist_entry *hists__add_entry(struct hists *hists, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, sample, sample_self, NULL); } @@ -712,10 +734,22 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct perf_sample *sample, bool sample_self) { - return __hists__add_entry(hists, al, sym_parent, bi, mi, + return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL, sample, sample_self, ops); } +struct hist_entry *hists__add_entry_block(struct hists *hists, + struct addr_location *al, + struct block_info *block_info) +{ + struct hist_entry entry = { + .block_info = block_info, + .hists = hists, + }, *he = hists__findnew_entry(hists, &entry, al, false); + + return he; +} + static int iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, struct addr_location *al __maybe_unused) @@ -1213,6 +1247,9 @@ void hist_entry__delete(struct hist_entry *he) mem_info__zput(he->mem_info); } + if (he->block_info) + block_info__zput(he->block_info); + zfree(&he->res_samples); zfree(&he->stat_acc); free_srcline(he->srcline); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 76ff6c6d03b8..24635f36148d 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -16,6 +16,7 @@ struct addr_location; struct map_symbol; struct mem_info; struct branch_info; +struct block_info; struct symbol; enum hist_filter { @@ -149,6 +150,10 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists, struct perf_sample *sample, bool sample_self); +struct hist_entry *hists__add_entry_block(struct hists *hists, + struct addr_location *al, + struct block_info *bi); + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); @@ -178,6 +183,8 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__delete_entries(struct hists *hists); void hists__output_recalc_col_len(struct hists *hists, int max_rows); +struct hist_entry *hists__get_entry(struct hists *hists, int idx); + u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); @@ -243,6 +250,7 @@ struct perf_hpp { size_t size; const char *sep; void *ptr; + bool skip; }; struct perf_hpp_fmt { diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index bc25995255ab..d8164574cb16 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -375,7 +375,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, struct mep *me = container_of(node, struct mep, nd); if (metricgroups) - printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); + printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n"); if (metrics) metricgroup__print_strlist(me->metrics, raw); next = rb_next(node); @@ -409,6 +409,7 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, const char **ids; int idnum; struct egroup *eg; + bool no_group = false; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); @@ -419,11 +420,25 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, strbuf_addf(events, ","); for (j = 0; j < idnum; j++) { pr_debug("found event %s\n", ids[j]); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(ids[j], "duration_time")) { + if (j > 0) + strbuf_addf(events, "}:W,"); + strbuf_addf(events, "duration_time"); + no_group = true; + continue; + } strbuf_addf(events, "%s%s", - j == 0 ? "{" : ",", + j == 0 || no_group ? "{" : ",", ids[j]); + no_group = false; } - strbuf_addf(events, "}:W"); + if (!no_group) + strbuf_addf(events, "}:W"); eg = malloc(sizeof(struct egroup)); if (!eg) { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8139a1f3ed39..55f4de6442e3 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -701,6 +701,46 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } +static bool pmu_uncore_alias_match(const char *pmu_name, const char *name) +{ + char *tmp = NULL, *tok, *str; + bool res; + + str = strdup(pmu_name); + if (!str) + return false; + + /* + * uncore alias may be from different PMU with common prefix + */ + tok = strtok_r(str, ",", &tmp); + if (strncmp(pmu_name, tok, strlen(tok))) { + res = false; + goto out; + } + + /* + * Match more complex aliases where the alias name is a comma-delimited + * list of tokens, orderly contained in the matching PMU name. + * + * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we + * match "socket" in "socketX_pmunameY" and then "pmuname" in + * "pmunameY". + */ + for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + name = strstr(name, tok); + if (!name) { + res = false; + goto out; + } + } + + res = true; +out: + free(str); + return res; +} + /* * From the pmu_events_map, find the table of PMU events that corresponds * to the current running CPU. Then, add all PMU events from that table @@ -731,12 +771,8 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) break; } - /* - * uncore alias may be from different PMU - * with common prefix - */ if (pmu_is_uncore(name) && - !strncmp(pname, name, strlen(pname))) + pmu_uncore_alias_match(pname, name)) goto new_alias; if (strcmp(pname, name)) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index ce376a73f964..a0f232151d6f 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -79,6 +79,9 @@ struct hist_entry_diff { /* HISTC_WEIGHTED_DIFF */ s64 wdiff; + + /* PERF_HPP_DIFF__CYCLES */ + s64 cycles; }; }; @@ -144,6 +147,7 @@ struct hist_entry { long time; struct hists *hists; struct mem_info *mem_info; + struct block_info *block_info; void *raw_data; u32 raw_size; int num_res; @@ -285,6 +289,15 @@ struct sort_entry { u8 se_width_idx; }; +struct block_hist { + struct hists block_hists; + struct perf_hpp_list block_list; + struct perf_hpp_fmt block_fmt; + int block_idx; + bool valid; + struct hist_entry he; +}; + extern struct sort_entry sort_thread; extern struct list_head hist_entry__sort_list; diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 1824cabe3512..dcad75daf5e4 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -11,6 +11,7 @@ #include "util/util.h" #include "util/debug.h" #include "util/callchain.h" +#include "util/symbol_conf.h" #include "srcline.h" #include "string2.h" #include "symbol.h" @@ -288,7 +289,8 @@ static int addr2line(const char *dso_name, u64 addr, } if (a2l == NULL) { - pr_warning("addr2line_init failed for %s\n", dso_name); + if (!symbol_conf.disable_add2line_warn) + pr_warning("addr2line_init failed for %s\n", dso_name); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 46d2c03814a1..ae2ce255e848 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2351,3 +2351,25 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } + +struct block_info *block_info__get(struct block_info *bi) +{ + if (bi) + refcount_inc(&bi->refcnt); + return bi; +} + +void block_info__put(struct block_info *bi) +{ + if (bi && refcount_dec_and_test(&bi->refcnt)) + free(bi); +} + +struct block_info *block_info__new(void) +{ + struct block_info *bi = zalloc(sizeof(*bi)); + + if (bi) + refcount_set(&bi->refcnt, 1); + return bi; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9a8fe012910a..12755b42ea93 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,17 @@ struct mem_info { refcount_t refcnt; }; +struct block_info { + struct symbol *sym; + u64 start; + u64 end; + u64 cycles; + u64 cycles_aggr; + int num; + int num_aggr; + refcount_t refcnt; +}; + struct addr_location { struct machine *machine; struct thread *thread; @@ -332,4 +343,16 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) +struct block_info *block_info__new(void); +struct block_info *block_info__get(struct block_info *bi); +void block_info__put(struct block_info *bi); + +static inline void __block_info__zput(struct block_info **bi) +{ + block_info__put(*bi); + *bi = NULL; +} + +#define block_info__zput(bi) __block_info__zput(&bi) + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 382ba63fc554..e6880789864c 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -39,7 +39,9 @@ struct symbol_conf { hide_unresolved, raw_trace, report_hierarchy, - inline_name; + report_block, + inline_name, + disable_add2line_warn; const char *vmlinux_name, *kallsyms_name, *source_prefix,