perf tools changes for v5.17: 1st batch

New features:
 
 - Add 'trace' subcommand for 'perf ftrace', setting the stage for more
   'perf ftrace' subcommands. Not using a subcommand yields the previous
   behaviour of 'perf ftrace'.
 
 - Add 'latency' subcommand to 'perf ftrace', that can use the function
   graph tracer or a BPF optimized one, via the -b/--use-bpf option.
 
   E.g.:
 
   $ sudo perf ftrace latency -a -T mutex_lock sleep 1
   #   DURATION     |      COUNT | GRAPH                          |
        0 - 1    us |       4596 | ########################       |
        1 - 2    us |       1680 | #########                      |
        2 - 4    us |       1106 | #####                          |
        4 - 8    us |        546 | ##                             |
        8 - 16   us |        562 | ###                            |
       16 - 32   us |          1 |                                |
       32 - 64   us |          0 |                                |
       64 - 128  us |          0 |                                |
      128 - 256  us |          0 |                                |
      256 - 512  us |          0 |                                |
      512 - 1024 us |          0 |                                |
        1 - 2    ms |          0 |                                |
        2 - 4    ms |          0 |                                |
        4 - 8    ms |          0 |                                |
        8 - 16   ms |          0 |                                |
       16 - 32   ms |          0 |                                |
       32 - 64   ms |          0 |                                |
       64 - 128  ms |          0 |                                |
      128 - 256  ms |          0 |                                |
      256 - 512  ms |          0 |                                |
      512 - 1024 ms |          0 |                                |
        1 - ...   s |          0 |                                |
 
   The original implementation of this command was in the bcc tool.
 
 - Support --cputype option for hybrid events in 'perf stat'.
 
 Improvements:
 
 - Call chain improvements for ARM64.
 
 - No need to do any affinity setup when profiling pids.
 
 - Reduce multiplexing with duration_time in 'perf stat' metrics.
 
 - Improve error message for uncore events, stating that some event groups are
   can only be used in system wide (-a) mode.
 
 - perf stat metric group leader fixes/improvements, including arch specific
   changes to better support Intel topdown events.
 
 - Probe non-deprecated sysfs path 1st, i.e. try /sys/devices/system/cpu/cpuN/topology/thread_siblings
   first, then the old /sys/devices/system/cpu/cpuN/topology/core_cpus.
 
 - Disable debuginfod by default in 'perf record', to avoid stalls on distros
   such as Fedora 35.
 
 - Use unbuffered output in 'perf bench' when pipe/tee'ing to a file.
 
 - Enable ignore_missing_thread in 'perf trace'
 
 Fixes:
 
 - Avoid TUI crash when navigating in the annotation of recursive functions.
 
 - Fix hex dump character output in 'perf script'.
 
 - Fix JSON indentation to 4 spaces standard in the ARM vendor event files.
 
 - Fix use after free in metric__new().
 
 - Fix IS_ERR_OR_NULL() usage in the perf BPF loader.
 
 - Fix up cross-arch register support, i.e. when printing register names take
   into account the architecture where the perf.data file was collected.
 
 - Fix SMT fallback with large core counts.
 
 - Don't lower case MetricExpr when parsing JSON files so as not to lose info
   such as the ":G" event modifier in metrics.
 
 perf test:
 
 - Add basic stress test for sigtrap handling to 'perf test'.
 
 - Fix 'perf test' failures on s/390
 
 - Enable system wide for metricgroups test in 'perf test´.
 
 - Use 3 digits for test numbering now we can have more tests.
 
 Arch specific:
 
 - Add events for Arm Neoverse N2 in the ARM JSON vendor event files
 
 - Support PERF_MEM_LVLNUM encodings in powerpc, that came from a single
   patch series, where I incorrectly merged the kernel bits, that were then
   reverted after coordination with Michael Ellerman and Stephen Rothwell.
 
 - Add ARM SPE total latency as PERF_SAMPLE_WEIGHT.
 
 - Update AMD documentation, with info on raw event encoding.
 
 - Add support for global and local variants of the "p_stage_cyc" sort key,
   applicable to perf.data files collected on powerpc.
 
 - Remove duplicate and incorrect aux size checks in the ARM CoreSight ETM code.
 
 Refactorings:
 
 - Add a perf_cpu abstraction to disambiguate CPUs and CPU map indexes, fixing
   problems along the way.
 
 - Document CPU map methods.
 
 UAPI sync:
 
 - Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy'
 
 - Sync UAPI files with the kernel sources: drm, msr-index, cpufeatures.
 
 Build system
 
 - Enable warnings through HOSTCFLAGS.
 
 - Drop requirement for libstdc++.so for libopencsd check
 
 libperf:
 
 - Make libperf adopt perf_counts_values__scale() from tools/perf/util/.
 
 - Add a stat multiplexing test to libperf.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCYeQj6AAKCRCyPKLppCJ+
 JwyWAQCBmU8OJxhSJQnNCwTB9zNkPPBbihvIztepOJ7zsw7JcQD+KfAidHGQvI/Y
 EmXIYkmdNkWPYJafONllnKK5cckjxgI=
 =aj9V
 -----END PGP SIGNATURE-----

Merge tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull perf tool updates from Arnaldo Carvalho de Melo:
 "New features:

   - Add 'trace' subcommand for 'perf ftrace', setting the stage for
     more 'perf ftrace' subcommands. Not using a subcommand yields the
     previous behaviour of 'perf ftrace'.

   - Add 'latency' subcommand to 'perf ftrace', that can use the
     function graph tracer or a BPF optimized one, via the -b/--use-bpf
     option.

     E.g.:

	$ sudo perf ftrace latency -a -T mutex_lock sleep 1
	#   DURATION     |      COUNT | GRAPH                          |
	     0 - 1    us |       4596 | ########################       |
	     1 - 2    us |       1680 | #########                      |
	     2 - 4    us |       1106 | #####                          |
	     4 - 8    us |        546 | ##                             |
	     8 - 16   us |        562 | ###                            |
	    16 - 32   us |          1 |                                |
	    32 - 64   us |          0 |                                |
	    64 - 128  us |          0 |                                |
	   128 - 256  us |          0 |                                |
	   256 - 512  us |          0 |                                |
	   512 - 1024 us |          0 |                                |
	     1 - 2    ms |          0 |                                |
	     2 - 4    ms |          0 |                                |
	     4 - 8    ms |          0 |                                |
	     8 - 16   ms |          0 |                                |
	    16 - 32   ms |          0 |                                |
	    32 - 64   ms |          0 |                                |
	    64 - 128  ms |          0 |                                |
	   128 - 256  ms |          0 |                                |
	   256 - 512  ms |          0 |                                |
	   512 - 1024 ms |          0 |                                |
	     1 - ...   s |          0 |                                |

     The original implementation of this command was in the bcc tool.

   - Support --cputype option for hybrid events in 'perf stat'.

  Improvements:

   - Call chain improvements for ARM64.

   - No need to do any affinity setup when profiling pids.

   - Reduce multiplexing with duration_time in 'perf stat' metrics.

   - Improve error message for uncore events, stating that some event
     groups are can only be used in system wide (-a) mode.

   - perf stat metric group leader fixes/improvements, including arch
     specific changes to better support Intel topdown events.

   - Probe non-deprecated sysfs path first, i.e. try the path
     /sys/devices/system/cpu/cpuN/topology/thread_siblings first, then
     the old /sys/devices/system/cpu/cpuN/topology/core_cpus.

   - Disable debuginfod by default in 'perf record', to avoid stalls on
     distros such as Fedora 35.

   - Use unbuffered output in 'perf bench' when pipe/tee'ing to a file.

   - Enable ignore_missing_thread in 'perf trace'

  Fixes:

   - Avoid TUI crash when navigating in the annotation of recursive
     functions.

   - Fix hex dump character output in 'perf script'.

   - Fix JSON indentation to 4 spaces standard in the ARM vendor event
     files.

   - Fix use after free in metric__new().

   - Fix IS_ERR_OR_NULL() usage in the perf BPF loader.

   - Fix up cross-arch register support, i.e. when printing register
     names take into account the architecture where the perf.data file
     was collected.

   - Fix SMT fallback with large core counts.

   - Don't lower case MetricExpr when parsing JSON files so as not to
     lose info such as the ":G" event modifier in metrics.

  perf test:

   - Add basic stress test for sigtrap handling to 'perf test'.

   - Fix 'perf test' failures on s/390

   - Enable system wide for metricgroups test in 'perf test´.

   - Use 3 digits for test numbering now we can have more tests.

  Arch specific:

   - Add events for Arm Neoverse N2 in the ARM JSON vendor event files

   - Support PERF_MEM_LVLNUM encodings in powerpc, that came from a
     single patch series, where I incorrectly merged the kernel bits,
     that were then reverted after coordination with Michael Ellerman
     and Stephen Rothwell.

   - Add ARM SPE total latency as PERF_SAMPLE_WEIGHT.

   - Update AMD documentation, with info on raw event encoding.

   - Add support for global and local variants of the "p_stage_cyc" sort
     key, applicable to perf.data files collected on powerpc.

   - Remove duplicate and incorrect aux size checks in the ARM CoreSight
     ETM code.

  Refactorings:

   - Add a perf_cpu abstraction to disambiguate CPUs and CPU map
     indexes, fixing problems along the way.

   - Document CPU map methods.

  UAPI sync:

   - Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench
     mem memcpy'

   - Sync UAPI files with the kernel sources: drm, msr-index,
     cpufeatures.

  Build system

   - Enable warnings through HOSTCFLAGS.

   - Drop requirement for libstdc++.so for libopencsd check

  libperf:

   - Make libperf adopt perf_counts_values__scale() from tools/perf/util/.

   - Add a stat multiplexing test to libperf"

* tag 'perf-tools-for-v5.17-2022-01-16' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (115 commits)
  perf record: Disable debuginfod by default
  perf evlist: No need to do any affinity setup when profiling pids
  perf cpumap: Add is_dummy() method
  perf metric: Fix metric_leader
  perf cputopo: Fix CPU topology reading on s/390
  perf metricgroup: Fix use after free in metric__new()
  libperf tests: Update a use of the new cpumap API
  perf arm: Fix off-by-one directory path
  tools arch x86: Sync the msr-index.h copy with the kernel sources
  tools headers cpufeatures: Sync with the kernel sources
  tools headers UAPI: Update tools's copy of drm.h header
  tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy'
  perf pmu-events: Don't lower case MetricExpr
  perf expr: Add debug logging for literals
  perf tools: Probe non-deprecated sysfs path 1st
  perf tools: Fix SMT fallback with large core counts
  perf cpumap: Give CPUs their own type
  perf stat: Correct first_shadow_cpu to return index
  perf script: Fix flipped index and cpu
  perf c2c: Use more intention revealing iterator
  ...
This commit is contained in:
Linus Torvalds 2022-01-18 06:32:11 +02:00
commit 57d17378a4
153 changed files with 4695 additions and 2185 deletions

View File

@ -315,6 +315,7 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */

View File

@ -486,6 +486,23 @@
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
#define MSR_AMD_CPPC_CAP2 0xc00102b2
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9

View File

@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
rep movsq
movl %edx, %ecx
rep movsb
ret
RET
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
retq
RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
retq
RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
retq
RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movb %cl, (%rdi)
.Lend:
retq
RET
SYM_FUNC_END(memcpy_orig)
.popsection

View File

@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
movl %edx,%ecx
rep stosb
movq %r9,%rax
ret
RET
SYM_FUNC_END(__memset)
SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
RET
SYM_FUNC_END(memset_erms)
SYM_FUNC_START_LOCAL(memset_orig)
@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
.Lende:
movq %r10,%rax
ret
RET
.Lbad_alignment:
cmpq $7,%rdx

View File

@ -99,7 +99,7 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
###
## HOSTCC C flags
host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
# output directory for tests below
TMPOUT = .tmp_$$$$

View File

@ -1096,6 +1096,24 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer)
#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
/**
* DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata.
*
* This queries metadata about a framebuffer. User-space fills
* &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the
* struct as the output.
*
* If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles
* will be filled with GEM buffer handles. Planes are valid until one has a
* zero handle -- this can be used to compute the number of planes.
*
* Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid
* until one has a zero &drm_mode_fb_cmd2.pitches.
*
* If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set
* in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the
* modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier.
*/
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
/*

View File

@ -1332,7 +1332,10 @@ union perf_mem_data_src {
/* hop level */
#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */
/* 2-7 available */
#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */
#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */
#define PERF_MEM_HOPS_3 0x04 /* remote board */
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
#define PERF_MEM_S(a, s) \

View File

@ -48,6 +48,7 @@ SYNOPSIS
int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
bool perf_cpu_map__empty(const struct perf_cpu_map *map);
int perf_cpu_map__max(struct perf_cpu_map *map);
bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus)
--
@ -135,16 +136,16 @@ SYNOPSIS
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void perf_evsel__close(struct perf_evsel *evsel);
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
void perf_evsel__munmap(struct perf_evsel *evsel);
void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
int perf_evsel__enable(struct perf_evsel *evsel);
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
int perf_evsel__disable(struct perf_evsel *evsel);
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);

View File

@ -10,15 +10,24 @@
#include <ctype.h>
#include <limits.h>
struct perf_cpu_map *perf_cpu_map__dummy_new(void)
static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)
{
struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);
if (cpus != NULL) {
cpus->nr = 1;
cpus->map[0] = -1;
cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
}
return cpus;
}
struct perf_cpu_map *perf_cpu_map__dummy_new(void)
{
struct perf_cpu_map *cpus = perf_cpu_map__alloc(1);
if (cpus)
cpus->map[0].cpu = -1;
return cpus;
}
@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void)
if (nr_cpus < 0)
return NULL;
cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
cpus = perf_cpu_map__alloc(nr_cpus);
if (cpus != NULL) {
int i;
for (i = 0; i < nr_cpus; ++i)
cpus->map[i] = i;
cpus->nr = nr_cpus;
refcount_set(&cpus->refcnt, 1);
cpus->map[i].cpu = i;
}
return cpus;
@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void)
return cpu_map__default_new();
}
static int cmp_int(const void *a, const void *b)
static int cmp_cpu(const void *a, const void *b)
{
return *(const int *)a - *(const int*)b;
const struct perf_cpu *cpu_a = a, *cpu_b = b;
return cpu_a->cpu - cpu_b->cpu;
}
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)
{
size_t payload_size = nr_cpus * sizeof(int);
struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
size_t payload_size = nr_cpus * sizeof(struct perf_cpu);
struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);
int i, j;
if (cpus != NULL) {
memcpy(cpus->map, tmp_cpus, payload_size);
qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
if (i == 0 || cpus->map[i] != cpus->map[i - 1])
cpus->map[j++] = cpus->map[i];
if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu)
cpus->map[j++].cpu = cpus->map[i].cpu;
}
cpus->nr = j;
assert(j <= nr_cpus);
refcount_set(&cpus->refcnt, 1);
}
return cpus;
}
@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
{
struct perf_cpu_map *cpus = NULL;
int nr_cpus = 0;
int *tmp_cpus = NULL, *tmp;
struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
int n, cpu, prev;
char sep;
@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)
if (new_max >= max_entries) {
max_entries = new_max + MAX_NR_CPUS / 2;
tmp = realloc(tmp_cpus, max_entries * sizeof(int));
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
while (++prev < cpu)
tmp_cpus[nr_cpus++] = prev;
tmp_cpus[nr_cpus++].cpu = prev;
}
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
tmp = realloc(tmp_cpus, max_entries * sizeof(int));
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto out_free_tmp;
tmp_cpus = tmp;
}
tmp_cpus[nr_cpus++] = cpu;
tmp_cpus[nr_cpus++].cpu = cpu;
if (n == 2 && sep == '-')
prev = cpu;
else
@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
unsigned long start_cpu, end_cpu = 0;
char *p = NULL;
int i, nr_cpus = 0;
int *tmp_cpus = NULL, *tmp;
struct perf_cpu *tmp_cpus = NULL, *tmp;
int max_entries = 0;
if (!cpu_list)
@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
for (; start_cpu <= end_cpu; start_cpu++) {
/* check for duplicates */
for (i = 0; i < nr_cpus; i++)
if (tmp_cpus[i] == (int)start_cpu)
if (tmp_cpus[i].cpu == (int)start_cpu)
goto invalid;
if (nr_cpus == max_entries) {
max_entries += MAX_NR_CPUS;
tmp = realloc(tmp_cpus, max_entries * sizeof(int));
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
tmp_cpus = tmp;
}
tmp_cpus[nr_cpus++] = (int)start_cpu;
tmp_cpus[nr_cpus++].cpu = (int)start_cpu;
}
if (*p)
++p;
@ -250,12 +257,16 @@ out:
return cpus;
}
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
struct perf_cpu result = {
.cpu = -1
};
if (cpus && idx < cpus->nr)
return cpus->map[idx];
return -1;
return result;
}
int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)
bool perf_cpu_map__empty(const struct perf_cpu_map *map)
{
return map ? map->map[0] == -1 : true;
return map ? map->map[0].cpu == -1 : true;
}
int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
int low = 0, high = cpus->nr;
int low, high;
if (!cpus)
return -1;
low = 0;
high = cpus->nr;
while (low < high) {
int idx = (low + high) / 2,
cpu_at_idx = cpus->map[idx];
int idx = (low + high) / 2;
struct perf_cpu cpu_at_idx = cpus->map[idx];
if (cpu_at_idx == cpu)
if (cpu_at_idx.cpu == cpu.cpu)
return idx;
if (cpu_at_idx > cpu)
if (cpu_at_idx.cpu > cpu.cpu)
high = idx;
else
low = idx + 1;
@ -288,10 +304,19 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)
return -1;
}
int perf_cpu_map__max(struct perf_cpu_map *map)
bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)
{
return perf_cpu_map__idx(cpus, cpu) != -1;
}
struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
{
struct perf_cpu result = {
.cpu = -1
};
// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
return map->nr > 0 ? map->map[map->nr - 1] : -1;
return map->nr > 0 ? map->map[map->nr - 1] : result;
}
/*
@ -305,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
int *tmp_cpus;
struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
@ -319,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
if (!other)
return orig;
if (orig->nr == other->nr &&
!memcmp(orig->map, other->map, orig->nr * sizeof(int)))
!memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
return orig;
tmp_len = orig->nr + other->nr;
tmp_cpus = malloc(tmp_len * sizeof(int));
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
while (i < orig->nr && j < other->nr) {
if (orig->map[i] <= other->map[j]) {
if (orig->map[i] == other->map[j])
if (orig->map[i].cpu <= other->map[j].cpu) {
if (orig->map[i].cpu == other->map[j].cpu)
j++;
tmp_cpus[k++] = orig->map[i++];
} else

View File

@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
int output, int cpu)
int output, struct perf_cpu cpu)
{
return perf_mmap__mmap(map, mp, output, cpu);
}
@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
int idx, struct perf_mmap_param *mp, int cpu_idx,
int thread, int *_output, int *_output_overwrite)
{
int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
struct perf_evsel *evsel;
int revent;
@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,
return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;
}
void __perf_evlist__set_leader(struct list_head *list)
void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)
{
struct perf_evsel *evsel, *leader;
struct perf_evsel *first, *last, *evsel;
leader = list_entry(list->next, struct perf_evsel, node);
evsel = list_entry(list->prev, struct perf_evsel, node);
first = list_first_entry(list, struct perf_evsel, node);
last = list_last_entry(list, struct perf_evsel, node);
leader->nr_members = evsel->idx - leader->idx + 1;
leader->nr_members = last->idx - first->idx + 1;
__perf_evlist__for_each_entry(list, evsel)
evsel->leader = leader;
@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list)
void perf_evlist__set_leader(struct perf_evlist *evlist)
{
if (evlist->nr_entries) {
struct perf_evsel *first = list_entry(evlist->entries.next,
struct perf_evsel, node);
evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
__perf_evlist__set_leader(&evlist->entries);
__perf_evlist__set_leader(&evlist->entries, first);
}
}

View File

@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel)
free(evsel);
}
#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y))
#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL)
#define FD(_evsel, _cpu_map_idx, _thread) \
((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread))
#define MMAP(_evsel, _cpu_map_idx, _thread) \
(_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \
: NULL)
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
int cpu, thread;
for (cpu = 0; cpu < ncpus; cpu++) {
int idx, thread;
for (idx = 0; idx < ncpus; idx++) {
for (thread = 0; thread < nthreads; thread++) {
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, idx, thread);
if (fd)
*fd = -1;
@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre
static int
sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
pid_t pid, struct perf_cpu cpu, int group_fd,
unsigned long flags)
{
return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);
}
static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd)
static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)
{
struct perf_evsel *leader = evsel->leader;
int *fd;
@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
if (!leader->fd)
return -ENOTCONN;
fd = FD(leader, cpu, thread);
fd = FD(leader, cpu_map_idx, thread);
if (fd == NULL || *fd == -1)
return -EBADF;
@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou
int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
int cpu, thread, err = 0;
struct perf_cpu cpu;
int idx, thread, err = 0;
if (cpus == NULL) {
static struct perf_cpu_map *empty_cpu_map;
@ -139,21 +144,21 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
return -ENOMEM;
for (cpu = 0; cpu < cpus->nr; cpu++) {
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
for (thread = 0; thread < threads->nr; thread++) {
int fd, group_fd, *evsel_fd;
evsel_fd = FD(evsel, cpu, thread);
evsel_fd = FD(evsel, idx, thread);
if (evsel_fd == NULL)
return -EINVAL;
err = get_group_fd(evsel, cpu, thread, &group_fd);
err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
return err;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
cpus->map[cpu], group_fd, 0);
cpu, group_fd, 0);
if (fd < 0)
return -errno;
@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, cpu_map_idx, thread);
if (fd && *fd >= 0) {
close(*fd);
@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
int cpu;
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
perf_evsel__close_fd_cpu(evsel, cpu);
for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++)
perf_evsel__close_fd_cpu(evsel, idx);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
if (evsel->fd == NULL)
return;
perf_evsel__close_fd_cpu(evsel, cpu);
perf_evsel__close_fd_cpu(evsel, cpu_map_idx);
}
void perf_evsel__munmap(struct perf_evsel *evsel)
{
int cpu, thread;
int idx, thread;
if (evsel->fd == NULL || evsel->mmap == NULL)
return;
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, idx, thread);
if (fd == NULL || *fd < 0)
continue;
perf_mmap__munmap(MMAP(evsel, cpu, thread));
perf_mmap__munmap(MMAP(evsel, idx, thread));
}
}
@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel)
int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
{
int ret, cpu, thread;
int ret, idx, thread;
struct perf_mmap_param mp = {
.prot = PROT_READ | PROT_WRITE,
.mask = (pages * page_size) - 1,
@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)
return -ENOMEM;
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, idx, thread);
struct perf_mmap *map;
struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);
if (fd == NULL || *fd < 0)
continue;
map = MMAP(evsel, cpu, thread);
map = MMAP(evsel, idx, thread);
perf_mmap__init(map, NULL, false, NULL);
ret = perf_mmap__mmap(map, &mp, *fd, cpu);
@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)
return 0;
}
void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread)
void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)
{
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL)
if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)
return NULL;
return MMAP(evsel, cpu, thread)->base;
return MMAP(evsel, cpu_map_idx, thread)->base;
}
int perf_evsel__read_size(struct perf_evsel *evsel)
@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel)
return size;
}
int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
size_t size = perf_evsel__read_size(evsel);
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, cpu_map_idx, thread);
memset(count, 0, sizeof(*count));
if (fd == NULL || *fd < 0)
return -EINVAL;
if (MMAP(evsel, cpu, thread) &&
!perf_mmap__read_self(MMAP(evsel, cpu, thread), count))
if (MMAP(evsel, cpu_map_idx, thread) &&
!perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))
return 0;
if (readn(*fd, count->values, size) <= 0)
@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg,
int cpu)
int cpu_map_idx)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int err;
int *fd = FD(evsel, cpu, thread);
int *fd = FD(evsel, cpu_map_idx, thread);
if (fd == NULL || *fd < 0)
return -1;
@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
return 0;
}
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);
}
int perf_evsel__enable(struct perf_evsel *evsel)
@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)
return err;
}
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)
{
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);
}
int perf_evsel__disable(struct perf_evsel *evsel)
@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)
zfree(&evsel->id);
evsel->ids = 0;
}
void perf_counts_values__scale(struct perf_counts_values *count,
bool scale, __s8 *pscaled)
{
s8 scaled = 0;
if (scale) {
if (count->run == 0) {
scaled = -1;
count->val = 0;
} else if (count->run < count->ena) {
scaled = 1;
count->val = (u64)((double)count->val * count->ena / count->run);
}
}
if (pscaled)
*pscaled = scaled;
}

View File

@ -4,16 +4,30 @@
#include <linux/refcount.h>
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
struct perf_cpu {
int cpu;
};
/**
* A sized, reference counted, sorted array of integers representing CPU
* numbers. This is commonly used to capture which CPUs a PMU is associated
* with. The indices into the cpumap are frequently used as they avoid having
* gaps if CPU numbers were used. For events associated with a pid, rather than
* a CPU, a single dummy map with an entry of -1 is used.
*/
struct perf_cpu_map {
refcount_t refcnt;
/** Length of the map array. */
int nr;
int map[];
/** The CPU values. */
struct perf_cpu map[];
};
#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 2048
#endif
int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu);
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */

View File

@ -4,6 +4,7 @@
#include <linux/list.h>
#include <api/fd/array.h>
#include <internal/cpumap.h>
#include <internal/evsel.h>
#define PERF_EVLIST__HLIST_BITS 8
@ -36,7 +37,7 @@ typedef void
typedef struct perf_mmap*
(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);
typedef int
(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int);
(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu);
struct perf_evlist_mmap_ops {
perf_evlist_mmap__cb_idx_t idx;
@ -127,5 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
void __perf_evlist__set_leader(struct list_head *list);
void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);
#endif /* __LIBPERF_INTERNAL_EVLIST_H */

View File

@ -6,8 +6,8 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include <sys/types.h>
#include <internal/cpumap.h>
struct perf_cpu_map;
struct perf_thread_map;
struct xyarray;
@ -27,7 +27,7 @@ struct perf_sample_id {
* queue number.
*/
int idx;
int cpu;
struct perf_cpu cpu;
pid_t tid;
/* Holds total ID period value for PERF_SAMPLE_READ processing. */

View File

@ -6,6 +6,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <stdbool.h>
#include <internal/cpumap.h>
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
@ -24,7 +25,7 @@ struct perf_mmap {
void *base;
int mask;
int fd;
int cpu;
struct perf_cpu cpu;
refcount_t refcnt;
u64 prev;
u64 start;
@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);
void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,
bool overwrite, libperf_unmap_cb_t unmap_cb);
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
int fd, int cpu);
int fd, struct perf_cpu cpu);
void perf_mmap__munmap(struct perf_mmap *map);
void perf_mmap__get(struct perf_mmap *map);
void perf_mmap__put(struct perf_mmap *map);

View File

@ -3,11 +3,10 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
#include <perf/cpumap.h>
#include <stdio.h>
#include <stdbool.h>
struct perf_cpu_map;
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
@ -16,10 +15,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map);
LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);
#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \
for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \

View File

@ -4,6 +4,8 @@
#include <stdint.h>
#include <perf/core.h>
#include <stdbool.h>
#include <linux/types.h>
struct perf_evsel;
struct perf_event_attr;
@ -26,18 +28,20 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);
LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel);
LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count,
bool scale, __s8 *pscaled);
#endif /* __LIBPERF_EVSEL_H */

View File

@ -10,6 +10,7 @@ LIBPERF_0.0.1 {
perf_cpu_map__cpu;
perf_cpu_map__empty;
perf_cpu_map__max;
perf_cpu_map__has;
perf_thread_map__new_dummy;
perf_thread_map__set_pid;
perf_thread_map__comm;
@ -50,6 +51,7 @@ LIBPERF_0.0.1 {
perf_mmap__read_init;
perf_mmap__read_done;
perf_mmap__read_event;
perf_counts_values__scale;
local:
*;
};

View File

@ -32,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)
}
int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp,
int fd, int cpu)
int fd, struct perf_cpu cpu)
{
map->prev = 0;
map->mask = mp->mask;
@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
count->ena += delta;
if (idx)
count->run += delta;
cnt = mul_u64_u64_div64(cnt, count->ena, count->run);
}
count->val = cnt;

View File

@ -21,6 +21,9 @@
#include "tests.h"
#include <internal/evsel.h>
#define EVENT_NUM 15
#define WAIT_COUNT 100000000UL
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
@ -331,7 +334,8 @@ static int test_mmap_cpus(void)
};
cpu_set_t saved_mask;
char path[PATH_MAX];
int id, err, cpu, tmp;
int id, err, tmp;
struct perf_cpu cpu;
union perf_event *event;
int count = 0;
@ -374,7 +378,7 @@ static int test_mmap_cpus(void)
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
CPU_SET(cpu.cpu, &mask);
err = sched_setaffinity(0, sizeof(mask), &mask);
__T("sched_setaffinity failed", err == 0);
@ -413,6 +417,159 @@ static int test_mmap_cpus(void)
return 0;
}
static double display_error(long long average,
long long high,
long long low,
long long expected)
{
double error;
error = (((double)average - expected) / expected) * 100.0;
__T_VERBOSE(" Expected: %lld\n", expected);
__T_VERBOSE(" High: %lld Low: %lld Average: %lld\n",
high, low, average);
__T_VERBOSE(" Average Error = %.2f%%\n", error);
return error;
}
static int test_stat_multiplexing(void)
{
struct perf_counts_values expected_counts = { .val = 0 };
struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },};
struct perf_thread_map *threads;
struct perf_evlist *evlist;
struct perf_evsel *evsel;
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_INSTRUCTIONS,
.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING,
.disabled = 1,
};
int err, i, nonzero = 0;
unsigned long count;
long long max = 0, min = 0, avg = 0;
double error = 0.0;
s8 scaled = 0;
/* read for non-multiplexing event count */
threads = perf_thread_map__new_dummy();
__T("failed to create threads", threads);
perf_thread_map__set_pid(threads, 0, 0);
evsel = perf_evsel__new(&attr);
__T("failed to create evsel", evsel);
err = perf_evsel__open(evsel, NULL, threads);
__T("failed to open evsel", err == 0);
err = perf_evsel__enable(evsel);
__T("failed to enable evsel", err == 0);
/* wait loop */
count = WAIT_COUNT;
while (count--)
;
perf_evsel__read(evsel, 0, 0, &expected_counts);
__T("failed to read value for evsel", expected_counts.val != 0);
__T("failed to read non-multiplexing event count",
expected_counts.ena == expected_counts.run);
err = perf_evsel__disable(evsel);
__T("failed to enable evsel", err == 0);
perf_evsel__close(evsel);
perf_evsel__delete(evsel);
perf_thread_map__put(threads);
/* read for multiplexing event count */
threads = perf_thread_map__new_dummy();
__T("failed to create threads", threads);
perf_thread_map__set_pid(threads, 0, 0);
evlist = perf_evlist__new();
__T("failed to create evlist", evlist);
for (i = 0; i < EVENT_NUM; i++) {
evsel = perf_evsel__new(&attr);
__T("failed to create evsel", evsel);
perf_evlist__add(evlist, evsel);
}
perf_evlist__set_maps(evlist, NULL, threads);
err = perf_evlist__open(evlist);
__T("failed to open evsel", err == 0);
perf_evlist__enable(evlist);
/* wait loop */
count = WAIT_COUNT;
while (count--)
;
i = 0;
perf_evlist__for_each_evsel(evlist, evsel) {
perf_evsel__read(evsel, 0, 0, &counts[i]);
__T("failed to read value for evsel", counts[i].val != 0);
i++;
}
perf_evlist__disable(evlist);
min = counts[0].val;
for (i = 0; i < EVENT_NUM; i++) {
__T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n",
i, counts[i].val, counts[i].run, counts[i].ena);
perf_counts_values__scale(&counts[i], true, &scaled);
if (scaled == 1) {
__T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n",
counts[i].val,
(double)counts[i].run / (double)counts[i].ena * 100.0,
counts[i].run, counts[i].ena);
} else if (scaled == -1) {
__T_VERBOSE("\t Not Running\n");
} else {
__T_VERBOSE("\t Not Scaling\n");
}
if (counts[i].val > max)
max = counts[i].val;
if (counts[i].val < min)
min = counts[i].val;
avg += counts[i].val;
if (counts[i].val != 0)
nonzero++;
}
if (nonzero != 0)
avg = avg / nonzero;
else
avg = 0;
error = display_error(avg, max, min, expected_counts.val);
__T("Error out of range!", ((error <= 1.0) && (error >= -1.0)));
perf_evlist__close(evlist);
perf_evlist__delete(evlist);
perf_thread_map__put(threads);
return 0;
}
int test_evlist(int argc, char **argv)
{
__T_START;
@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv)
test_stat_thread_enable();
test_mmap_thread();
test_mmap_cpus();
test_stat_multiplexing();
__T_END;
return tests_failed == 0 ? 0 : -1;

View File

@ -74,12 +74,15 @@ OPTIONS
used when creating a uprobe for a process that resides in a
different mount namespace from the perf(1) utility.
--debuginfod=URLs::
--debuginfod[=URLs]::
Specify debuginfod URL to be used when retrieving perf.data binaries,
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
buildid-cache.debuginfod=http://192.168.122.174:8002
If the URLs is not specified, the value of DEBUGINFOD_URLS
system environment variable is used.
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]

View File

@ -587,6 +587,15 @@ record.*::
Use 'n' control blocks in asynchronous (Posix AIO) trace writing
mode ('n' default: 1, max: 4).
record.debuginfod::
Specify debuginfod URL to be used when cacheing perf.data binaries,
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
http://192.168.122.174:8002
If the URLs is 'system', the value of DEBUGINFOD_URLS system environment
variable is used.
diff.*::
diff.order::
This option sets the number of columns to sort the result.

View File

@ -81,7 +81,11 @@ On AMD systems it is implemented using IBS (up to precise-level 2).
The precise modifier works with event types 0x76 (cpu-cycles, CPU
clocks not halted) and 0xC1 (micro-ops retired). Both events map to
IBS execution sampling (IBS op) with the IBS Op Counter Control bit
(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmers
(IbsOpCntCtl) set respectively (see the
Core Complex (CCX) -> Processor x86 Core -> Instruction Based Sampling (IBS)
section of the [AMD Processor Programming Reference (PPR)] relevant to the
family, model and stepping of the processor being used).
Manual Volume 2: System Programming, 13.3 Instruction-Based
Sampling). Examples to use IBS:
@ -94,10 +98,12 @@ RAW HARDWARE EVENT DESCRIPTOR
Even when an event is not available in a symbolic form within perf right now,
it can be encoded in a per processor specific way.
For instance For x86 CPUs NNN represents the raw register encoding with the
For instance on x86 CPUs, N is a hexadecimal value that represents the raw register encoding with the
layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmers Manual Volume 2: System Programming], Page 344,
Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
of IA32_PERFEVTSELx MSRs) or AMD's PERF_CTL MSRs (see the
Core Complex (CCX) -> Processor x86 Core -> MSR Registers section of the
[AMD Processor Programming Reference (PPR)] relevant to the family, model
and stepping of the processor being used).
Note: Only the following bit fields can be set in x86 counter
registers: event, umask, edge, inv, cmask. Esp. guest/host only and
@ -126,6 +132,38 @@ It's also possible to use pmu syntax:
perf record -e cpu/r1a8/ ...
perf record -e cpu/r0x1a8/ ...
Some processors, like those from AMD, support event codes and unit masks
larger than a byte. In such cases, the bits corresponding to the event
configuration parameters can be seen with:
cat /sys/bus/event_source/devices/<pmu>/format/<config>
Example:
If the AMD docs for an EPYC 7713 processor describe an event as:
Event Umask Event Mask
Num. Value Mnemonic Description
28FH 03H op_cache_hit_miss.op_cache_hit Counts Op Cache micro-tag
hit events.
raw encoding of 0x0328F cannot be used since the upper nibble of the
EventSelect bits have to be specified via bits 32-35 as can be seen with:
cat /sys/bus/event_source/devices/cpu/format/event
raw encoding of 0x20000038F should be used instead:
perf stat -e r20000038f -a sleep 1
perf record -e r20000038f ...
It's also possible to use pmu syntax:
perf record -e r20000038f -a sleep 1
perf record -e cpu/r20000038f/ ...
perf record -e cpu/r0x20000038f/ ...
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@ -316,4 +354,4 @@ SEE ALSO
linkperf:perf-stat[1], linkperf:perf-top[1],
linkperf:perf-record[1],
http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmers Manual Volume 2: System Programming]
https://bugzilla.kernel.org/show_bug.cgi?id=206537[AMD Processor Programming Reference (PPR)]

View File

@ -30,8 +30,10 @@ OPTIONS
- a symbolic event name (use 'perf list' to list all events)
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
- a raw PMU event in the form of rN where N is a hexadecimal value
that represents the raw register encoding with the layout of the
event control registers as described by entries in
/sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@ -713,6 +715,15 @@ measurements:
include::intel-hybrid.txt[]
--debuginfod[=URLs]::
Specify debuginfod URL to be used when cacheing perf.data binaries,
it follows the same syntax as the DEBUGINFOD_URLS variable, like:
http://192.168.122.174:8002
If the URLs is not specified, the value of DEBUGINFOD_URLS
system environment variable is used.
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]

View File

@ -36,8 +36,10 @@ report::
- a symbolic event name (use 'perf list' to list all events)
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
- a raw PMU event in the form of rN where N is a hexadecimal value
that represents the raw register encoding with the layout of the
event control registers as described by entries in
/sys/bus/event_sources/devices/cpu/format/*.
- a symbolic or raw PMU event followed by an optional colon
and a list of event modifiers, e.g., cpu-cycles:p. See the
@ -493,6 +495,10 @@ This option can be enabled in perf config by setting the variable
$ perf config stat.no-csv-summary=true
--cputype::
Only enable events on applying cpu with this type for hybrid platform
(e.g. core or atom)"
EXAMPLES
--------

View File

@ -38,9 +38,10 @@ Default is to monitor all CPUS.
-e <event>::
--event=<event>::
Select the PMU event. Selection can be a symbolic event name
(use 'perf list' to list all events) or a raw PMU
event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
(use 'perf list' to list all events) or a raw PMU event in the form
of rN where N is a hexadecimal value that represents the raw register
encoding with the layout of the event control registers as described
by entries in /sys/bus/event_sources/devices/cpu/format/*.
-E <entries>::
--entries=<entries>::

View File

@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@ -143,7 +144,10 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++
OPENCSDLIBS := -lopencsd_c_api
ifeq ($(findstring -static,${LDFLAGS}),-static)
OPENCSDLIBS += -lopencsd -lstdc++
endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
endif
@ -211,6 +215,7 @@ endif
ifneq ($(WERROR),0)
CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
HOSTCFLAGS += -Werror
endif
ifndef DEBUG
@ -290,6 +295,9 @@ CXXFLAGS += -ggdb3
CXXFLAGS += -funwind-tables
CXXFLAGS += -Wno-strict-aliasing
HOSTCFLAGS += -Wall
HOSTCFLAGS += -Wextra
# Enforce a non-executable stack, as we may regress (again) in the future by
# adding assembler files missing the .GNU-stack linker note.
LDFLAGS += -Wl,-z,noexecstack

View File

@ -226,7 +226,7 @@ else
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
export HOSTCC HOSTLD HOSTAR
export HOSTCC HOSTLD HOSTAR HOSTCFLAGS
include $(srctree)/tools/build/Makefile.include
@ -1041,7 +1041,7 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@

View File

@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM_R0:
return "r0";
case PERF_REG_ARM_R1:
return "r1";
case PERF_REG_ARM_R2:
return "r2";
case PERF_REG_ARM_R3:
return "r3";
case PERF_REG_ARM_R4:
return "r4";
case PERF_REG_ARM_R5:
return "r5";
case PERF_REG_ARM_R6:
return "r6";
case PERF_REG_ARM_R7:
return "r7";
case PERF_REG_ARM_R8:
return "r8";
case PERF_REG_ARM_R9:
return "r9";
case PERF_REG_ARM_R10:
return "r10";
case PERF_REG_ARM_FP:
return "fp";
case PERF_REG_ARM_IP:
return "ip";
case PERF_REG_ARM_SP:
return "sp";
case PERF_REG_ARM_LR:
return "lr";
case PERF_REG_ARM_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
/* Set option of each CPU we have */
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(event_cpus, i) ||
!cpu_map__has(online_cpus, i))
for (i = 0; i < cpu__max_cpu().cpu; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (!perf_cpu_map__has(event_cpus, cpu) ||
!perf_cpu_map__has(online_cpus, cpu))
continue;
if (option & BIT(ETM_OPT_CTXTID)) {
@ -407,25 +409,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
}
/* Validate auxtrace_mmap_pages provided by user */
if (opts->auxtrace_mmap_pages) {
unsigned int max_page = (KiB(128) / page_size);
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
if (!privileged &&
opts->auxtrace_mmap_pages > max_page) {
opts->auxtrace_mmap_pages = max_page;
pr_err("auxtrace too big, truncating to %d\n",
max_page);
}
if (!is_power_of_2(sz)) {
pr_err("Invalid mmap size for %s: must be a power of 2\n",
CORESIGHT_ETM_PMU_NAME);
return -EINVAL;
}
}
if (opts->auxtrace_snapshot_mode)
pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
opts->auxtrace_snapshot_size);
@ -541,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
/* cpu map is not empty, we have specific CPUs to work with */
if (!perf_cpu_map__empty(event_cpus)) {
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(event_cpus, i) ||
!cpu_map__has(online_cpus, i))
for (i = 0; i < cpu__max_cpu().cpu; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (!perf_cpu_map__has(event_cpus, cpu) ||
!perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@ -555,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
}
} else {
/* get configuration for all CPUs in the system */
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(online_cpus, i))
for (i = 0; i < cpu__max_cpu().cpu; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (!perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@ -741,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
} else {
/* Make sure all specified CPUs are online */
for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
if (cpu_map__has(event_cpus, i) &&
!cpu_map__has(online_cpus, i))
struct perf_cpu cpu = { .cpu = i, };
if (perf_cpu_map__has(event_cpus, cpu) &&
!perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}
@ -762,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
offset = CS_ETM_SNAPSHOT + 1;
for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
if (cpu_map__has(cpu_map, i))
for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (perf_cpu_map__has(cpu_map, cpu))
cs_etm_get_metadata(i, &offset, itr, info);
}
perf_cpu_map__put(online_cpus);

View File

@ -4,7 +4,9 @@
#include <stdlib.h>
#include <linux/types.h>
#define perf_event_arm_regs perf_event_arm64_regs
#include <asm/perf_regs.h>
#undef perf_event_arm_regs
void perf_regs_load(u64 *regs);
@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
return "x0";
case PERF_REG_ARM64_X1:
return "x1";
case PERF_REG_ARM64_X2:
return "x2";
case PERF_REG_ARM64_X3:
return "x3";
case PERF_REG_ARM64_X4:
return "x4";
case PERF_REG_ARM64_X5:
return "x5";
case PERF_REG_ARM64_X6:
return "x6";
case PERF_REG_ARM64_X7:
return "x7";
case PERF_REG_ARM64_X8:
return "x8";
case PERF_REG_ARM64_X9:
return "x9";
case PERF_REG_ARM64_X10:
return "x10";
case PERF_REG_ARM64_X11:
return "x11";
case PERF_REG_ARM64_X12:
return "x12";
case PERF_REG_ARM64_X13:
return "x13";
case PERF_REG_ARM64_X14:
return "x14";
case PERF_REG_ARM64_X15:
return "x15";
case PERF_REG_ARM64_X16:
return "x16";
case PERF_REG_ARM64_X17:
return "x17";
case PERF_REG_ARM64_X18:
return "x18";
case PERF_REG_ARM64_X19:
return "x19";
case PERF_REG_ARM64_X20:
return "x20";
case PERF_REG_ARM64_X21:
return "x21";
case PERF_REG_ARM64_X22:
return "x22";
case PERF_REG_ARM64_X23:
return "x23";
case PERF_REG_ARM64_X24:
return "x24";
case PERF_REG_ARM64_X25:
return "x25";
case PERF_REG_ARM64_X26:
return "x26";
case PERF_REG_ARM64_X27:
return "x27";
case PERF_REG_ARM64_X28:
return "x28";
case PERF_REG_ARM64_X29:
return "x29";
case PERF_REG_ARM64_SP:
return "sp";
case PERF_REG_ARM64_LR:
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
#include "callchain.h"
#include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
}

View File

@ -15,7 +15,7 @@ const struct pmu_events_map *pmu_events_map__find(void)
* The cpumap should cover all CPUs. Otherwise, some CPUs may
* not support some events or have different event IDs.
*/
if (pmu->cpus->nr != cpu__max_cpu())
if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
return perf_pmu__find_map(pmu);

View File

@ -15,86 +15,4 @@
#define PERF_REG_IP PERF_REG_CSKY_PC
#define PERF_REG_SP PERF_REG_CSKY_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_CSKY_A0:
return "a0";
case PERF_REG_CSKY_A1:
return "a1";
case PERF_REG_CSKY_A2:
return "a2";
case PERF_REG_CSKY_A3:
return "a3";
case PERF_REG_CSKY_REGS0:
return "regs0";
case PERF_REG_CSKY_REGS1:
return "regs1";
case PERF_REG_CSKY_REGS2:
return "regs2";
case PERF_REG_CSKY_REGS3:
return "regs3";
case PERF_REG_CSKY_REGS4:
return "regs4";
case PERF_REG_CSKY_REGS5:
return "regs5";
case PERF_REG_CSKY_REGS6:
return "regs6";
case PERF_REG_CSKY_REGS7:
return "regs7";
case PERF_REG_CSKY_REGS8:
return "regs8";
case PERF_REG_CSKY_REGS9:
return "regs9";
case PERF_REG_CSKY_SP:
return "sp";
case PERF_REG_CSKY_LR:
return "lr";
case PERF_REG_CSKY_PC:
return "pc";
#if defined(__CSKYABIV2__)
case PERF_REG_CSKY_EXREGS0:
return "exregs0";
case PERF_REG_CSKY_EXREGS1:
return "exregs1";
case PERF_REG_CSKY_EXREGS2:
return "exregs2";
case PERF_REG_CSKY_EXREGS3:
return "exregs3";
case PERF_REG_CSKY_EXREGS4:
return "exregs4";
case PERF_REG_CSKY_EXREGS5:
return "exregs5";
case PERF_REG_CSKY_EXREGS6:
return "exregs6";
case PERF_REG_CSKY_EXREGS7:
return "exregs7";
case PERF_REG_CSKY_EXREGS8:
return "exregs8";
case PERF_REG_CSKY_EXREGS9:
return "exregs9";
case PERF_REG_CSKY_EXREGS10:
return "exregs10";
case PERF_REG_CSKY_EXREGS11:
return "exregs11";
case PERF_REG_CSKY_EXREGS12:
return "exregs12";
case PERF_REG_CSKY_EXREGS13:
return "exregs13";
case PERF_REG_CSKY_EXREGS14:
return "exregs14";
case PERF_REG_CSKY_TLS:
return "tls";
case PERF_REG_CSKY_HI:
return "hi";
case PERF_REG_CSKY_LO:
return "lo";
#endif
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -12,73 +12,4 @@
#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_MIPS_PC:
return "PC";
case PERF_REG_MIPS_R1:
return "$1";
case PERF_REG_MIPS_R2:
return "$2";
case PERF_REG_MIPS_R3:
return "$3";
case PERF_REG_MIPS_R4:
return "$4";
case PERF_REG_MIPS_R5:
return "$5";
case PERF_REG_MIPS_R6:
return "$6";
case PERF_REG_MIPS_R7:
return "$7";
case PERF_REG_MIPS_R8:
return "$8";
case PERF_REG_MIPS_R9:
return "$9";
case PERF_REG_MIPS_R10:
return "$10";
case PERF_REG_MIPS_R11:
return "$11";
case PERF_REG_MIPS_R12:
return "$12";
case PERF_REG_MIPS_R13:
return "$13";
case PERF_REG_MIPS_R14:
return "$14";
case PERF_REG_MIPS_R15:
return "$15";
case PERF_REG_MIPS_R16:
return "$16";
case PERF_REG_MIPS_R17:
return "$17";
case PERF_REG_MIPS_R18:
return "$18";
case PERF_REG_MIPS_R19:
return "$19";
case PERF_REG_MIPS_R20:
return "$20";
case PERF_REG_MIPS_R21:
return "$21";
case PERF_REG_MIPS_R22:
return "$22";
case PERF_REG_MIPS_R23:
return "$23";
case PERF_REG_MIPS_R24:
return "$24";
case PERF_REG_MIPS_R25:
return "$25";
case PERF_REG_MIPS_R28:
return "$28";
case PERF_REG_MIPS_R29:
return "$29";
case PERF_REG_MIPS_R30:
return "$30";
case PERF_REG_MIPS_R31:
return "$31";
default:
break;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -19,70 +19,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_POWERPC_NIP
#define PERF_REG_SP PERF_REG_POWERPC_R1
static const char *reg_names[] = {
[PERF_REG_POWERPC_R0] = "r0",
[PERF_REG_POWERPC_R1] = "r1",
[PERF_REG_POWERPC_R2] = "r2",
[PERF_REG_POWERPC_R3] = "r3",
[PERF_REG_POWERPC_R4] = "r4",
[PERF_REG_POWERPC_R5] = "r5",
[PERF_REG_POWERPC_R6] = "r6",
[PERF_REG_POWERPC_R7] = "r7",
[PERF_REG_POWERPC_R8] = "r8",
[PERF_REG_POWERPC_R9] = "r9",
[PERF_REG_POWERPC_R10] = "r10",
[PERF_REG_POWERPC_R11] = "r11",
[PERF_REG_POWERPC_R12] = "r12",
[PERF_REG_POWERPC_R13] = "r13",
[PERF_REG_POWERPC_R14] = "r14",
[PERF_REG_POWERPC_R15] = "r15",
[PERF_REG_POWERPC_R16] = "r16",
[PERF_REG_POWERPC_R17] = "r17",
[PERF_REG_POWERPC_R18] = "r18",
[PERF_REG_POWERPC_R19] = "r19",
[PERF_REG_POWERPC_R20] = "r20",
[PERF_REG_POWERPC_R21] = "r21",
[PERF_REG_POWERPC_R22] = "r22",
[PERF_REG_POWERPC_R23] = "r23",
[PERF_REG_POWERPC_R24] = "r24",
[PERF_REG_POWERPC_R25] = "r25",
[PERF_REG_POWERPC_R26] = "r26",
[PERF_REG_POWERPC_R27] = "r27",
[PERF_REG_POWERPC_R28] = "r28",
[PERF_REG_POWERPC_R29] = "r29",
[PERF_REG_POWERPC_R30] = "r30",
[PERF_REG_POWERPC_R31] = "r31",
[PERF_REG_POWERPC_NIP] = "nip",
[PERF_REG_POWERPC_MSR] = "msr",
[PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
[PERF_REG_POWERPC_CTR] = "ctr",
[PERF_REG_POWERPC_LINK] = "link",
[PERF_REG_POWERPC_XER] = "xer",
[PERF_REG_POWERPC_CCR] = "ccr",
[PERF_REG_POWERPC_SOFTE] = "softe",
[PERF_REG_POWERPC_TRAP] = "trap",
[PERF_REG_POWERPC_DAR] = "dar",
[PERF_REG_POWERPC_DSISR] = "dsisr",
[PERF_REG_POWERPC_SIER] = "sier",
[PERF_REG_POWERPC_MMCRA] = "mmcra",
[PERF_REG_POWERPC_MMCR0] = "mmcr0",
[PERF_REG_POWERPC_MMCR1] = "mmcr1",
[PERF_REG_POWERPC_MMCR2] = "mmcr2",
[PERF_REG_POWERPC_MMCR3] = "mmcr3",
[PERF_REG_POWERPC_SIER2] = "sier2",
[PERF_REG_POWERPC_SIER3] = "sier3",
[PERF_REG_POWERPC_PMC1] = "pmc1",
[PERF_REG_POWERPC_PMC2] = "pmc2",
[PERF_REG_POWERPC_PMC3] = "pmc3",
[PERF_REG_POWERPC_PMC4] = "pmc4",
[PERF_REG_POWERPC_PMC5] = "pmc5",
[PERF_REG_POWERPC_PMC6] = "pmc6",
[PERF_REG_POWERPC_SDAR] = "sdar",
[PERF_REG_POWERPC_SIAR] = "siar",
};
static inline const char *__perf_reg_name(int id)
{
return reg_names[id];
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header)
{
if (!strcmp(se_header, "Local INSTR Latency"))
return "Finish Cyc";
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
else if (!strcmp(se_header, "INSTR Latency"))
return "Global Finish_cyc";
else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
return "Dispatch Cyc";
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
return "Global Dispatch_cyc";
return se_header;
}
@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key)
{
if (!strcmp(sort_key, "p_stage_cyc"))
return 1;
if (!strcmp(sort_key, "local_p_stage_cyc"))
return 1;
return 0;
}

View File

@ -19,78 +19,4 @@
#define PERF_REG_IP PERF_REG_RISCV_PC
#define PERF_REG_SP PERF_REG_RISCV_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_RISCV_PC:
return "pc";
case PERF_REG_RISCV_RA:
return "ra";
case PERF_REG_RISCV_SP:
return "sp";
case PERF_REG_RISCV_GP:
return "gp";
case PERF_REG_RISCV_TP:
return "tp";
case PERF_REG_RISCV_T0:
return "t0";
case PERF_REG_RISCV_T1:
return "t1";
case PERF_REG_RISCV_T2:
return "t2";
case PERF_REG_RISCV_S0:
return "s0";
case PERF_REG_RISCV_S1:
return "s1";
case PERF_REG_RISCV_A0:
return "a0";
case PERF_REG_RISCV_A1:
return "a1";
case PERF_REG_RISCV_A2:
return "a2";
case PERF_REG_RISCV_A3:
return "a3";
case PERF_REG_RISCV_A4:
return "a4";
case PERF_REG_RISCV_A5:
return "a5";
case PERF_REG_RISCV_A6:
return "a6";
case PERF_REG_RISCV_A7:
return "a7";
case PERF_REG_RISCV_S2:
return "s2";
case PERF_REG_RISCV_S3:
return "s3";
case PERF_REG_RISCV_S4:
return "s4";
case PERF_REG_RISCV_S5:
return "s5";
case PERF_REG_RISCV_S6:
return "s6";
case PERF_REG_RISCV_S7:
return "s7";
case PERF_REG_RISCV_S8:
return "s8";
case PERF_REG_RISCV_S9:
return "s9";
case PERF_REG_RISCV_S10:
return "s10";
case PERF_REG_RISCV_S11:
return "s11";
case PERF_REG_RISCV_T3:
return "t3";
case PERF_REG_RISCV_T4:
return "t4";
case PERF_REG_RISCV_T5:
return "t5";
case PERF_REG_RISCV_T6:
return "t6";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_S390_R0:
return "R0";
case PERF_REG_S390_R1:
return "R1";
case PERF_REG_S390_R2:
return "R2";
case PERF_REG_S390_R3:
return "R3";
case PERF_REG_S390_R4:
return "R4";
case PERF_REG_S390_R5:
return "R5";
case PERF_REG_S390_R6:
return "R6";
case PERF_REG_S390_R7:
return "R7";
case PERF_REG_S390_R8:
return "R8";
case PERF_REG_S390_R9:
return "R9";
case PERF_REG_S390_R10:
return "R10";
case PERF_REG_S390_R11:
return "R11";
case PERF_REG_S390_R12:
return "R12";
case PERF_REG_S390_R13:
return "R13";
case PERF_REG_S390_R14:
return "R14";
case PERF_REG_S390_R15:
return "R15";
case PERF_REG_S390_FP0:
return "FP0";
case PERF_REG_S390_FP1:
return "FP1";
case PERF_REG_S390_FP2:
return "FP2";
case PERF_REG_S390_FP3:
return "FP3";
case PERF_REG_S390_FP4:
return "FP4";
case PERF_REG_S390_FP5:
return "FP5";
case PERF_REG_S390_FP6:
return "FP6";
case PERF_REG_S390_FP7:
return "FP7";
case PERF_REG_S390_FP8:
return "FP8";
case PERF_REG_S390_FP9:
return "FP9";
case PERF_REG_S390_FP10:
return "FP10";
case PERF_REG_S390_FP11:
return "FP11";
case PERF_REG_S390_FP12:
return "FP12";
case PERF_REG_S390_FP13:
return "FP13";
case PERF_REG_S390_FP14:
return "FP14";
case PERF_REG_S390_FP15:
return "FP15";
case PERF_REG_S390_MASK:
return "MASK";
case PERF_REG_S390_PC:
return "PC";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_X86_AX:
return "AX";
case PERF_REG_X86_BX:
return "BX";
case PERF_REG_X86_CX:
return "CX";
case PERF_REG_X86_DX:
return "DX";
case PERF_REG_X86_SI:
return "SI";
case PERF_REG_X86_DI:
return "DI";
case PERF_REG_X86_BP:
return "BP";
case PERF_REG_X86_SP:
return "SP";
case PERF_REG_X86_IP:
return "IP";
case PERF_REG_X86_FLAGS:
return "FLAGS";
case PERF_REG_X86_CS:
return "CS";
case PERF_REG_X86_SS:
return "SS";
case PERF_REG_X86_DS:
return "DS";
case PERF_REG_X86_ES:
return "ES";
case PERF_REG_X86_FS:
return "FS";
case PERF_REG_X86_GS:
return "GS";
#ifdef HAVE_ARCH_X86_64_SUPPORT
case PERF_REG_X86_R8:
return "R8";
case PERF_REG_X86_R9:
return "R9";
case PERF_REG_X86_R10:
return "R10";
case PERF_REG_X86_R11:
return "R11";
case PERF_REG_X86_R12:
return "R12";
case PERF_REG_X86_R13:
return "R13";
case PERF_REG_X86_R14:
return "R14";
case PERF_REG_X86_R15:
return "R15";
#endif /* HAVE_ARCH_X86_64_SUPPORT */
#define XMM(x) \
case PERF_REG_X86_XMM ## x: \
case PERF_REG_X86_XMM ## x + 1: \
return "XMM" #x;
XMM(0)
XMM(1)
XMM(2)
XMM(3)
XMM(4)
XMM(5)
XMM(6)
XMM(7)
XMM(8)
XMM(9)
XMM(10)
XMM(11)
XMM(12)
XMM(13)
XMM(14)
XMM(15)
#undef XMM
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist)
else
return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
}
struct evsel *arch_evlist__leader(struct list_head *list)
{
struct evsel *evsel, *first;
first = list_first_entry(list, struct evsel, core.node);
if (!pmu_have_event("cpu", "slots"))
return first;
__evlist__for_each_entry(list, evsel) {
if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
evsel->name && strstr(evsel->name, "slots"))
return evsel;
}
return first;
}

View File

@ -253,7 +253,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)

View File

@ -342,7 +342,7 @@ static int do_threads(struct worker *worker, struct perf_cpu_map *cpu)
if (!noaffinity) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)

View File

@ -177,7 +177,7 @@ int bench_futex_hash(int argc, const char **argv)
goto errmem;
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
if (ret)

View File

@ -136,7 +136,7 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr,
worker[i].futex = &global_futex;
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");

View File

@ -131,7 +131,7 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");

View File

@ -152,7 +152,7 @@ static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");

View File

@ -105,7 +105,7 @@ static void block_threads(pthread_t *w,
/* create and block all threads */
for (i = 0; i < params.nthreads; i++) {
CPU_ZERO(&cpuset);
CPU_SET(cpu->map[i % cpu->nr], &cpuset);
CPU_SET(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, &cpuset);
if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");

View File

@ -226,7 +226,6 @@ static void run_collection(struct collection *coll)
if (!bench->fn)
break;
printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
fflush(stdout);
argv[1] = bench->name;
run_bench(coll->name, bench->name, bench->fn, 1, argv);
@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv)
struct collection *coll;
int ret = 0;
/* Unbuffered output */
setvbuf(stdout, NULL, _IONBF, 0);
if (argc < 2) {
/* No collection specified. */
print_usage();
@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv)
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
fflush(stdout);
ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
goto end;
}

View File

@ -351,10 +351,14 @@ static int build_id_cache__show_all(void)
static int perf_buildid_cache_config(const char *var, const char *value, void *cb)
{
const char **debuginfod = cb;
struct perf_debuginfod *di = cb;
if (!strcmp(var, "buildid-cache.debuginfod"))
*debuginfod = strdup(value);
if (!strcmp(var, "buildid-cache.debuginfod")) {
di->urls = strdup(value);
if (!di->urls)
return -ENOMEM;
di->set = true;
}
return 0;
}
@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv)
*purge_name_list_str = NULL,
*missing_filename = NULL,
*update_name_list_str = NULL,
*kcore_filename = NULL,
*debuginfod = NULL;
*kcore_filename = NULL;
struct perf_debuginfod debuginfod = { };
char sbuf[STRERR_BUFSIZE];
struct perf_data data = {
@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_STRING('u', "update", &update_name_list_str, "file list",
"file(s) to update"),
OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url",
"set debuginfod url"),
OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls,
&debuginfod.set, "debuginfod urls",
"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
"system"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
OPT_END()
@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv)
if (argc || !(list_files || opts_flag))
usage_with_options(buildid_cache_usage, buildid_cache_options);
if (debuginfod) {
pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod);
setenv("DEBUGINFOD_URLS", debuginfod, 1);
}
perf_debuginfod_setup(&debuginfod);
/* -l is exclusive. It can not be used with other options. */
if (list_files && opts_flag) {

View File

@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session)
{
struct numa_node *n;
unsigned long **nodes;
int node, cpu;
int node, idx;
struct perf_cpu cpu;
int *cpu2node;
if (c2c.node_info > 2)
@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session)
if (!cpu2node)
return -ENOMEM;
for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
cpu2node[cpu] = -1;
for (idx = 0; idx < c2c.cpus_cnt; idx++)
cpu2node[idx] = -1;
c2c.cpu2node = cpu2node;
@ -2057,13 +2058,13 @@ static int setup_nodes(struct perf_session *session)
if (perf_cpu_map__empty(map))
continue;
for (cpu = 0; cpu < map->nr; cpu++) {
set_bit(map->map[cpu], set);
perf_cpu_map__for_each_cpu(cpu, idx, map) {
set_bit(cpu.cpu, set);
if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
return -EINVAL;
cpu2node[map->map[cpu]] = node;
cpu2node[cpu.cpu] = node;
}
}

View File

@ -13,7 +13,9 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
#include <math.h>
#include <poll.h>
#include <ctype.h>
#include <linux/capability.h>
#include <linux/string.h>
@ -28,36 +30,12 @@
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
#include "util/ftrace.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
struct perf_ftrace {
struct evlist *evlist;
struct target target;
const char *tracer;
struct list_head filters;
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
int graph_depth;
unsigned long percpu_buffer_size;
bool inherit;
int func_stack_trace;
int func_irq_info;
int graph_nosleep_time;
int graph_noirqs;
int graph_verbose;
int graph_thresh;
unsigned int initial_delay;
};
struct filter_entry {
struct list_head list;
char name[];
};
static volatile int workload_exec_errno;
static bool done;
@ -303,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
int ret;
int last_cpu;
last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1);
last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1).cpu;
mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return 0;
}
static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
static void select_tracer(struct perf_ftrace *ftrace)
{
bool graph = !list_empty(&ftrace->graph_funcs) ||
!list_empty(&ftrace->nograph_funcs);
bool func = !list_empty(&ftrace->filters) ||
!list_empty(&ftrace->notrace);
/* The function_graph has priority over function tracer. */
if (graph)
ftrace->tracer = "function_graph";
else if (func)
ftrace->tracer = "function";
/* Otherwise, the default tracer is used. */
pr_debug("%s tracer is used\n", ftrace->tracer);
}
static int __cmd_ftrace(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return -1;
}
signal(SIGINT, sig_handler);
signal(SIGUSR1, sig_handler);
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
if (write_tracing_file("trace", "0") < 0)
goto out;
if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
ftrace__workload_exec_failed_signal) < 0) {
goto out;
}
if (set_tracing_options(ftrace) < 0)
goto out_reset;
@ -693,6 +680,270 @@ out:
return (done && !workload_exec_errno) ? 0 : -1;
}
static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
{
char *p, *q;
char *unit;
double num;
int i;
/* ensure NUL termination */
buf[len] = '\0';
/* handle data line by line */
for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
*q = '\0';
/* move it to the line buffer */
strcat(linebuf, p);
/*
* parse trace output to get function duration like in
*
* # tracer: function_graph
* #
* # CPU DURATION FUNCTION CALLS
* # | | | | | | |
* 1) + 10.291 us | do_filp_open();
* 1) 4.889 us | do_filp_open();
* 1) 6.086 us | do_filp_open();
*
*/
if (linebuf[0] == '#')
goto next;
/* ignore CPU */
p = strchr(linebuf, ')');
if (p == NULL)
p = linebuf;
while (*p && !isdigit(*p) && (*p != '|'))
p++;
/* no duration */
if (*p == '\0' || *p == '|')
goto next;
num = strtod(p, &unit);
if (!unit || strncmp(unit, " us", 3))
goto next;
i = log2(num);
if (i < 0)
i = 0;
if (i >= NUM_BUCKET)
i = NUM_BUCKET - 1;
buckets[i]++;
next:
/* empty the line buffer for the next output */
linebuf[0] = '\0';
}
/* preserve any remaining output (before newline) */
strcat(linebuf, p);
}
static void display_histogram(int buckets[])
{
int i;
int total = 0;
int bar_total = 46; /* to fit in 80 column */
char bar[] = "###############################################";
int bar_len;
for (i = 0; i < NUM_BUCKET; i++)
total += buckets[i];
if (total == 0) {
printf("No data found\n");
return;
}
printf("# %14s | %10s | %-*s |\n",
" DURATION ", "COUNT", bar_total, "GRAPH");
bar_len = buckets[0] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
const char *unit = "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = "ms";
}
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
start, stop, unit, buckets[i], bar_len, bar,
bar_total - bar_len, "");
}
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
bar_total - bar_len, "");
}
static int prepare_func_latency(struct perf_ftrace *ftrace)
{
char *trace_file;
int fd;
if (ftrace->target.use_bpf)
return perf_ftrace__latency_prepare_bpf(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
return -1;
}
/* reset ftrace buffer */
if (write_tracing_file("trace", "0") < 0)
return -1;
if (set_tracing_options(ftrace) < 0)
return -1;
/* force to use the function_graph tracer to track duration */
if (write_tracing_file("current_tracer", "function_graph") < 0) {
pr_err("failed to set current_tracer to function_graph\n");
return -1;
}
trace_file = get_tracing_file("trace_pipe");
if (!trace_file) {
pr_err("failed to open trace_pipe\n");
return -1;
}
fd = open(trace_file, O_RDONLY);
if (fd < 0)
pr_err("failed to open trace_pipe\n");
put_tracing_file(trace_file);
return fd;
}
static int start_func_latency(struct perf_ftrace *ftrace)
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_start_bpf(ftrace);
if (write_tracing_file("tracing_on", "1") < 0) {
pr_err("can't enable tracing\n");
return -1;
}
return 0;
}
static int stop_func_latency(struct perf_ftrace *ftrace)
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_stop_bpf(ftrace);
write_tracing_file("tracing_on", "0");
return 0;
}
static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_read_bpf(ftrace, buckets);
return 0;
}
static int cleanup_func_latency(struct perf_ftrace *ftrace)
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_cleanup_bpf(ftrace);
reset_tracing_files(ftrace);
return 0;
}
static int __cmd_latency(struct perf_ftrace *ftrace)
{
int trace_fd;
char buf[4096];
char line[256];
struct pollfd pollfd = {
.events = POLLIN,
};
int buckets[NUM_BUCKET] = { };
if (!(perf_cap__capable(CAP_PERFMON) ||
perf_cap__capable(CAP_SYS_ADMIN))) {
pr_err("ftrace only works for %s!\n",
#ifdef HAVE_LIBCAP_SUPPORT
"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
#else
"root"
#endif
);
return -1;
}
trace_fd = prepare_func_latency(ftrace);
if (trace_fd < 0)
goto out;
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
pollfd.fd = trace_fd;
if (start_func_latency(ftrace) < 0)
goto out;
evlist__start_workload(ftrace->evlist);
line[0] = '\0';
while (!done) {
if (poll(&pollfd, 1, -1) < 0)
break;
if (pollfd.revents & POLLIN) {
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n < 0)
break;
make_histogram(buckets, buf, n, line);
}
}
stop_func_latency(ftrace);
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
pr_err("workload failed: %s\n", emsg);
goto out;
}
/* read remaining buffer contents */
while (!ftrace->target.use_bpf) {
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
make_histogram(buckets, buf, n, line);
}
read_func_latency(ftrace, buckets);
display_histogram(buckets);
out:
close(trace_fd);
cleanup_func_latency(ftrace);
return (done && !workload_exec_errno) ? 0 : -1;
}
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@ -855,22 +1106,11 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
static void select_tracer(struct perf_ftrace *ftrace)
{
bool graph = !list_empty(&ftrace->graph_funcs) ||
!list_empty(&ftrace->nograph_funcs);
bool func = !list_empty(&ftrace->filters) ||
!list_empty(&ftrace->notrace);
/* The function_graph has priority over function tracer. */
if (graph)
ftrace->tracer = "function_graph";
else if (func)
ftrace->tracer = "function";
/* Otherwise, the default tracer is used. */
pr_debug("%s tracer is used\n", ftrace->tracer);
}
enum perf_ftrace_subcommand {
PERF_FTRACE_NONE,
PERF_FTRACE_TRACE,
PERF_FTRACE_LATENCY,
};
int cmd_ftrace(int argc, const char **argv)
{
@ -879,17 +1119,7 @@ int cmd_ftrace(int argc, const char **argv)
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
};
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- <command> [<options>]",
NULL
};
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
"Tracer to use: function_graph(default) or function"),
OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
"Show available functions to filter",
opt_list_avail_functions, "*"),
const struct option common_options[] = {
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
@ -901,6 +1131,14 @@ int cmd_ftrace(int argc, const char **argv)
"System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
"List of cpus to monitor"),
OPT_END()
};
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
"Tracer to use: function_graph(default) or function"),
OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
"Show available functions to filter",
opt_list_avail_functions, "*"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
@ -923,24 +1161,65 @@ int cmd_ftrace(int argc, const char **argv)
"Trace children processes"),
OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
"Number of milliseconds to wait before starting tracing after program start"),
OPT_END()
OPT_PARENT(common_options),
};
const struct option latency_options[] = {
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Show latency of given function", parse_filter_func),
#ifdef HAVE_BPF_SKEL
OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
"Use BPF to measure function latency"),
#endif
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- [<command>] [<options>]",
"perf ftrace {trace|latency} [<options>] [<command>]",
"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
NULL
};
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
INIT_LIST_HEAD(&ftrace.filters);
INIT_LIST_HEAD(&ftrace.notrace);
INIT_LIST_HEAD(&ftrace.graph_funcs);
INIT_LIST_HEAD(&ftrace.nograph_funcs);
signal(SIGINT, sig_handler);
signal(SIGUSR1, sig_handler);
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
if (argc > 1) {
if (!strcmp(argv[1], "trace")) {
subcmd = PERF_FTRACE_TRACE;
} else if (!strcmp(argv[1], "latency")) {
subcmd = PERF_FTRACE_LATENCY;
options = latency_options;
}
select_tracer(&ftrace);
if (subcmd != PERF_FTRACE_NONE) {
argc--;
argv++;
}
}
/* for backward compatibility */
if (subcmd == PERF_FTRACE_NONE)
subcmd = PERF_FTRACE_TRACE;
argc = parse_options(argc, argv, options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (argc < 0) {
ret = -EINVAL;
goto out_delete_filters;
}
ret = target__validate(&ftrace.target);
if (ret) {
@ -961,7 +1240,35 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
goto out_delete_evlist;
ret = __cmd_ftrace(&ftrace, argc, argv);
if (argc) {
ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
argv, false,
ftrace__workload_exec_failed_signal);
if (ret < 0)
goto out_delete_evlist;
}
switch (subcmd) {
case PERF_FTRACE_TRACE:
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
ret = __cmd_ftrace(&ftrace);
break;
case PERF_FTRACE_LATENCY:
if (list_empty(&ftrace.filters)) {
pr_err("Should provide a function to measure\n");
parse_options_usage(ftrace_usage, options, "T", 1);
ret = -EINVAL;
goto out_delete_evlist;
}
ret = __cmd_latency(&ftrace);
break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");
ret = -EINVAL;
break;
}
out_delete_evlist:
evlist__delete(ftrace.evlist);

View File

@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp
int ret = evsel__process_alloc_event(evsel, sample);
if (!ret) {
int node1 = cpu__get_node(sample->cpu),
int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
node2 = evsel__intval(evsel, sample, "node");
if (node1 != node2)

View File

@ -111,6 +111,7 @@ struct record {
unsigned long long samples;
struct mmap_cpu_mask affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
struct perf_debuginfod debuginfod;
};
static volatile int done;
@ -2177,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb)
rec->opts.nr_cblocks = nr_cblocks_default;
}
#endif
if (!strcmp(var, "record.debuginfod")) {
rec->debuginfod.urls = strdup(value);
if (!rec->debuginfod.urls)
return -ENOMEM;
rec->debuginfod.set = true;
}
return 0;
}
@ -2267,6 +2274,10 @@ out_free:
return ret;
}
void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
{
}
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@ -2663,6 +2674,10 @@ static struct option __record_options[] = {
parse_control_option),
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
"Fine-tune event synthesis: default=all", parse_record_synth_option),
OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
&record.debuginfod.set, "debuginfod urls",
"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
"system"),
OPT_END()
};
@ -2716,6 +2731,8 @@ int cmd_record(int argc, const char **argv)
if (err)
return err;
perf_debuginfod_setup(&record.debuginfod);
/* Make system wide (-a) the default target. */
if (!argc && target__none(&rec->opts.target))
rec->opts.target.system_wide = true;
@ -2792,7 +2809,7 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
rec->affinity_mask.nbits = cpu__max_cpu();
rec->affinity_mask.nbits = cpu__max_cpu().cpu;
rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
@ -2898,6 +2915,10 @@ int cmd_record(int argc, const char **argv)
}
rec->opts.target.hybrid = perf_pmu__has_hybrid();
if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
arch__add_leaf_frame_record_opts(&rec->opts);
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);

View File

@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
}
callchain_param_setup(sample_type);
callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(*pevlist);
callchain_param_setup(sample_type);
callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
return 0;
}

View File

@ -167,7 +167,7 @@ struct trace_sched_handler {
struct perf_sched_map {
DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
int *comp_cpus;
struct perf_cpu *comp_cpus;
bool comp;
struct perf_thread_map *color_pids;
const char *color_pids_str;
@ -191,7 +191,7 @@ struct perf_sched {
* Track the current task - that way we can know whether there's any
* weird events, such as a task being switched away that is not current.
*/
int max_cpu;
struct perf_cpu max_cpu;
u32 curr_pid[MAX_CPUS];
struct thread *curr_thread[MAX_CPUS];
char next_shortname1;
@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
int i, this_cpu = sample->cpu;
int i;
struct perf_cpu this_cpu = {
.cpu = sample->cpu,
};
int cpus_nr;
bool new_cpu = false;
const char *color = PERF_COLOR_NORMAL;
char stimestamp[32];
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0);
if (this_cpu > sched->max_cpu)
if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (sched->map.comp) {
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
sched->map.comp_cpus[cpus_nr++] = this_cpu;
new_cpu = true;
}
} else
cpus_nr = sched->max_cpu;
cpus_nr = sched->max_cpu.cpu;
timestamp0 = sched->cpu_last_switched[this_cpu];
sched->cpu_last_switched[this_cpu] = timestamp;
timestamp0 = sched->cpu_last_switched[this_cpu.cpu];
sched->cpu_last_switched[this_cpu.cpu] = timestamp;
if (timestamp0)
delta = timestamp - timestamp0;
else
@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
return -1;
}
sched->curr_thread[this_cpu] = thread__get(sched_in);
sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
printf(" ");
@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
}
for (i = 0; i < cpus_nr; i++) {
int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
struct thread *curr_thread = sched->curr_thread[cpu];
struct perf_cpu cpu = {
.cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
};
struct thread *curr_thread = sched->curr_thread[cpu.cpu];
struct thread_runtime *curr_tr;
const char *pid_color = color;
const char *cpu_color = color;
@ -1617,19 +1622,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
if (curr_thread && thread__has_color(curr_thread))
pid_color = COLOR_PIDS;
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu))
continue;
if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu))
cpu_color = COLOR_CPUS;
if (cpu != this_cpu)
if (cpu.cpu != this_cpu.cpu)
color_fprintf(stdout, color, " ");
else
color_fprintf(stdout, cpu_color, "*");
if (sched->curr_thread[cpu]) {
curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
if (sched->curr_thread[cpu.cpu]) {
curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]);
if (curr_tr == NULL) {
thread__put(sched_in);
return -1;
@ -1639,7 +1644,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
color_fprintf(stdout, color, " ");
}
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
goto out;
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread)
static void timehist_header(struct perf_sched *sched)
{
u32 ncpus = sched->max_cpu + 1;
u32 ncpus = sched->max_cpu.cpu + 1;
u32 i, j;
printf("%15s %6s ", "time", "cpu");
@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched,
struct thread_runtime *tr = thread__priv(thread);
const char *next_comm = evsel__strval(evsel, sample, "next_comm");
const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
u32 max_cpus = sched->max_cpu + 1;
u32 max_cpus = sched->max_cpu.cpu + 1;
char tstr[64];
char nstr[30];
u64 wait_time;
@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
if (sched->show_cpu_visual)
printf(" %*s ", sched->max_cpu + 1, "");
printf(" %*s ", sched->max_cpu.cpu + 1, "");
printf(" %-*s ", comm_width, timehist_get_commstr(thread));
@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched,
{
struct thread *thread;
char tstr[64];
u32 max_cpus = sched->max_cpu + 1;
u32 max_cpus;
u32 ocpu, dcpu;
if (sched->summary_only)
return;
max_cpus = sched->max_cpu + 1;
max_cpus = sched->max_cpu.cpu + 1;
ocpu = evsel__intval(evsel, sample, "orig_cpu");
dcpu = evsel__intval(evsel, sample, "dest_cpu");
@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched,
printf(" Total scheduling time (msec): ");
print_sched_time(hist_time, 2);
printf(" (x %d)\n", sched->max_cpu);
printf(" (x %d)\n", sched->max_cpu.cpu);
}
typedef int (*sched_handler)(struct perf_tool *tool,
@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool,
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
int err = 0;
int this_cpu = sample->cpu;
struct perf_cpu this_cpu = {
.cpu = sample->cpu,
};
if (this_cpu > sched->max_cpu)
if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (evsel->handler != NULL) {
@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched)
goto out;
/* pre-allocate struct for per-CPU idle stats */
sched->max_cpu = session->header.env.nr_cpus_online;
if (sched->max_cpu == 0)
sched->max_cpu = 4;
if (init_idle_threads(sched->max_cpu))
sched->max_cpu.cpu = session->header.env.nr_cpus_online;
if (sched->max_cpu.cpu == 0)
sched->max_cpu.cpu = 4;
if (init_idle_threads(sched->max_cpu.cpu))
goto out;
/* summary_only implies summary option, but don't overwrite summary if set */
@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched)
{
struct perf_cpu_map *map;
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF);
if (sched->map.comp) {
sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int));
if (!sched->map.comp_cpus)
return -1;
}

View File

@ -15,6 +15,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/trace-event.h"
#include "util/env.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/evsel_fprintf.h"
@ -648,7 +649,7 @@ out:
return 0;
}
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch,
FILE *fp)
{
unsigned i = 0, r;
@ -661,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
}
return printed;
@ -718,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
attr->sample_regs_intr, fp);
attr->sample_regs_intr, arch, fp);
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
attr->sample_regs_user, fp);
attr->sample_regs_user, arch, fp);
}
static int perf_sample__fprintf_start(struct perf_script *script,
@ -2000,6 +2001,7 @@ static void process_event(struct perf_script *script,
struct evsel_script *es = evsel->priv;
FILE *fp = es->fp;
char str[PAGE_SIZE_NAME_LEN];
const char *arch = perf_env__arch(machine->env);
if (output[type].fields == 0)
return;
@ -2066,10 +2068,10 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(IREGS))
perf_sample__fprintf_iregs(sample, attr, fp);
perf_sample__fprintf_iregs(sample, attr, arch, fp);
if (PRINT_FIELD(UREGS))
perf_sample__fprintf_uregs(sample, attr, fp);
perf_sample__fprintf_uregs(sample, attr, arch, fp);
if (PRINT_FIELD(BRSTACK))
perf_sample__fprintf_brstack(sample, thread, attr, fp);
@ -2113,8 +2115,8 @@ static struct scripting_ops *scripting_ops;
static void __process_stat(struct evsel *counter, u64 tstamp)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
int ncpus = evsel__nr_cpus(counter);
int cpu, thread;
int idx, thread;
struct perf_cpu cpu;
static int header_printed;
if (counter->core.system_wide)
@ -2127,13 +2129,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
}
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
struct perf_counts_values *counts;
counts = perf_counts(counter->counts, cpu, thread);
counts = perf_counts(counter->counts, idx, thread);
printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
counter->core.cpus->map[cpu],
cpu.cpu,
perf_thread_map__pid(counter->core.threads, thread),
counts->val,
counts->ena,
@ -2316,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(evlist);
callchain_param_setup(sample_type);
callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
/* Enable fields for callchain entries */
if (symbol_conf.use_callchain &&
@ -3466,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
struct perf_session *session = script->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
}
callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"

View File

@ -234,7 +234,7 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b)
return false;
for (int i = 0; i < a->core.cpus->nr; i++) {
if (a->core.cpus->map[i] != b->core.cpus->map[i])
if (a->core.cpus->map[i].cpu != b->core.cpus->map[i].cpu)
return false;
}
@ -327,34 +327,35 @@ static int write_stat_round_event(u64 tm, u64 type)
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread,
struct perf_counts_values *count)
{
struct perf_sample_id *sid = SID(counter, cpu, thread);
struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread);
struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
process_synthesized_event, NULL);
}
static int read_single_counter(struct evsel *counter, int cpu,
static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread, struct timespec *rs)
{
if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
struct perf_counts_values *count =
perf_counts(counter->counts, cpu, thread);
perf_counts(counter->counts, cpu_map_idx, thread);
count->ena = count->run = val;
count->val = val;
return 0;
}
return evsel__read_counter(counter, cpu, thread);
return evsel__read_counter(counter, cpu_map_idx, thread);
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int thread;
@ -368,24 +369,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
for (thread = 0; thread < nthreads; thread++) {
struct perf_counts_values *count;
count = perf_counts(counter->counts, cpu, thread);
count = perf_counts(counter->counts, cpu_map_idx, thread);
/*
* The leader's group read loads data into its group members
* (via evsel__read_counter()) and sets their count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
read_single_counter(counter, cpu_map_idx, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
return -1;
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
if (STAT_RECORD) {
if (evsel__write_stat_event(counter, cpu, thread, count)) {
if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
@ -395,7 +396,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
evsel__name(counter),
cpu,
perf_cpu_map__cpu(evsel__cpus(counter),
cpu_map_idx).cpu,
count->val, count->ena, count->run);
}
}
@ -405,36 +407,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
static int read_affinity_counters(struct timespec *rs)
{
struct evsel *counter;
struct affinity affinity;
int i, ncpus, cpu;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity;
if (all_counters_use_bpf)
return 0;
if (affinity__setup(&affinity) < 0)
return -1;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
ncpus = 1;
evlist__for_each_cpu(evsel_list, i, cpu) {
if (i >= ncpus)
break;
affinity__set(&affinity, cpu);
affinity = NULL;
else if (affinity__setup(&saved_affinity) < 0)
return -1;
else
affinity = &saved_affinity;
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (evsel__is_bpf(counter))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
counter->cpu_iter - 1);
}
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
struct evsel *counter = evlist_cpu_itr.evsel;
if (evsel__is_bpf(counter))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
evlist_cpu_itr.cpu_map_idx);
}
}
affinity__cleanup(&affinity);
if (affinity)
affinity__cleanup(&saved_affinity);
return 0;
}
@ -788,8 +787,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity affinity;
int i, cpu, err;
int err;
bool second_pass = false;
if (forks) {
@ -813,56 +813,53 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
all_counters_use_bpf = false;
}
evlist__for_each_cpu (evsel_list, i, cpu) {
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
counter = evlist_cpu_itr.evsel;
/*
* bperf calls evsel__open_per_cpu() in bperf__load(), so
* no need to call it again here.
*/
if (target.use_bpf)
break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (counter->reset_group || counter->errored)
continue;
if (evsel__is_bpf(counter))
continue;
if (counter->reset_group || counter->errored)
continue;
if (evsel__is_bpf(counter))
continue;
try_again:
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
* doesn't support mixing group and non group reads. Defer
* it to later.
* Don't close here because we're in the wrong affinity.
*/
if ((errno == EINVAL || errno == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true;
continue;
}
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
}
if (create_perf_stat_counter(counter, &stat_config, &target,
evlist_cpu_itr.cpu_map_idx) < 0) {
/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
* doesn't support mixing group and non group reads. Defer
* it to later.
* Don't close here because we're in the wrong affinity.
*/
if ((errno == EINVAL || errno == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true;
continue;
}
counter->supported = true;
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
}
}
counter->supported = true;
}
if (second_pass) {
@ -871,42 +868,40 @@ try_again:
* and also close errored counters.
*/
evlist__for_each_cpu(evsel_list, i, cpu) {
affinity__set(&affinity, cpu);
/* First close errored or weak retry */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip_no_inc(counter, cpu))
continue;
perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
}
/* Now reopen weak */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->reset_group)
continue;
try_again_reset:
pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
/* First close errored or weak retry */
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
counter = evlist_cpu_itr.evsel;
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
if (!counter->reset_group && !counter->errored)
continue;
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
}
/* Now reopen weak */
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) {
counter = evlist_cpu_itr.evsel;
if (!counter->reset_group && !counter->errored)
continue;
if (!counter->reset_group)
continue;
try_again_reset:
pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
evlist_cpu_itr.cpu_map_idx) < 0) {
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
counter->supported = true;
}
counter->supported = true;
}
}
affinity__cleanup(&affinity);
@ -1168,6 +1163,26 @@ static int parse_stat_cgroups(const struct option *opt,
return parse_cgroups(opt, str, unset);
}
static int parse_hybrid_type(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
struct evlist *evlist = *(struct evlist **)opt->value;
if (!list_empty(&evlist->core.entries)) {
fprintf(stderr, "Must define cputype before events/metrics\n");
return -1;
}
evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
if (!evlist->hybrid_pmu_name) {
fprintf(stderr, "--cputype %s is not supported!\n", str);
return -1;
}
return 0;
}
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@ -1282,6 +1297,10 @@ static struct option stat_options[] = {
"don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
"don't print output (useful with record)"),
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
"Only enable events on applying cpu with this type "
"for hybrid platform (e.g. core or atom)",
parse_hybrid_type),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
@ -1298,70 +1317,75 @@ static struct option stat_options[] = {
OPT_END()
};
static const char *const aggr_mode__string[] = {
[AGGR_CORE] = "core",
[AGGR_DIE] = "die",
[AGGR_GLOBAL] = "global",
[AGGR_NODE] = "node",
[AGGR_NONE] = "none",
[AGGR_SOCKET] = "socket",
[AGGR_THREAD] = "thread",
[AGGR_UNSET] = "unset",
};
static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_socket(map, cpu, NULL);
return aggr_cpu_id__socket(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_die(map, cpu, NULL);
return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_core(map, cpu, NULL);
return aggr_cpu_id__core(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_node(map, cpu, NULL);
return aggr_cpu_id__node(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
aggr_get_id_t get_id, struct perf_cpu cpu)
{
int cpu;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (idx >= map->nr)
return id;
if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
cpu = map->map[idx];
if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu]))
config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
id = config->cpus_aggr_map->map[cpu];
id = config->cpus_aggr_map->map[cpu.cpu];
return id;
}
static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
}
static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
}
static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
}
static bool term_percore_set(void)
@ -1376,54 +1400,67 @@ static bool term_percore_set(void)
return false;
}
static int perf_stat_init_aggr_mode(void)
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
{
int nr;
switch (stat_config.aggr_mode) {
switch (aggr_mode) {
case AGGR_SOCKET:
if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build socket map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_socket_cached;
break;
return aggr_cpu_id__socket;
case AGGR_DIE:
if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_cached;
break;
return aggr_cpu_id__die;
case AGGR_CORE:
if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_cached;
break;
return aggr_cpu_id__core;
case AGGR_NODE:
if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_node_cached;
break;
return aggr_cpu_id__node;
case AGGR_NONE:
if (term_percore_set()) {
if (cpu_map__build_core_map(evsel_list->core.cpus,
&stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_cached;
}
break;
if (term_percore_set())
return aggr_cpu_id__core;
return NULL;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
break;
return NULL;
}
}
static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
case AGGR_SOCKET:
return perf_stat__get_socket_cached;
case AGGR_DIE:
return perf_stat__get_die_cached;
case AGGR_CORE:
return perf_stat__get_core_cached;
case AGGR_NODE:
return perf_stat__get_node_cached;
case AGGR_NONE:
if (term_percore_set()) {
return perf_stat__get_core_cached;
}
return NULL;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
return NULL;
}
}
static int perf_stat_init_aggr_mode(void)
{
int nr;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
if (get_id) {
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus,
get_id, /*data=*/NULL);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
}
stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
}
/*
@ -1431,7 +1468,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
nr = perf_cpu_map__max(evsel_list->core.cpus);
nr = perf_cpu_map__max(evsel_list->core.cpus).cpu;
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
@ -1459,169 +1496,139 @@ static void perf_stat__exit_aggr_mode(void)
stat_config.cpus_aggr_map = NULL;
}
static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
{
int cpu;
if (idx > map->nr)
return -1;
cpu = map->map[idx];
if (cpu >= env->nr_cpus_avail)
return -1;
return cpu;
}
static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
int cpu = perf_env__get_cpu(env, map, idx);
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu != -1)
id.socket = env->cpu[cpu].socket_id;
if (cpu.cpu != -1)
id.socket = env->cpu[cpu.cpu].socket_id;
return id;
}
static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
int cpu = perf_env__get_cpu(env, map, idx);
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu != -1) {
if (cpu.cpu != -1) {
/*
* die_id is relative to socket, so start
* with the socket ID and then add die to
* make a unique ID.
*/
id.socket = env->cpu[cpu].socket_id;
id.die = env->cpu[cpu].die_id;
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
}
return id;
}
static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
int cpu = perf_env__get_cpu(env, map, idx);
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu != -1) {
if (cpu.cpu != -1) {
/*
* core_id is relative to socket and die,
* we need a global id. So we set
* socket, die id and core id
*/
id.socket = env->cpu[cpu].socket_id;
id.die = env->cpu[cpu].die_id;
id.core = env->cpu[cpu].core_id;
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
id.core = env->cpu[cpu.cpu].core_id;
}
return id;
}
static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
int cpu = perf_env__get_cpu(data, map, idx);
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
id.node = perf_env__numa_node(data, cpu);
return id;
}
static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **sockp)
{
return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
}
static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **diep)
{
return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
}
static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **corep)
{
return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
}
static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **nodep)
{
return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
}
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_die(map, idx, &perf_stat.session->header.env);
return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_core(map, idx, &perf_stat.session->header.env);
return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_node(map, idx, &perf_stat.session->header.env);
return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{
struct perf_env *env = &st->session->header.env;
switch (stat_config.aggr_mode) {
switch (aggr_mode) {
case AGGR_SOCKET:
if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build socket map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_socket_file;
break;
return perf_env__get_socket_aggr_by_cpu;
case AGGR_DIE:
if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_file;
break;
return perf_env__get_die_aggr_by_cpu;
case AGGR_CORE:
if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_file;
break;
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_node_file;
break;
return perf_env__get_node_aggr_by_cpu;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
break;
return NULL;
}
}
static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
case AGGR_SOCKET:
return perf_stat__get_socket_file;
case AGGR_DIE:
return perf_stat__get_die_file;
case AGGR_CORE:
return perf_stat__get_core_file;
case AGGR_NODE:
return perf_stat__get_node_file;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
return NULL;
}
}
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
{
struct perf_env *env = &st->session->header.env;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
if (!get_id)
return 0;
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
}
stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
return 0;
}

View File

@ -3964,6 +3964,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__add(evlist, pgfault_min);
}
/* Enable ignoring missing threads when -u/-p option is defined. */
trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
if (trace->sched &&
evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;

View File

@ -308,8 +308,6 @@ int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, vo
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
struct filter_data *d = data;
pr_debug("%s API\n", __func__);
return do_checks(data, sample, ctx, false);

View File

@ -0,0 +1,8 @@
[
{
"ArchStdEvent": "BR_MIS_PRED"
},
{
"ArchStdEvent": "BR_PRED"
}
]

View File

@ -0,0 +1,20 @@
[
{
"ArchStdEvent": "CPU_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS"
},
{
"ArchStdEvent": "BUS_CYCLES"
},
{
"ArchStdEvent": "BUS_ACCESS_RD"
},
{
"ArchStdEvent": "BUS_ACCESS_WR"
},
{
"ArchStdEvent": "CNT_CYCLES"
}
]

View File

@ -0,0 +1,155 @@
[
{
"ArchStdEvent": "L1I_CACHE_REFILL"
},
{
"ArchStdEvent": "L1I_TLB_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL"
},
{
"ArchStdEvent": "L1D_CACHE"
},
{
"ArchStdEvent": "L1D_TLB_REFILL"
},
{
"ArchStdEvent": "L1I_CACHE"
},
{
"ArchStdEvent": "L1D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL"
},
{
"ArchStdEvent": "L2D_CACHE_WB"
},
{
"ArchStdEvent": "L2D_CACHE_ALLOCATE"
},
{
"ArchStdEvent": "L1D_TLB"
},
{
"ArchStdEvent": "L1I_TLB"
},
{
"ArchStdEvent": "L3D_CACHE_ALLOCATE"
},
{
"ArchStdEvent": "L3D_CACHE_REFILL"
},
{
"ArchStdEvent": "L3D_CACHE"
},
{
"ArchStdEvent": "L2D_TLB_REFILL"
},
{
"ArchStdEvent": "L2D_TLB"
},
{
"ArchStdEvent": "DTLB_WALK"
},
{
"ArchStdEvent": "ITLB_WALK"
},
{
"ArchStdEvent": "LL_CACHE_RD"
},
{
"ArchStdEvent": "LL_CACHE_MISS_RD"
},
{
"ArchStdEvent": "L1D_CACHE_LMISS_RD"
},
{
"ArchStdEvent": "L1D_CACHE_RD"
},
{
"ArchStdEvent": "L1D_CACHE_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_INNER"
},
{
"ArchStdEvent": "L1D_CACHE_REFILL_OUTER"
},
{
"ArchStdEvent": "L1D_CACHE_WB_VICTIM"
},
{
"ArchStdEvent": "L1D_CACHE_WB_CLEAN"
},
{
"ArchStdEvent": "L1D_CACHE_INVAL"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_RD"
},
{
"ArchStdEvent": "L1D_TLB_REFILL_WR"
},
{
"ArchStdEvent": "L1D_TLB_RD"
},
{
"ArchStdEvent": "L1D_TLB_WR"
},
{
"ArchStdEvent": "L2D_CACHE_RD"
},
{
"ArchStdEvent": "L2D_CACHE_WR"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_RD"
},
{
"ArchStdEvent": "L2D_CACHE_REFILL_WR"
},
{
"ArchStdEvent": "L2D_CACHE_WB_VICTIM"
},
{
"ArchStdEvent": "L2D_CACHE_WB_CLEAN"
},
{
"ArchStdEvent": "L2D_CACHE_INVAL"
},
{
"ArchStdEvent": "L2D_TLB_REFILL_RD"
},
{
"ArchStdEvent": "L2D_TLB_REFILL_WR"
},
{
"ArchStdEvent": "L2D_TLB_RD"
},
{
"ArchStdEvent": "L2D_TLB_WR"
},
{
"ArchStdEvent": "L3D_CACHE_RD"
},
{
"ArchStdEvent": "L1I_CACHE_LMISS"
},
{
"ArchStdEvent": "L2D_CACHE_LMISS_RD"
},
{
"ArchStdEvent": "L3D_CACHE_LMISS_RD"
}
]

View File

@ -0,0 +1,47 @@
[
{
"ArchStdEvent": "EXC_TAKEN"
},
{
"ArchStdEvent": "MEMORY_ERROR"
},
{
"ArchStdEvent": "EXC_UNDEF"
},
{
"ArchStdEvent": "EXC_SVC"
},
{
"ArchStdEvent": "EXC_PABORT"
},
{
"ArchStdEvent": "EXC_DABORT"
},
{
"ArchStdEvent": "EXC_IRQ"
},
{
"ArchStdEvent": "EXC_FIQ"
},
{
"ArchStdEvent": "EXC_SMC"
},
{
"ArchStdEvent": "EXC_HVC"
},
{
"ArchStdEvent": "EXC_TRAP_PABORT"
},
{
"ArchStdEvent": "EXC_TRAP_DABORT"
},
{
"ArchStdEvent": "EXC_TRAP_OTHER"
},
{
"ArchStdEvent": "EXC_TRAP_IRQ"
},
{
"ArchStdEvent": "EXC_TRAP_FIQ"
}
]

View File

@ -0,0 +1,143 @@
[
{
"ArchStdEvent": "SW_INCR"
},
{
"ArchStdEvent": "INST_RETIRED"
},
{
"ArchStdEvent": "EXC_RETURN"
},
{
"ArchStdEvent": "CID_WRITE_RETIRED"
},
{
"ArchStdEvent": "INST_SPEC"
},
{
"ArchStdEvent": "TTBR_WRITE_RETIRED"
},
{
"ArchStdEvent": "BR_RETIRED"
},
{
"ArchStdEvent": "BR_MIS_PRED_RETIRED"
},
{
"ArchStdEvent": "OP_RETIRED"
},
{
"ArchStdEvent": "OP_SPEC"
},
{
"ArchStdEvent": "LDREX_SPEC"
},
{
"ArchStdEvent": "STREX_PASS_SPEC"
},
{
"ArchStdEvent": "STREX_FAIL_SPEC"
},
{
"ArchStdEvent": "STREX_SPEC"
},
{
"ArchStdEvent": "LD_SPEC"
},
{
"ArchStdEvent": "ST_SPEC"
},
{
"ArchStdEvent": "DP_SPEC"
},
{
"ArchStdEvent": "ASE_SPEC"
},
{
"ArchStdEvent": "VFP_SPEC"
},
{
"ArchStdEvent": "PC_WRITE_SPEC"
},
{
"ArchStdEvent": "CRYPTO_SPEC"
},
{
"ArchStdEvent": "BR_IMMED_SPEC"
},
{
"ArchStdEvent": "BR_RETURN_SPEC"
},
{
"ArchStdEvent": "BR_INDIRECT_SPEC"
},
{
"ArchStdEvent": "ISB_SPEC"
},
{
"ArchStdEvent": "DSB_SPEC"
},
{
"ArchStdEvent": "DMB_SPEC"
},
{
"ArchStdEvent": "RC_LD_SPEC"
},
{
"ArchStdEvent": "RC_ST_SPEC"
},
{
"ArchStdEvent": "ASE_INST_SPEC"
},
{
"ArchStdEvent": "SVE_INST_SPEC"
},
{
"ArchStdEvent": "FP_HP_SPEC"
},
{
"ArchStdEvent": "FP_SP_SPEC"
},
{
"ArchStdEvent": "FP_DP_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_EMPTY_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_FULL_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_PARTIAL_SPEC"
},
{
"ArchStdEvent": "SVE_PRED_NOT_FULL_SPEC"
},
{
"ArchStdEvent": "SVE_LDFF_SPEC"
},
{
"ArchStdEvent": "SVE_LDFF_FAULT_SPEC"
},
{
"ArchStdEvent": "FP_SCALE_OPS_SPEC"
},
{
"ArchStdEvent": "FP_FIXED_OPS_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT8_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT16_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT32_SPEC"
},
{
"ArchStdEvent": "ASE_SVE_INT64_SPEC"
}
]

View File

@ -0,0 +1,38 @@
[
{
"ArchStdEvent": "MEM_ACCESS"
},
{
"ArchStdEvent": "MEM_ACCESS_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_WR"
},
{
"ArchStdEvent": "UNALIGNED_LD_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_ST_SPEC"
},
{
"ArchStdEvent": "UNALIGNED_LDST_SPEC"
},
{
"ArchStdEvent": "LDST_ALIGN_LAT"
},
{
"ArchStdEvent": "LD_ALIGN_LAT"
},
{
"ArchStdEvent": "ST_ALIGN_LAT"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_RD"
},
{
"ArchStdEvent": "MEM_ACCESS_CHECKED_WR"
}
]

View File

@ -0,0 +1,5 @@
[
{
"ArchStdEvent": "REMOTE_ACCESS"
}
]

View File

@ -0,0 +1,23 @@
[
{
"ArchStdEvent": "STALL_FRONTEND"
},
{
"ArchStdEvent": "STALL_BACKEND"
},
{
"ArchStdEvent": "STALL"
},
{
"ArchStdEvent": "STALL_SLOT_BACKEND"
},
{
"ArchStdEvent": "STALL_SLOT_FRONTEND"
},
{
"ArchStdEvent": "STALL_SLOT"
},
{
"ArchStdEvent": "STALL_BACKEND_MEM"
}
]

View File

@ -0,0 +1,14 @@
[
{
"ArchStdEvent": "SAMPLE_POP"
},
{
"ArchStdEvent": "SAMPLE_FEED"
},
{
"ArchStdEvent": "SAMPLE_FILTRATE"
},
{
"ArchStdEvent": "SAMPLE_COLLISION"
}
]

View File

@ -0,0 +1,29 @@
[
{
"ArchStdEvent": "TRB_WRAP"
},
{
"ArchStdEvent": "TRCEXTOUT0"
},
{
"ArchStdEvent": "TRCEXTOUT1"
},
{
"ArchStdEvent": "TRCEXTOUT2"
},
{
"ArchStdEvent": "TRCEXTOUT3"
},
{
"ArchStdEvent": "CTI_TRIGOUT4"
},
{
"ArchStdEvent": "CTI_TRIGOUT5"
},
{
"ArchStdEvent": "CTI_TRIGOUT6"
},
{
"ArchStdEvent": "CTI_TRIGOUT7"
}
]

View File

@ -299,6 +299,30 @@
"EventName": "STALL_SLOT",
"BriefDescription": "No operation sent for execution on a slot"
},
{
"PublicDescription": "Sample Population",
"EventCode": "0x4000",
"EventName": "SAMPLE_POP",
"BriefDescription": "Sample Population"
},
{
"PublicDescription": "Sample Taken",
"EventCode": "0x4001",
"EventName": "SAMPLE_FEED",
"BriefDescription": "Sample Taken"
},
{
"PublicDescription": "Sample Taken and not removed by filtering",
"EventCode": "0x4002",
"EventName": "SAMPLE_FILTRATE",
"BriefDescription": "Sample Taken and not removed by filtering"
},
{
"PublicDescription": "Sample collided with previous sample",
"EventCode": "0x4003",
"EventName": "SAMPLE_COLLISION",
"BriefDescription": "Sample collided with previous sample"
},
{
"PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.",
"EventCode": "0x4004",
@ -329,6 +353,96 @@
"EventName": "L3D_CACHE_LMISS_RD",
"BriefDescription": "Level 3 data cache long-latency read miss"
},
{
"PublicDescription": "Trace buffer current write pointer wrapped",
"EventCode": "0x400C",
"EventName": "TRB_WRAP",
"BriefDescription": "Trace buffer current write pointer wrapped"
},
{
"PublicDescription": "PE Trace Unit external output 0",
"EventCode": "0x4010",
"EventName": "TRCEXTOUT0",
"BriefDescription": "PE Trace Unit external output 0"
},
{
"PublicDescription": "PE Trace Unit external output 1",
"EventCode": "0x4011",
"EventName": "TRCEXTOUT1",
"BriefDescription": "PE Trace Unit external output 1"
},
{
"PublicDescription": "PE Trace Unit external output 2",
"EventCode": "0x4012",
"EventName": "TRCEXTOUT2",
"BriefDescription": "PE Trace Unit external output 2"
},
{
"PublicDescription": "PE Trace Unit external output 3",
"EventCode": "0x4013",
"EventName": "TRCEXTOUT3",
"BriefDescription": "PE Trace Unit external output 3"
},
{
"PublicDescription": "Cross-trigger Interface output trigger 4",
"EventCode": "0x4018",
"EventName": "CTI_TRIGOUT4",
"BriefDescription": "Cross-trigger Interface output trigger 4"
},
{
"PublicDescription": "Cross-trigger Interface output trigger 5 ",
"EventCode": "0x4019",
"EventName": "CTI_TRIGOUT5",
"BriefDescription": "Cross-trigger Interface output trigger 5 "
},
{
"PublicDescription": "Cross-trigger Interface output trigger 6",
"EventCode": "0x401A",
"EventName": "CTI_TRIGOUT6",
"BriefDescription": "Cross-trigger Interface output trigger 6"
},
{
"PublicDescription": "Cross-trigger Interface output trigger 7",
"EventCode": "0x401B",
"EventName": "CTI_TRIGOUT7",
"BriefDescription": "Cross-trigger Interface output trigger 7"
},
{
"PublicDescription": "Access with additional latency from alignment",
"EventCode": "0x4020",
"EventName": "LDST_ALIGN_LAT",
"BriefDescription": "Access with additional latency from alignment"
},
{
"PublicDescription": "Load with additional latency from alignment",
"EventCode": "0x4021",
"EventName": "LD_ALIGN_LAT",
"BriefDescription": "Load with additional latency from alignment"
},
{
"PublicDescription": "Store with additional latency from alignment",
"EventCode": "0x4022",
"EventName": "ST_ALIGN_LAT",
"BriefDescription": "Store with additional latency from alignment"
},
{
"PublicDescription": "Checked data memory access",
"EventCode": "0x4024",
"EventName": "MEM_ACCESS_CHECKED",
"BriefDescription": "Checked data memory access"
},
{
"PublicDescription": "Checked data memory access, read",
"EventCode": "0x4025",
"EventName": "MEM_ACCESS_CHECKED_RD",
"BriefDescription": "Checked data memory access, read"
},
{
"PublicDescription": "Checked data memory access, write",
"EventCode": "0x4026",
"EventName": "MEM_ACCESS_CHECKED_WR",
"BriefDescription": "Checked data memory access, write"
},
{
"PublicDescription": "SIMD Instruction architecturally executed.",
"EventCode": "0x8000",
@ -341,6 +455,18 @@
"EventName": "SVE_INST_RETIRED",
"BriefDescription": "Instruction architecturally executed, SVE."
},
{
"PublicDescription": "ASE operations speculatively executed",
"EventCode": "0x8005",
"EventName": "ASE_INST_SPEC",
"BriefDescription": "ASE operations speculatively executed"
},
{
"PublicDescription": "SVE operations speculatively executed",
"EventCode": "0x8006",
"EventName": "SVE_INST_SPEC",
"BriefDescription": "SVE operations speculatively executed"
},
{
"PublicDescription": "Microarchitectural operation, Operations speculatively executed.",
"EventCode": "0x8008",
@ -359,6 +485,24 @@
"EventName": "FP_SPEC",
"BriefDescription": "Floating-point Operations speculatively executed."
},
{
"PublicDescription": "Floating-point half-precision operations speculatively executed",
"EventCode": "0x8014",
"EventName": "FP_HP_SPEC",
"BriefDescription": "Floating-point half-precision operations speculatively executed"
},
{
"PublicDescription": "Floating-point single-precision operations speculatively executed",
"EventCode": "0x8018",
"EventName": "FP_SP_SPEC",
"BriefDescription": "Floating-point single-precision operations speculatively executed"
},
{
"PublicDescription": "Floating-point double-precision operations speculatively executed",
"EventCode": "0x801C",
"EventName": "FP_DP_SPEC",
"BriefDescription": "Floating-point double-precision operations speculatively executed"
},
{
"PublicDescription": "Floating-point FMA Operations speculatively executed.",
"EventCode": "0x8028",
@ -389,6 +533,30 @@
"EventName": "SVE_PRED_SPEC",
"BriefDescription": "SVE predicated Operations speculatively executed."
},
{
"PublicDescription": "SVE predicated operations with no active predicates speculatively executed",
"EventCode": "0x8075",
"EventName": "SVE_PRED_EMPTY_SPEC",
"BriefDescription": "SVE predicated operations with no active predicates speculatively executed"
},
{
"PublicDescription": "SVE predicated operations speculatively executed with all active predicates",
"EventCode": "0x8076",
"EventName": "SVE_PRED_FULL_SPEC",
"BriefDescription": "SVE predicated operations speculatively executed with all active predicates"
},
{
"PublicDescription": "SVE predicated operations speculatively executed with partially active predicates",
"EventCode": "0x8077",
"EventName": "SVE_PRED_PARTIAL_SPEC",
"BriefDescription": "SVE predicated operations speculatively executed with partially active predicates"
},
{
"PublicDescription": "SVE predicated operations with empty or partially active predicates",
"EventCode": "0x8079",
"EventName": "SVE_PRED_NOT_FULL_SPEC",
"BriefDescription": "SVE predicated operations with empty or partially active predicates"
},
{
"PublicDescription": "SVE MOVPRFX Operations speculatively executed.",
"EventCode": "0x807C",
@ -497,6 +665,12 @@
"EventName": "SVE_LDFF_SPEC",
"BriefDescription": "SVE First-fault load Operations speculatively executed."
},
{
"PublicDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0",
"EventCode": "0x80BD",
"EventName": "SVE_LDFF_FAULT_SPEC",
"BriefDescription": "SVE first-fault load operations speculatively executed which set FFR bit to 0"
},
{
"PublicDescription": "Scalable floating-point element Operations speculatively executed.",
"EventCode": "0x80C0",
@ -544,5 +718,29 @@
"EventCode": "0x80C7",
"EventName": "FP_DP_FIXED_OPS_SPEC",
"BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed."
},
{
"PublicDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed",
"EventCode": "0x80E3",
"EventName": "ASE_SVE_INT8_SPEC",
"BriefDescription": "Advanced SIMD and SVE 8-bit integer operations speculatively executed"
},
{
"PublicDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed",
"EventCode": "0x80E7",
"EventName": "ASE_SVE_INT16_SPEC",
"BriefDescription": "Advanced SIMD and SVE 16-bit integer operations speculatively executed"
},
{
"PublicDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed",
"EventCode": "0x80EB",
"EventName": "ASE_SVE_INT32_SPEC",
"BriefDescription": "Advanced SIMD and SVE 32-bit integer operations speculatively executed"
},
{
"PublicDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed",
"EventCode": "0x80EF",
"EventName": "ASE_SVE_INT64_SPEC",
"BriefDescription": "Advanced SIMD and SVE 64-bit integer operations speculatively executed"
}
]

View File

@ -19,6 +19,7 @@
0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
0x00000000410fd400,v1,arm/neoverse-v1,core
0x00000000410fd490,v1,arm/neoverse-n2,core
0x00000000420f5160,v1,cavium/thunderx2,core
0x00000000430f0af0,v1,cavium/thunderx2,core
0x00000000460f0010,v1,fujitsu/a64fx,core

1 # Format:
19 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core
20 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core
21 0x00000000410fd400,v1,arm/neoverse-v1,core
22 0x00000000410fd490,v1,arm/neoverse-n2,core
23 0x00000000420f5160,v1,cavium/thunderx2,core
24 0x00000000430f0af0,v1,cavium/thunderx2,core
25 0x00000000460f0010,v1,fujitsu/a64fx,core

View File

@ -148,305 +148,305 @@
"EventCode": "0x60",
"EventName": "BUS_ACCESS_RD",
"BriefDescription": "Bus access read"
},
{
},
{
"PublicDescription": "Bus access write",
"EventCode": "0x61",
"EventName": "BUS_ACCESS_WR",
"BriefDescription": "Bus access write"
},
{
},
{
"PublicDescription": "Bus access, Normal, Cacheable, Shareable",
"EventCode": "0x62",
"EventName": "BUS_ACCESS_SHARED",
"BriefDescription": "Bus access, Normal, Cacheable, Shareable"
},
{
},
{
"PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
"EventCode": "0x63",
"EventName": "BUS_ACCESS_NOT_SHARED",
"BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
},
{
},
{
"PublicDescription": "Bus access, Normal",
"EventCode": "0x64",
"EventName": "BUS_ACCESS_NORMAL",
"BriefDescription": "Bus access, Normal"
},
{
},
{
"PublicDescription": "Bus access, peripheral",
"EventCode": "0x65",
"EventName": "BUS_ACCESS_PERIPH",
"BriefDescription": "Bus access, peripheral"
},
{
},
{
"PublicDescription": "Data memory access, read",
"EventCode": "0x66",
"EventName": "MEM_ACCESS_RD",
"BriefDescription": "Data memory access, read"
},
{
},
{
"PublicDescription": "Data memory access, write",
"EventCode": "0x67",
"EventName": "MEM_ACCESS_WR",
"BriefDescription": "Data memory access, write"
},
{
},
{
"PublicDescription": "Unaligned access, read",
"EventCode": "0x68",
"EventName": "UNALIGNED_LD_SPEC",
"BriefDescription": "Unaligned access, read"
},
{
},
{
"PublicDescription": "Unaligned access, write",
"EventCode": "0x69",
"EventName": "UNALIGNED_ST_SPEC",
"BriefDescription": "Unaligned access, write"
},
{
},
{
"PublicDescription": "Unaligned access",
"EventCode": "0x6a",
"EventName": "UNALIGNED_LDST_SPEC",
"BriefDescription": "Unaligned access"
},
{
},
{
"PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
"EventCode": "0x6c",
"EventName": "LDREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
},
{
},
{
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
"EventCode": "0x6d",
"EventName": "STREX_PASS_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
},
{
},
{
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
"EventCode": "0x6e",
"EventName": "STREX_FAIL_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
},
{
},
{
"PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
"EventCode": "0x6f",
"EventName": "STREX_SPEC",
"BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
},
{
},
{
"PublicDescription": "Operation speculatively executed, load",
"EventCode": "0x70",
"EventName": "LD_SPEC",
"BriefDescription": "Operation speculatively executed, load"
},
{
},
{
"PublicDescription": "Operation speculatively executed, store",
"EventCode": "0x71",
"EventName": "ST_SPEC",
"BriefDescription": "Operation speculatively executed, store"
},
{
},
{
"PublicDescription": "Operation speculatively executed, load or store",
"EventCode": "0x72",
"EventName": "LDST_SPEC",
"BriefDescription": "Operation speculatively executed, load or store"
},
{
},
{
"PublicDescription": "Operation speculatively executed, integer data processing",
"EventCode": "0x73",
"EventName": "DP_SPEC",
"BriefDescription": "Operation speculatively executed, integer data processing"
},
{
},
{
"PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
"EventCode": "0x74",
"EventName": "ASE_SPEC",
"BriefDescription": "Operation speculatively executed, Advanced SIMD instruction"
},
{
},
{
"PublicDescription": "Operation speculatively executed, floating-point instruction",
"EventCode": "0x75",
"EventName": "VFP_SPEC",
"BriefDescription": "Operation speculatively executed, floating-point instruction"
},
{
},
{
"PublicDescription": "Operation speculatively executed, software change of the PC",
"EventCode": "0x76",
"EventName": "PC_WRITE_SPEC",
"BriefDescription": "Operation speculatively executed, software change of the PC"
},
{
},
{
"PublicDescription": "Operation speculatively executed, Cryptographic instruction",
"EventCode": "0x77",
"EventName": "CRYPTO_SPEC",
"BriefDescription": "Operation speculatively executed, Cryptographic instruction"
},
{
},
{
"PublicDescription": "Branch speculatively executed, immediate branch",
"EventCode": "0x78",
"EventName": "BR_IMMED_SPEC",
"BriefDescription": "Branch speculatively executed, immediate branch"
},
{
},
{
"PublicDescription": "Branch speculatively executed, procedure return",
"EventCode": "0x79",
"EventName": "BR_RETURN_SPEC",
"BriefDescription": "Branch speculatively executed, procedure return"
},
{
},
{
"PublicDescription": "Branch speculatively executed, indirect branch",
"EventCode": "0x7a",
"EventName": "BR_INDIRECT_SPEC",
"BriefDescription": "Branch speculatively executed, indirect branch"
},
{
},
{
"PublicDescription": "Barrier speculatively executed, ISB",
"EventCode": "0x7c",
"EventName": "ISB_SPEC",
"BriefDescription": "Barrier speculatively executed, ISB"
},
{
},
{
"PublicDescription": "Barrier speculatively executed, DSB",
"EventCode": "0x7d",
"EventName": "DSB_SPEC",
"BriefDescription": "Barrier speculatively executed, DSB"
},
{
},
{
"PublicDescription": "Barrier speculatively executed, DMB",
"EventCode": "0x7e",
"EventName": "DMB_SPEC",
"BriefDescription": "Barrier speculatively executed, DMB"
},
{
},
{
"PublicDescription": "Exception taken, Other synchronous",
"EventCode": "0x81",
"EventName": "EXC_UNDEF",
"BriefDescription": "Exception taken, Other synchronous"
},
{
},
{
"PublicDescription": "Exception taken, Supervisor Call",
"EventCode": "0x82",
"EventName": "EXC_SVC",
"BriefDescription": "Exception taken, Supervisor Call"
},
{
},
{
"PublicDescription": "Exception taken, Instruction Abort",
"EventCode": "0x83",
"EventName": "EXC_PABORT",
"BriefDescription": "Exception taken, Instruction Abort"
},
{
},
{
"PublicDescription": "Exception taken, Data Abort and SError",
"EventCode": "0x84",
"EventName": "EXC_DABORT",
"BriefDescription": "Exception taken, Data Abort and SError"
},
{
},
{
"PublicDescription": "Exception taken, IRQ",
"EventCode": "0x86",
"EventName": "EXC_IRQ",
"BriefDescription": "Exception taken, IRQ"
},
{
},
{
"PublicDescription": "Exception taken, FIQ",
"EventCode": "0x87",
"EventName": "EXC_FIQ",
"BriefDescription": "Exception taken, FIQ"
},
{
},
{
"PublicDescription": "Exception taken, Secure Monitor Call",
"EventCode": "0x88",
"EventName": "EXC_SMC",
"BriefDescription": "Exception taken, Secure Monitor Call"
},
{
},
{
"PublicDescription": "Exception taken, Hypervisor Call",
"EventCode": "0x8a",
"EventName": "EXC_HVC",
"BriefDescription": "Exception taken, Hypervisor Call"
},
{
},
{
"PublicDescription": "Exception taken, Instruction Abort not taken locally",
"EventCode": "0x8b",
"EventName": "EXC_TRAP_PABORT",
"BriefDescription": "Exception taken, Instruction Abort not taken locally"
},
{
},
{
"PublicDescription": "Exception taken, Data Abort or SError not taken locally",
"EventCode": "0x8c",
"EventName": "EXC_TRAP_DABORT",
"BriefDescription": "Exception taken, Data Abort or SError not taken locally"
},
{
},
{
"PublicDescription": "Exception taken, Other traps not taken locally",
"EventCode": "0x8d",
"EventName": "EXC_TRAP_OTHER",
"BriefDescription": "Exception taken, Other traps not taken locally"
},
{
},
{
"PublicDescription": "Exception taken, IRQ not taken locally",
"EventCode": "0x8e",
"EventName": "EXC_TRAP_IRQ",
"BriefDescription": "Exception taken, IRQ not taken locally"
},
{
},
{
"PublicDescription": "Exception taken, FIQ not taken locally",
"EventCode": "0x8f",
"EventName": "EXC_TRAP_FIQ",
"BriefDescription": "Exception taken, FIQ not taken locally"
},
{
},
{
"PublicDescription": "Release consistency operation speculatively executed, Load-Acquire",
"EventCode": "0x90",
"EventName": "RC_LD_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
},
{
},
{
"PublicDescription": "Release consistency operation speculatively executed, Store-Release",
"EventCode": "0x91",
"EventName": "RC_ST_SPEC",
"BriefDescription": "Release consistency operation speculatively executed, Store-Release"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache access, read",
"EventCode": "0xa0",
"EventName": "L3D_CACHE_RD",
"BriefDescription": "Attributable Level 3 data or unified cache access, read"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache access, write",
"EventCode": "0xa1",
"EventName": "L3D_CACHE_WR",
"BriefDescription": "Attributable Level 3 data or unified cache access, write"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache refill, read",
"EventCode": "0xa2",
"EventName": "L3D_CACHE_REFILL_RD",
"BriefDescription": "Attributable Level 3 data or unified cache refill, read"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache refill, write",
"EventCode": "0xa3",
"EventName": "L3D_CACHE_REFILL_WR",
"BriefDescription": "Attributable Level 3 data or unified cache refill, write"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim",
"EventCode": "0xa6",
"EventName": "L3D_CACHE_WB_VICTIM",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean",
"EventCode": "0xa7",
"EventName": "L3D_CACHE_WB_CLEAN",
"BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
},
{
},
{
"PublicDescription": "Attributable Level 3 data or unified cache access, invalidate",
"EventCode": "0xa8",
"EventName": "L3D_CACHE_INVAL",
"BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
}
}
]

View File

@ -672,8 +672,6 @@ static int json_events(const char *fn,
addfield(map, &je.metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
addfield(map, &je.metric_expr, "", "", val);
for (s = je.metric_expr; *s; s++)
*s = tolower(*s);
} else if (json_streq(map, field, "ArchStdEvent")) {
addfield(map, &arch_std, "", "", val);
for (s = arch_std; *s; s++)

View File

@ -65,6 +65,7 @@ perf-y += pe-file-parsing.o
perf-y += expand-cgroup.o
perf-y += perf-time-to-tsc.o
perf-y += dlfilter-test.o
perf-y += sigtrap.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)

View File

@ -65,7 +65,7 @@ do { \
#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
FILE *file;
@ -93,7 +93,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
/* syscall arguments */
__WRITE_ASS(fd, "d", fd);
__WRITE_ASS(group_fd, "d", group_fd);
__WRITE_ASS(cpu, "d", cpu);
__WRITE_ASS(cpu, "d", cpu.cpu);
__WRITE_ASS(pid, "d", pid);
__WRITE_ASS(flags, "lu", flags);
@ -144,7 +144,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
return 0;
}
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu,
int fd, int group_fd, unsigned long flags)
{
int errno_saved = errno;

View File

@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
if (map && bm) {
for (i = 0; i < map->nr; i++)
set_bit(map->map[i], bm);
set_bit(map->map[i].cpu, bm);
}
if (map)

View File

@ -107,6 +107,7 @@ static struct test_suite *generic_tests[] = {
&suite__expand_cgroup_events,
&suite__perf_time_to_tsc,
&suite__dlfilter,
&suite__sigtrap,
NULL,
};
@ -420,7 +421,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
continue;
st.file = ent->d_name;
pr_info("%2d: %-*s:", i, width, test_suite.desc);
pr_info("%3d: %-*s:", i, width, test_suite.desc);
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@ -470,7 +471,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
continue;
}
pr_info("%2d: %-*s:", i, width, test_description(t, -1));
pr_info("%3d: %-*s:", i, width, test_description(t, -1));
if (intlist__find(skiplist, i)) {
color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
@ -510,7 +511,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
curr, argc, argv))
continue;
pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
pr_info("%3d.%1d: %-*s:", i, subi + 1, subw,
test_description(t, subi));
test_and_print(t, subi);
}
@ -545,7 +546,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
pr_info("%2d: %s\n", i, t.desc);
pr_info("%3d: %s\n", i, t.desc);
}
@ -567,14 +568,14 @@ static int perf_test__list(int argc, const char **argv)
if (!perf_test__matches(test_description(t, -1), curr, argc, argv))
continue;
pr_info("%2d: %s\n", i, test_description(t, -1));
pr_info("%3d: %s\n", i, test_description(t, -1));
if (has_subtests(t)) {
int subn = num_subtests(t);
int subi;
for (subi = 0; subi < subn; subi++)
pr_info("%2d:%1d: %s\n", i, subi + 1,
pr_info("%3d:%1d: %s\n", i, subi + 1,
test_description(t, subi));
}
}
@ -606,6 +607,9 @@ int cmd_test(int argc, const char **argv)
if (ret < 0)
return ret;
/* Unbuffered output */
setvbuf(stdout, NULL, _IONBF, 0);
argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
if (argc >= 1 && !strcmp(argv[0], "list"))
return perf_test__list(argc - 1, argv + 1);

View File

@ -38,7 +38,7 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong nr", map->nr == 20);
for (i = 0; i < 20; i++) {
TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
TEST_ASSERT_VAL("wrong cpu", map->map[i].cpu == i);
}
perf_cpu_map__put(map);
@ -67,8 +67,8 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
map = cpu_map__new_data(data);
TEST_ASSERT_VAL("wrong nr", map->nr == 2);
TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
TEST_ASSERT_VAL("wrong cpu", map->map[0].cpu == 1);
TEST_ASSERT_VAL("wrong cpu", map->map[1].cpu == 256);
TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
perf_cpu_map__put(map);
return 0;

View File

@ -76,9 +76,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
TEST_ASSERT_VAL("wrong id", ev->id == 123);
TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
TEST_ASSERT_VAL("wrong cpus", map->map[0].cpu == 1);
TEST_ASSERT_VAL("wrong cpus", map->map[1].cpu == 2);
TEST_ASSERT_VAL("wrong cpus", map->map[2].cpu == 3);
perf_cpu_map__put(map);
return 0;
}

View File

@ -31,7 +31,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
if (map && bm) {
for (i = 0; i < map->nr; i++) {
set_bit(map->map[i], bm);
set_bit(map->map[i].cpu, bm);
}
}

View File

@ -59,11 +59,11 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
}
CPU_ZERO(&cpu_set);
CPU_SET(cpus->map[0], &cpu_set);
CPU_SET(cpus->map[0].cpu, &cpu_set);
sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf)));
cpus->map[0].cpu, str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_free_cpus;
}

View File

@ -22,7 +22,8 @@
static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
int err = -1, fd, cpu;
int err = -1, fd, idx;
struct perf_cpu cpu;
struct perf_cpu_map *cpus;
struct evsel *evsel;
unsigned int nr_openat_calls = 111, i;
@ -58,23 +59,23 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
goto out_evsel_delete;
}
for (cpu = 0; cpu < cpus->nr; ++cpu) {
unsigned int ncalls = nr_openat_calls + cpu;
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
unsigned int ncalls = nr_openat_calls + idx;
/*
* XXX eventually lift this restriction in a way that
* keeps perf building on older glibc installations
* without CPU_ALLOC. 1024 cpus in 2010 still seems
* a reasonable upper limit tho :-)
*/
if (cpus->map[cpu] >= CPU_SETSIZE) {
pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
if (cpu.cpu >= CPU_SETSIZE) {
pr_debug("Ignoring CPU %d\n", cpu.cpu);
continue;
}
CPU_SET(cpus->map[cpu], &cpu_set);
CPU_SET(cpu.cpu, &cpu_set);
if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
pr_debug("sched_setaffinity() failed on CPU %d: %s ",
cpus->map[cpu],
cpu.cpu,
str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_close_fd;
}
@ -82,37 +83,29 @@ static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __mayb
fd = openat(0, "/etc/passwd", O_RDONLY);
close(fd);
}
CPU_CLR(cpus->map[cpu], &cpu_set);
CPU_CLR(cpu.cpu, &cpu_set);
}
/*
* Here we need to explicitly preallocate the counts, as if
* we use the auto allocation it will allocate just for 1 cpu,
* as we start by cpu 0.
*/
if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
goto out_close_fd;
}
evsel->core.cpus = perf_cpu_map__get(cpus);
err = 0;
for (cpu = 0; cpu < cpus->nr; ++cpu) {
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
unsigned int expected;
if (cpus->map[cpu] >= CPU_SETSIZE)
if (cpu.cpu >= CPU_SETSIZE)
continue;
if (evsel__read_on_cpu(evsel, cpu, 0) < 0) {
if (evsel__read_on_cpu(evsel, idx, 0) < 0) {
pr_debug("evsel__read_on_cpu\n");
err = -1;
break;
}
expected = nr_openat_calls + cpu;
if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
expected = nr_openat_calls + idx;
if (perf_counts(evsel->counts, idx, 0)->val != expected) {
pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
expected, cpu.cpu, perf_counts(evsel->counts, idx, 0)->val);
err = -1;
}
}

View File

@ -6,7 +6,7 @@ set -e
for m in $(perf list --raw-dump metricgroups); do
echo "Testing $m"
perf stat -M "$m" true
perf stat -M "$m" -a true
done
exit 0

177
tools/perf/tests/sigtrap.c Normal file
View File

@ -0,0 +1,177 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Basic test for sigtrap support.
*
* Copyright (C) 2021, Google LLC.
*/
#include <errno.h>
#include <stdint.h>
#include <stdlib.h>
#include <linux/hw_breakpoint.h>
#include <linux/string.h>
#include <pthread.h>
#include <signal.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>
#include "cloexec.h"
#include "debug.h"
#include "event.h"
#include "tests.h"
#include "../perf-sys.h"
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the
* perf_event interface.
*
* Just disable the test for these architectures until these issues are
* resolved.
*/
#if defined(__powerpc__) || defined(__s390x__)
#define BP_ACCOUNT_IS_SUPPORTED 0
#else
#define BP_ACCOUNT_IS_SUPPORTED 1
#endif
#define NUM_THREADS 5
static struct {
int tids_want_signal; /* Which threads still want a signal. */
int signal_count; /* Sanity check number of signals received. */
volatile int iterate_on; /* Variable to set breakpoint on. */
siginfo_t first_siginfo; /* First observed siginfo_t. */
} ctx;
#define TEST_SIG_DATA (~(unsigned long)(&ctx.iterate_on))
static struct perf_event_attr make_event_attr(void)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_BREAKPOINT,
.size = sizeof(attr),
.sample_period = 1,
.disabled = 1,
.bp_addr = (unsigned long)&ctx.iterate_on,
.bp_type = HW_BREAKPOINT_RW,
.bp_len = HW_BREAKPOINT_LEN_1,
.inherit = 1, /* Children inherit events ... */
.inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
.remove_on_exec = 1, /* Required by sigtrap. */
.sigtrap = 1, /* Request synchronous SIGTRAP on event. */
.sig_data = TEST_SIG_DATA,
.exclude_kernel = 1, /* To allow */
.exclude_hv = 1, /* running as !root */
};
return attr;
}
static void
sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
{
if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
ctx.first_siginfo = *info;
__atomic_fetch_sub(&ctx.tids_want_signal, syscall(SYS_gettid), __ATOMIC_RELAXED);
}
static void *test_thread(void *arg)
{
pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
pid_t tid = syscall(SYS_gettid);
int i;
pthread_barrier_wait(barrier);
__atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
for (i = 0; i < ctx.iterate_on - 1; i++)
__atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
return NULL;
}
static int run_test_threads(pthread_t *threads, pthread_barrier_t *barrier)
{
int i;
pthread_barrier_wait(barrier);
for (i = 0; i < NUM_THREADS; i++)
TEST_ASSERT_EQUAL("pthread_join() failed", pthread_join(threads[i], NULL), 0);
return TEST_OK;
}
static int run_stress_test(int fd, pthread_t *threads, pthread_barrier_t *barrier)
{
int ret;
ctx.iterate_on = 3000;
TEST_ASSERT_EQUAL("misfired signal?", ctx.signal_count, 0);
TEST_ASSERT_EQUAL("enable failed", ioctl(fd, PERF_EVENT_IOC_ENABLE, 0), 0);
ret = run_test_threads(threads, barrier);
TEST_ASSERT_EQUAL("disable failed", ioctl(fd, PERF_EVENT_IOC_DISABLE, 0), 0);
TEST_ASSERT_EQUAL("unexpected sigtraps", ctx.signal_count, NUM_THREADS * ctx.iterate_on);
TEST_ASSERT_EQUAL("missing signals or incorrectly delivered", ctx.tids_want_signal, 0);
TEST_ASSERT_VAL("unexpected si_addr", ctx.first_siginfo.si_addr == &ctx.iterate_on);
#if 0 /* FIXME: enable when libc's signal.h has si_perf_{type,data} */
TEST_ASSERT_EQUAL("unexpected si_perf_type", ctx.first_siginfo.si_perf_type,
PERF_TYPE_BREAKPOINT);
TEST_ASSERT_EQUAL("unexpected si_perf_data", ctx.first_siginfo.si_perf_data,
TEST_SIG_DATA);
#endif
return ret;
}
static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
struct perf_event_attr attr = make_event_attr();
struct sigaction action = {};
struct sigaction oldact;
pthread_t threads[NUM_THREADS];
pthread_barrier_t barrier;
char sbuf[STRERR_BUFSIZE];
int i, fd, ret = TEST_FAIL;
if (!BP_ACCOUNT_IS_SUPPORTED) {
pr_debug("Test not supported on this architecture");
return TEST_SKIP;
}
pthread_barrier_init(&barrier, NULL, NUM_THREADS + 1);
action.sa_flags = SA_SIGINFO | SA_NODEFER;
action.sa_sigaction = sigtrap_handler;
sigemptyset(&action.sa_mask);
if (sigaction(SIGTRAP, &action, &oldact)) {
pr_debug("FAILED sigaction(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
goto out;
}
fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
if (fd < 0) {
pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_restore_sigaction;
}
for (i = 0; i < NUM_THREADS; i++) {
if (pthread_create(&threads[i], NULL, test_thread, &barrier)) {
pr_debug("FAILED pthread_create(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
goto out_close_perf_event;
}
}
ret = run_stress_test(fd, threads, &barrier);
out_close_perf_event:
close(fd);
out_restore_sigaction:
sigaction(SIGTRAP, &oldact, NULL);
out:
pthread_barrier_destroy(&barrier);
return ret;
}
DEFINE_SUITE("Sigtrap", sigtrap);

View File

@ -87,7 +87,8 @@ static int test__synthesize_stat(struct test_suite *test __maybe_unused, int sub
count.run = 300;
TEST_ASSERT_VAL("failed to synthesize stat_config",
!perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
!perf_event__synthesize_stat(NULL, (struct perf_cpu){.cpu = 1}, 2, 3,
&count, process_stat_event, NULL));
return 0;
}

View File

@ -146,6 +146,7 @@ DECLARE_SUITE(pe_file_parsing);
DECLARE_SUITE(expand_cgroup_events);
DECLARE_SUITE(perf_time_to_tsc);
DECLARE_SUITE(dlfilter);
DECLARE_SUITE(sigtrap);
/*
* PowerPC and S390 do not support creation of instruction breakpoints using the

View File

@ -112,62 +112,83 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
TEST_ASSERT_VAL("Session header CPU map not set", session->header.env.cpu);
for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
if (!cpu_map__has(map, i))
struct perf_cpu cpu = { .cpu = i };
if (!perf_cpu_map__has(map, cpu))
continue;
pr_debug("CPU %d, core %d, socket %d\n", i,
session->header.env.cpu[i].core_id,
session->header.env.cpu[i].socket_id);
}
// Test that CPU ID contains socket, die, core and CPU
for (i = 0; i < map->nr; i++) {
id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i].cpu == id.cpu.cpu);
TEST_ASSERT_VAL("Cpu map - Core ID doesn't match",
session->header.env.cpu[map->map[i].cpu].core_id == id.core);
TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match",
session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
session->header.env.cpu[map->map[i].cpu].die_id == id.die);
TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1);
}
// Test that core ID contains socket, die and core
for (i = 0; i < map->nr; i++) {
id = cpu_map__get_core(map, i, NULL);
id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Core map - Core ID doesn't match",
session->header.env.cpu[map->map[i]].core_id == id.core);
session->header.env.cpu[map->map[i].cpu].core_id == id.core);
TEST_ASSERT_VAL("Core map - Socket ID doesn't match",
session->header.env.cpu[map->map[i]].socket_id == id.socket);
session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
TEST_ASSERT_VAL("Core map - Die ID doesn't match",
session->header.env.cpu[map->map[i]].die_id == id.die);
session->header.env.cpu[map->map[i].cpu].die_id == id.die);
TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1);
}
// Test that die ID contains socket and die
for (i = 0; i < map->nr; i++) {
id = cpu_map__get_die(map, i, NULL);
id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Die map - Socket ID doesn't match",
session->header.env.cpu[map->map[i]].socket_id == id.socket);
session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
TEST_ASSERT_VAL("Die map - Die ID doesn't match",
session->header.env.cpu[map->map[i]].die_id == id.die);
session->header.env.cpu[map->map[i].cpu].die_id == id.die);
TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1);
}
// Test that socket ID contains only socket
for (i = 0; i < map->nr; i++) {
id = cpu_map__get_socket(map, i, NULL);
id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Socket map - Socket ID doesn't match",
session->header.env.cpu[map->map[i]].socket_id == id.socket);
session->header.env.cpu[map->map[i].cpu].socket_id == id.socket);
TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1);
TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1);
TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1);
}
// Test that node ID contains only node
for (i = 0; i < map->nr; i++) {
id = cpu_map__get_node(map, i, NULL);
id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL);
TEST_ASSERT_VAL("Node map - Node ID doesn't match",
cpu__get_node(map->map[i]) == id.node);
TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1);
TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1);
TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1);
}
perf_session__delete(session);

View File

@ -966,6 +966,7 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
.opts = opts,
};
int ret = -1, err;
int not_annotated = list_empty(&notes->src->source);
if (sym == NULL)
return -1;
@ -973,13 +974,15 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
if (ms->map->dso->annotate_warned)
return -1;
err = symbol__annotate2(ms, evsel, opts, &browser.arch);
if (err) {
char msg[BUFSIZ];
ms->map->dso->annotate_warned = true;
symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
goto out_free_offsets;
if (not_annotated) {
err = symbol__annotate2(ms, evsel, opts, &browser.arch);
if (err) {
char msg[BUFSIZ];
ms->map->dso->annotate_warned = true;
symbol__strerror_disassemble(ms, err, msg, sizeof(msg));
ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
goto out_free_offsets;
}
}
ui_helpline__push("Press ESC to exit");
@ -994,9 +997,11 @@ int symbol__tui_annotate(struct map_symbol *ms, struct evsel *evsel,
ret = annotate_browser__run(&browser, evsel, hbt);
annotated_source__purge(notes->src);
if(not_annotated)
annotated_source__purge(notes->src);
out_free_offsets:
zfree(&notes->offsets);
if(not_annotated)
zfree(&notes->offsets);
return ret;
}

View File

@ -1,3 +1,4 @@
perf-y += arm64-frame-pointer-unwind-support.o
perf-y += annotate.o
perf-y += block-info.o
perf-y += block-range.o
@ -144,6 +145,7 @@ perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o

View File

@ -11,7 +11,7 @@
static int get_cpu_set_size(void)
{
int sz = cpu__max_cpu() + 8 - 1;
int sz = cpu__max_cpu().cpu + 8 - 1;
/*
* sched_getaffinity doesn't like masks smaller than the kernel.
* Hopefully that's big enough.

View File

@ -179,6 +179,8 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.phys_addr = ip;
break;
case ARM_SPE_COUNTER:
if (idx == SPE_CNT_PKT_HDR_INDEX_TOTAL_LAT)
decoder->record.latency = payload;
break;
case ARM_SPE_CONTEXT:
decoder->record.context_id = payload;

View File

@ -33,6 +33,7 @@ struct arm_spe_record {
enum arm_spe_sample_type type;
int err;
u32 op;
u32 latency;
u64 from_ip;
u64 to_ip;
u64 timestamp;

View File

@ -58,6 +58,8 @@ struct arm_spe {
u8 sample_branch;
u8 sample_remote_access;
u8 sample_memory;
u8 sample_instructions;
u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@ -68,6 +70,7 @@ struct arm_spe {
u64 branch_miss_id;
u64 remote_access_id;
u64 memory_id;
u64 instructions_id;
u64 kernel_start;
@ -90,6 +93,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
u64 period_instructions;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@ -202,6 +206,7 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@ -330,6 +335,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@ -347,6 +353,36 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
u64 spe_events_id, u64 data_src)
{
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
union perf_event *event = speq->event_buf;
struct perf_sample sample = { .ip = 0, };
/*
* Handles perf instruction sampling period.
*/
speq->period_instructions++;
if (speq->period_instructions < spe->instructions_sample_period)
return 0;
speq->period_instructions = 0;
arm_spe_prep_sample(spe, speq, event, &sample);
sample.id = spe_events_id;
sample.stream_id = spe_events_id;
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
sample.data_src = data_src;
sample.period = spe->instructions_sample_period;
sample.weight = record->latency;
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
@ -480,6 +516,12 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
return err;
}
if (spe->sample_instructions) {
err = arm_spe__synth_instruction_sample(speq, spe->instructions_id, data_src);
if (err)
return err;
}
return 0;
}
@ -993,7 +1035,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
PERF_SAMPLE_WEIGHT;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
@ -1107,8 +1150,30 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
return err;
spe->memory_id = id;
arm_spe_set_event_name(evlist, id, "memory");
id += 1;
}
if (spe->synth_opts.instructions) {
if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
goto synth_instructions_out;
}
if (spe->synth_opts.period > 1)
pr_warning("Arm SPE has a hardware-based sample period.\n"
"Additional instruction events will be discarded by --itrace\n");
spe->sample_instructions = true;
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
attr.sample_period = spe->synth_opts.period;
spe->instructions_sample_period = attr.sample_period;
err = arm_spe_synth_event(session, &attr, id);
if (err)
return err;
spe->instructions_id = id;
arm_spe_set_event_name(evlist, id, "instructions");
}
synth_instructions_out:
return 0;
}

View File

@ -0,0 +1,63 @@
// SPDX-License-Identifier: GPL-2.0
#include "arm64-frame-pointer-unwind-support.h"
#include "callchain.h"
#include "event.h"
#include "perf_regs.h" // SMPL_REG_MASK
#include "unwind.h"
#define perf_event_arm_regs perf_event_arm64_regs
#include "../../arch/arm64/include/uapi/asm/perf_regs.h"
#undef perf_event_arm_regs
struct entries {
u64 stack[2];
size_t length;
};
static bool get_leaf_frame_caller_enabled(struct perf_sample *sample)
{
return callchain_param.record_mode == CALLCHAIN_FP && sample->user_regs.regs
&& sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_LR);
}
static int add_entry(struct unwind_entry *entry, void *arg)
{
struct entries *entries = arg;
entries->stack[entries->length++] = entry->ip;
return 0;
}
u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int usr_idx)
{
int ret;
struct entries entries = {};
struct regs_dump old_regs = sample->user_regs;
if (!get_leaf_frame_caller_enabled(sample))
return 0;
/*
* If PC and SP are not recorded, get the value of PC from the stack
* and set its mask. SP is not used when doing the unwinding but it
* still needs to be set to prevent failures.
*/
if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_PC))) {
sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_PC);
sample->user_regs.cache_regs[PERF_REG_ARM64_PC] = sample->callchain->ips[usr_idx+1];
}
if (!(sample->user_regs.mask & SMPL_REG_MASK(PERF_REG_ARM64_SP))) {
sample->user_regs.cache_mask |= SMPL_REG_MASK(PERF_REG_ARM64_SP);
sample->user_regs.cache_regs[PERF_REG_ARM64_SP] = 0;
}
ret = unwind__get_entries(add_entry, &entries, thread, sample, 2);
sample->user_regs = old_regs;
if (ret || entries.length != 2)
return ret;
return callchain_param.order == ORDER_CALLER ? entries.stack[0] : entries.stack[1];
}

View File

@ -0,0 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
#include "event.h"
#include "thread.h"
u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
#endif /* __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H */

View File

@ -123,7 +123,7 @@ int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
mm->prev = 0;
mm->idx = mp->idx;
mm->tid = mp->tid;
mm->cpu = mp->cpu;
mm->cpu = mp->cpu.cpu;
if (!mp->len) {
mm->base = NULL;
@ -180,7 +180,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
else
mp->tid = -1;
} else {
mp->cpu = -1;
mp->cpu.cpu = -1;
mp->tid = perf_thread_map__pid(evlist->core.threads, idx);
}
}
@ -292,7 +292,7 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
if (!queue->set) {
queue->set = true;
queue->tid = buffer->tid;
queue->cpu = buffer->cpu;
queue->cpu = buffer->cpu.cpu;
}
buffer->buffer_nr = queues->next_buffer_nr++;
@ -339,11 +339,11 @@ static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
return 0;
}
static bool filter_cpu(struct perf_session *session, int cpu)
static bool filter_cpu(struct perf_session *session, struct perf_cpu cpu)
{
unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
return cpu_bitmap && cpu.cpu != -1 && !test_bit(cpu.cpu, cpu_bitmap);
}
static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
@ -399,7 +399,7 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
struct auxtrace_buffer buffer = {
.pid = -1,
.tid = event->auxtrace.tid,
.cpu = event->auxtrace.cpu,
.cpu = { event->auxtrace.cpu },
.data_offset = data_offset,
.offset = event->auxtrace.offset,
.reference = event->auxtrace.reference,

View File

@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
#include <internal/cpumap.h>
#include <asm/bitsperlong.h>
#include <asm/barrier.h>
@ -240,7 +241,7 @@ struct auxtrace_buffer {
size_t size;
pid_t pid;
pid_t tid;
int cpu;
struct perf_cpu cpu;
void *data;
off_t data_offset;
void *mmap_addr;
@ -350,7 +351,7 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
int cpu;
struct perf_cpu cpu;
};
/**

View File

@ -424,7 +424,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
size_t prologue_cnt = 0;
int i, err;
if (IS_ERR(priv) || !priv || priv->is_tp)
if (IS_ERR_OR_NULL(priv) || priv->is_tp)
goto errout;
pev = &priv->pev;
@ -573,7 +573,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
bool need_prologue = false;
int err, i;
if (IS_ERR(priv) || !priv) {
if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@ -645,8 +645,11 @@ int bpf__probe(struct bpf_object *obj)
goto out;
priv = bpf_program__priv(prog);
if (IS_ERR(priv) || !priv) {
err = PTR_ERR(priv);
if (IS_ERR_OR_NULL(priv)) {
if (!priv)
err = -BPF_LOADER_ERRNO__INTERNAL;
else
err = PTR_ERR(priv);
goto out;
}
@ -696,7 +699,7 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
if (IS_ERR(priv) || !priv || priv->is_tp)
if (IS_ERR_OR_NULL(priv) || priv->is_tp)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@ -754,7 +757,7 @@ int bpf__foreach_event(struct bpf_object *obj,
struct perf_probe_event *pev;
int i, fd;
if (IS_ERR(priv) || !priv) {
if (IS_ERR_OR_NULL(priv)) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}

View File

@ -265,7 +265,7 @@ static int bpf_program_profiler__read(struct evsel *evsel)
return 0;
}
static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu_map_idx,
int fd)
{
struct bpf_prog_profiler_bpf *skel;
@ -277,7 +277,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu,
assert(skel != NULL);
ret = bpf_map_update_elem(bpf_map__fd(skel->maps.events),
&cpu, &fd, BPF_ANY);
&cpu_map_idx, &fd, BPF_ANY);
if (ret)
return ret;
}
@ -554,7 +554,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
filter_type == BPERF_FILTER_TGID)
key = evsel->core.threads->map[i].pid;
else if (filter_type == BPERF_FILTER_CPU)
key = evsel->core.cpus->map[i];
key = evsel->core.cpus->map[i].cpu;
else
break;
@ -580,12 +580,12 @@ out:
return err;
}
static int bperf__install_pe(struct evsel *evsel, int cpu, int fd)
static int bperf__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
struct bperf_leader_bpf *skel = evsel->leader_skel;
return bpf_map_update_elem(bpf_map__fd(skel->maps.events),
&cpu, &fd, BPF_ANY);
&cpu_map_idx, &fd, BPF_ANY);
}
/*
@ -598,7 +598,7 @@ static int bperf_sync_counters(struct evsel *evsel)
num_cpu = all_cpu_map->nr;
for (i = 0; i < num_cpu; i++) {
cpu = all_cpu_map->map[i];
cpu = all_cpu_map->map[i].cpu;
bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu);
}
return 0;
@ -619,15 +619,17 @@ static int bperf__disable(struct evsel *evsel)
static int bperf__read(struct evsel *evsel)
{
struct bperf_follower_bpf *skel = evsel->follower_skel;
__u32 num_cpu_bpf = cpu__max_cpu();
__u32 num_cpu_bpf = cpu__max_cpu().cpu;
struct bpf_perf_event_value values[num_cpu_bpf];
int reading_map_fd, err = 0;
__u32 i, j, num_cpu;
__u32 i;
int j;
bperf_sync_counters(evsel);
reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) {
struct perf_cpu entry;
__u32 cpu;
err = bpf_map_lookup_elem(reading_map_fd, &i, values);
@ -637,16 +639,15 @@ static int bperf__read(struct evsel *evsel)
case BPERF_FILTER_GLOBAL:
assert(i == 0);
num_cpu = all_cpu_map->nr;
for (j = 0; j < num_cpu; j++) {
cpu = all_cpu_map->map[j];
perf_cpu_map__for_each_cpu(entry, j, all_cpu_map) {
cpu = entry.cpu;
perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter;
perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled;
perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running;
}
break;
case BPERF_FILTER_CPU:
cpu = evsel->core.cpus->map[i];
cpu = evsel->core.cpus->map[i].cpu;
perf_counts(evsel->counts, i, 0)->val = values[cpu].counter;
perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled;
perf_counts(evsel->counts, i, 0)->run = values[cpu].running;
@ -771,11 +772,11 @@ static inline bool bpf_counter_skip(struct evsel *evsel)
evsel->follower_skel == NULL;
}
int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd)
int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd)
{
if (bpf_counter_skip(evsel))
return 0;
return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd);
return evsel->bpf_counter_ops->install_pe(evsel, cpu_map_idx, fd);
}
int bpf_counter__load(struct evsel *evsel, struct target *target)

Some files were not shown because too many files have changed in this diff Show More