The latest perf updates in this cycle are:

- Optimize perf_sample_data layout
  - Prepare sample data handling for BPF integration
  - Update the x86 PMU driver for Intel Meteor Lake
  - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids)
    discovery breakage
  - Fix the x86 Zhaoxin PMU driver
  - Cleanups
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmPzaHgRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1jYQg/+KRfobCevMQlZVnz09T3SsJ4ahJ587BL6
 g2C6kobyUNfeChpFVroBkTR+yCb6Mq4xGr2nda9+2E978BYu9eanpx/u/bXNQ6NU
 6YhLwgRrlFXonYn07kFfUJeELZ0W+zpPvymEN1KhTQWcrgXDfXRt2VfMwNsVxGRF
 ZRyCWK+UOzSMU22FtW3I/xVLBB0vio9Y6wRC5QOpDVW5YtGwQGust7GJ53JPK43J
 m2soJvWORauT+v0aqc7ggOtKd6pahVoXrDrbktxtq9N0ZGI+PubVCGevex++cXm/
 B3QSf6VcMMuU6pfzxiEwRa8Whrc3XFeSDEfvMjC5v3becGNkdNBnGOJzYprwgRZJ
 irb6/dSrv5P2lj6WphsO1Wzcm7EoWh8M7DVOMh/13Y/oODRdOrv48112Don9UURC
 EPyvzAzizqdwdDopUmfiqUwuAXqb8uPZqCgmlz/NJkVz1/ijlfrmLgeDuf0vI7Aq
 HznzzRwjFHzyCH7D+rtonFh3JDaqgaouY76tpC5yTtzKbZPlFT8kzeCvqkTMnGgH
 czZnSNc/kBup0HDkNSlthK+TyrMXWKeVa8KQSY1E0NJHO4IBBCMzZywSoAaeofQK
 hqfQyofX9XHmuHhCA4yIfv1XkZGlBTxpPAyDdHjgs9iJTsodSYMs8ESY08eW8DXn
 Ld/35O6SylM=
 =ztUT
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:

 - Optimize perf_sample_data layout

 - Prepare sample data handling for BPF integration

 - Update the x86 PMU driver for Intel Meteor Lake

 - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids)
   discovery breakage

 - Fix the x86 Zhaoxin PMU driver

 - Cleanups

* tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits)
  perf/x86/intel/uncore: Add Meteor Lake support
  x86/perf/zhaoxin: Add stepping check for ZXC
  perf/x86/intel/ds: Fix the conversion from TSC to perf time
  perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
  perf/x86/uncore: Add a quirk for UPI on SPR
  perf/x86/uncore: Ignore broken units in discovery table
  perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
  perf/x86/uncore: Factor out uncore_device_to_die()
  perf/core: Call perf_prepare_sample() before running BPF
  perf/core: Introduce perf_prepare_header()
  perf/core: Do not pass header for sample ID init
  perf/core: Set data->sample_flags in perf_prepare_sample()
  perf/core: Add perf_sample_save_brstack() helper
  perf/core: Add perf_sample_save_raw_data() helper
  perf/core: Add perf_sample_save_callchain() helper
  perf/core: Save the dynamic parts of sample data size
  x86/kprobes: Use switch-case for 0xFF opcodes in prepare_emulation
  perf/core: Change the layout of perf_sample_data
  perf/x86/msr: Add Meteor Lake support
  perf/x86/cstate: Add Meteor Lake support
  ...
This commit is contained in:
Linus Torvalds 2023-02-20 17:29:55 -08:00
commit a2f0e7eee1
25 changed files with 963 additions and 327 deletions

View File

@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
}
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&

View File

@ -662,9 +662,7 @@ static int cfdiag_push_sample(struct perf_event *event,
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = cpuhw->usedss;
raw.frag.data = cpuhw->stop;
raw.size = raw.frag.size;
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);

View File

@ -672,7 +672,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
/* Protect callchain buffers, tasks */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
perf_prepare_sample(data, event, regs);
perf_prepare_header(&header, data, event, regs);
if (perf_output_begin(&handle, data, event, header.size))
goto out;

View File

@ -362,9 +362,7 @@ static int paicrypt_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);

View File

@ -451,9 +451,7 @@ static int paiext_push_sample(void)
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
raw.frag.size = rawsize;
raw.frag.data = cpump->save;
raw.size = raw.frag.size;
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}
overflow = perf_event_overflow(event, &data, &regs);

View File

@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!x86_perf_event_set_period(event))
continue;
if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);

View File

@ -1110,8 +1110,7 @@ fail:
.data = ibs_data.data,
},
};
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
}
if (perf_ibs == &perf_ibs_op)
@ -1122,10 +1121,8 @@ fail:
* recorded as part of interrupt regs. Thus we need to use rip from
* interrupt regs while unwinding call stack.
*/
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
data.callchain = perf_callchain(event, iregs);
data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(&data, event, iregs);
throttle = perf_event_overflow(event, &data, &regs);
out:

View File

@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0),
INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1),
EVENT_EXTRA_END
};
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@ -3026,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
perf_sample_data_init(&data, 0, event->hw.last_period);
if (has_branch_stack(event)) {
data.br_stack = &cpuc->lbr_stack;
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (has_branch_stack(event))
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
@ -4182,6 +4190,12 @@ static int hsw_hw_config(struct perf_event *event)
static struct event_constraint counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
static struct event_constraint counter1_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x2);
static struct event_constraint counter0_1_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x3);
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
@ -4191,6 +4205,12 @@ static struct event_constraint fixed0_constraint =
static struct event_constraint fixed0_counter0_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
static struct event_constraint fixed0_counter0_1_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL);
static struct event_constraint counters_1_7_constraint =
INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL);
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@ -4322,6 +4342,78 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return &emptyconstraint;
}
static struct event_constraint *
cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
c = intel_get_event_constraints(cpuc, idx, event);
/*
* The :ppp indicates the Precise Distribution (PDist) facility, which
* is only supported on the GP counter 0 & 1 and Fixed counter 0.
* If a :ppp event which is not available on the above eligible counters,
* error out.
*/
if (event->attr.precise_ip == 3) {
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
if (constraint_match(&fixed0_constraint, event->hw.config))
return &fixed0_counter0_1_constraint;
switch (c->idxmsk64 & 0x3ull) {
case 0x1:
return &counter0_constraint;
case 0x2:
return &counter1_constraint;
case 0x3:
return &counter0_1_constraint;
}
return &emptyconstraint;
}
return c;
}
static struct event_constraint *
rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct event_constraint *c;
c = spr_get_event_constraints(cpuc, idx, event);
/* The Retire Latency is not supported by the fixed counter 0. */
if (event->attr.precise_ip &&
(event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
constraint_match(&fixed0_constraint, event->hw.config)) {
/*
* The Instruction PDIR is only available
* on the fixed counter 0. Error out for this case.
*/
if (event->attr.precise_ip == 3)
return &emptyconstraint;
return &counters_1_7_constraint;
}
return c;
}
static struct event_constraint *
mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
if (pmu->cpu_type == hybrid_big)
return rwc_get_event_constraints(cpuc, idx, event);
if (pmu->cpu_type == hybrid_small)
return cmt_get_event_constraints(cpuc, idx, event);
WARN_ON(1);
return &emptyconstraint;
}
static int adl_hw_config(struct perf_event *event)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
@ -4494,6 +4586,25 @@ static void flip_smm_bit(void *data)
}
}
static void intel_pmu_check_num_counters(int *num_counters,
int *num_counters_fixed,
u64 *intel_ctrl, u64 fixed_mask);
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
{
unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
unsigned int eax, ebx, ecx, edx;
if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
&eax, &ebx, &ecx, &edx);
pmu->num_counters = fls(eax);
pmu->num_counters_fixed = fls(ebx);
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
&pmu->intel_ctrl, ebx);
}
}
static bool init_hybrid_pmu(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
@ -4519,6 +4630,9 @@ static bool init_hybrid_pmu(int cpu)
if (!cpumask_empty(&pmu->supported_cpus))
goto end;
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
update_pmu_cap(pmu);
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
return false;
@ -5463,6 +5577,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = {
NULL,
};
static struct attribute *mtl_hybrid_mem_attrs[] = {
EVENT_PTR(mem_ld_adl),
EVENT_PTR(mem_st_adl),
NULL
};
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
@ -5490,20 +5610,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
#define ADL_HYBRID_RTM_FORMAT_ATTR \
FORMAT_HYBRID_PTR(in_tx), \
FORMAT_HYBRID_PTR(in_tx_cp)
#define ADL_HYBRID_FORMAT_ATTR \
FORMAT_HYBRID_PTR(offcore_rsp), \
FORMAT_HYBRID_PTR(ldlat), \
FORMAT_HYBRID_PTR(frontend)
static struct attribute *adl_hybrid_extra_attr_rtm[] = {
FORMAT_HYBRID_PTR(in_tx),
FORMAT_HYBRID_PTR(in_tx_cp),
FORMAT_HYBRID_PTR(offcore_rsp),
FORMAT_HYBRID_PTR(ldlat),
FORMAT_HYBRID_PTR(frontend),
NULL,
ADL_HYBRID_RTM_FORMAT_ATTR,
ADL_HYBRID_FORMAT_ATTR,
NULL
};
static struct attribute *adl_hybrid_extra_attr[] = {
FORMAT_HYBRID_PTR(offcore_rsp),
FORMAT_HYBRID_PTR(ldlat),
FORMAT_HYBRID_PTR(frontend),
NULL,
ADL_HYBRID_FORMAT_ATTR,
NULL
};
PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
ADL_HYBRID_RTM_FORMAT_ATTR,
ADL_HYBRID_FORMAT_ATTR,
FORMAT_HYBRID_PTR(snoop_rsp),
NULL
};
static struct attribute *mtl_hybrid_extra_attr[] = {
ADL_HYBRID_FORMAT_ATTR,
FORMAT_HYBRID_PTR(snoop_rsp),
NULL
};
static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
@ -5725,6 +5865,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
}
}
static __always_inline bool is_mtl(u8 x86_model)
{
return (x86_model == INTEL_FAM6_METEORLAKE) ||
(x86_model == INTEL_FAM6_METEORLAKE_L);
}
__init int intel_pmu_init(void)
{
struct attribute **extra_skl_attr = &empty_attrs;
@ -6382,6 +6528,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_RAPTORLAKE:
case INTEL_FAM6_RAPTORLAKE_P:
case INTEL_FAM6_RAPTORLAKE_S:
case INTEL_FAM6_METEORLAKE:
case INTEL_FAM6_METEORLAKE_L:
/*
* Alder Lake has 2 types of CPU, core and atom.
*
@ -6401,9 +6549,7 @@ __init int intel_pmu_init(void)
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
x86_pmu.lbr_pt_coexist = true;
intel_pmu_pebs_data_source_adl();
x86_pmu.pebs_latency_data = adl_latency_data_small;
x86_pmu.num_topdown_events = 8;
static_call_update(intel_pmu_update_topdown_event,
@ -6490,8 +6636,22 @@ __init int intel_pmu_init(void)
pmu->event_constraints = intel_slm_event_constraints;
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
pmu->extra_regs = intel_grt_extra_regs;
pr_cont("Alderlake Hybrid events, ");
name = "alderlake_hybrid";
if (is_mtl(boot_cpu_data.x86_model)) {
x86_pmu.pebs_latency_data = mtl_latency_data_small;
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
mem_attr = mtl_hybrid_mem_attrs;
intel_pmu_pebs_data_source_mtl();
x86_pmu.get_event_constraints = mtl_get_event_constraints;
pmu->extra_regs = intel_cmt_extra_regs;
pr_cont("Meteorlake Hybrid events, ");
name = "meteorlake_hybrid";
} else {
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
intel_pmu_pebs_data_source_adl();
pr_cont("Alderlake Hybrid events, ");
name = "alderlake_hybrid";
}
break;
default:
@ -6606,6 +6766,9 @@ __init int intel_pmu_init(void)
if (is_hybrid())
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
if (x86_pmu.intel_cap.pebs_timing_info)
x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
intel_aux_output_init();
return 0;

View File

@ -2,12 +2,14 @@
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/sched/clock.h>
#include <asm/cpu_entry_area.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/insn.h>
#include <asm/io.h>
#include <asm/timer.h>
#include "../perf_event.h"
@ -53,6 +55,13 @@ union intel_x86_pebs_dse {
unsigned int st_lat_locked:1;
unsigned int ld_reserved3:26;
};
struct {
unsigned int mtl_dse:5;
unsigned int mtl_locked:1;
unsigned int mtl_stlb_miss:1;
unsigned int mtl_fwd_blk:1;
unsigned int ld_reserved4:24;
};
};
@ -135,6 +144,29 @@ void __init intel_pmu_pebs_data_source_adl(void)
__intel_pmu_pebs_data_source_grt(data_source);
}
static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
{
data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE);
data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
}
void __init intel_pmu_pebs_data_source_mtl(void)
{
u64 *data_source;
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
__intel_pmu_pebs_data_source_skl(false, data_source);
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
intel_pmu_pebs_data_source_cmt(data_source);
}
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
@ -219,24 +251,19 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
}
/* Retrieve the latency data for e-core of ADL */
u64 adl_latency_data_small(struct perf_event *event, u64 status)
static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
u8 dse, bool tlb, bool lock, bool blk)
{
union intel_x86_pebs_dse dse;
u64 val;
WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
dse.val = status;
dse &= PERF_PEBS_DATA_SOURCE_MASK;
val = hybrid_var(event->pmu, pebs_data_source)[dse];
val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
pebs_set_tlb_lock(&val, tlb, lock);
/*
* For the atom core on ADL,
* bit 4: lock, bit 5: TLB access.
*/
pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
if (dse.ld_data_blk)
if (blk)
val |= P(BLK, DATA);
else
val |= P(BLK, NA);
@ -244,6 +271,29 @@ u64 adl_latency_data_small(struct perf_event *event, u64 status)
return val;
}
u64 adl_latency_data_small(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
dse.val = status;
return __adl_latency_data_small(event, status, dse.ld_dse,
dse.ld_locked, dse.ld_stlb_miss,
dse.ld_data_blk);
}
/* Retrieve the latency data for e-core of MTL */
u64 mtl_latency_data_small(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
dse.val = status;
return __adl_latency_data_small(event, status, dse.mtl_dse,
dse.mtl_stlb_miss, dse.mtl_locked,
dse.mtl_fwd_blk);
}
static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
@ -759,7 +809,8 @@ int intel_pmu_drain_bts_buffer(void)
* the sample.
*/
rcu_read_lock();
perf_prepare_sample(&header, &data, event, &regs);
perf_prepare_sample(&data, event, &regs);
perf_prepare_header(&header, &data, event, &regs);
if (perf_output_begin(&handle, &data, event,
header.size * (top - base - skip)))
@ -1519,6 +1570,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
return val;
}
static void setup_pebs_time(struct perf_event *event,
struct perf_sample_data *data,
u64 tsc)
{
/* Converting to a user-defined clock is not supported yet. */
if (event->attr.use_clockid != 0)
return;
/*
* Doesn't support the conversion when the TSC is unstable.
* The TSC unstable case is a corner case and very unlikely to
* happen. If it happens, the TSC in a PEBS record will be
* dropped and fall back to perf_event_clock().
*/
if (!using_native_sched_clock() || !sched_clock_stable())
return;
data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
data->sample_flags |= PERF_SAMPLE_TIME;
}
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
PERF_SAMPLE_PHYS_ADDR | \
PERF_SAMPLE_DATA_PAGE_SIZE)
@ -1569,10 +1641,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = perf_callchain(event, iregs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
if (sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(data, event, iregs);
/*
* We use the interrupt regs as a base because the PEBS record does not
@ -1668,16 +1738,11 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
*
* We can only do this for the default trace clock.
*/
if (x86_pmu.intel_cap.pebs_format >= 3 &&
event->attr.use_clockid == 0) {
data->time = native_sched_clock_from_tsc(pebs->tsc);
data->sample_flags |= PERF_SAMPLE_TIME;
}
if (x86_pmu.intel_cap.pebs_format >= 3)
setup_pebs_time(event, data, pebs->tsc);
if (has_branch_stack(event)) {
data->br_stack = &cpuc->lbr_stack;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (has_branch_stack(event))
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
}
static void adaptive_pebs_save_regs(struct pt_regs *regs,
@ -1705,6 +1770,7 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
#define PEBS_LATENCY_MASK 0xffff
#define PEBS_CACHE_LATENCY_OFFSET 32
#define PEBS_RETIRE_LATENCY_OFFSET 32
/*
* With adaptive PEBS the layout depends on what fields are configured.
@ -1735,10 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
perf_sample_data_init(data, 0, event->hw.last_period);
data->period = event->hw.last_period;
if (event->attr.use_clockid == 0) {
data->time = native_sched_clock_from_tsc(basic->tsc);
data->sample_flags |= PERF_SAMPLE_TIME;
}
setup_pebs_time(event, data, basic->tsc);
/*
* We must however always use iregs for the unwinder to stay sane; the
@ -1746,16 +1809,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
* previous PMI context or an (I)RET happened between the record and
* PMI.
*/
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
data->callchain = perf_callchain(event, iregs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
if (sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(data, event, iregs);
*regs = *iregs;
/* The ip in basic is EventingIP */
set_linear_ip(regs, basic->ip);
regs->flags = PERF_EFLAGS_EXACT;
if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
/*
* The record for MEMINFO is in front of GP
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
@ -1835,8 +1899,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
data->br_stack = &cpuc->lbr_stack;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
}
}

View File

@ -1606,12 +1606,10 @@ clear_arch_lbr:
*/
void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
{
int lbr_fmt = x86_pmu.intel_cap.lbr_format;
lbr->nr = x86_pmu.lbr_nr;
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
lbr->info = x86_pmu.lbr_info;
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);

View File

@ -65,6 +65,21 @@ int uncore_die_to_segment(int die)
return bus ? pci_domain_nr(bus) : -EINVAL;
}
int uncore_device_to_die(struct pci_dev *dev)
{
int node = pcibus_to_node(dev->bus);
int cpu;
for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->initialized && cpu_to_node(cpu) == node)
return c->logical_die_id;
}
return -1;
}
static void uncore_free_pcibus_map(void)
{
struct pci2phy_map *map, *tmp;
@ -842,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = {
.attrs = uncore_pmu_attrs,
};
static inline int uncore_get_box_id(struct intel_uncore_type *type,
struct intel_uncore_pmu *pmu)
{
return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
}
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
{
struct intel_uncore_type *type = pmu->type;
@ -850,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
sprintf(pmu_name, "uncore_type_%u", type->type_id);
else {
sprintf(pmu_name, "uncore_type_%u_%d",
type->type_id, type->box_ids[pmu->pmu_idx]);
type->type_id, uncore_get_box_id(type, pmu));
}
}
@ -877,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
* Use the box ID from the discovery table if applicable.
*/
sprintf(pmu->name, "uncore_%s_%d", type->name,
type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
uncore_get_box_id(type, pmu));
}
}
@ -1674,7 +1695,10 @@ struct intel_uncore_init_fun {
void (*cpu_init)(void);
int (*pci_init)(void);
void (*mmio_init)(void);
/* Discovery table is required */
bool use_discovery;
/* The units in the discovery table should be ignored. */
int *uncore_units_ignore;
};
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@ -1765,6 +1789,11 @@ static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun mtl_uncore_init __initconst = {
.cpu_init = mtl_uncore_cpu_init,
.mmio_init = adl_uncore_mmio_init,
};
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
.cpu_init = icx_uncore_cpu_init,
.pci_init = icx_uncore_pci_init,
@ -1782,6 +1811,7 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
.pci_init = spr_uncore_pci_init,
.mmio_init = spr_uncore_mmio_init,
.use_discovery = true,
.uncore_units_ignore = spr_uncore_units_ignore,
};
static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
@ -1832,6 +1862,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &mtl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
@ -1853,7 +1885,7 @@ static int __init intel_uncore_init(void)
id = x86_match_cpu(intel_uncore_match);
if (!id) {
if (!uncore_no_discover && intel_uncore_has_discovery_tables())
if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
else
return -ENODEV;
@ -1861,7 +1893,8 @@ static int __init intel_uncore_init(void)
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
if (uncore_no_discover && uncore_init->use_discovery)
return -ENODEV;
if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
if (uncore_init->use_discovery &&
!intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
return -ENODEV;
}

View File

@ -34,6 +34,8 @@
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
#define UNCORE_IGNORE_END -1
struct pci_extra_dev {
struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
};
@ -208,6 +210,7 @@ struct pci2phy_map {
struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_dieid(struct pci_bus *bus);
int uncore_die_to_segment(int die);
int uncore_device_to_die(struct pci_dev *dev);
ssize_t uncore_event_show(struct device *dev,
struct device_attribute *attr, char *buf);
@ -589,6 +592,7 @@ extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev;
extern struct event_constraint uncore_constraint_empty;
extern int spr_uncore_units_ignore[];
/* uncore_snb.c */
int snb_uncore_pci_init(void);
@ -602,6 +606,7 @@ void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
void mtl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
void adl_uncore_mmio_init(void);

View File

@ -33,7 +33,7 @@ static int logical_die_id;
static int get_device_die_id(struct pci_dev *dev)
{
int cpu, node = pcibus_to_node(dev->bus);
int node = pcibus_to_node(dev->bus);
/*
* If the NUMA info is not available, assume that the logical die id is
@ -43,19 +43,7 @@ static int get_device_die_id(struct pci_dev *dev)
if (node < 0)
return logical_die_id++;
for_each_cpu(cpu, cpumask_of_node(node)) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
if (c->initialized && cpu_to_node(cpu) == node)
return c->logical_die_id;
}
/*
* All CPUs of a node may be offlined. For this case,
* the PCI and MMIO type of uncore blocks which are
* enumerated by the device will be unavailable.
*/
return -1;
return uncore_device_to_die(dev);
}
#define __node_2_type(cur) \
@ -140,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
unsigned int *box_offset, *ids;
int i;
if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset))
if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
pr_info("Invalid address is detected for uncore type %d box %d, "
"Disable the uncore unit.\n",
unit->box_type, unit->box_id);
return;
}
if (parsed) {
type = search_uncore_discovery_type(unit->box_type);
if (WARN_ON_ONCE(!type))
if (!type) {
pr_info("A spurious uncore type %d is detected, "
"Disable the uncore type.\n",
unit->box_type);
return;
}
/* Store the first box of each die */
if (!type->box_ctrl_die[die])
type->box_ctrl_die[die] = unit->ctl;
@ -181,8 +177,12 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
ids[i] = type->ids[i];
box_offset[i] = type->box_offset[i];
if (WARN_ON_ONCE(unit->box_id == ids[i]))
if (unit->box_id == ids[i]) {
pr_info("Duplicate uncore type %d box ID %d is detected, "
"Drop the duplicate uncore unit.\n",
unit->box_type, unit->box_id);
goto free_ids;
}
}
ids[i] = unit->box_id;
box_offset[i] = unit->ctl - type->box_ctrl;
@ -202,8 +202,25 @@ free_box_offset:
}
static bool
uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore)
{
int i;
if (!ignore)
return false;
for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) {
if (unit->box_type == ignore[i])
return true;
}
return false;
}
static int parse_discovery_table(struct pci_dev *dev, int die,
u32 bar_offset, bool *parsed)
u32 bar_offset, bool *parsed,
int *ignore)
{
struct uncore_global_discovery global;
struct uncore_unit_discovery unit;
@ -258,6 +275,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
if (unit.access_type >= UNCORE_ACCESS_MAX)
continue;
if (uncore_ignore_unit(&unit, ignore))
continue;
uncore_insert_box_info(&unit, die, *parsed);
}
@ -266,7 +286,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
return 0;
}
bool intel_uncore_has_discovery_tables(void)
bool intel_uncore_has_discovery_tables(int *ignore)
{
u32 device, val, entry_id, bar_offset;
int die, dvsec = 0, ret = true;
@ -302,7 +322,7 @@ bool intel_uncore_has_discovery_tables(void)
if (die < 0)
continue;
parse_discovery_table(dev, die, bar_offset, &parsed);
parse_discovery_table(dev, die, bar_offset, &parsed, ignore);
}
}

View File

@ -21,9 +21,15 @@
/* Global discovery table size */
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20
#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7)
#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff)
#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff)
#define UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET 28
#define UNCORE_DISCOVERY_PCI_DOMAIN(data) \
((data >> UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET) & 0x7)
#define UNCORE_DISCOVERY_PCI_BUS_OFFSET 20
#define UNCORE_DISCOVERY_PCI_BUS(data) \
((data >> UNCORE_DISCOVERY_PCI_BUS_OFFSET) & 0xff)
#define UNCORE_DISCOVERY_PCI_DEVFN_OFFSET 12
#define UNCORE_DISCOVERY_PCI_DEVFN(data) \
((data >> UNCORE_DISCOVERY_PCI_DEVFN_OFFSET) & 0xff)
#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff)
@ -122,7 +128,7 @@ struct intel_uncore_discovery_type {
unsigned int *box_offset; /* Box offset */
};
bool intel_uncore_has_discovery_tables(void);
bool intel_uncore_has_discovery_tables(int *ignore);
void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);

View File

@ -109,6 +109,19 @@
#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728
#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729
#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A
#define PCI_DEVICE_ID_INTEL_MTL_1_IMC 0x7d00
#define PCI_DEVICE_ID_INTEL_MTL_2_IMC 0x7d01
#define PCI_DEVICE_ID_INTEL_MTL_3_IMC 0x7d02
#define PCI_DEVICE_ID_INTEL_MTL_4_IMC 0x7d05
#define PCI_DEVICE_ID_INTEL_MTL_5_IMC 0x7d10
#define PCI_DEVICE_ID_INTEL_MTL_6_IMC 0x7d14
#define PCI_DEVICE_ID_INTEL_MTL_7_IMC 0x7d15
#define PCI_DEVICE_ID_INTEL_MTL_8_IMC 0x7d16
#define PCI_DEVICE_ID_INTEL_MTL_9_IMC 0x7d21
#define PCI_DEVICE_ID_INTEL_MTL_10_IMC 0x7d22
#define PCI_DEVICE_ID_INTEL_MTL_11_IMC 0x7d23
#define PCI_DEVICE_ID_INTEL_MTL_12_IMC 0x7d24
#define PCI_DEVICE_ID_INTEL_MTL_13_IMC 0x7d28
#define IMC_UNCORE_DEV(a) \
@ -205,6 +218,32 @@
#define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0
#define ADL_UNC_ARB_MSR_OFFSET 0x8
/* MTL Cbo register */
#define MTL_UNC_CBO_0_PER_CTR0 0x2448
#define MTL_UNC_CBO_0_PERFEVTSEL0 0x2442
/* MTL HAC_ARB register */
#define MTL_UNC_HAC_ARB_CTR 0x2018
#define MTL_UNC_HAC_ARB_CTRL 0x2012
/* MTL ARB register */
#define MTL_UNC_ARB_CTR 0x2418
#define MTL_UNC_ARB_CTRL 0x2412
/* MTL cNCU register */
#define MTL_UNC_CNCU_FIXED_CTR 0x2408
#define MTL_UNC_CNCU_FIXED_CTRL 0x2402
#define MTL_UNC_CNCU_BOX_CTL 0x240e
/* MTL sNCU register */
#define MTL_UNC_SNCU_FIXED_CTR 0x2008
#define MTL_UNC_SNCU_FIXED_CTRL 0x2002
#define MTL_UNC_SNCU_BOX_CTL 0x200e
/* MTL HAC_CBO register */
#define MTL_UNC_HBO_CTR 0x2048
#define MTL_UNC_HBO_CTRL 0x2042
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
@ -598,6 +637,115 @@ void adl_uncore_cpu_init(void)
uncore_msr_uncores = adl_msr_uncores;
}
static struct intel_uncore_type mtl_uncore_cbox = {
.name = "cbox",
.num_counters = 2,
.perf_ctr_bits = 48,
.perf_ctr = MTL_UNC_CBO_0_PER_CTR0,
.event_ctl = MTL_UNC_CBO_0_PERFEVTSEL0,
.event_mask = ADL_UNC_RAW_EVENT_MASK,
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
.ops = &icl_uncore_msr_ops,
.format_group = &adl_uncore_format_group,
};
static struct intel_uncore_type mtl_uncore_hac_arb = {
.name = "hac_arb",
.num_counters = 2,
.num_boxes = 2,
.perf_ctr_bits = 48,
.perf_ctr = MTL_UNC_HAC_ARB_CTR,
.event_ctl = MTL_UNC_HAC_ARB_CTRL,
.event_mask = ADL_UNC_RAW_EVENT_MASK,
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
.ops = &icl_uncore_msr_ops,
.format_group = &adl_uncore_format_group,
};
static struct intel_uncore_type mtl_uncore_arb = {
.name = "arb",
.num_counters = 2,
.num_boxes = 2,
.perf_ctr_bits = 48,
.perf_ctr = MTL_UNC_ARB_CTR,
.event_ctl = MTL_UNC_ARB_CTRL,
.event_mask = ADL_UNC_RAW_EVENT_MASK,
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
.ops = &icl_uncore_msr_ops,
.format_group = &adl_uncore_format_group,
};
static struct intel_uncore_type mtl_uncore_hac_cbox = {
.name = "hac_cbox",
.num_counters = 2,
.num_boxes = 2,
.perf_ctr_bits = 48,
.perf_ctr = MTL_UNC_HBO_CTR,
.event_ctl = MTL_UNC_HBO_CTRL,
.event_mask = ADL_UNC_RAW_EVENT_MASK,
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
.ops = &icl_uncore_msr_ops,
.format_group = &adl_uncore_format_group,
};
static void mtl_uncore_msr_init_box(struct intel_uncore_box *box)
{
wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN);
}
static struct intel_uncore_ops mtl_uncore_msr_ops = {
.init_box = mtl_uncore_msr_init_box,
.disable_event = snb_uncore_msr_disable_event,
.enable_event = snb_uncore_msr_enable_event,
.read_counter = uncore_msr_read_counter,
};
static struct intel_uncore_type mtl_uncore_cncu = {
.name = "cncu",
.num_counters = 1,
.num_boxes = 1,
.box_ctl = MTL_UNC_CNCU_BOX_CTL,
.fixed_ctr_bits = 48,
.fixed_ctr = MTL_UNC_CNCU_FIXED_CTR,
.fixed_ctl = MTL_UNC_CNCU_FIXED_CTRL,
.single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group,
.ops = &mtl_uncore_msr_ops,
.event_descs = icl_uncore_events,
};
static struct intel_uncore_type mtl_uncore_sncu = {
.name = "sncu",
.num_counters = 1,
.num_boxes = 1,
.box_ctl = MTL_UNC_SNCU_BOX_CTL,
.fixed_ctr_bits = 48,
.fixed_ctr = MTL_UNC_SNCU_FIXED_CTR,
.fixed_ctl = MTL_UNC_SNCU_FIXED_CTRL,
.single_fixed = 1,
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
.format_group = &icl_uncore_clock_format_group,
.ops = &mtl_uncore_msr_ops,
.event_descs = icl_uncore_events,
};
static struct intel_uncore_type *mtl_msr_uncores[] = {
&mtl_uncore_cbox,
&mtl_uncore_hac_arb,
&mtl_uncore_arb,
&mtl_uncore_hac_cbox,
&mtl_uncore_cncu,
&mtl_uncore_sncu,
NULL
};
void mtl_uncore_cpu_init(void)
{
mtl_uncore_cbox.num_boxes = icl_get_cbox_num();
uncore_msr_uncores = mtl_msr_uncores;
}
enum {
SNB_PCI_UNCORE_IMC,
};
@ -1264,6 +1412,19 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
IMC_UNCORE_DEV(RPL_23),
IMC_UNCORE_DEV(RPL_24),
IMC_UNCORE_DEV(RPL_25),
IMC_UNCORE_DEV(MTL_1),
IMC_UNCORE_DEV(MTL_2),
IMC_UNCORE_DEV(MTL_3),
IMC_UNCORE_DEV(MTL_4),
IMC_UNCORE_DEV(MTL_5),
IMC_UNCORE_DEV(MTL_6),
IMC_UNCORE_DEV(MTL_7),
IMC_UNCORE_DEV(MTL_8),
IMC_UNCORE_DEV(MTL_9),
IMC_UNCORE_DEV(MTL_10),
IMC_UNCORE_DEV(MTL_11),
IMC_UNCORE_DEV(MTL_12),
IMC_UNCORE_DEV(MTL_13),
{ /* end: all zeroes */ }
};

View File

@ -1453,9 +1453,6 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
}
raw_spin_unlock(&pci2phy_map_lock);
} else {
int node = pcibus_to_node(ubox_dev->bus);
int cpu;
segment = pci_domain_nr(ubox_dev->bus);
raw_spin_lock(&pci2phy_map_lock);
map = __find_pci2phy_map(segment);
@ -1465,15 +1462,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
break;
}
die_id = -1;
for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev);
if (c->initialized && cpu_to_node(cpu) == node) {
map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
break;
}
}
raw_spin_unlock(&pci2phy_map_lock);
if (WARN_ON_ONCE(die_id == -1)) {
@ -6142,6 +6132,46 @@ static int spr_upi_get_topology(struct intel_uncore_type *type)
return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0);
}
static struct intel_uncore_type spr_uncore_mdf = {
SPR_UNCORE_COMMON_FORMAT(),
.name = "mdf",
};
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
#define UNCORE_SPR_IIO 1
#define UNCORE_SPR_IMC 6
#define UNCORE_SPR_UPI 8
#define UNCORE_SPR_M3UPI 9
/*
* The uncore units, which are supported by the discovery table,
* are defined here.
*/
static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
&spr_uncore_chabox,
&spr_uncore_iio,
&spr_uncore_irp,
&spr_uncore_m2pcie,
&spr_uncore_pcu,
NULL,
&spr_uncore_imc,
&spr_uncore_m2m,
NULL,
NULL,
NULL,
&spr_uncore_mdf,
};
/*
* The uncore units, which are not supported by the discovery table,
* are implemented from here.
*/
#define SPR_UNCORE_UPI_NUM_BOXES 4
static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
0, 0x8000, 0x10000, 0x18000
};
static struct intel_uncore_type spr_uncore_upi = {
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
@ -6152,38 +6182,30 @@ static struct intel_uncore_type spr_uncore_upi = {
.get_topology = spr_upi_get_topology,
.set_mapping = spr_upi_set_mapping,
.cleanup_mapping = spr_upi_cleanup_mapping,
.type_id = UNCORE_SPR_UPI,
.num_counters = 4,
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
.perf_ctr_bits = 48,
.perf_ctr = ICX_UPI_PCI_PMON_CTR0,
.event_ctl = ICX_UPI_PCI_PMON_CTL0,
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
.pci_offsets = spr_upi_pci_offsets,
};
static struct intel_uncore_type spr_uncore_m3upi = {
SPR_UNCORE_PCI_COMMON_FORMAT(),
.name = "m3upi",
.type_id = UNCORE_SPR_M3UPI,
.num_counters = 4,
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
.perf_ctr_bits = 48,
.perf_ctr = ICX_M3UPI_PCI_PMON_CTR0,
.event_ctl = ICX_M3UPI_PCI_PMON_CTL0,
.box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
.pci_offsets = spr_upi_pci_offsets,
.constraints = icx_uncore_m3upi_constraints,
};
static struct intel_uncore_type spr_uncore_mdf = {
SPR_UNCORE_COMMON_FORMAT(),
.name = "mdf",
};
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
#define UNCORE_SPR_IIO 1
#define UNCORE_SPR_IMC 6
static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
&spr_uncore_chabox,
&spr_uncore_iio,
&spr_uncore_irp,
&spr_uncore_m2pcie,
&spr_uncore_pcu,
NULL,
&spr_uncore_imc,
&spr_uncore_m2m,
&spr_uncore_upi,
&spr_uncore_m3upi,
NULL,
&spr_uncore_mdf,
};
enum perf_uncore_spr_iio_freerunning_type_id {
SPR_IIO_MSR_IOCLK,
SPR_IIO_MSR_BW_IN,
@ -6314,6 +6336,7 @@ static struct intel_uncore_type spr_uncore_imc_free_running = {
#define UNCORE_SPR_MSR_EXTRA_UNCORES 1
#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1
#define UNCORE_SPR_PCI_EXTRA_UNCORES 2
static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = {
&spr_uncore_iio_free_running,
@ -6323,6 +6346,17 @@ static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES]
&spr_uncore_imc_free_running,
};
static struct intel_uncore_type *spr_pci_uncores[UNCORE_SPR_PCI_EXTRA_UNCORES] = {
&spr_uncore_upi,
&spr_uncore_m3upi
};
int spr_uncore_units_ignore[] = {
UNCORE_SPR_UPI,
UNCORE_SPR_M3UPI,
UNCORE_IGNORE_END
};
static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
struct intel_uncore_type *from_type)
{
@ -6423,9 +6457,69 @@ void spr_uncore_cpu_init(void)
spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
}
#define SPR_UNCORE_UPI_PCIID 0x3241
#define SPR_UNCORE_UPI0_DEVFN 0x9
#define SPR_UNCORE_M3UPI_PCIID 0x3246
#define SPR_UNCORE_M3UPI0_DEVFN 0x29
static void spr_update_device_location(int type_id)
{
struct intel_uncore_type *type;
struct pci_dev *dev = NULL;
u32 device, devfn;
u64 *ctls;
int die;
if (type_id == UNCORE_SPR_UPI) {
type = &spr_uncore_upi;
device = SPR_UNCORE_UPI_PCIID;
devfn = SPR_UNCORE_UPI0_DEVFN;
} else if (type_id == UNCORE_SPR_M3UPI) {
type = &spr_uncore_m3upi;
device = SPR_UNCORE_M3UPI_PCIID;
devfn = SPR_UNCORE_M3UPI0_DEVFN;
} else
return;
ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
if (!ctls) {
type->num_boxes = 0;
return;
}
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
if (devfn != dev->devfn)
continue;
die = uncore_device_to_die(dev);
if (die < 0)
continue;
ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
type->box_ctl;
}
type->box_ctls = ctls;
}
int spr_uncore_pci_init(void)
{
uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL);
/*
* The discovery table of UPI on some SPR variant is broken,
* which impacts the detection of both UPI and M3UPI uncore PMON.
* Use the pre-defined UPI and M3UPI table to replace.
*
* The accurate location, e.g., domain and BUS number,
* can only be retrieved at load time.
* Update the location of UPI and M3UPI.
*/
spr_update_device_location(UNCORE_SPR_UPI);
spr_update_device_location(UNCORE_SPR_M3UPI);
uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI,
UNCORE_SPR_PCI_EXTRA_UNCORES,
spr_pci_uncores);
return 0;
}

View File

@ -35,15 +35,17 @@
* per-core reg tables.
*/
enum extra_reg_type {
EXTRA_REG_NONE = -1, /* not used */
EXTRA_REG_NONE = -1, /* not used */
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_FE = 4, /* fe_* */
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_FE = 4, /* fe_* */
EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */
EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */
EXTRA_REG_MAX /* number of entries needed */
EXTRA_REG_MAX /* number of entries needed */
};
struct event_constraint {
@ -606,6 +608,7 @@ union perf_capabilities {
u64 pebs_baseline:1;
u64 perf_metrics:1;
u64 pebs_output_pt_available:1;
u64 pebs_timing_info:1;
u64 anythread_deprecated:1;
};
u64 capabilities;
@ -647,6 +650,7 @@ enum {
};
#define PERF_PEBS_DATA_SOURCE_MAX 0x10
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
struct x86_hybrid_pmu {
struct pmu pmu;
@ -1000,6 +1004,7 @@ do { \
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
#define EVENT_VAR(_id) event_attr_##_id
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
@ -1486,6 +1491,8 @@ int intel_pmu_drain_bts_buffer(void);
u64 adl_latency_data_small(struct perf_event *event, u64 status);
u64 mtl_latency_data_small(struct perf_event *event, u64 status);
extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
@ -1597,6 +1604,8 @@ void intel_pmu_pebs_data_source_adl(void);
void intel_pmu_pebs_data_source_grt(void);
void intel_pmu_pebs_data_source_mtl(void);
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);

View File

@ -541,7 +541,13 @@ __init int zhaoxin_pmu_init(void)
switch (boot_cpu_data.x86) {
case 0x06:
if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
/*
* Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS.
* Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D]
* ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3
*/
if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) ||
boot_cpu_data.x86_model == 0x19) {
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;

View File

@ -312,6 +312,7 @@
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */

View File

@ -189,6 +189,9 @@
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
#define MSR_SNOOP_RSP_0 0x00001328
#define MSR_SNOOP_RSP_1 0x00001329
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9

View File

@ -159,6 +159,14 @@ union cpuid10_edx {
unsigned int full;
};
/*
* Intel "Architectural Performance Monitoring extension" CPUID
* detection/enumeration details:
*/
#define ARCH_PERFMON_EXT_LEAF 0x00000023
#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
/*
* Intel Architectural LBR CPUID detection/enumeration details:
*/

View File

@ -659,17 +659,19 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
* is determined by the MOD/RM byte.
*/
opcode = insn->modrm.bytes[0];
if ((opcode & 0x30) == 0x10) {
if ((opcode & 0x8) == 0x8)
return -EOPNOTSUPP; /* far call */
/* call absolute, indirect */
switch (X86_MODRM_REG(opcode)) {
case 0b010: /* FF /2, call near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_call_indirect;
} else if ((opcode & 0x30) == 0x20) {
if ((opcode & 0x8) == 0x8)
return -EOPNOTSUPP; /* far jmp */
/* jmp near absolute indirect */
break;
case 0b100: /* FF /4, jmp near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
} else
break;
case 0b011: /* FF /3, call far, absolute indirect */
case 0b101: /* FF /5, jmp far, absolute indirect */
return -EOPNOTSUPP;
}
if (!p->ainsn.emulate_op)
break;
if (insn->addr_bytes != sizeof(unsigned long))
@ -990,20 +992,6 @@ int kprobe_int3_handler(struct pt_regs *regs)
kprobe_post_process(p, regs, kcb);
return 1;
}
}
if (*addr != INT3_INSN_OPCODE) {
/*
* The breakpoint instruction was removed right
* after we hit it. Another cpu has removed
* either a probepoint or a debugger breakpoint
* at this address. In either case, no further
* handling of this interrupt is appropriate.
* Back up over the (now missing) int3 and run
* the original instruction.
*/
regs->ip = (unsigned long)addr;
return 1;
} /* else: not a kprobe fault; let the kernel handle it */
return 0;

View File

@ -95,6 +95,11 @@ struct perf_raw_record {
u32 size;
};
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
{
return frag->pad < sizeof(u64);
}
/*
* branch stack layout:
* nr: number of taken branches stored in entries[]
@ -1095,50 +1100,82 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running);
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
static inline bool branch_sample_no_flags(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
}
static inline bool branch_sample_no_cycles(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
}
static inline bool branch_sample_type(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
}
static inline bool branch_sample_hw_index(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
}
static inline bool branch_sample_priv(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}
struct perf_sample_data {
/*
* Fields set by perf_sample_data_init(), group so as to
* minimize the cachelines touched.
* Fields set by perf_sample_data_init() unconditionally,
* group so as to minimize the cachelines touched.
*/
u64 sample_flags;
u64 period;
u64 dyn_size;
/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
* Fields commonly set by __perf_event_header__init_id(),
* group so as to minimize the cachelines touched.
*/
struct perf_branch_stack *br_stack;
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
u64 addr;
struct perf_raw_record *raw;
u64 type;
u64 ip;
struct {
u32 pid;
u32 tid;
} tid_entry;
u64 time;
u64 id;
u64 stream_id;
struct {
u32 cpu;
u32 reserved;
} cpu_entry;
/*
* The other fields, optionally {set,used} by
* perf_{prepare,output}_sample().
*/
u64 ip;
struct perf_callchain_entry *callchain;
u64 aux_size;
struct perf_raw_record *raw;
struct perf_branch_stack *br_stack;
union perf_sample_weight weight;
union perf_mem_data_src data_src;
u64 txn;
struct perf_regs regs_user;
struct perf_regs regs_intr;
u64 stack_user_size;
u64 phys_addr;
u64 stream_id;
u64 cgroup;
u64 addr;
u64 phys_addr;
u64 data_page_size;
u64 code_page_size;
u64 aux_size;
} ____cacheline_aligned;
/* default value for data source */
@ -1154,6 +1191,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
/* remaining struct members initialized in perf_prepare_sample() */
data->sample_flags = PERF_SAMPLE_PERIOD;
data->period = period;
data->dyn_size = 0;
if (addr) {
data->addr = addr;
@ -1161,6 +1199,68 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
}
}
static inline void perf_sample_save_callchain(struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
int size = 1;
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr;
data->dyn_size += size * sizeof(u64);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
struct perf_raw_record *raw)
{
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
int size;
do {
sum += frag->size;
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
size = round_up(sum + sizeof(u32), sizeof(u64));
raw->size = size - sizeof(u32);
frag->pad = raw->size - sum;
data->raw = raw;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_RAW;
}
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
struct perf_event *event,
struct perf_branch_stack *brs)
{
int size = sizeof(u64); /* nr */
if (branch_sample_hw_index(event))
size += sizeof(u64);
size += brs->nr * sizeof(struct perf_branch_entry);
data->br_stack = brs;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
static inline u32 perf_sample_data_size(struct perf_sample_data *data,
struct perf_event *event)
{
u32 size = sizeof(struct perf_event_header);
size += event->header_size + event->id_header_size;
size += data->dyn_size;
return size;
}
/*
* Clear all bitfields in the perf_branch_entry.
* The to and from fields are not cleared because they are
@ -1182,7 +1282,10 @@ extern void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event);
extern void perf_prepare_sample(struct perf_event_header *header,
extern void perf_prepare_sample(struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs);
extern void perf_prepare_header(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs);
@ -1402,7 +1505,6 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
extern struct perf_callchain_entry *
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
u32 max_stack, bool crosstask, bool add_mark);
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
extern int get_callchain_buffers(int max_stack);
extern void put_callchain_buffers(void);
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
@ -1670,11 +1772,6 @@ extern void perf_restore_debug_store(void);
static inline void perf_restore_debug_store(void) { }
#endif
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
{
return frag->pad < sizeof(u64);
}
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
struct perf_pmu_events_attr {
@ -1724,7 +1821,7 @@ static struct perf_pmu_events_attr _var = { \
.id = _id, } \
})[0].attr.attr)
#define PMU_FORMAT_ATTR(_name, _format) \
#define PMU_FORMAT_ATTR_SHOW(_name, _format) \
static ssize_t \
_name##_show(struct device *dev, \
struct device_attribute *attr, \
@ -1733,6 +1830,9 @@ _name##_show(struct device *dev, \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
#define PMU_FORMAT_ATTR(_name, _format) \
PMU_FORMAT_ATTR_SHOW(_name, _format) \
\
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
@ -1781,30 +1881,4 @@ static inline void perf_lopwr_cb(bool mode)
}
#endif
#ifdef CONFIG_PERF_EVENTS
static inline bool branch_sample_no_flags(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
}
static inline bool branch_sample_no_cycles(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
}
static inline bool branch_sample_type(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
}
static inline bool branch_sample_hw_index(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
}
static inline bool branch_sample_priv(const struct perf_event *event)
{
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
}
#endif /* CONFIG_PERF_EVENTS */
#endif /* _LINUX_PERF_EVENT_H */

View File

@ -7041,13 +7041,20 @@ out_put:
ring_buffer_put(rb);
}
static void __perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
/*
* A set of common sample data types saved even for non-sample records
* when event->attr.sample_id_all is set.
*/
#define PERF_SAMPLE_ID_ALL (PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \
PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
static void __perf_event_header__init_id(struct perf_sample_data *data,
struct perf_event *event,
u64 sample_type)
{
data->type = event->attr.sample_type;
header->size += event->id_header_size;
data->sample_flags |= data->type & PERF_SAMPLE_ID_ALL;
if (sample_type & PERF_SAMPLE_TID) {
/* namespace issues */
@ -7074,8 +7081,10 @@ void perf_event_header__init_id(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event)
{
if (event->attr.sample_id_all)
__perf_event_header__init_id(header, data, event, event->attr.sample_type);
if (event->attr.sample_id_all) {
header->size += event->id_header_size;
__perf_event_header__init_id(data, event, event->attr.sample_type);
}
}
static void __perf_event__output_id_sample(struct perf_output_handle *handle,
@ -7305,7 +7314,7 @@ void perf_output_sample(struct perf_output_handle *handle,
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
if (data->br_stack) {
size_t size;
size = data->br_stack->nr
@ -7549,83 +7558,68 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
return callchain ?: &__empty_callchain;
}
void perf_prepare_sample(struct perf_event_header *header,
struct perf_sample_data *data,
static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
{
return d * !!(flags & s);
}
void perf_prepare_sample(struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
u64 sample_type = event->attr.sample_type;
u64 filtered_sample_type;
header->type = PERF_RECORD_SAMPLE;
header->size = sizeof(*header) + event->header_size;
header->misc = 0;
header->misc |= perf_misc_flags(regs);
/*
* Clear the sample flags that have already been done by the
* PMU driver.
* Add the sample flags that are dependent to others. And clear the
* sample flags that have already been done by the PMU driver.
*/
filtered_sample_type = sample_type & ~data->sample_flags;
__perf_event_header__init_id(header, data, event, filtered_sample_type);
filtered_sample_type = sample_type;
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_CODE_PAGE_SIZE,
PERF_SAMPLE_IP);
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_DATA_PAGE_SIZE |
PERF_SAMPLE_PHYS_ADDR, PERF_SAMPLE_ADDR);
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_STACK_USER,
PERF_SAMPLE_REGS_USER);
filtered_sample_type &= ~data->sample_flags;
if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
if (filtered_sample_type == 0) {
/* Make sure it has the correct data->type for output */
data->type = event->attr.sample_type;
return;
}
__perf_event_header__init_id(data, event, filtered_sample_type);
if (filtered_sample_type & PERF_SAMPLE_IP) {
data->ip = perf_instruction_pointer(regs);
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
int size = 1;
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
data->callchain = perf_callchain(event, regs);
size += data->callchain->nr;
header->size += size * sizeof(u64);
data->sample_flags |= PERF_SAMPLE_IP;
}
if (sample_type & PERF_SAMPLE_RAW) {
struct perf_raw_record *raw = data->raw;
int size;
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
perf_sample_save_callchain(data, event, regs);
if (raw && (data->sample_flags & PERF_SAMPLE_RAW)) {
struct perf_raw_frag *frag = &raw->frag;
u32 sum = 0;
do {
sum += frag->size;
if (perf_raw_frag_last(frag))
break;
frag = frag->next;
} while (1);
size = round_up(sum + sizeof(u32), sizeof(u64));
raw->size = size - sizeof(u32);
frag->pad = raw->size - sum;
} else {
size = sizeof(u64);
data->raw = NULL;
}
header->size += size;
if (filtered_sample_type & PERF_SAMPLE_RAW) {
data->raw = NULL;
data->dyn_size += sizeof(u64);
data->sample_flags |= PERF_SAMPLE_RAW;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
int size = sizeof(u64); /* nr */
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
if (branch_sample_hw_index(event))
size += sizeof(u64);
size += data->br_stack->nr
* sizeof(struct perf_branch_entry);
}
header->size += size;
if (filtered_sample_type & PERF_SAMPLE_BRANCH_STACK) {
data->br_stack = NULL;
data->dyn_size += sizeof(u64);
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
if (filtered_sample_type & PERF_SAMPLE_REGS_USER)
perf_sample_regs_user(&data->regs_user, regs);
if (sample_type & PERF_SAMPLE_REGS_USER) {
/*
* It cannot use the filtered_sample_type here as REGS_USER can be set
* by STACK_USER (using __cond_set() above) and we don't want to update
* the dyn_size if it's not requested by users.
*/
if ((sample_type & ~data->sample_flags) & PERF_SAMPLE_REGS_USER) {
/* regs dump ABI info */
int size = sizeof(u64);
@ -7634,10 +7628,11 @@ void perf_prepare_sample(struct perf_event_header *header,
size += hweight64(mask) * sizeof(u64);
}
header->size += size;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_REGS_USER;
}
if (sample_type & PERF_SAMPLE_STACK_USER) {
if (filtered_sample_type & PERF_SAMPLE_STACK_USER) {
/*
* Either we need PERF_SAMPLE_STACK_USER bit to be always
* processed as the last one or have additional check added
@ -7645,9 +7640,10 @@ void perf_prepare_sample(struct perf_event_header *header,
* up the rest of the sample size.
*/
u16 stack_size = event->attr.sample_stack_user;
u16 header_size = perf_sample_data_size(data, event);
u16 size = sizeof(u64);
stack_size = perf_sample_ustack_size(stack_size, header->size,
stack_size = perf_sample_ustack_size(stack_size, header_size,
data->regs_user.regs);
/*
@ -7659,24 +7655,31 @@ void perf_prepare_sample(struct perf_event_header *header,
size += sizeof(u64) + stack_size;
data->stack_user_size = stack_size;
header->size += size;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_STACK_USER;
}
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
data->weight.full = 0;
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC)
data->data_src.val = PERF_MEM_NA;
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
data->txn = 0;
if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
if (filtered_sample_type & PERF_SAMPLE_ADDR)
data->addr = 0;
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
}
if (sample_type & PERF_SAMPLE_REGS_INTR) {
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC) {
data->data_src.val = PERF_MEM_NA;
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
}
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION) {
data->txn = 0;
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
}
if (filtered_sample_type & PERF_SAMPLE_ADDR) {
data->addr = 0;
data->sample_flags |= PERF_SAMPLE_ADDR;
}
if (filtered_sample_type & PERF_SAMPLE_REGS_INTR) {
/* regs dump ABI info */
int size = sizeof(u64);
@ -7688,20 +7691,23 @@ void perf_prepare_sample(struct perf_event_header *header,
size += hweight64(mask) * sizeof(u64);
}
header->size += size;
data->dyn_size += size;
data->sample_flags |= PERF_SAMPLE_REGS_INTR;
}
if (sample_type & PERF_SAMPLE_PHYS_ADDR &&
filtered_sample_type & PERF_SAMPLE_PHYS_ADDR)
if (filtered_sample_type & PERF_SAMPLE_PHYS_ADDR) {
data->phys_addr = perf_virt_to_phys(data->addr);
data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
}
#ifdef CONFIG_CGROUP_PERF
if (sample_type & PERF_SAMPLE_CGROUP) {
if (filtered_sample_type & PERF_SAMPLE_CGROUP) {
struct cgroup *cgrp;
/* protected by RCU */
cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
data->cgroup = cgroup_id(cgrp);
data->sample_flags |= PERF_SAMPLE_CGROUP;
}
#endif
@ -7710,16 +7716,21 @@ void perf_prepare_sample(struct perf_event_header *header,
* require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
* but the value will not dump to the userspace.
*/
if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
if (filtered_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) {
data->data_page_size = perf_get_page_size(data->addr);
data->sample_flags |= PERF_SAMPLE_DATA_PAGE_SIZE;
}
if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
if (filtered_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) {
data->code_page_size = perf_get_page_size(data->ip);
data->sample_flags |= PERF_SAMPLE_CODE_PAGE_SIZE;
}
if (sample_type & PERF_SAMPLE_AUX) {
if (filtered_sample_type & PERF_SAMPLE_AUX) {
u64 size;
u16 header_size = perf_sample_data_size(data, event);
header->size += sizeof(u64); /* size */
header_size += sizeof(u64); /* size */
/*
* Given the 16bit nature of header::size, an AUX sample can
@ -7727,14 +7738,26 @@ void perf_prepare_sample(struct perf_event_header *header,
* Make sure this doesn't happen by using up to U16_MAX bytes
* per sample in total (rounded down to 8 byte boundary).
*/
size = min_t(size_t, U16_MAX - header->size,
size = min_t(size_t, U16_MAX - header_size,
event->attr.aux_sample_size);
size = rounddown(size, 8);
size = perf_prepare_sample_aux(event, data, size);
WARN_ON_ONCE(size + header->size > U16_MAX);
header->size += size;
WARN_ON_ONCE(size + header_size > U16_MAX);
data->dyn_size += size + sizeof(u64); /* size above */
data->sample_flags |= PERF_SAMPLE_AUX;
}
}
void perf_prepare_header(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
header->type = PERF_RECORD_SAMPLE;
header->size = perf_sample_data_size(data, event);
header->misc = perf_misc_flags(regs);
/*
* If you're adding more sample types here, you likely need to do
* something about the overflowing header::size, like repurpose the
@ -7762,7 +7785,8 @@ __perf_event_output(struct perf_event *event,
/* protect the callchain buffers */
rcu_read_lock();
perf_prepare_sample(&header, data, event, regs);
perf_prepare_sample(data, event, regs);
perf_prepare_header(&header, data, event, regs);
err = output_begin(&handle, data, event, header.size);
if (err)
@ -10120,8 +10144,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
};
perf_sample_data_init(&data, 0, 0);
data.raw = &raw;
data.sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(&data, &raw);
perf_trace_buf_update(record, event_type);
@ -10328,13 +10351,7 @@ static void bpf_overflow_handler(struct perf_event *event,
rcu_read_lock();
prog = READ_ONCE(event->prog);
if (prog) {
if (prog->call_get_stack &&
(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
!(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
data->callchain = perf_callchain(event, regs);
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
}
perf_prepare_sample(data, event, regs);
ret = bpf_prog_run(prog, &ctx);
}
rcu_read_unlock();

View File

@ -687,8 +687,7 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
}
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
sd->sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(sd, &raw);
err = __bpf_perf_event_output(regs, map, flags, sd);
@ -746,8 +745,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
perf_fetch_caller_regs(regs);
perf_sample_data_init(sd, 0, 0);
sd->raw = &raw;
sd->sample_flags |= PERF_SAMPLE_RAW;
perf_sample_save_raw_data(sd, &raw);
ret = __bpf_perf_event_output(regs, map, flags, sd);
out: