mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
The latest perf updates in this cycle are:
- Optimize perf_sample_data layout - Prepare sample data handling for BPF integration - Update the x86 PMU driver for Intel Meteor Lake - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids) discovery breakage - Fix the x86 Zhaoxin PMU driver - Cleanups Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmPzaHgRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1jYQg/+KRfobCevMQlZVnz09T3SsJ4ahJ587BL6 g2C6kobyUNfeChpFVroBkTR+yCb6Mq4xGr2nda9+2E978BYu9eanpx/u/bXNQ6NU 6YhLwgRrlFXonYn07kFfUJeELZ0W+zpPvymEN1KhTQWcrgXDfXRt2VfMwNsVxGRF ZRyCWK+UOzSMU22FtW3I/xVLBB0vio9Y6wRC5QOpDVW5YtGwQGust7GJ53JPK43J m2soJvWORauT+v0aqc7ggOtKd6pahVoXrDrbktxtq9N0ZGI+PubVCGevex++cXm/ B3QSf6VcMMuU6pfzxiEwRa8Whrc3XFeSDEfvMjC5v3becGNkdNBnGOJzYprwgRZJ irb6/dSrv5P2lj6WphsO1Wzcm7EoWh8M7DVOMh/13Y/oODRdOrv48112Don9UURC EPyvzAzizqdwdDopUmfiqUwuAXqb8uPZqCgmlz/NJkVz1/ijlfrmLgeDuf0vI7Aq HznzzRwjFHzyCH7D+rtonFh3JDaqgaouY76tpC5yTtzKbZPlFT8kzeCvqkTMnGgH czZnSNc/kBup0HDkNSlthK+TyrMXWKeVa8KQSY1E0NJHO4IBBCMzZywSoAaeofQK hqfQyofX9XHmuHhCA4yIfv1XkZGlBTxpPAyDdHjgs9iJTsodSYMs8ESY08eW8DXn Ld/35O6SylM= =ztUT -----END PGP SIGNATURE----- Merge tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf updates from Ingo Molnar: - Optimize perf_sample_data layout - Prepare sample data handling for BPF integration - Update the x86 PMU driver for Intel Meteor Lake - Restructure the x86 uncore code to fix a SPR (Sapphire Rapids) discovery breakage - Fix the x86 Zhaoxin PMU driver - Cleanups * tag 'perf-core-2023-02-20' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (27 commits) perf/x86/intel/uncore: Add Meteor Lake support x86/perf/zhaoxin: Add stepping check for ZXC perf/x86/intel/ds: Fix the conversion from TSC to perf time perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table perf/x86/uncore: Add a quirk for UPI on SPR perf/x86/uncore: Ignore broken units in discovery table perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name perf/x86/uncore: Factor out uncore_device_to_die() perf/core: Call perf_prepare_sample() before running BPF perf/core: Introduce perf_prepare_header() perf/core: Do not pass header for sample ID init perf/core: Set data->sample_flags in perf_prepare_sample() perf/core: Add perf_sample_save_brstack() helper perf/core: Add perf_sample_save_raw_data() helper perf/core: Add perf_sample_save_callchain() helper perf/core: Save the dynamic parts of sample data size x86/kprobes: Use switch-case for 0xFF opcodes in prepare_emulation perf/core: Change the layout of perf_sample_data perf/x86/msr: Add Meteor Lake support perf/x86/cstate: Add Meteor Lake support ...
This commit is contained in:
commit
a2f0e7eee1
@ -2313,8 +2313,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||
struct cpu_hw_events *cpuhw;
|
||||
cpuhw = this_cpu_ptr(&cpu_hw_events);
|
||||
power_pmu_bhrb_read(event, cpuhw);
|
||||
data.br_stack = &cpuhw->bhrb_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack);
|
||||
}
|
||||
|
||||
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
|
||||
|
@ -662,9 +662,7 @@ static int cfdiag_push_sample(struct perf_event *event,
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.frag.size = cpuhw->usedss;
|
||||
raw.frag.data = cpuhw->stop;
|
||||
raw.size = raw.frag.size;
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
@ -672,7 +672,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
|
||||
/* Protect callchain buffers, tasks */
|
||||
rcu_read_lock();
|
||||
|
||||
perf_prepare_sample(&header, data, event, regs);
|
||||
perf_prepare_sample(data, event, regs);
|
||||
perf_prepare_header(&header, data, event, regs);
|
||||
if (perf_output_begin(&handle, data, event, header.size))
|
||||
goto out;
|
||||
|
||||
|
@ -362,9 +362,7 @@ static int paicrypt_push_sample(void)
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.frag.size = rawsize;
|
||||
raw.frag.data = cpump->save;
|
||||
raw.size = raw.frag.size;
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
@ -451,9 +451,7 @@ static int paiext_push_sample(void)
|
||||
if (event->attr.sample_type & PERF_SAMPLE_RAW) {
|
||||
raw.frag.size = rawsize;
|
||||
raw.frag.data = cpump->save;
|
||||
raw.size = raw.frag.size;
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
}
|
||||
|
||||
overflow = perf_event_overflow(event, &data, ®s);
|
||||
|
@ -928,10 +928,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||
if (!x86_perf_event_set_period(event))
|
||||
continue;
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
if (has_branch_stack(event))
|
||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
|
@ -1110,8 +1110,7 @@ fail:
|
||||
.data = ibs_data.data,
|
||||
},
|
||||
};
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
}
|
||||
|
||||
if (perf_ibs == &perf_ibs_op)
|
||||
@ -1122,10 +1121,8 @@ fail:
|
||||
* recorded as part of interrupt regs. Thus we need to use rip from
|
||||
* interrupt regs while unwinding call stack.
|
||||
*/
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
data.callchain = perf_callchain(event, iregs);
|
||||
data.sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(&data, event, iregs);
|
||||
|
||||
throttle = perf_event_overflow(event, &data, ®s);
|
||||
out:
|
||||
|
@ -2119,6 +2119,16 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
|
||||
/* must define OFFCORE_RSP_X first, see intel_fixup_er() */
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff3ffffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x5d0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x0127, MSR_SNOOP_RSP_0, 0xffffffffffffffffull, SNOOP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x0227, MSR_SNOOP_RSP_1, 0xffffffffffffffffull, SNOOP_1),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
||||
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
||||
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
||||
@ -3026,10 +3036,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||
|
||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
data.br_stack = &cpuc->lbr_stack;
|
||||
data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
if (has_branch_stack(event))
|
||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
x86_pmu_stop(event, 0);
|
||||
@ -4182,6 +4190,12 @@ static int hsw_hw_config(struct perf_event *event)
|
||||
static struct event_constraint counter0_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
|
||||
|
||||
static struct event_constraint counter1_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x2);
|
||||
|
||||
static struct event_constraint counter0_1_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x3);
|
||||
|
||||
static struct event_constraint counter2_constraint =
|
||||
EVENT_CONSTRAINT(0, 0x4, 0);
|
||||
|
||||
@ -4191,6 +4205,12 @@ static struct event_constraint fixed0_constraint =
|
||||
static struct event_constraint fixed0_counter0_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
|
||||
|
||||
static struct event_constraint fixed0_counter0_1_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000003ULL);
|
||||
|
||||
static struct event_constraint counters_1_7_constraint =
|
||||
INTEL_ALL_EVENT_CONSTRAINT(0, 0xfeULL);
|
||||
|
||||
static struct event_constraint *
|
||||
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
@ -4322,6 +4342,78 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
cmt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
c = intel_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
/*
|
||||
* The :ppp indicates the Precise Distribution (PDist) facility, which
|
||||
* is only supported on the GP counter 0 & 1 and Fixed counter 0.
|
||||
* If a :ppp event which is not available on the above eligible counters,
|
||||
* error out.
|
||||
*/
|
||||
if (event->attr.precise_ip == 3) {
|
||||
/* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */
|
||||
if (constraint_match(&fixed0_constraint, event->hw.config))
|
||||
return &fixed0_counter0_1_constraint;
|
||||
|
||||
switch (c->idxmsk64 & 0x3ull) {
|
||||
case 0x1:
|
||||
return &counter0_constraint;
|
||||
case 0x2:
|
||||
return &counter1_constraint;
|
||||
case 0x3:
|
||||
return &counter0_1_constraint;
|
||||
}
|
||||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct event_constraint *c;
|
||||
|
||||
c = spr_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
/* The Retire Latency is not supported by the fixed counter 0. */
|
||||
if (event->attr.precise_ip &&
|
||||
(event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE) &&
|
||||
constraint_match(&fixed0_constraint, event->hw.config)) {
|
||||
/*
|
||||
* The Instruction PDIR is only available
|
||||
* on the fixed counter 0. Error out for this case.
|
||||
*/
|
||||
if (event->attr.precise_ip == 3)
|
||||
return &emptyconstraint;
|
||||
return &counters_1_7_constraint;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct event_constraint *
|
||||
mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||
struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
|
||||
if (pmu->cpu_type == hybrid_big)
|
||||
return rwc_get_event_constraints(cpuc, idx, event);
|
||||
if (pmu->cpu_type == hybrid_small)
|
||||
return cmt_get_event_constraints(cpuc, idx, event);
|
||||
|
||||
WARN_ON(1);
|
||||
return &emptyconstraint;
|
||||
}
|
||||
|
||||
static int adl_hw_config(struct perf_event *event)
|
||||
{
|
||||
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
|
||||
@ -4494,6 +4586,25 @@ static void flip_smm_bit(void *data)
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_check_num_counters(int *num_counters,
|
||||
int *num_counters_fixed,
|
||||
u64 *intel_ctrl, u64 fixed_mask);
|
||||
|
||||
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
||||
{
|
||||
unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
|
||||
if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) {
|
||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
pmu->num_counters = fls(eax);
|
||||
pmu->num_counters_fixed = fls(ebx);
|
||||
intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
|
||||
&pmu->intel_ctrl, ebx);
|
||||
}
|
||||
}
|
||||
|
||||
static bool init_hybrid_pmu(int cpu)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||
@ -4519,6 +4630,9 @@ static bool init_hybrid_pmu(int cpu)
|
||||
if (!cpumask_empty(&pmu->supported_cpus))
|
||||
goto end;
|
||||
|
||||
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
||||
update_pmu_cap(pmu);
|
||||
|
||||
if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
|
||||
return false;
|
||||
|
||||
@ -5463,6 +5577,12 @@ static struct attribute *adl_hybrid_mem_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *mtl_hybrid_mem_attrs[] = {
|
||||
EVENT_PTR(mem_ld_adl),
|
||||
EVENT_PTR(mem_st_adl),
|
||||
NULL
|
||||
};
|
||||
|
||||
EVENT_ATTR_STR_HYBRID(tx-start, tx_start_adl, "event=0xc9,umask=0x1", hybrid_big);
|
||||
EVENT_ATTR_STR_HYBRID(tx-commit, tx_commit_adl, "event=0xc9,umask=0x2", hybrid_big);
|
||||
EVENT_ATTR_STR_HYBRID(tx-abort, tx_abort_adl, "event=0xc9,umask=0x4", hybrid_big);
|
||||
@ -5490,20 +5610,40 @@ FORMAT_ATTR_HYBRID(offcore_rsp, hybrid_big_small);
|
||||
FORMAT_ATTR_HYBRID(ldlat, hybrid_big_small);
|
||||
FORMAT_ATTR_HYBRID(frontend, hybrid_big);
|
||||
|
||||
#define ADL_HYBRID_RTM_FORMAT_ATTR \
|
||||
FORMAT_HYBRID_PTR(in_tx), \
|
||||
FORMAT_HYBRID_PTR(in_tx_cp)
|
||||
|
||||
#define ADL_HYBRID_FORMAT_ATTR \
|
||||
FORMAT_HYBRID_PTR(offcore_rsp), \
|
||||
FORMAT_HYBRID_PTR(ldlat), \
|
||||
FORMAT_HYBRID_PTR(frontend)
|
||||
|
||||
static struct attribute *adl_hybrid_extra_attr_rtm[] = {
|
||||
FORMAT_HYBRID_PTR(in_tx),
|
||||
FORMAT_HYBRID_PTR(in_tx_cp),
|
||||
FORMAT_HYBRID_PTR(offcore_rsp),
|
||||
FORMAT_HYBRID_PTR(ldlat),
|
||||
FORMAT_HYBRID_PTR(frontend),
|
||||
NULL,
|
||||
ADL_HYBRID_RTM_FORMAT_ATTR,
|
||||
ADL_HYBRID_FORMAT_ATTR,
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *adl_hybrid_extra_attr[] = {
|
||||
FORMAT_HYBRID_PTR(offcore_rsp),
|
||||
FORMAT_HYBRID_PTR(ldlat),
|
||||
FORMAT_HYBRID_PTR(frontend),
|
||||
NULL,
|
||||
ADL_HYBRID_FORMAT_ATTR,
|
||||
NULL
|
||||
};
|
||||
|
||||
PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
|
||||
FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small);
|
||||
|
||||
static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
|
||||
ADL_HYBRID_RTM_FORMAT_ATTR,
|
||||
ADL_HYBRID_FORMAT_ATTR,
|
||||
FORMAT_HYBRID_PTR(snoop_rsp),
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute *mtl_hybrid_extra_attr[] = {
|
||||
ADL_HYBRID_FORMAT_ATTR,
|
||||
FORMAT_HYBRID_PTR(snoop_rsp),
|
||||
NULL
|
||||
};
|
||||
|
||||
static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
|
||||
@ -5725,6 +5865,12 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline bool is_mtl(u8 x86_model)
|
||||
{
|
||||
return (x86_model == INTEL_FAM6_METEORLAKE) ||
|
||||
(x86_model == INTEL_FAM6_METEORLAKE_L);
|
||||
}
|
||||
|
||||
__init int intel_pmu_init(void)
|
||||
{
|
||||
struct attribute **extra_skl_attr = &empty_attrs;
|
||||
@ -6382,6 +6528,8 @@ __init int intel_pmu_init(void)
|
||||
case INTEL_FAM6_RAPTORLAKE:
|
||||
case INTEL_FAM6_RAPTORLAKE_P:
|
||||
case INTEL_FAM6_RAPTORLAKE_S:
|
||||
case INTEL_FAM6_METEORLAKE:
|
||||
case INTEL_FAM6_METEORLAKE_L:
|
||||
/*
|
||||
* Alder Lake has 2 types of CPU, core and atom.
|
||||
*
|
||||
@ -6401,9 +6549,7 @@ __init int intel_pmu_init(void)
|
||||
x86_pmu.flags |= PMU_FL_HAS_RSP_1;
|
||||
x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
|
||||
x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
x86_pmu.lbr_pt_coexist = true;
|
||||
intel_pmu_pebs_data_source_adl();
|
||||
x86_pmu.pebs_latency_data = adl_latency_data_small;
|
||||
x86_pmu.num_topdown_events = 8;
|
||||
static_call_update(intel_pmu_update_topdown_event,
|
||||
@ -6490,8 +6636,22 @@ __init int intel_pmu_init(void)
|
||||
pmu->event_constraints = intel_slm_event_constraints;
|
||||
pmu->pebs_constraints = intel_grt_pebs_event_constraints;
|
||||
pmu->extra_regs = intel_grt_extra_regs;
|
||||
pr_cont("Alderlake Hybrid events, ");
|
||||
name = "alderlake_hybrid";
|
||||
if (is_mtl(boot_cpu_data.x86_model)) {
|
||||
x86_pmu.pebs_latency_data = mtl_latency_data_small;
|
||||
extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
|
||||
mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
|
||||
mem_attr = mtl_hybrid_mem_attrs;
|
||||
intel_pmu_pebs_data_source_mtl();
|
||||
x86_pmu.get_event_constraints = mtl_get_event_constraints;
|
||||
pmu->extra_regs = intel_cmt_extra_regs;
|
||||
pr_cont("Meteorlake Hybrid events, ");
|
||||
name = "meteorlake_hybrid";
|
||||
} else {
|
||||
x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
|
||||
intel_pmu_pebs_data_source_adl();
|
||||
pr_cont("Alderlake Hybrid events, ");
|
||||
name = "alderlake_hybrid";
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -6606,6 +6766,9 @@ __init int intel_pmu_init(void)
|
||||
if (is_hybrid())
|
||||
intel_pmu_check_hybrid_pmus((u64)fixed_mask);
|
||||
|
||||
if (x86_pmu.intel_cap.pebs_timing_info)
|
||||
x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
|
||||
|
||||
intel_aux_output_init();
|
||||
|
||||
return 0;
|
||||
|
@ -2,12 +2,14 @@
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/clock.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/timer.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
|
||||
@ -53,6 +55,13 @@ union intel_x86_pebs_dse {
|
||||
unsigned int st_lat_locked:1;
|
||||
unsigned int ld_reserved3:26;
|
||||
};
|
||||
struct {
|
||||
unsigned int mtl_dse:5;
|
||||
unsigned int mtl_locked:1;
|
||||
unsigned int mtl_stlb_miss:1;
|
||||
unsigned int mtl_fwd_blk:1;
|
||||
unsigned int ld_reserved4:24;
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
@ -135,6 +144,29 @@ void __init intel_pmu_pebs_data_source_adl(void)
|
||||
__intel_pmu_pebs_data_source_grt(data_source);
|
||||
}
|
||||
|
||||
static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
|
||||
{
|
||||
data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
|
||||
data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
|
||||
data_source[0x0a] = OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE);
|
||||
data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE);
|
||||
data_source[0x0c] = OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD);
|
||||
data_source[0x0d] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM);
|
||||
}
|
||||
|
||||
void __init intel_pmu_pebs_data_source_mtl(void)
|
||||
{
|
||||
u64 *data_source;
|
||||
|
||||
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
|
||||
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
|
||||
__intel_pmu_pebs_data_source_skl(false, data_source);
|
||||
|
||||
data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
|
||||
memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
|
||||
intel_pmu_pebs_data_source_cmt(data_source);
|
||||
}
|
||||
|
||||
static u64 precise_store_data(u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
@ -219,24 +251,19 @@ static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock)
|
||||
}
|
||||
|
||||
/* Retrieve the latency data for e-core of ADL */
|
||||
u64 adl_latency_data_small(struct perf_event *event, u64 status)
|
||||
static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
|
||||
u8 dse, bool tlb, bool lock, bool blk)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
u64 val;
|
||||
|
||||
WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
|
||||
|
||||
dse.val = status;
|
||||
dse &= PERF_PEBS_DATA_SOURCE_MASK;
|
||||
val = hybrid_var(event->pmu, pebs_data_source)[dse];
|
||||
|
||||
val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse];
|
||||
pebs_set_tlb_lock(&val, tlb, lock);
|
||||
|
||||
/*
|
||||
* For the atom core on ADL,
|
||||
* bit 4: lock, bit 5: TLB access.
|
||||
*/
|
||||
pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss);
|
||||
|
||||
if (dse.ld_data_blk)
|
||||
if (blk)
|
||||
val |= P(BLK, DATA);
|
||||
else
|
||||
val |= P(BLK, NA);
|
||||
@ -244,6 +271,29 @@ u64 adl_latency_data_small(struct perf_event *event, u64 status)
|
||||
return val;
|
||||
}
|
||||
|
||||
u64 adl_latency_data_small(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
return __adl_latency_data_small(event, status, dse.ld_dse,
|
||||
dse.ld_locked, dse.ld_stlb_miss,
|
||||
dse.ld_data_blk);
|
||||
}
|
||||
|
||||
/* Retrieve the latency data for e-core of MTL */
|
||||
u64 mtl_latency_data_small(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
|
||||
dse.val = status;
|
||||
|
||||
return __adl_latency_data_small(event, status, dse.mtl_dse,
|
||||
dse.mtl_stlb_miss, dse.mtl_locked,
|
||||
dse.mtl_fwd_blk);
|
||||
}
|
||||
|
||||
static u64 load_latency_data(struct perf_event *event, u64 status)
|
||||
{
|
||||
union intel_x86_pebs_dse dse;
|
||||
@ -759,7 +809,8 @@ int intel_pmu_drain_bts_buffer(void)
|
||||
* the sample.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
perf_prepare_sample(&header, &data, event, ®s);
|
||||
perf_prepare_sample(&data, event, ®s);
|
||||
perf_prepare_header(&header, &data, event, ®s);
|
||||
|
||||
if (perf_output_begin(&handle, &data, event,
|
||||
header.size * (top - base - skip)))
|
||||
@ -1519,6 +1570,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux)
|
||||
return val;
|
||||
}
|
||||
|
||||
static void setup_pebs_time(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
u64 tsc)
|
||||
{
|
||||
/* Converting to a user-defined clock is not supported yet. */
|
||||
if (event->attr.use_clockid != 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Doesn't support the conversion when the TSC is unstable.
|
||||
* The TSC unstable case is a corner case and very unlikely to
|
||||
* happen. If it happens, the TSC in a PEBS record will be
|
||||
* dropped and fall back to perf_event_clock().
|
||||
*/
|
||||
if (!using_native_sched_clock() || !sched_clock_stable())
|
||||
return;
|
||||
|
||||
data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset;
|
||||
data->sample_flags |= PERF_SAMPLE_TIME;
|
||||
}
|
||||
|
||||
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
|
||||
PERF_SAMPLE_PHYS_ADDR | \
|
||||
PERF_SAMPLE_DATA_PAGE_SIZE)
|
||||
@ -1569,10 +1641,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
data->callchain = perf_callchain(event, iregs);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
|
||||
/*
|
||||
* We use the interrupt regs as a base because the PEBS record does not
|
||||
@ -1668,16 +1738,11 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||
*
|
||||
* We can only do this for the default trace clock.
|
||||
*/
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3 &&
|
||||
event->attr.use_clockid == 0) {
|
||||
data->time = native_sched_clock_from_tsc(pebs->tsc);
|
||||
data->sample_flags |= PERF_SAMPLE_TIME;
|
||||
}
|
||||
if (x86_pmu.intel_cap.pebs_format >= 3)
|
||||
setup_pebs_time(event, data, pebs->tsc);
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
data->br_stack = &cpuc->lbr_stack;
|
||||
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
if (has_branch_stack(event))
|
||||
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
|
||||
}
|
||||
|
||||
static void adaptive_pebs_save_regs(struct pt_regs *regs,
|
||||
@ -1705,6 +1770,7 @@ static void adaptive_pebs_save_regs(struct pt_regs *regs,
|
||||
|
||||
#define PEBS_LATENCY_MASK 0xffff
|
||||
#define PEBS_CACHE_LATENCY_OFFSET 32
|
||||
#define PEBS_RETIRE_LATENCY_OFFSET 32
|
||||
|
||||
/*
|
||||
* With adaptive PEBS the layout depends on what fields are configured.
|
||||
@ -1735,10 +1801,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||
data->period = event->hw.last_period;
|
||||
|
||||
if (event->attr.use_clockid == 0) {
|
||||
data->time = native_sched_clock_from_tsc(basic->tsc);
|
||||
data->sample_flags |= PERF_SAMPLE_TIME;
|
||||
}
|
||||
setup_pebs_time(event, data, basic->tsc);
|
||||
|
||||
/*
|
||||
* We must however always use iregs for the unwinder to stay sane; the
|
||||
@ -1746,16 +1809,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
* previous PMI context or an (I)RET happened between the record and
|
||||
* PMI.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
data->callchain = perf_callchain(event, iregs);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(data, event, iregs);
|
||||
|
||||
*regs = *iregs;
|
||||
/* The ip in basic is EventingIP */
|
||||
set_linear_ip(regs, basic->ip);
|
||||
regs->flags = PERF_EFLAGS_EXACT;
|
||||
|
||||
if ((sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && (x86_pmu.flags & PMU_FL_RETIRE_LATENCY))
|
||||
data->weight.var3_w = format_size >> PEBS_RETIRE_LATENCY_OFFSET & PEBS_LATENCY_MASK;
|
||||
|
||||
/*
|
||||
* The record for MEMINFO is in front of GP
|
||||
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
|
||||
@ -1835,8 +1899,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||
|
||||
if (has_branch_stack(event)) {
|
||||
intel_pmu_store_pebs_lbrs(lbr);
|
||||
data->br_stack = &cpuc->lbr_stack;
|
||||
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
perf_sample_save_brstack(data, event, &cpuc->lbr_stack);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1606,12 +1606,10 @@ clear_arch_lbr:
|
||||
*/
|
||||
void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
|
||||
{
|
||||
int lbr_fmt = x86_pmu.intel_cap.lbr_format;
|
||||
|
||||
lbr->nr = x86_pmu.lbr_nr;
|
||||
lbr->from = x86_pmu.lbr_from;
|
||||
lbr->to = x86_pmu.lbr_to;
|
||||
lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
|
||||
lbr->info = x86_pmu.lbr_info;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
|
||||
|
||||
|
@ -65,6 +65,21 @@ int uncore_die_to_segment(int die)
|
||||
return bus ? pci_domain_nr(bus) : -EINVAL;
|
||||
}
|
||||
|
||||
int uncore_device_to_die(struct pci_dev *dev)
|
||||
{
|
||||
int node = pcibus_to_node(dev->bus);
|
||||
int cpu;
|
||||
|
||||
for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) {
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
if (c->initialized && cpu_to_node(cpu) == node)
|
||||
return c->logical_die_id;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void uncore_free_pcibus_map(void)
|
||||
{
|
||||
struct pci2phy_map *map, *tmp;
|
||||
@ -842,6 +857,12 @@ static const struct attribute_group uncore_pmu_attr_group = {
|
||||
.attrs = uncore_pmu_attrs,
|
||||
};
|
||||
|
||||
static inline int uncore_get_box_id(struct intel_uncore_type *type,
|
||||
struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
|
||||
}
|
||||
|
||||
void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
|
||||
{
|
||||
struct intel_uncore_type *type = pmu->type;
|
||||
@ -850,7 +871,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
|
||||
sprintf(pmu_name, "uncore_type_%u", type->type_id);
|
||||
else {
|
||||
sprintf(pmu_name, "uncore_type_%u_%d",
|
||||
type->type_id, type->box_ids[pmu->pmu_idx]);
|
||||
type->type_id, uncore_get_box_id(type, pmu));
|
||||
}
|
||||
}
|
||||
|
||||
@ -877,7 +898,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
|
||||
* Use the box ID from the discovery table if applicable.
|
||||
*/
|
||||
sprintf(pmu->name, "uncore_%s_%d", type->name,
|
||||
type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
|
||||
uncore_get_box_id(type, pmu));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1674,7 +1695,10 @@ struct intel_uncore_init_fun {
|
||||
void (*cpu_init)(void);
|
||||
int (*pci_init)(void);
|
||||
void (*mmio_init)(void);
|
||||
/* Discovery table is required */
|
||||
bool use_discovery;
|
||||
/* The units in the discovery table should be ignored. */
|
||||
int *uncore_units_ignore;
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
|
||||
@ -1765,6 +1789,11 @@ static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
|
||||
.mmio_init = adl_uncore_mmio_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun mtl_uncore_init __initconst = {
|
||||
.cpu_init = mtl_uncore_cpu_init,
|
||||
.mmio_init = adl_uncore_mmio_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
|
||||
.cpu_init = icx_uncore_cpu_init,
|
||||
.pci_init = icx_uncore_pci_init,
|
||||
@ -1782,6 +1811,7 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
|
||||
.pci_init = spr_uncore_pci_init,
|
||||
.mmio_init = spr_uncore_mmio_init,
|
||||
.use_discovery = true,
|
||||
.uncore_units_ignore = spr_uncore_units_ignore,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
|
||||
@ -1832,6 +1862,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &mtl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
|
||||
@ -1853,7 +1885,7 @@ static int __init intel_uncore_init(void)
|
||||
|
||||
id = x86_match_cpu(intel_uncore_match);
|
||||
if (!id) {
|
||||
if (!uncore_no_discover && intel_uncore_has_discovery_tables())
|
||||
if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
|
||||
uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
|
||||
else
|
||||
return -ENODEV;
|
||||
@ -1861,7 +1893,8 @@ static int __init intel_uncore_init(void)
|
||||
uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
|
||||
if (uncore_no_discover && uncore_init->use_discovery)
|
||||
return -ENODEV;
|
||||
if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
|
||||
if (uncore_init->use_discovery &&
|
||||
!intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,8 @@
|
||||
|
||||
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
|
||||
|
||||
#define UNCORE_IGNORE_END -1
|
||||
|
||||
struct pci_extra_dev {
|
||||
struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
|
||||
};
|
||||
@ -208,6 +210,7 @@ struct pci2phy_map {
|
||||
struct pci2phy_map *__find_pci2phy_map(int segment);
|
||||
int uncore_pcibus_to_dieid(struct pci_bus *bus);
|
||||
int uncore_die_to_segment(int die);
|
||||
int uncore_device_to_die(struct pci_dev *dev);
|
||||
|
||||
ssize_t uncore_event_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf);
|
||||
@ -589,6 +592,7 @@ extern raw_spinlock_t pci2phy_map_lock;
|
||||
extern struct list_head pci2phy_map_head;
|
||||
extern struct pci_extra_dev *uncore_extra_pci_dev;
|
||||
extern struct event_constraint uncore_constraint_empty;
|
||||
extern int spr_uncore_units_ignore[];
|
||||
|
||||
/* uncore_snb.c */
|
||||
int snb_uncore_pci_init(void);
|
||||
@ -602,6 +606,7 @@ void skl_uncore_cpu_init(void);
|
||||
void icl_uncore_cpu_init(void);
|
||||
void tgl_uncore_cpu_init(void);
|
||||
void adl_uncore_cpu_init(void);
|
||||
void mtl_uncore_cpu_init(void);
|
||||
void tgl_uncore_mmio_init(void);
|
||||
void tgl_l_uncore_mmio_init(void);
|
||||
void adl_uncore_mmio_init(void);
|
||||
|
@ -33,7 +33,7 @@ static int logical_die_id;
|
||||
|
||||
static int get_device_die_id(struct pci_dev *dev)
|
||||
{
|
||||
int cpu, node = pcibus_to_node(dev->bus);
|
||||
int node = pcibus_to_node(dev->bus);
|
||||
|
||||
/*
|
||||
* If the NUMA info is not available, assume that the logical die id is
|
||||
@ -43,19 +43,7 @@ static int get_device_die_id(struct pci_dev *dev)
|
||||
if (node < 0)
|
||||
return logical_die_id++;
|
||||
|
||||
for_each_cpu(cpu, cpumask_of_node(node)) {
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
if (c->initialized && cpu_to_node(cpu) == node)
|
||||
return c->logical_die_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* All CPUs of a node may be offlined. For this case,
|
||||
* the PCI and MMIO type of uncore blocks which are
|
||||
* enumerated by the device will be unavailable.
|
||||
*/
|
||||
return -1;
|
||||
return uncore_device_to_die(dev);
|
||||
}
|
||||
|
||||
#define __node_2_type(cur) \
|
||||
@ -140,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
|
||||
unsigned int *box_offset, *ids;
|
||||
int i;
|
||||
|
||||
if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset))
|
||||
if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
|
||||
pr_info("Invalid address is detected for uncore type %d box %d, "
|
||||
"Disable the uncore unit.\n",
|
||||
unit->box_type, unit->box_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (parsed) {
|
||||
type = search_uncore_discovery_type(unit->box_type);
|
||||
if (WARN_ON_ONCE(!type))
|
||||
if (!type) {
|
||||
pr_info("A spurious uncore type %d is detected, "
|
||||
"Disable the uncore type.\n",
|
||||
unit->box_type);
|
||||
return;
|
||||
}
|
||||
/* Store the first box of each die */
|
||||
if (!type->box_ctrl_die[die])
|
||||
type->box_ctrl_die[die] = unit->ctl;
|
||||
@ -181,8 +177,12 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
|
||||
ids[i] = type->ids[i];
|
||||
box_offset[i] = type->box_offset[i];
|
||||
|
||||
if (WARN_ON_ONCE(unit->box_id == ids[i]))
|
||||
if (unit->box_id == ids[i]) {
|
||||
pr_info("Duplicate uncore type %d box ID %d is detected, "
|
||||
"Drop the duplicate uncore unit.\n",
|
||||
unit->box_type, unit->box_id);
|
||||
goto free_ids;
|
||||
}
|
||||
}
|
||||
ids[i] = unit->box_id;
|
||||
box_offset[i] = unit->ctl - type->box_ctrl;
|
||||
@ -202,8 +202,25 @@ free_box_offset:
|
||||
|
||||
}
|
||||
|
||||
static bool
|
||||
uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!ignore)
|
||||
return false;
|
||||
|
||||
for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) {
|
||||
if (unit->box_type == ignore[i])
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int parse_discovery_table(struct pci_dev *dev, int die,
|
||||
u32 bar_offset, bool *parsed)
|
||||
u32 bar_offset, bool *parsed,
|
||||
int *ignore)
|
||||
{
|
||||
struct uncore_global_discovery global;
|
||||
struct uncore_unit_discovery unit;
|
||||
@ -258,6 +275,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
|
||||
if (unit.access_type >= UNCORE_ACCESS_MAX)
|
||||
continue;
|
||||
|
||||
if (uncore_ignore_unit(&unit, ignore))
|
||||
continue;
|
||||
|
||||
uncore_insert_box_info(&unit, die, *parsed);
|
||||
}
|
||||
|
||||
@ -266,7 +286,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool intel_uncore_has_discovery_tables(void)
|
||||
bool intel_uncore_has_discovery_tables(int *ignore)
|
||||
{
|
||||
u32 device, val, entry_id, bar_offset;
|
||||
int die, dvsec = 0, ret = true;
|
||||
@ -302,7 +322,7 @@ bool intel_uncore_has_discovery_tables(void)
|
||||
if (die < 0)
|
||||
continue;
|
||||
|
||||
parse_discovery_table(dev, die, bar_offset, &parsed);
|
||||
parse_discovery_table(dev, die, bar_offset, &parsed, ignore);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,9 +21,15 @@
|
||||
/* Global discovery table size */
|
||||
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20
|
||||
|
||||
#define UNCORE_DISCOVERY_PCI_DOMAIN(data) ((data >> 28) & 0x7)
|
||||
#define UNCORE_DISCOVERY_PCI_BUS(data) ((data >> 20) & 0xff)
|
||||
#define UNCORE_DISCOVERY_PCI_DEVFN(data) ((data >> 12) & 0xff)
|
||||
#define UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET 28
|
||||
#define UNCORE_DISCOVERY_PCI_DOMAIN(data) \
|
||||
((data >> UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET) & 0x7)
|
||||
#define UNCORE_DISCOVERY_PCI_BUS_OFFSET 20
|
||||
#define UNCORE_DISCOVERY_PCI_BUS(data) \
|
||||
((data >> UNCORE_DISCOVERY_PCI_BUS_OFFSET) & 0xff)
|
||||
#define UNCORE_DISCOVERY_PCI_DEVFN_OFFSET 12
|
||||
#define UNCORE_DISCOVERY_PCI_DEVFN(data) \
|
||||
((data >> UNCORE_DISCOVERY_PCI_DEVFN_OFFSET) & 0xff)
|
||||
#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data) (data & 0xfff)
|
||||
|
||||
|
||||
@ -122,7 +128,7 @@ struct intel_uncore_discovery_type {
|
||||
unsigned int *box_offset; /* Box offset */
|
||||
};
|
||||
|
||||
bool intel_uncore_has_discovery_tables(void);
|
||||
bool intel_uncore_has_discovery_tables(int *ignore);
|
||||
void intel_uncore_clear_discovery_tables(void);
|
||||
void intel_uncore_generic_uncore_cpu_init(void);
|
||||
int intel_uncore_generic_uncore_pci_init(void);
|
||||
|
@ -109,6 +109,19 @@
|
||||
#define PCI_DEVICE_ID_INTEL_RPL_23_IMC 0xA728
|
||||
#define PCI_DEVICE_ID_INTEL_RPL_24_IMC 0xA729
|
||||
#define PCI_DEVICE_ID_INTEL_RPL_25_IMC 0xA72A
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_1_IMC 0x7d00
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_2_IMC 0x7d01
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_3_IMC 0x7d02
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_4_IMC 0x7d05
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_5_IMC 0x7d10
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_6_IMC 0x7d14
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_7_IMC 0x7d15
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_8_IMC 0x7d16
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_9_IMC 0x7d21
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_10_IMC 0x7d22
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_11_IMC 0x7d23
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_12_IMC 0x7d24
|
||||
#define PCI_DEVICE_ID_INTEL_MTL_13_IMC 0x7d28
|
||||
|
||||
|
||||
#define IMC_UNCORE_DEV(a) \
|
||||
@ -205,6 +218,32 @@
|
||||
#define ADL_UNC_ARB_PERFEVTSEL0 0x2FD0
|
||||
#define ADL_UNC_ARB_MSR_OFFSET 0x8
|
||||
|
||||
/* MTL Cbo register */
|
||||
#define MTL_UNC_CBO_0_PER_CTR0 0x2448
|
||||
#define MTL_UNC_CBO_0_PERFEVTSEL0 0x2442
|
||||
|
||||
/* MTL HAC_ARB register */
|
||||
#define MTL_UNC_HAC_ARB_CTR 0x2018
|
||||
#define MTL_UNC_HAC_ARB_CTRL 0x2012
|
||||
|
||||
/* MTL ARB register */
|
||||
#define MTL_UNC_ARB_CTR 0x2418
|
||||
#define MTL_UNC_ARB_CTRL 0x2412
|
||||
|
||||
/* MTL cNCU register */
|
||||
#define MTL_UNC_CNCU_FIXED_CTR 0x2408
|
||||
#define MTL_UNC_CNCU_FIXED_CTRL 0x2402
|
||||
#define MTL_UNC_CNCU_BOX_CTL 0x240e
|
||||
|
||||
/* MTL sNCU register */
|
||||
#define MTL_UNC_SNCU_FIXED_CTR 0x2008
|
||||
#define MTL_UNC_SNCU_FIXED_CTRL 0x2002
|
||||
#define MTL_UNC_SNCU_BOX_CTL 0x200e
|
||||
|
||||
/* MTL HAC_CBO register */
|
||||
#define MTL_UNC_HBO_CTR 0x2048
|
||||
#define MTL_UNC_HBO_CTRL 0x2042
|
||||
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11");
|
||||
@ -598,6 +637,115 @@ void adl_uncore_cpu_init(void)
|
||||
uncore_msr_uncores = adl_msr_uncores;
|
||||
}
|
||||
|
||||
static struct intel_uncore_type mtl_uncore_cbox = {
|
||||
.name = "cbox",
|
||||
.num_counters = 2,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = MTL_UNC_CBO_0_PER_CTR0,
|
||||
.event_ctl = MTL_UNC_CBO_0_PERFEVTSEL0,
|
||||
.event_mask = ADL_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.format_group = &adl_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type mtl_uncore_hac_arb = {
|
||||
.name = "hac_arb",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 2,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = MTL_UNC_HAC_ARB_CTR,
|
||||
.event_ctl = MTL_UNC_HAC_ARB_CTRL,
|
||||
.event_mask = ADL_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.format_group = &adl_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type mtl_uncore_arb = {
|
||||
.name = "arb",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 2,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = MTL_UNC_ARB_CTR,
|
||||
.event_ctl = MTL_UNC_ARB_CTRL,
|
||||
.event_mask = ADL_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.format_group = &adl_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type mtl_uncore_hac_cbox = {
|
||||
.name = "hac_cbox",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 2,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = MTL_UNC_HBO_CTR,
|
||||
.event_ctl = MTL_UNC_HBO_CTRL,
|
||||
.event_mask = ADL_UNC_RAW_EVENT_MASK,
|
||||
.msr_offset = SNB_UNC_CBO_MSR_OFFSET,
|
||||
.ops = &icl_uncore_msr_ops,
|
||||
.format_group = &adl_uncore_format_group,
|
||||
};
|
||||
|
||||
static void mtl_uncore_msr_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
wrmsrl(uncore_msr_box_ctl(box), SNB_UNC_GLOBAL_CTL_EN);
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops mtl_uncore_msr_ops = {
|
||||
.init_box = mtl_uncore_msr_init_box,
|
||||
.disable_event = snb_uncore_msr_disable_event,
|
||||
.enable_event = snb_uncore_msr_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type mtl_uncore_cncu = {
|
||||
.name = "cncu",
|
||||
.num_counters = 1,
|
||||
.num_boxes = 1,
|
||||
.box_ctl = MTL_UNC_CNCU_BOX_CTL,
|
||||
.fixed_ctr_bits = 48,
|
||||
.fixed_ctr = MTL_UNC_CNCU_FIXED_CTR,
|
||||
.fixed_ctl = MTL_UNC_CNCU_FIXED_CTRL,
|
||||
.single_fixed = 1,
|
||||
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
|
||||
.format_group = &icl_uncore_clock_format_group,
|
||||
.ops = &mtl_uncore_msr_ops,
|
||||
.event_descs = icl_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type mtl_uncore_sncu = {
|
||||
.name = "sncu",
|
||||
.num_counters = 1,
|
||||
.num_boxes = 1,
|
||||
.box_ctl = MTL_UNC_SNCU_BOX_CTL,
|
||||
.fixed_ctr_bits = 48,
|
||||
.fixed_ctr = MTL_UNC_SNCU_FIXED_CTR,
|
||||
.fixed_ctl = MTL_UNC_SNCU_FIXED_CTRL,
|
||||
.single_fixed = 1,
|
||||
.event_mask = SNB_UNC_CTL_EV_SEL_MASK,
|
||||
.format_group = &icl_uncore_clock_format_group,
|
||||
.ops = &mtl_uncore_msr_ops,
|
||||
.event_descs = icl_uncore_events,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *mtl_msr_uncores[] = {
|
||||
&mtl_uncore_cbox,
|
||||
&mtl_uncore_hac_arb,
|
||||
&mtl_uncore_arb,
|
||||
&mtl_uncore_hac_cbox,
|
||||
&mtl_uncore_cncu,
|
||||
&mtl_uncore_sncu,
|
||||
NULL
|
||||
};
|
||||
|
||||
void mtl_uncore_cpu_init(void)
|
||||
{
|
||||
mtl_uncore_cbox.num_boxes = icl_get_cbox_num();
|
||||
uncore_msr_uncores = mtl_msr_uncores;
|
||||
}
|
||||
|
||||
enum {
|
||||
SNB_PCI_UNCORE_IMC,
|
||||
};
|
||||
@ -1264,6 +1412,19 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = {
|
||||
IMC_UNCORE_DEV(RPL_23),
|
||||
IMC_UNCORE_DEV(RPL_24),
|
||||
IMC_UNCORE_DEV(RPL_25),
|
||||
IMC_UNCORE_DEV(MTL_1),
|
||||
IMC_UNCORE_DEV(MTL_2),
|
||||
IMC_UNCORE_DEV(MTL_3),
|
||||
IMC_UNCORE_DEV(MTL_4),
|
||||
IMC_UNCORE_DEV(MTL_5),
|
||||
IMC_UNCORE_DEV(MTL_6),
|
||||
IMC_UNCORE_DEV(MTL_7),
|
||||
IMC_UNCORE_DEV(MTL_8),
|
||||
IMC_UNCORE_DEV(MTL_9),
|
||||
IMC_UNCORE_DEV(MTL_10),
|
||||
IMC_UNCORE_DEV(MTL_11),
|
||||
IMC_UNCORE_DEV(MTL_12),
|
||||
IMC_UNCORE_DEV(MTL_13),
|
||||
{ /* end: all zeroes */ }
|
||||
};
|
||||
|
||||
|
@ -1453,9 +1453,6 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
|
||||
}
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
} else {
|
||||
int node = pcibus_to_node(ubox_dev->bus);
|
||||
int cpu;
|
||||
|
||||
segment = pci_domain_nr(ubox_dev->bus);
|
||||
raw_spin_lock(&pci2phy_map_lock);
|
||||
map = __find_pci2phy_map(segment);
|
||||
@ -1465,15 +1462,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
|
||||
break;
|
||||
}
|
||||
|
||||
die_id = -1;
|
||||
for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev);
|
||||
|
||||
if (c->initialized && cpu_to_node(cpu) == node) {
|
||||
map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock(&pci2phy_map_lock);
|
||||
|
||||
if (WARN_ON_ONCE(die_id == -1)) {
|
||||
@ -6142,6 +6132,46 @@ static int spr_upi_get_topology(struct intel_uncore_type *type)
|
||||
return discover_upi_topology(type, SPR_UBOX_DID, SPR_UPI_REGS_ADDR_DEVICE_LINK0);
|
||||
}
|
||||
|
||||
static struct intel_uncore_type spr_uncore_mdf = {
|
||||
SPR_UNCORE_COMMON_FORMAT(),
|
||||
.name = "mdf",
|
||||
};
|
||||
|
||||
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
|
||||
#define UNCORE_SPR_IIO 1
|
||||
#define UNCORE_SPR_IMC 6
|
||||
#define UNCORE_SPR_UPI 8
|
||||
#define UNCORE_SPR_M3UPI 9
|
||||
|
||||
/*
|
||||
* The uncore units, which are supported by the discovery table,
|
||||
* are defined here.
|
||||
*/
|
||||
static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
|
||||
&spr_uncore_chabox,
|
||||
&spr_uncore_iio,
|
||||
&spr_uncore_irp,
|
||||
&spr_uncore_m2pcie,
|
||||
&spr_uncore_pcu,
|
||||
NULL,
|
||||
&spr_uncore_imc,
|
||||
&spr_uncore_m2m,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
&spr_uncore_mdf,
|
||||
};
|
||||
|
||||
/*
|
||||
* The uncore units, which are not supported by the discovery table,
|
||||
* are implemented from here.
|
||||
*/
|
||||
#define SPR_UNCORE_UPI_NUM_BOXES 4
|
||||
|
||||
static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
|
||||
0, 0x8000, 0x10000, 0x18000
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_upi = {
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.event_mask_ext = SPR_RAW_EVENT_MASK_EXT,
|
||||
@ -6152,38 +6182,30 @@ static struct intel_uncore_type spr_uncore_upi = {
|
||||
.get_topology = spr_upi_get_topology,
|
||||
.set_mapping = spr_upi_set_mapping,
|
||||
.cleanup_mapping = spr_upi_cleanup_mapping,
|
||||
.type_id = UNCORE_SPR_UPI,
|
||||
.num_counters = 4,
|
||||
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = ICX_UPI_PCI_PMON_CTR0,
|
||||
.event_ctl = ICX_UPI_PCI_PMON_CTL0,
|
||||
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
|
||||
.pci_offsets = spr_upi_pci_offsets,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_m3upi = {
|
||||
SPR_UNCORE_PCI_COMMON_FORMAT(),
|
||||
.name = "m3upi",
|
||||
.type_id = UNCORE_SPR_M3UPI,
|
||||
.num_counters = 4,
|
||||
.num_boxes = SPR_UNCORE_UPI_NUM_BOXES,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = ICX_M3UPI_PCI_PMON_CTR0,
|
||||
.event_ctl = ICX_M3UPI_PCI_PMON_CTL0,
|
||||
.box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
|
||||
.pci_offsets = spr_upi_pci_offsets,
|
||||
.constraints = icx_uncore_m3upi_constraints,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type spr_uncore_mdf = {
|
||||
SPR_UNCORE_COMMON_FORMAT(),
|
||||
.name = "mdf",
|
||||
};
|
||||
|
||||
#define UNCORE_SPR_NUM_UNCORE_TYPES 12
|
||||
#define UNCORE_SPR_IIO 1
|
||||
#define UNCORE_SPR_IMC 6
|
||||
|
||||
static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
|
||||
&spr_uncore_chabox,
|
||||
&spr_uncore_iio,
|
||||
&spr_uncore_irp,
|
||||
&spr_uncore_m2pcie,
|
||||
&spr_uncore_pcu,
|
||||
NULL,
|
||||
&spr_uncore_imc,
|
||||
&spr_uncore_m2m,
|
||||
&spr_uncore_upi,
|
||||
&spr_uncore_m3upi,
|
||||
NULL,
|
||||
&spr_uncore_mdf,
|
||||
};
|
||||
|
||||
enum perf_uncore_spr_iio_freerunning_type_id {
|
||||
SPR_IIO_MSR_IOCLK,
|
||||
SPR_IIO_MSR_BW_IN,
|
||||
@ -6314,6 +6336,7 @@ static struct intel_uncore_type spr_uncore_imc_free_running = {
|
||||
|
||||
#define UNCORE_SPR_MSR_EXTRA_UNCORES 1
|
||||
#define UNCORE_SPR_MMIO_EXTRA_UNCORES 1
|
||||
#define UNCORE_SPR_PCI_EXTRA_UNCORES 2
|
||||
|
||||
static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = {
|
||||
&spr_uncore_iio_free_running,
|
||||
@ -6323,6 +6346,17 @@ static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES]
|
||||
&spr_uncore_imc_free_running,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *spr_pci_uncores[UNCORE_SPR_PCI_EXTRA_UNCORES] = {
|
||||
&spr_uncore_upi,
|
||||
&spr_uncore_m3upi
|
||||
};
|
||||
|
||||
int spr_uncore_units_ignore[] = {
|
||||
UNCORE_SPR_UPI,
|
||||
UNCORE_SPR_M3UPI,
|
||||
UNCORE_IGNORE_END
|
||||
};
|
||||
|
||||
static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
|
||||
struct intel_uncore_type *from_type)
|
||||
{
|
||||
@ -6423,9 +6457,69 @@ void spr_uncore_cpu_init(void)
|
||||
spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
|
||||
}
|
||||
|
||||
#define SPR_UNCORE_UPI_PCIID 0x3241
|
||||
#define SPR_UNCORE_UPI0_DEVFN 0x9
|
||||
#define SPR_UNCORE_M3UPI_PCIID 0x3246
|
||||
#define SPR_UNCORE_M3UPI0_DEVFN 0x29
|
||||
|
||||
static void spr_update_device_location(int type_id)
|
||||
{
|
||||
struct intel_uncore_type *type;
|
||||
struct pci_dev *dev = NULL;
|
||||
u32 device, devfn;
|
||||
u64 *ctls;
|
||||
int die;
|
||||
|
||||
if (type_id == UNCORE_SPR_UPI) {
|
||||
type = &spr_uncore_upi;
|
||||
device = SPR_UNCORE_UPI_PCIID;
|
||||
devfn = SPR_UNCORE_UPI0_DEVFN;
|
||||
} else if (type_id == UNCORE_SPR_M3UPI) {
|
||||
type = &spr_uncore_m3upi;
|
||||
device = SPR_UNCORE_M3UPI_PCIID;
|
||||
devfn = SPR_UNCORE_M3UPI0_DEVFN;
|
||||
} else
|
||||
return;
|
||||
|
||||
ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
|
||||
if (!ctls) {
|
||||
type->num_boxes = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
|
||||
if (devfn != dev->devfn)
|
||||
continue;
|
||||
|
||||
die = uncore_device_to_die(dev);
|
||||
if (die < 0)
|
||||
continue;
|
||||
|
||||
ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
|
||||
dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
|
||||
devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
|
||||
type->box_ctl;
|
||||
}
|
||||
|
||||
type->box_ctls = ctls;
|
||||
}
|
||||
|
||||
int spr_uncore_pci_init(void)
|
||||
{
|
||||
uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL);
|
||||
/*
|
||||
* The discovery table of UPI on some SPR variant is broken,
|
||||
* which impacts the detection of both UPI and M3UPI uncore PMON.
|
||||
* Use the pre-defined UPI and M3UPI table to replace.
|
||||
*
|
||||
* The accurate location, e.g., domain and BUS number,
|
||||
* can only be retrieved at load time.
|
||||
* Update the location of UPI and M3UPI.
|
||||
*/
|
||||
spr_update_device_location(UNCORE_SPR_UPI);
|
||||
spr_update_device_location(UNCORE_SPR_M3UPI);
|
||||
uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI,
|
||||
UNCORE_SPR_PCI_EXTRA_UNCORES,
|
||||
spr_pci_uncores);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -35,15 +35,17 @@
|
||||
* per-core reg tables.
|
||||
*/
|
||||
enum extra_reg_type {
|
||||
EXTRA_REG_NONE = -1, /* not used */
|
||||
EXTRA_REG_NONE = -1, /* not used */
|
||||
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
|
||||
EXTRA_REG_FE = 4, /* fe_* */
|
||||
EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
|
||||
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
|
||||
EXTRA_REG_LBR = 2, /* lbr_select */
|
||||
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
|
||||
EXTRA_REG_FE = 4, /* fe_* */
|
||||
EXTRA_REG_SNOOP_0 = 5, /* snoop response 0 */
|
||||
EXTRA_REG_SNOOP_1 = 6, /* snoop response 1 */
|
||||
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
EXTRA_REG_MAX /* number of entries needed */
|
||||
};
|
||||
|
||||
struct event_constraint {
|
||||
@ -606,6 +608,7 @@ union perf_capabilities {
|
||||
u64 pebs_baseline:1;
|
||||
u64 perf_metrics:1;
|
||||
u64 pebs_output_pt_available:1;
|
||||
u64 pebs_timing_info:1;
|
||||
u64 anythread_deprecated:1;
|
||||
};
|
||||
u64 capabilities;
|
||||
@ -647,6 +650,7 @@ enum {
|
||||
};
|
||||
|
||||
#define PERF_PEBS_DATA_SOURCE_MAX 0x10
|
||||
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
|
||||
|
||||
struct x86_hybrid_pmu {
|
||||
struct pmu pmu;
|
||||
@ -1000,6 +1004,7 @@ do { \
|
||||
#define PMU_FL_PAIR 0x40 /* merge counters for large incr. events */
|
||||
#define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */
|
||||
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
|
||||
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
|
||||
|
||||
#define EVENT_VAR(_id) event_attr_##_id
|
||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||
@ -1486,6 +1491,8 @@ int intel_pmu_drain_bts_buffer(void);
|
||||
|
||||
u64 adl_latency_data_small(struct perf_event *event, u64 status);
|
||||
|
||||
u64 mtl_latency_data_small(struct perf_event *event, u64 status);
|
||||
|
||||
extern struct event_constraint intel_core2_pebs_event_constraints[];
|
||||
|
||||
extern struct event_constraint intel_atom_pebs_event_constraints[];
|
||||
@ -1597,6 +1604,8 @@ void intel_pmu_pebs_data_source_adl(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_grt(void);
|
||||
|
||||
void intel_pmu_pebs_data_source_mtl(void);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
void intel_pt_interrupt(void);
|
||||
|
@ -541,7 +541,13 @@ __init int zhaoxin_pmu_init(void)
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 0x06:
|
||||
if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) {
|
||||
/*
|
||||
* Support Zhaoxin CPU from ZXC series, exclude Nano series through FMS.
|
||||
* Nano FMS: Family=6, Model=F, Stepping=[0-A][C-D]
|
||||
* ZXC FMS: Family=6, Model=F, Stepping=E-F OR Family=6, Model=0x19, Stepping=0-3
|
||||
*/
|
||||
if ((boot_cpu_data.x86_model == 0x0f && boot_cpu_data.x86_stepping >= 0x0e) ||
|
||||
boot_cpu_data.x86_model == 0x19) {
|
||||
|
||||
x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
|
||||
|
||||
|
@ -312,6 +312,7 @@
|
||||
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
|
||||
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
|
||||
#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
|
||||
#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
|
||||
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
|
||||
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
|
||||
|
||||
|
@ -189,6 +189,9 @@
|
||||
#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
|
||||
#define MSR_TURBO_RATIO_LIMIT2 0x000001af
|
||||
|
||||
#define MSR_SNOOP_RSP_0 0x00001328
|
||||
#define MSR_SNOOP_RSP_1 0x00001329
|
||||
|
||||
#define MSR_LBR_SELECT 0x000001c8
|
||||
#define MSR_LBR_TOS 0x000001c9
|
||||
|
||||
|
@ -159,6 +159,14 @@ union cpuid10_edx {
|
||||
unsigned int full;
|
||||
};
|
||||
|
||||
/*
|
||||
* Intel "Architectural Performance Monitoring extension" CPUID
|
||||
* detection/enumeration details:
|
||||
*/
|
||||
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
|
||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
||||
|
||||
/*
|
||||
* Intel Architectural LBR CPUID detection/enumeration details:
|
||||
*/
|
||||
|
@ -659,17 +659,19 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn)
|
||||
* is determined by the MOD/RM byte.
|
||||
*/
|
||||
opcode = insn->modrm.bytes[0];
|
||||
if ((opcode & 0x30) == 0x10) {
|
||||
if ((opcode & 0x8) == 0x8)
|
||||
return -EOPNOTSUPP; /* far call */
|
||||
/* call absolute, indirect */
|
||||
switch (X86_MODRM_REG(opcode)) {
|
||||
case 0b010: /* FF /2, call near, absolute indirect */
|
||||
p->ainsn.emulate_op = kprobe_emulate_call_indirect;
|
||||
} else if ((opcode & 0x30) == 0x20) {
|
||||
if ((opcode & 0x8) == 0x8)
|
||||
return -EOPNOTSUPP; /* far jmp */
|
||||
/* jmp near absolute indirect */
|
||||
break;
|
||||
case 0b100: /* FF /4, jmp near, absolute indirect */
|
||||
p->ainsn.emulate_op = kprobe_emulate_jmp_indirect;
|
||||
} else
|
||||
break;
|
||||
case 0b011: /* FF /3, call far, absolute indirect */
|
||||
case 0b101: /* FF /5, jmp far, absolute indirect */
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (!p->ainsn.emulate_op)
|
||||
break;
|
||||
|
||||
if (insn->addr_bytes != sizeof(unsigned long))
|
||||
@ -990,20 +992,6 @@ int kprobe_int3_handler(struct pt_regs *regs)
|
||||
kprobe_post_process(p, regs, kcb);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (*addr != INT3_INSN_OPCODE) {
|
||||
/*
|
||||
* The breakpoint instruction was removed right
|
||||
* after we hit it. Another cpu has removed
|
||||
* either a probepoint or a debugger breakpoint
|
||||
* at this address. In either case, no further
|
||||
* handling of this interrupt is appropriate.
|
||||
* Back up over the (now missing) int3 and run
|
||||
* the original instruction.
|
||||
*/
|
||||
regs->ip = (unsigned long)addr;
|
||||
return 1;
|
||||
} /* else: not a kprobe fault; let the kernel handle it */
|
||||
|
||||
return 0;
|
||||
|
@ -95,6 +95,11 @@ struct perf_raw_record {
|
||||
u32 size;
|
||||
};
|
||||
|
||||
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
|
||||
{
|
||||
return frag->pad < sizeof(u64);
|
||||
}
|
||||
|
||||
/*
|
||||
* branch stack layout:
|
||||
* nr: number of taken branches stored in entries[]
|
||||
@ -1095,50 +1100,82 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||
extern u64 perf_event_read_value(struct perf_event *event,
|
||||
u64 *enabled, u64 *running);
|
||||
|
||||
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
|
||||
|
||||
static inline bool branch_sample_no_flags(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_no_cycles(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_type(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_hw_index(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_priv(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
|
||||
}
|
||||
|
||||
|
||||
struct perf_sample_data {
|
||||
/*
|
||||
* Fields set by perf_sample_data_init(), group so as to
|
||||
* minimize the cachelines touched.
|
||||
* Fields set by perf_sample_data_init() unconditionally,
|
||||
* group so as to minimize the cachelines touched.
|
||||
*/
|
||||
u64 sample_flags;
|
||||
u64 period;
|
||||
u64 dyn_size;
|
||||
|
||||
/*
|
||||
* The other fields, optionally {set,used} by
|
||||
* perf_{prepare,output}_sample().
|
||||
* Fields commonly set by __perf_event_header__init_id(),
|
||||
* group so as to minimize the cachelines touched.
|
||||
*/
|
||||
struct perf_branch_stack *br_stack;
|
||||
union perf_sample_weight weight;
|
||||
union perf_mem_data_src data_src;
|
||||
u64 txn;
|
||||
u64 addr;
|
||||
struct perf_raw_record *raw;
|
||||
|
||||
u64 type;
|
||||
u64 ip;
|
||||
struct {
|
||||
u32 pid;
|
||||
u32 tid;
|
||||
} tid_entry;
|
||||
u64 time;
|
||||
u64 id;
|
||||
u64 stream_id;
|
||||
struct {
|
||||
u32 cpu;
|
||||
u32 reserved;
|
||||
} cpu_entry;
|
||||
|
||||
/*
|
||||
* The other fields, optionally {set,used} by
|
||||
* perf_{prepare,output}_sample().
|
||||
*/
|
||||
u64 ip;
|
||||
struct perf_callchain_entry *callchain;
|
||||
u64 aux_size;
|
||||
struct perf_raw_record *raw;
|
||||
struct perf_branch_stack *br_stack;
|
||||
union perf_sample_weight weight;
|
||||
union perf_mem_data_src data_src;
|
||||
u64 txn;
|
||||
|
||||
struct perf_regs regs_user;
|
||||
struct perf_regs regs_intr;
|
||||
u64 stack_user_size;
|
||||
|
||||
u64 phys_addr;
|
||||
u64 stream_id;
|
||||
u64 cgroup;
|
||||
u64 addr;
|
||||
u64 phys_addr;
|
||||
u64 data_page_size;
|
||||
u64 code_page_size;
|
||||
u64 aux_size;
|
||||
} ____cacheline_aligned;
|
||||
|
||||
/* default value for data source */
|
||||
@ -1154,6 +1191,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
/* remaining struct members initialized in perf_prepare_sample() */
|
||||
data->sample_flags = PERF_SAMPLE_PERIOD;
|
||||
data->period = period;
|
||||
data->dyn_size = 0;
|
||||
|
||||
if (addr) {
|
||||
data->addr = addr;
|
||||
@ -1161,6 +1199,68 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
|
||||
}
|
||||
}
|
||||
|
||||
static inline void perf_sample_save_callchain(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
int size = 1;
|
||||
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
size += data->callchain->nr;
|
||||
|
||||
data->dyn_size += size * sizeof(u64);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
|
||||
static inline void perf_sample_save_raw_data(struct perf_sample_data *data,
|
||||
struct perf_raw_record *raw)
|
||||
{
|
||||
struct perf_raw_frag *frag = &raw->frag;
|
||||
u32 sum = 0;
|
||||
int size;
|
||||
|
||||
do {
|
||||
sum += frag->size;
|
||||
if (perf_raw_frag_last(frag))
|
||||
break;
|
||||
frag = frag->next;
|
||||
} while (1);
|
||||
|
||||
size = round_up(sum + sizeof(u32), sizeof(u64));
|
||||
raw->size = size - sizeof(u32);
|
||||
frag->pad = raw->size - sum;
|
||||
|
||||
data->raw = raw;
|
||||
data->dyn_size += size;
|
||||
data->sample_flags |= PERF_SAMPLE_RAW;
|
||||
}
|
||||
|
||||
static inline void perf_sample_save_brstack(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct perf_branch_stack *brs)
|
||||
{
|
||||
int size = sizeof(u64); /* nr */
|
||||
|
||||
if (branch_sample_hw_index(event))
|
||||
size += sizeof(u64);
|
||||
size += brs->nr * sizeof(struct perf_branch_entry);
|
||||
|
||||
data->br_stack = brs;
|
||||
data->dyn_size += size;
|
||||
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
static inline u32 perf_sample_data_size(struct perf_sample_data *data,
|
||||
struct perf_event *event)
|
||||
{
|
||||
u32 size = sizeof(struct perf_event_header);
|
||||
|
||||
size += event->header_size + event->id_header_size;
|
||||
size += data->dyn_size;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear all bitfields in the perf_branch_entry.
|
||||
* The to and from fields are not cleared because they are
|
||||
@ -1182,7 +1282,10 @@ extern void perf_output_sample(struct perf_output_handle *handle,
|
||||
struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event);
|
||||
extern void perf_prepare_sample(struct perf_event_header *header,
|
||||
extern void perf_prepare_sample(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct pt_regs *regs);
|
||||
extern void perf_prepare_header(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct pt_regs *regs);
|
||||
@ -1402,7 +1505,6 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct
|
||||
extern struct perf_callchain_entry *
|
||||
get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
|
||||
u32 max_stack, bool crosstask, bool add_mark);
|
||||
extern struct perf_callchain_entry *perf_callchain(struct perf_event *event, struct pt_regs *regs);
|
||||
extern int get_callchain_buffers(int max_stack);
|
||||
extern void put_callchain_buffers(void);
|
||||
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
|
||||
@ -1670,11 +1772,6 @@ extern void perf_restore_debug_store(void);
|
||||
static inline void perf_restore_debug_store(void) { }
|
||||
#endif
|
||||
|
||||
static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
|
||||
{
|
||||
return frag->pad < sizeof(u64);
|
||||
}
|
||||
|
||||
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
|
||||
|
||||
struct perf_pmu_events_attr {
|
||||
@ -1724,7 +1821,7 @@ static struct perf_pmu_events_attr _var = { \
|
||||
.id = _id, } \
|
||||
})[0].attr.attr)
|
||||
|
||||
#define PMU_FORMAT_ATTR(_name, _format) \
|
||||
#define PMU_FORMAT_ATTR_SHOW(_name, _format) \
|
||||
static ssize_t \
|
||||
_name##_show(struct device *dev, \
|
||||
struct device_attribute *attr, \
|
||||
@ -1733,6 +1830,9 @@ _name##_show(struct device *dev, \
|
||||
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
|
||||
return sprintf(page, _format "\n"); \
|
||||
} \
|
||||
|
||||
#define PMU_FORMAT_ATTR(_name, _format) \
|
||||
PMU_FORMAT_ATTR_SHOW(_name, _format) \
|
||||
\
|
||||
static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
|
||||
|
||||
@ -1781,30 +1881,4 @@ static inline void perf_lopwr_cb(bool mode)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
static inline bool branch_sample_no_flags(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_FLAGS;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_no_cycles(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_NO_CYCLES;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_type(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_TYPE_SAVE;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_hw_index(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
|
||||
}
|
||||
|
||||
static inline bool branch_sample_priv(const struct perf_event *event)
|
||||
{
|
||||
return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
#endif /* _LINUX_PERF_EVENT_H */
|
||||
|
@ -7041,13 +7041,20 @@ out_put:
|
||||
ring_buffer_put(rb);
|
||||
}
|
||||
|
||||
static void __perf_event_header__init_id(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
/*
|
||||
* A set of common sample data types saved even for non-sample records
|
||||
* when event->attr.sample_id_all is set.
|
||||
*/
|
||||
#define PERF_SAMPLE_ID_ALL (PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
|
||||
PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \
|
||||
PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
|
||||
|
||||
static void __perf_event_header__init_id(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
u64 sample_type)
|
||||
{
|
||||
data->type = event->attr.sample_type;
|
||||
header->size += event->id_header_size;
|
||||
data->sample_flags |= data->type & PERF_SAMPLE_ID_ALL;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_TID) {
|
||||
/* namespace issues */
|
||||
@ -7074,8 +7081,10 @@ void perf_event_header__init_id(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (event->attr.sample_id_all)
|
||||
__perf_event_header__init_id(header, data, event, event->attr.sample_type);
|
||||
if (event->attr.sample_id_all) {
|
||||
header->size += event->id_header_size;
|
||||
__perf_event_header__init_id(data, event, event->attr.sample_type);
|
||||
}
|
||||
}
|
||||
|
||||
static void __perf_event__output_id_sample(struct perf_output_handle *handle,
|
||||
@ -7305,7 +7314,7 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (data->br_stack) {
|
||||
size_t size;
|
||||
|
||||
size = data->br_stack->nr
|
||||
@ -7549,83 +7558,68 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
||||
return callchain ?: &__empty_callchain;
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
|
||||
{
|
||||
return d * !!(flags & s);
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
u64 sample_type = event->attr.sample_type;
|
||||
u64 filtered_sample_type;
|
||||
|
||||
header->type = PERF_RECORD_SAMPLE;
|
||||
header->size = sizeof(*header) + event->header_size;
|
||||
|
||||
header->misc = 0;
|
||||
header->misc |= perf_misc_flags(regs);
|
||||
|
||||
/*
|
||||
* Clear the sample flags that have already been done by the
|
||||
* PMU driver.
|
||||
* Add the sample flags that are dependent to others. And clear the
|
||||
* sample flags that have already been done by the PMU driver.
|
||||
*/
|
||||
filtered_sample_type = sample_type & ~data->sample_flags;
|
||||
__perf_event_header__init_id(header, data, event, filtered_sample_type);
|
||||
filtered_sample_type = sample_type;
|
||||
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_CODE_PAGE_SIZE,
|
||||
PERF_SAMPLE_IP);
|
||||
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_DATA_PAGE_SIZE |
|
||||
PERF_SAMPLE_PHYS_ADDR, PERF_SAMPLE_ADDR);
|
||||
filtered_sample_type |= __cond_set(sample_type, PERF_SAMPLE_STACK_USER,
|
||||
PERF_SAMPLE_REGS_USER);
|
||||
filtered_sample_type &= ~data->sample_flags;
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
|
||||
if (filtered_sample_type == 0) {
|
||||
/* Make sure it has the correct data->type for output */
|
||||
data->type = event->attr.sample_type;
|
||||
return;
|
||||
}
|
||||
|
||||
__perf_event_header__init_id(data, event, filtered_sample_type);
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_IP) {
|
||||
data->ip = perf_instruction_pointer(regs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
int size = 1;
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
|
||||
size += data->callchain->nr;
|
||||
|
||||
header->size += size * sizeof(u64);
|
||||
data->sample_flags |= PERF_SAMPLE_IP;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_RAW) {
|
||||
struct perf_raw_record *raw = data->raw;
|
||||
int size;
|
||||
if (filtered_sample_type & PERF_SAMPLE_CALLCHAIN)
|
||||
perf_sample_save_callchain(data, event, regs);
|
||||
|
||||
if (raw && (data->sample_flags & PERF_SAMPLE_RAW)) {
|
||||
struct perf_raw_frag *frag = &raw->frag;
|
||||
u32 sum = 0;
|
||||
|
||||
do {
|
||||
sum += frag->size;
|
||||
if (perf_raw_frag_last(frag))
|
||||
break;
|
||||
frag = frag->next;
|
||||
} while (1);
|
||||
|
||||
size = round_up(sum + sizeof(u32), sizeof(u64));
|
||||
raw->size = size - sizeof(u32);
|
||||
frag->pad = raw->size - sum;
|
||||
} else {
|
||||
size = sizeof(u64);
|
||||
data->raw = NULL;
|
||||
}
|
||||
|
||||
header->size += size;
|
||||
if (filtered_sample_type & PERF_SAMPLE_RAW) {
|
||||
data->raw = NULL;
|
||||
data->dyn_size += sizeof(u64);
|
||||
data->sample_flags |= PERF_SAMPLE_RAW;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
int size = sizeof(u64); /* nr */
|
||||
if (data->sample_flags & PERF_SAMPLE_BRANCH_STACK) {
|
||||
if (branch_sample_hw_index(event))
|
||||
size += sizeof(u64);
|
||||
|
||||
size += data->br_stack->nr
|
||||
* sizeof(struct perf_branch_entry);
|
||||
}
|
||||
header->size += size;
|
||||
if (filtered_sample_type & PERF_SAMPLE_BRANCH_STACK) {
|
||||
data->br_stack = NULL;
|
||||
data->dyn_size += sizeof(u64);
|
||||
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
}
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
|
||||
if (filtered_sample_type & PERF_SAMPLE_REGS_USER)
|
||||
perf_sample_regs_user(&data->regs_user, regs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_REGS_USER) {
|
||||
/*
|
||||
* It cannot use the filtered_sample_type here as REGS_USER can be set
|
||||
* by STACK_USER (using __cond_set() above) and we don't want to update
|
||||
* the dyn_size if it's not requested by users.
|
||||
*/
|
||||
if ((sample_type & ~data->sample_flags) & PERF_SAMPLE_REGS_USER) {
|
||||
/* regs dump ABI info */
|
||||
int size = sizeof(u64);
|
||||
|
||||
@ -7634,10 +7628,11 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
size += hweight64(mask) * sizeof(u64);
|
||||
}
|
||||
|
||||
header->size += size;
|
||||
data->dyn_size += size;
|
||||
data->sample_flags |= PERF_SAMPLE_REGS_USER;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_STACK_USER) {
|
||||
if (filtered_sample_type & PERF_SAMPLE_STACK_USER) {
|
||||
/*
|
||||
* Either we need PERF_SAMPLE_STACK_USER bit to be always
|
||||
* processed as the last one or have additional check added
|
||||
@ -7645,9 +7640,10 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
* up the rest of the sample size.
|
||||
*/
|
||||
u16 stack_size = event->attr.sample_stack_user;
|
||||
u16 header_size = perf_sample_data_size(data, event);
|
||||
u16 size = sizeof(u64);
|
||||
|
||||
stack_size = perf_sample_ustack_size(stack_size, header->size,
|
||||
stack_size = perf_sample_ustack_size(stack_size, header_size,
|
||||
data->regs_user.regs);
|
||||
|
||||
/*
|
||||
@ -7659,24 +7655,31 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
size += sizeof(u64) + stack_size;
|
||||
|
||||
data->stack_user_size = stack_size;
|
||||
header->size += size;
|
||||
data->dyn_size += size;
|
||||
data->sample_flags |= PERF_SAMPLE_STACK_USER;
|
||||
}
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE)
|
||||
if (filtered_sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
||||
data->weight.full = 0;
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
data->data_src.val = PERF_MEM_NA;
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION)
|
||||
data->txn = 0;
|
||||
|
||||
if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_DATA_PAGE_SIZE)) {
|
||||
if (filtered_sample_type & PERF_SAMPLE_ADDR)
|
||||
data->addr = 0;
|
||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_REGS_INTR) {
|
||||
if (filtered_sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||
data->data_src.val = PERF_MEM_NA;
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||
}
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_TRANSACTION) {
|
||||
data->txn = 0;
|
||||
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
||||
}
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_ADDR) {
|
||||
data->addr = 0;
|
||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||
}
|
||||
|
||||
if (filtered_sample_type & PERF_SAMPLE_REGS_INTR) {
|
||||
/* regs dump ABI info */
|
||||
int size = sizeof(u64);
|
||||
|
||||
@ -7688,20 +7691,23 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
size += hweight64(mask) * sizeof(u64);
|
||||
}
|
||||
|
||||
header->size += size;
|
||||
data->dyn_size += size;
|
||||
data->sample_flags |= PERF_SAMPLE_REGS_INTR;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_PHYS_ADDR &&
|
||||
filtered_sample_type & PERF_SAMPLE_PHYS_ADDR)
|
||||
if (filtered_sample_type & PERF_SAMPLE_PHYS_ADDR) {
|
||||
data->phys_addr = perf_virt_to_phys(data->addr);
|
||||
data->sample_flags |= PERF_SAMPLE_PHYS_ADDR;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
if (sample_type & PERF_SAMPLE_CGROUP) {
|
||||
if (filtered_sample_type & PERF_SAMPLE_CGROUP) {
|
||||
struct cgroup *cgrp;
|
||||
|
||||
/* protected by RCU */
|
||||
cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
|
||||
data->cgroup = cgroup_id(cgrp);
|
||||
data->sample_flags |= PERF_SAMPLE_CGROUP;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -7710,16 +7716,21 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
* require PERF_SAMPLE_ADDR, kernel implicitly retrieve the data->addr,
|
||||
* but the value will not dump to the userspace.
|
||||
*/
|
||||
if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
|
||||
if (filtered_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) {
|
||||
data->data_page_size = perf_get_page_size(data->addr);
|
||||
data->sample_flags |= PERF_SAMPLE_DATA_PAGE_SIZE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
|
||||
if (filtered_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) {
|
||||
data->code_page_size = perf_get_page_size(data->ip);
|
||||
data->sample_flags |= PERF_SAMPLE_CODE_PAGE_SIZE;
|
||||
}
|
||||
|
||||
if (sample_type & PERF_SAMPLE_AUX) {
|
||||
if (filtered_sample_type & PERF_SAMPLE_AUX) {
|
||||
u64 size;
|
||||
u16 header_size = perf_sample_data_size(data, event);
|
||||
|
||||
header->size += sizeof(u64); /* size */
|
||||
header_size += sizeof(u64); /* size */
|
||||
|
||||
/*
|
||||
* Given the 16bit nature of header::size, an AUX sample can
|
||||
@ -7727,14 +7738,26 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
* Make sure this doesn't happen by using up to U16_MAX bytes
|
||||
* per sample in total (rounded down to 8 byte boundary).
|
||||
*/
|
||||
size = min_t(size_t, U16_MAX - header->size,
|
||||
size = min_t(size_t, U16_MAX - header_size,
|
||||
event->attr.aux_sample_size);
|
||||
size = rounddown(size, 8);
|
||||
size = perf_prepare_sample_aux(event, data, size);
|
||||
|
||||
WARN_ON_ONCE(size + header->size > U16_MAX);
|
||||
header->size += size;
|
||||
WARN_ON_ONCE(size + header_size > U16_MAX);
|
||||
data->dyn_size += size + sizeof(u64); /* size above */
|
||||
data->sample_flags |= PERF_SAMPLE_AUX;
|
||||
}
|
||||
}
|
||||
|
||||
void perf_prepare_header(struct perf_event_header *header,
|
||||
struct perf_sample_data *data,
|
||||
struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
header->type = PERF_RECORD_SAMPLE;
|
||||
header->size = perf_sample_data_size(data, event);
|
||||
header->misc = perf_misc_flags(regs);
|
||||
|
||||
/*
|
||||
* If you're adding more sample types here, you likely need to do
|
||||
* something about the overflowing header::size, like repurpose the
|
||||
@ -7762,7 +7785,8 @@ __perf_event_output(struct perf_event *event,
|
||||
/* protect the callchain buffers */
|
||||
rcu_read_lock();
|
||||
|
||||
perf_prepare_sample(&header, data, event, regs);
|
||||
perf_prepare_sample(data, event, regs);
|
||||
perf_prepare_header(&header, data, event, regs);
|
||||
|
||||
err = output_begin(&handle, data, event, header.size);
|
||||
if (err)
|
||||
@ -10120,8 +10144,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size,
|
||||
};
|
||||
|
||||
perf_sample_data_init(&data, 0, 0);
|
||||
data.raw = &raw;
|
||||
data.sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(&data, &raw);
|
||||
|
||||
perf_trace_buf_update(record, event_type);
|
||||
|
||||
@ -10328,13 +10351,7 @@ static void bpf_overflow_handler(struct perf_event *event,
|
||||
rcu_read_lock();
|
||||
prog = READ_ONCE(event->prog);
|
||||
if (prog) {
|
||||
if (prog->call_get_stack &&
|
||||
(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
|
||||
!(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) {
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
data->sample_flags |= PERF_SAMPLE_CALLCHAIN;
|
||||
}
|
||||
|
||||
perf_prepare_sample(data, event, regs);
|
||||
ret = bpf_prog_run(prog, &ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
@ -687,8 +687,7 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
|
||||
}
|
||||
|
||||
perf_sample_data_init(sd, 0, 0);
|
||||
sd->raw = &raw;
|
||||
sd->sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(sd, &raw);
|
||||
|
||||
err = __bpf_perf_event_output(regs, map, flags, sd);
|
||||
|
||||
@ -746,8 +745,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
|
||||
|
||||
perf_fetch_caller_regs(regs);
|
||||
perf_sample_data_init(sd, 0, 0);
|
||||
sd->raw = &raw;
|
||||
sd->sample_flags |= PERF_SAMPLE_RAW;
|
||||
perf_sample_save_raw_data(sd, &raw);
|
||||
|
||||
ret = __bpf_perf_event_output(regs, map, flags, sd);
|
||||
out:
|
||||
|
Loading…
Reference in New Issue
Block a user