17e3162972
This patch updates PEBS event constraints for Intel Atom, Nehalem, Westmere. This patch also reorganizes the PEBS format/constraint detection code. It is now based on processor model and not PEBS format. Two processors may use the same PEBS format without have the same list of PEBS events. In this second version, we simplified the initialization of the PEBS constraints by leveraging the existing switch() statement in perf_event_intel.c. We also renamed the constraint tables to be more consistent with regular constraints. In this 3rd version, we drop BR_INST_RETIRED.MISPRED from Intel Atom as it does not seem to work. Use MISPREDICTED_BRANCH_RETIRED instead. Also add FP_ASSIST.* o both Intel Nehalem and Westmere. I misssed those in the earlier patches. Events were tested using libpfm4 perf_examples. Signed-off-by: Stephane Eranian <eranian@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <4d6e6b02.815bdf0a.637b.07a7@mx.google.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
782 lines
19 KiB
C
782 lines
19 KiB
C
#ifdef CONFIG_CPU_SUP_INTEL
|
|
|
|
/* The maximal number of PEBS events: */
|
|
#define MAX_PEBS_EVENTS 4
|
|
|
|
/* The size of a BTS record in bytes: */
|
|
#define BTS_RECORD_SIZE 24
|
|
|
|
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
|
#define PEBS_BUFFER_SIZE PAGE_SIZE
|
|
|
|
/*
|
|
* pebs_record_32 for p4 and core not supported
|
|
|
|
struct pebs_record_32 {
|
|
u32 flags, ip;
|
|
u32 ax, bc, cx, dx;
|
|
u32 si, di, bp, sp;
|
|
};
|
|
|
|
*/
|
|
|
|
struct pebs_record_core {
|
|
u64 flags, ip;
|
|
u64 ax, bx, cx, dx;
|
|
u64 si, di, bp, sp;
|
|
u64 r8, r9, r10, r11;
|
|
u64 r12, r13, r14, r15;
|
|
};
|
|
|
|
struct pebs_record_nhm {
|
|
u64 flags, ip;
|
|
u64 ax, bx, cx, dx;
|
|
u64 si, di, bp, sp;
|
|
u64 r8, r9, r10, r11;
|
|
u64 r12, r13, r14, r15;
|
|
u64 status, dla, dse, lat;
|
|
};
|
|
|
|
/*
|
|
* A debug store configuration.
|
|
*
|
|
* We only support architectures that use 64bit fields.
|
|
*/
|
|
struct debug_store {
|
|
u64 bts_buffer_base;
|
|
u64 bts_index;
|
|
u64 bts_absolute_maximum;
|
|
u64 bts_interrupt_threshold;
|
|
u64 pebs_buffer_base;
|
|
u64 pebs_index;
|
|
u64 pebs_absolute_maximum;
|
|
u64 pebs_interrupt_threshold;
|
|
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
|
};
|
|
|
|
static void init_debug_store_on_cpu(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds)
|
|
return;
|
|
|
|
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
|
|
(u32)((u64)(unsigned long)ds),
|
|
(u32)((u64)(unsigned long)ds >> 32));
|
|
}
|
|
|
|
static void fini_debug_store_on_cpu(int cpu)
|
|
{
|
|
if (!per_cpu(cpu_hw_events, cpu).ds)
|
|
return;
|
|
|
|
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
|
|
}
|
|
|
|
static int alloc_pebs_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
int node = cpu_to_node(cpu);
|
|
int max, thresh = 1; /* always use a single PEBS record */
|
|
void *buffer;
|
|
|
|
if (!x86_pmu.pebs)
|
|
return 0;
|
|
|
|
buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
|
|
if (unlikely(!buffer))
|
|
return -ENOMEM;
|
|
|
|
max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
|
|
|
|
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
|
|
ds->pebs_index = ds->pebs_buffer_base;
|
|
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
|
|
max * x86_pmu.pebs_record_size;
|
|
|
|
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
|
|
thresh * x86_pmu.pebs_record_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void release_pebs_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds || !x86_pmu.pebs)
|
|
return;
|
|
|
|
kfree((void *)(unsigned long)ds->pebs_buffer_base);
|
|
ds->pebs_buffer_base = 0;
|
|
}
|
|
|
|
static int alloc_bts_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
int node = cpu_to_node(cpu);
|
|
int max, thresh;
|
|
void *buffer;
|
|
|
|
if (!x86_pmu.bts)
|
|
return 0;
|
|
|
|
buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node);
|
|
if (unlikely(!buffer))
|
|
return -ENOMEM;
|
|
|
|
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
|
|
thresh = max / 16;
|
|
|
|
ds->bts_buffer_base = (u64)(unsigned long)buffer;
|
|
ds->bts_index = ds->bts_buffer_base;
|
|
ds->bts_absolute_maximum = ds->bts_buffer_base +
|
|
max * BTS_RECORD_SIZE;
|
|
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
|
|
thresh * BTS_RECORD_SIZE;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void release_bts_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds || !x86_pmu.bts)
|
|
return;
|
|
|
|
kfree((void *)(unsigned long)ds->bts_buffer_base);
|
|
ds->bts_buffer_base = 0;
|
|
}
|
|
|
|
static int alloc_ds_buffer(int cpu)
|
|
{
|
|
int node = cpu_to_node(cpu);
|
|
struct debug_store *ds;
|
|
|
|
ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node);
|
|
if (unlikely(!ds))
|
|
return -ENOMEM;
|
|
|
|
per_cpu(cpu_hw_events, cpu).ds = ds;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void release_ds_buffer(int cpu)
|
|
{
|
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
|
|
|
if (!ds)
|
|
return;
|
|
|
|
per_cpu(cpu_hw_events, cpu).ds = NULL;
|
|
kfree(ds);
|
|
}
|
|
|
|
static void release_ds_buffers(void)
|
|
{
|
|
int cpu;
|
|
|
|
if (!x86_pmu.bts && !x86_pmu.pebs)
|
|
return;
|
|
|
|
get_online_cpus();
|
|
for_each_online_cpu(cpu)
|
|
fini_debug_store_on_cpu(cpu);
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
release_pebs_buffer(cpu);
|
|
release_bts_buffer(cpu);
|
|
release_ds_buffer(cpu);
|
|
}
|
|
put_online_cpus();
|
|
}
|
|
|
|
static void reserve_ds_buffers(void)
|
|
{
|
|
int bts_err = 0, pebs_err = 0;
|
|
int cpu;
|
|
|
|
x86_pmu.bts_active = 0;
|
|
x86_pmu.pebs_active = 0;
|
|
|
|
if (!x86_pmu.bts && !x86_pmu.pebs)
|
|
return;
|
|
|
|
if (!x86_pmu.bts)
|
|
bts_err = 1;
|
|
|
|
if (!x86_pmu.pebs)
|
|
pebs_err = 1;
|
|
|
|
get_online_cpus();
|
|
|
|
for_each_possible_cpu(cpu) {
|
|
if (alloc_ds_buffer(cpu)) {
|
|
bts_err = 1;
|
|
pebs_err = 1;
|
|
}
|
|
|
|
if (!bts_err && alloc_bts_buffer(cpu))
|
|
bts_err = 1;
|
|
|
|
if (!pebs_err && alloc_pebs_buffer(cpu))
|
|
pebs_err = 1;
|
|
|
|
if (bts_err && pebs_err)
|
|
break;
|
|
}
|
|
|
|
if (bts_err) {
|
|
for_each_possible_cpu(cpu)
|
|
release_bts_buffer(cpu);
|
|
}
|
|
|
|
if (pebs_err) {
|
|
for_each_possible_cpu(cpu)
|
|
release_pebs_buffer(cpu);
|
|
}
|
|
|
|
if (bts_err && pebs_err) {
|
|
for_each_possible_cpu(cpu)
|
|
release_ds_buffer(cpu);
|
|
} else {
|
|
if (x86_pmu.bts && !bts_err)
|
|
x86_pmu.bts_active = 1;
|
|
|
|
if (x86_pmu.pebs && !pebs_err)
|
|
x86_pmu.pebs_active = 1;
|
|
|
|
for_each_online_cpu(cpu)
|
|
init_debug_store_on_cpu(cpu);
|
|
}
|
|
|
|
put_online_cpus();
|
|
}
|
|
|
|
/*
|
|
* BTS
|
|
*/
|
|
|
|
static struct event_constraint bts_constraint =
|
|
EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
|
|
|
|
static void intel_pmu_enable_bts(u64 config)
|
|
{
|
|
unsigned long debugctlmsr;
|
|
|
|
debugctlmsr = get_debugctlmsr();
|
|
|
|
debugctlmsr |= DEBUGCTLMSR_TR;
|
|
debugctlmsr |= DEBUGCTLMSR_BTS;
|
|
debugctlmsr |= DEBUGCTLMSR_BTINT;
|
|
|
|
if (!(config & ARCH_PERFMON_EVENTSEL_OS))
|
|
debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
|
|
|
|
if (!(config & ARCH_PERFMON_EVENTSEL_USR))
|
|
debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
|
|
|
|
update_debugctlmsr(debugctlmsr);
|
|
}
|
|
|
|
static void intel_pmu_disable_bts(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
unsigned long debugctlmsr;
|
|
|
|
if (!cpuc->ds)
|
|
return;
|
|
|
|
debugctlmsr = get_debugctlmsr();
|
|
|
|
debugctlmsr &=
|
|
~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
|
|
DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
|
|
|
|
update_debugctlmsr(debugctlmsr);
|
|
}
|
|
|
|
static int intel_pmu_drain_bts_buffer(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
struct debug_store *ds = cpuc->ds;
|
|
struct bts_record {
|
|
u64 from;
|
|
u64 to;
|
|
u64 flags;
|
|
};
|
|
struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
|
|
struct bts_record *at, *top;
|
|
struct perf_output_handle handle;
|
|
struct perf_event_header header;
|
|
struct perf_sample_data data;
|
|
struct pt_regs regs;
|
|
|
|
if (!event)
|
|
return 0;
|
|
|
|
if (!x86_pmu.bts_active)
|
|
return 0;
|
|
|
|
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
|
|
top = (struct bts_record *)(unsigned long)ds->bts_index;
|
|
|
|
if (top <= at)
|
|
return 0;
|
|
|
|
ds->bts_index = ds->bts_buffer_base;
|
|
|
|
perf_sample_data_init(&data, 0);
|
|
data.period = event->hw.last_period;
|
|
regs.ip = 0;
|
|
|
|
/*
|
|
* Prepare a generic sample, i.e. fill in the invariant fields.
|
|
* We will overwrite the from and to address before we output
|
|
* the sample.
|
|
*/
|
|
perf_prepare_sample(&header, &data, event, ®s);
|
|
|
|
if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
|
|
return 1;
|
|
|
|
for (; at < top; at++) {
|
|
data.ip = at->from;
|
|
data.addr = at->to;
|
|
|
|
perf_output_sample(&handle, &header, &data, event);
|
|
}
|
|
|
|
perf_output_end(&handle);
|
|
|
|
/* There's new data available. */
|
|
event->hw.interrupts++;
|
|
event->pending_kill = POLL_IN;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* PEBS
|
|
*/
|
|
static struct event_constraint intel_core2_pebs_event_constraints[] = {
|
|
PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
|
|
PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
|
|
PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
|
|
PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
|
|
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
static struct event_constraint intel_atom_pebs_event_constraints[] = {
|
|
PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */
|
|
PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
|
|
INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
static struct event_constraint intel_nehalem_pebs_event_constraints[] = {
|
|
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
|
PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
|
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */
|
|
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
|
PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
|
|
INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
|
|
PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
|
|
INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
static struct event_constraint intel_westmere_pebs_event_constraints[] = {
|
|
INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */
|
|
PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */
|
|
INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */
|
|
|
|
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */
|
|
PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */
|
|
INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */
|
|
INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
static struct event_constraint intel_snb_pebs_events[] = {
|
|
PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
|
|
PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
|
|
PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
|
|
PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
|
|
PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
|
|
PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
|
|
PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
|
|
PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
|
|
PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
|
|
PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
|
|
PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
|
|
PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
|
|
PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
|
|
PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
|
|
PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
|
|
PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
|
PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
|
|
PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
|
|
PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
|
|
PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
|
|
PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
|
|
PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
|
|
PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
|
|
PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
|
|
PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
|
|
PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
|
|
PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
|
|
PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
|
|
PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
|
|
PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
|
|
PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
|
|
PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */
|
|
PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */
|
|
PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
|
|
EVENT_CONSTRAINT_END
|
|
};
|
|
|
|
static struct event_constraint *
|
|
intel_pebs_constraints(struct perf_event *event)
|
|
{
|
|
struct event_constraint *c;
|
|
|
|
if (!event->attr.precise_ip)
|
|
return NULL;
|
|
|
|
if (x86_pmu.pebs_constraints) {
|
|
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
|
|
if ((event->hw.config & c->cmask) == c->code)
|
|
return c;
|
|
}
|
|
}
|
|
|
|
return &emptyconstraint;
|
|
}
|
|
|
|
static void intel_pmu_pebs_enable(struct perf_event *event)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
|
|
|
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
|
WARN_ON_ONCE(cpuc->enabled);
|
|
|
|
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
|
intel_pmu_lbr_enable(event);
|
|
}
|
|
|
|
static void intel_pmu_pebs_disable(struct perf_event *event)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
struct hw_perf_event *hwc = &event->hw;
|
|
|
|
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
|
if (cpuc->enabled)
|
|
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
|
|
|
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
|
|
|
if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
|
|
intel_pmu_lbr_disable(event);
|
|
}
|
|
|
|
static void intel_pmu_pebs_enable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (cpuc->pebs_enabled)
|
|
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
|
}
|
|
|
|
static void intel_pmu_pebs_disable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
|
if (cpuc->pebs_enabled)
|
|
wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
|
|
}
|
|
|
|
#include <asm/insn.h>
|
|
|
|
static inline bool kernel_ip(unsigned long ip)
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
return ip > PAGE_OFFSET;
|
|
#else
|
|
return (long)ip < 0;
|
|
#endif
|
|
}
|
|
|
|
static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
unsigned long from = cpuc->lbr_entries[0].from;
|
|
unsigned long old_to, to = cpuc->lbr_entries[0].to;
|
|
unsigned long ip = regs->ip;
|
|
|
|
/*
|
|
* We don't need to fixup if the PEBS assist is fault like
|
|
*/
|
|
if (!x86_pmu.intel_cap.pebs_trap)
|
|
return 1;
|
|
|
|
/*
|
|
* No LBR entry, no basic block, no rewinding
|
|
*/
|
|
if (!cpuc->lbr_stack.nr || !from || !to)
|
|
return 0;
|
|
|
|
/*
|
|
* Basic blocks should never cross user/kernel boundaries
|
|
*/
|
|
if (kernel_ip(ip) != kernel_ip(to))
|
|
return 0;
|
|
|
|
/*
|
|
* unsigned math, either ip is before the start (impossible) or
|
|
* the basic block is larger than 1 page (sanity)
|
|
*/
|
|
if ((ip - to) > PAGE_SIZE)
|
|
return 0;
|
|
|
|
/*
|
|
* We sampled a branch insn, rewind using the LBR stack
|
|
*/
|
|
if (ip == to) {
|
|
regs->ip = from;
|
|
return 1;
|
|
}
|
|
|
|
do {
|
|
struct insn insn;
|
|
u8 buf[MAX_INSN_SIZE];
|
|
void *kaddr;
|
|
|
|
old_to = to;
|
|
if (!kernel_ip(ip)) {
|
|
int bytes, size = MAX_INSN_SIZE;
|
|
|
|
bytes = copy_from_user_nmi(buf, (void __user *)to, size);
|
|
if (bytes != size)
|
|
return 0;
|
|
|
|
kaddr = buf;
|
|
} else
|
|
kaddr = (void *)to;
|
|
|
|
kernel_insn_init(&insn, kaddr);
|
|
insn_get_length(&insn);
|
|
to += insn.length;
|
|
} while (to < ip);
|
|
|
|
if (to == ip) {
|
|
regs->ip = old_to;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Even though we decoded the basic block, the instruction stream
|
|
* never matched the given IP, either the TO or the IP got corrupted.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
static int intel_pmu_save_and_restart(struct perf_event *event);
|
|
|
|
static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
struct pt_regs *iregs, void *__pebs)
|
|
{
|
|
/*
|
|
* We cast to pebs_record_core since that is a subset of
|
|
* both formats and we don't use the other fields in this
|
|
* routine.
|
|
*/
|
|
struct pebs_record_core *pebs = __pebs;
|
|
struct perf_sample_data data;
|
|
struct pt_regs regs;
|
|
|
|
if (!intel_pmu_save_and_restart(event))
|
|
return;
|
|
|
|
perf_sample_data_init(&data, 0);
|
|
data.period = event->hw.last_period;
|
|
|
|
/*
|
|
* We use the interrupt regs as a base because the PEBS record
|
|
* does not contain a full regs set, specifically it seems to
|
|
* lack segment descriptors, which get used by things like
|
|
* user_mode().
|
|
*
|
|
* In the simple case fix up only the IP and BP,SP regs, for
|
|
* PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
|
|
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
|
|
*/
|
|
regs = *iregs;
|
|
regs.ip = pebs->ip;
|
|
regs.bp = pebs->bp;
|
|
regs.sp = pebs->sp;
|
|
|
|
if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))
|
|
regs.flags |= PERF_EFLAGS_EXACT;
|
|
else
|
|
regs.flags &= ~PERF_EFLAGS_EXACT;
|
|
|
|
if (perf_event_overflow(event, 1, &data, ®s))
|
|
x86_pmu_stop(event, 0);
|
|
}
|
|
|
|
static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
struct debug_store *ds = cpuc->ds;
|
|
struct perf_event *event = cpuc->events[0]; /* PMC0 only */
|
|
struct pebs_record_core *at, *top;
|
|
int n;
|
|
|
|
if (!x86_pmu.pebs_active)
|
|
return;
|
|
|
|
at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
|
|
top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
|
|
|
|
/*
|
|
* Whatever else happens, drain the thing
|
|
*/
|
|
ds->pebs_index = ds->pebs_buffer_base;
|
|
|
|
if (!test_bit(0, cpuc->active_mask))
|
|
return;
|
|
|
|
WARN_ON_ONCE(!event);
|
|
|
|
if (!event->attr.precise_ip)
|
|
return;
|
|
|
|
n = top - at;
|
|
if (n <= 0)
|
|
return;
|
|
|
|
/*
|
|
* Should not happen, we program the threshold at 1 and do not
|
|
* set a reset value.
|
|
*/
|
|
WARN_ON_ONCE(n > 1);
|
|
at += n - 1;
|
|
|
|
__intel_pmu_pebs_event(event, iregs, at);
|
|
}
|
|
|
|
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
|
|
{
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
struct debug_store *ds = cpuc->ds;
|
|
struct pebs_record_nhm *at, *top;
|
|
struct perf_event *event = NULL;
|
|
u64 status = 0;
|
|
int bit, n;
|
|
|
|
if (!x86_pmu.pebs_active)
|
|
return;
|
|
|
|
at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
|
|
top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
|
|
|
|
ds->pebs_index = ds->pebs_buffer_base;
|
|
|
|
n = top - at;
|
|
if (n <= 0)
|
|
return;
|
|
|
|
/*
|
|
* Should not happen, we program the threshold at 1 and do not
|
|
* set a reset value.
|
|
*/
|
|
WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
|
|
|
|
for ( ; at < top; at++) {
|
|
for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
|
|
event = cpuc->events[bit];
|
|
if (!test_bit(bit, cpuc->active_mask))
|
|
continue;
|
|
|
|
WARN_ON_ONCE(!event);
|
|
|
|
if (!event->attr.precise_ip)
|
|
continue;
|
|
|
|
if (__test_and_set_bit(bit, (unsigned long *)&status))
|
|
continue;
|
|
|
|
break;
|
|
}
|
|
|
|
if (!event || bit >= MAX_PEBS_EVENTS)
|
|
continue;
|
|
|
|
__intel_pmu_pebs_event(event, iregs, at);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* BTS, PEBS probe and setup
|
|
*/
|
|
|
|
static void intel_ds_init(void)
|
|
{
|
|
/*
|
|
* No support for 32bit formats
|
|
*/
|
|
if (!boot_cpu_has(X86_FEATURE_DTES64))
|
|
return;
|
|
|
|
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
|
|
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
|
|
if (x86_pmu.pebs) {
|
|
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
|
|
int format = x86_pmu.intel_cap.pebs_format;
|
|
|
|
switch (format) {
|
|
case 0:
|
|
printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
|
|
x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
|
|
x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
|
|
break;
|
|
|
|
case 1:
|
|
printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
|
|
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
|
|
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
|
|
break;
|
|
|
|
default:
|
|
printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
|
|
x86_pmu.pebs = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
#else /* CONFIG_CPU_SUP_INTEL */
|
|
|
|
static void reserve_ds_buffers(void)
|
|
{
|
|
}
|
|
|
|
static void release_ds_buffers(void)
|
|
{
|
|
}
|
|
|
|
#endif /* CONFIG_CPU_SUP_INTEL */
|