forked from Minki/linux
perf, x86: Fix Intel-nhm PMU programming errata workaround
Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented
workaround we implemented in:
11164cd
: perf, x86: Add Nehelem PMU programming errata workaround
doesn't actually work fully and causes a stuck PMU state
under load and non-functioning perf profiling.
A functional workaround was found by trial & error.
Affects all Nehalem-class Intel PMUs.
Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <stable@kernel.org> # .35.x
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
9d5f3714e4
commit
351af0725e
@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
|
|||||||
* Intel Errata AAP53 (model 30)
|
* Intel Errata AAP53 (model 30)
|
||||||
* Intel Errata BD53 (model 44)
|
* Intel Errata BD53 (model 44)
|
||||||
*
|
*
|
||||||
* These chips need to be 'reset' when adding counters by programming
|
* The official story:
|
||||||
* the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
|
* These chips need to be 'reset' when adding counters by programming the
|
||||||
* either in sequence on the same PMC or on different PMCs.
|
* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
|
||||||
|
* in sequence on the same PMC or on different PMCs.
|
||||||
|
*
|
||||||
|
* In practise it appears some of these events do in fact count, and
|
||||||
|
* we need to programm all 4 events.
|
||||||
*/
|
*/
|
||||||
|
static void intel_pmu_nhm_workaround(void)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||||
|
static const unsigned long nhm_magic[4] = {
|
||||||
|
0x4300B5,
|
||||||
|
0x4300D2,
|
||||||
|
0x4300B1,
|
||||||
|
0x4300B1
|
||||||
|
};
|
||||||
|
struct perf_event *event;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The Errata requires below steps:
|
||||||
|
* 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
|
||||||
|
* 2) Configure 4 PERFEVTSELx with the magic events and clear
|
||||||
|
* the corresponding PMCx;
|
||||||
|
* 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
|
||||||
|
* 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
|
||||||
|
* 5) Clear 4 pairs of ERFEVTSELx and PMCx;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The real steps we choose are a little different from above.
|
||||||
|
* A) To reduce MSR operations, we don't run step 1) as they
|
||||||
|
* are already cleared before this function is called;
|
||||||
|
* B) Call x86_perf_event_update to save PMCx before configuring
|
||||||
|
* PERFEVTSELx with magic number;
|
||||||
|
* C) With step 5), we do clear only when the PERFEVTSELx is
|
||||||
|
* not used currently.
|
||||||
|
* D) Call x86_perf_event_set_period to restore PMCx;
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* We always operate 4 pairs of PERF Counters */
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
event = cpuc->events[i];
|
||||||
|
if (event)
|
||||||
|
x86_perf_event_update(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
|
||||||
|
wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
|
||||||
|
}
|
||||||
|
|
||||||
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
|
||||||
|
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
|
||||||
|
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
event = cpuc->events[i];
|
||||||
|
|
||||||
|
if (event) {
|
||||||
|
x86_perf_event_set_period(event);
|
||||||
|
__x86_pmu_enable_event(&event->hw,
|
||||||
|
ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||||
|
} else
|
||||||
|
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void intel_pmu_nhm_enable_all(int added)
|
static void intel_pmu_nhm_enable_all(int added)
|
||||||
{
|
{
|
||||||
if (added) {
|
if (added)
|
||||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
intel_pmu_nhm_workaround();
|
||||||
int i;
|
|
||||||
|
|
||||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
|
|
||||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
|
|
||||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
|
|
||||||
|
|
||||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
|
|
||||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
|
|
||||||
|
|
||||||
for (i = 0; i < 3; i++) {
|
|
||||||
struct perf_event *event = cpuc->events[i];
|
|
||||||
|
|
||||||
if (!event)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
__x86_pmu_enable_event(&event->hw,
|
|
||||||
ARCH_PERFMON_EVENTSEL_ENABLE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
intel_pmu_enable_all(added);
|
intel_pmu_enable_all(added);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user