mirror of
https://github.com/torvalds/linux.git
synced 2024-11-17 09:31:50 +00:00
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: kprobes/x86: Fix the return address of multiple kretprobes perf tools: Fix build error on read only source. perf, x86: Fix Intel-nhm PMU programming errata workaround
This commit is contained in:
commit
b3ea36b7a2
@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
|
||||
* Intel Errata AAP53 (model 30)
|
||||
* Intel Errata BD53 (model 44)
|
||||
*
|
||||
* These chips need to be 'reset' when adding counters by programming
|
||||
* the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
|
||||
* either in sequence on the same PMC or on different PMCs.
|
||||
* The official story:
|
||||
* These chips need to be 'reset' when adding counters by programming the
|
||||
* magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
|
||||
* in sequence on the same PMC or on different PMCs.
|
||||
*
|
||||
* In practise it appears some of these events do in fact count, and
|
||||
* we need to programm all 4 events.
|
||||
*/
|
||||
static void intel_pmu_nhm_workaround(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
static const unsigned long nhm_magic[4] = {
|
||||
0x4300B5,
|
||||
0x4300D2,
|
||||
0x4300B1,
|
||||
0x4300B1
|
||||
};
|
||||
struct perf_event *event;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The Errata requires below steps:
|
||||
* 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
|
||||
* 2) Configure 4 PERFEVTSELx with the magic events and clear
|
||||
* the corresponding PMCx;
|
||||
* 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
|
||||
* 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
|
||||
* 5) Clear 4 pairs of ERFEVTSELx and PMCx;
|
||||
*/
|
||||
|
||||
/*
|
||||
* The real steps we choose are a little different from above.
|
||||
* A) To reduce MSR operations, we don't run step 1) as they
|
||||
* are already cleared before this function is called;
|
||||
* B) Call x86_perf_event_update to save PMCx before configuring
|
||||
* PERFEVTSELx with magic number;
|
||||
* C) With step 5), we do clear only when the PERFEVTSELx is
|
||||
* not used currently.
|
||||
* D) Call x86_perf_event_set_period to restore PMCx;
|
||||
*/
|
||||
|
||||
/* We always operate 4 pairs of PERF Counters */
|
||||
for (i = 0; i < 4; i++) {
|
||||
event = cpuc->events[i];
|
||||
if (event)
|
||||
x86_perf_event_update(event);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
|
||||
wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
|
||||
}
|
||||
|
||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
|
||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
event = cpuc->events[i];
|
||||
|
||||
if (event) {
|
||||
x86_perf_event_set_period(event);
|
||||
__x86_pmu_enable_event(&event->hw,
|
||||
ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
} else
|
||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
|
||||
}
|
||||
}
|
||||
|
||||
static void intel_pmu_nhm_enable_all(int added)
|
||||
{
|
||||
if (added) {
|
||||
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
int i;
|
||||
|
||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
|
||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
|
||||
wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
|
||||
|
||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
|
||||
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
struct perf_event *event = cpuc->events[i];
|
||||
|
||||
if (!event)
|
||||
continue;
|
||||
|
||||
__x86_pmu_enable_event(&event->hw,
|
||||
ARCH_PERFMON_EVENTSEL_ENABLE);
|
||||
}
|
||||
}
|
||||
if (added)
|
||||
intel_pmu_nhm_workaround();
|
||||
intel_pmu_enable_all(added);
|
||||
}
|
||||
|
||||
|
@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
|
||||
struct hlist_node *node, *tmp;
|
||||
unsigned long flags, orig_ret_address = 0;
|
||||
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
|
||||
kprobe_opcode_t *correct_ret_addr = NULL;
|
||||
|
||||
INIT_HLIST_HEAD(&empty_rp);
|
||||
kretprobe_hash_lock(current, &head, &flags);
|
||||
@ -740,15 +741,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
|
||||
/* another task is sharing our hash bucket */
|
||||
continue;
|
||||
|
||||
if (ri->rp && ri->rp->handler) {
|
||||
__get_cpu_var(current_kprobe) = &ri->rp->kp;
|
||||
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
ri->rp->handler(ri, regs);
|
||||
__get_cpu_var(current_kprobe) = NULL;
|
||||
}
|
||||
|
||||
orig_ret_address = (unsigned long)ri->ret_addr;
|
||||
recycle_rp_inst(ri, &empty_rp);
|
||||
|
||||
if (orig_ret_address != trampoline_address)
|
||||
/*
|
||||
@ -761,6 +754,32 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
|
||||
|
||||
kretprobe_assert(ri, orig_ret_address, trampoline_address);
|
||||
|
||||
correct_ret_addr = ri->ret_addr;
|
||||
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
|
||||
if (ri->task != current)
|
||||
/* another task is sharing our hash bucket */
|
||||
continue;
|
||||
|
||||
orig_ret_address = (unsigned long)ri->ret_addr;
|
||||
if (ri->rp && ri->rp->handler) {
|
||||
__get_cpu_var(current_kprobe) = &ri->rp->kp;
|
||||
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
ri->ret_addr = correct_ret_addr;
|
||||
ri->rp->handler(ri, regs);
|
||||
__get_cpu_var(current_kprobe) = NULL;
|
||||
}
|
||||
|
||||
recycle_rp_inst(ri, &empty_rp);
|
||||
|
||||
if (orig_ret_address != trampoline_address)
|
||||
/*
|
||||
* This is the real return address. Any other
|
||||
* instances associated with this task are for
|
||||
* other calls deeper on the call stack
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
kretprobe_hash_unlock(current, &flags);
|
||||
|
||||
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
|
||||
|
@ -5,6 +5,12 @@ endif
|
||||
# The default target of this Makefile is...
|
||||
all::
|
||||
|
||||
ifneq ($(OUTPUT),)
|
||||
# check that the output directory actually exists
|
||||
OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
|
||||
$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
|
||||
endif
|
||||
|
||||
# Define V=1 to have a more verbose compile.
|
||||
# Define V=2 to have an even more verbose compile.
|
||||
#
|
||||
@ -931,15 +937,15 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
|
||||
$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
|
||||
|
||||
$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
|
||||
$(QUIET_GEN)$(RM) $@ $@+ && \
|
||||
$(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \
|
||||
sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
|
||||
-e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
|
||||
-e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
|
||||
-e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
|
||||
-e 's/@@NO_CURL@@/$(NO_CURL)/g' \
|
||||
$@.sh >$@+ && \
|
||||
chmod +x $@+ && \
|
||||
mv $@+ $(OUTPUT)$@
|
||||
$@.sh > $(OUTPUT)$@+ && \
|
||||
chmod +x $(OUTPUT)$@+ && \
|
||||
mv $(OUTPUT)$@+ $(OUTPUT)$@
|
||||
|
||||
configure: configure.ac
|
||||
$(QUIET_GEN)$(RM) $@ $<+ && \
|
||||
|
@ -113,7 +113,7 @@ endef
|
||||
# try-cc
|
||||
# Usage: option = $(call try-cc, source-to-build, cc-options)
|
||||
try-cc = $(shell sh -c \
|
||||
'TMP="$(TMPOUT).$$$$"; \
|
||||
'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \
|
||||
echo "$(1)" | \
|
||||
$(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
|
||||
rm -f "$$TMP"')
|
||||
|
Loading…
Reference in New Issue
Block a user