forked from Minki/linux
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar: "Fix merge window fallout and fix sleep profiling (this was always broken, so it's not a fix for the merge window - we can skip this one from the head of the tree)." * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/trace: Add ability to set a target task for events perf/x86: Fix USER/KERNEL tagging of samples properly perf/x86/intel/uncore: Make UNCORE_PMU_HRTIMER_INTERVAL 64-bit
This commit is contained in:
commit
bd463a0606
@ -196,11 +196,16 @@ static inline u32 get_ibs_caps(void) { return 0; }
|
||||
extern void perf_events_lapic_init(void);
|
||||
|
||||
/*
|
||||
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
|
||||
* This flag is otherwise unused and ABI specified to be 0, so nobody should
|
||||
* care what we do with it.
|
||||
* Abuse bits {3,5} of the cpu eflags register. These flags are otherwise
|
||||
* unused and ABI specified to be 0, so nobody should care what we do with
|
||||
* them.
|
||||
*
|
||||
* EXACT - the IP points to the exact instruction that triggered the
|
||||
* event (HW bugs exempt).
|
||||
* VM - original X86_VM_MASK; see set_linear_ip().
|
||||
*/
|
||||
#define PERF_EFLAGS_EXACT (1UL << 3)
|
||||
#define PERF_EFLAGS_VM (1UL << 5)
|
||||
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
|
@ -32,6 +32,8 @@
|
||||
#include <asm/smp.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/ldt.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
@ -1738,6 +1740,29 @@ valid_user_frame(const void __user *fp, unsigned long size)
|
||||
return (__range_not_ok(fp, size, TASK_SIZE) == 0);
|
||||
}
|
||||
|
||||
static unsigned long get_segment_base(unsigned int segment)
|
||||
{
|
||||
struct desc_struct *desc;
|
||||
int idx = segment >> 3;
|
||||
|
||||
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
|
||||
if (idx > LDT_ENTRIES)
|
||||
return 0;
|
||||
|
||||
if (idx > current->active_mm->context.size)
|
||||
return 0;
|
||||
|
||||
desc = current->active_mm->context.ldt;
|
||||
} else {
|
||||
if (idx > GDT_ENTRIES)
|
||||
return 0;
|
||||
|
||||
desc = __this_cpu_ptr(&gdt_page.gdt[0]);
|
||||
}
|
||||
|
||||
return get_desc_base(desc + idx);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
||||
#include <asm/compat.h>
|
||||
@ -1746,13 +1771,17 @@ static inline int
|
||||
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
||||
{
|
||||
/* 32-bit process in 64-bit kernel. */
|
||||
unsigned long ss_base, cs_base;
|
||||
struct stack_frame_ia32 frame;
|
||||
const void __user *fp;
|
||||
|
||||
if (!test_thread_flag(TIF_IA32))
|
||||
return 0;
|
||||
|
||||
fp = compat_ptr(regs->bp);
|
||||
cs_base = get_segment_base(regs->cs);
|
||||
ss_base = get_segment_base(regs->ss);
|
||||
|
||||
fp = compat_ptr(ss_base + regs->bp);
|
||||
while (entry->nr < PERF_MAX_STACK_DEPTH) {
|
||||
unsigned long bytes;
|
||||
frame.next_frame = 0;
|
||||
@ -1765,8 +1794,8 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
|
||||
if (!valid_user_frame(fp, sizeof(frame)))
|
||||
break;
|
||||
|
||||
perf_callchain_store(entry, frame.return_address);
|
||||
fp = compat_ptr(frame.next_frame);
|
||||
perf_callchain_store(entry, cs_base + frame.return_address);
|
||||
fp = compat_ptr(ss_base + frame.next_frame);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
@ -1789,6 +1818,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't know what to do with VM86 stacks.. ignore them for now.
|
||||
*/
|
||||
if (regs->flags & (X86_VM_MASK | PERF_EFLAGS_VM))
|
||||
return;
|
||||
|
||||
fp = (void __user *)regs->bp;
|
||||
|
||||
perf_callchain_store(entry, regs->ip);
|
||||
@ -1816,16 +1851,50 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Deal with code segment offsets for the various execution modes:
|
||||
*
|
||||
* VM86 - the good olde 16 bit days, where the linear address is
|
||||
* 20 bits and we use regs->ip + 0x10 * regs->cs.
|
||||
*
|
||||
* IA32 - Where we need to look at GDT/LDT segment descriptor tables
|
||||
* to figure out what the 32bit base address is.
|
||||
*
|
||||
* X32 - has TIF_X32 set, but is running in x86_64
|
||||
*
|
||||
* X86_64 - CS,DS,SS,ES are all zero based.
|
||||
*/
|
||||
static unsigned long code_segment_base(struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* If we are in VM86 mode, add the segment offset to convert to a
|
||||
* linear address.
|
||||
*/
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
return 0x10 * regs->cs;
|
||||
|
||||
/*
|
||||
* For IA32 we look at the GDT/LDT segment base to convert the
|
||||
* effective IP to a linear address.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
if (user_mode(regs) && regs->cs != __USER_CS)
|
||||
return get_segment_base(regs->cs);
|
||||
#else
|
||||
if (test_thread_flag(TIF_IA32)) {
|
||||
if (user_mode(regs) && regs->cs != __USER32_CS)
|
||||
return get_segment_base(regs->cs);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long ip;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
|
||||
ip = perf_guest_cbs->get_guest_ip();
|
||||
else
|
||||
ip = instruction_pointer(regs);
|
||||
return perf_guest_cbs->get_guest_ip();
|
||||
|
||||
return ip;
|
||||
return regs->ip + code_segment_base(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
@ -1838,7 +1907,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
} else {
|
||||
if (!kernel_ip(regs->ip))
|
||||
if (user_mode(regs))
|
||||
misc |= PERF_RECORD_MISC_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_KERNEL;
|
||||
|
@ -516,6 +516,26 @@ static inline bool kernel_ip(unsigned long ip)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Not all PMUs provide the right context information to place the reported IP
|
||||
* into full context. Specifically segment registers are typically not
|
||||
* supplied.
|
||||
*
|
||||
* Assuming the address is a linear address (it is for IBS), we fake the CS and
|
||||
* vm86 mode using the known zero-based code segment and 'fix up' the registers
|
||||
* to reflect this.
|
||||
*
|
||||
* Intel PEBS/LBR appear to typically provide the effective address, nothing
|
||||
* much we can do about that but pray and treat it like a linear address.
|
||||
*/
|
||||
static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
|
||||
{
|
||||
regs->cs = kernel_ip(ip) ? __KERNEL_CS : __USER_CS;
|
||||
if (regs->flags & X86_VM_MASK)
|
||||
regs->flags ^= (PERF_EFLAGS_VM | X86_VM_MASK);
|
||||
regs->ip = ip;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CPU_SUP_AMD
|
||||
|
||||
int amd_pmu_init(void);
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include <asm/apic.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
static u32 ibs_caps;
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
|
||||
@ -536,7 +538,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
|
||||
if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
|
||||
regs.flags &= ~PERF_EFLAGS_EXACT;
|
||||
} else {
|
||||
instruction_pointer_set(®s, ibs_data.regs[1]);
|
||||
set_linear_ip(®s, ibs_data.regs[1]);
|
||||
regs.flags |= PERF_EFLAGS_EXACT;
|
||||
}
|
||||
|
||||
|
@ -499,7 +499,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
||||
* We sampled a branch insn, rewind using the LBR stack
|
||||
*/
|
||||
if (ip == to) {
|
||||
regs->ip = from;
|
||||
set_linear_ip(regs, from);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -529,7 +529,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
||||
} while (to < ip);
|
||||
|
||||
if (to == ip) {
|
||||
regs->ip = old_to;
|
||||
set_linear_ip(regs, old_to);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -569,7 +569,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
||||
* A possible PERF_SAMPLE_REGS will have to transfer all regs.
|
||||
*/
|
||||
regs = *iregs;
|
||||
regs.ip = pebs->ip;
|
||||
regs.flags = pebs->flags;
|
||||
set_linear_ip(®s, pebs->ip);
|
||||
regs.bp = pebs->bp;
|
||||
regs.sp = pebs->sp;
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "perf_event.h"
|
||||
|
||||
#define UNCORE_PMU_NAME_LEN 32
|
||||
#define UNCORE_PMU_HRTIMER_INTERVAL (60 * NSEC_PER_SEC)
|
||||
#define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC)
|
||||
|
||||
#define UNCORE_FIXED_EVENT 0xff
|
||||
#define UNCORE_PMC_IDX_MAX_GENERIC 8
|
||||
|
@ -306,9 +306,10 @@ extern void *perf_trace_buf_prepare(int size, unsigned short type,
|
||||
|
||||
static inline void
|
||||
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
|
||||
u64 count, struct pt_regs *regs, void *head)
|
||||
u64 count, struct pt_regs *regs, void *head,
|
||||
struct task_struct *task)
|
||||
{
|
||||
perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
|
||||
perf_tp_event(addr, count, raw_data, size, regs, head, rctx, task);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1272,7 +1272,8 @@ static inline bool perf_paranoid_kernel(void)
|
||||
extern void perf_event_init(void);
|
||||
extern void perf_tp_event(u64 addr, u64 count, void *record,
|
||||
int entry_size, struct pt_regs *regs,
|
||||
struct hlist_head *head, int rctx);
|
||||
struct hlist_head *head, int rctx,
|
||||
struct task_struct *task);
|
||||
extern void perf_bp_event(struct perf_event *event, void *data);
|
||||
|
||||
#ifndef perf_misc_flags
|
||||
|
@ -73,6 +73,9 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
|
||||
__entry->prio = p->prio;
|
||||
__entry->success = success;
|
||||
__entry->target_cpu = task_cpu(p);
|
||||
)
|
||||
TP_perf_assign(
|
||||
__perf_task(p);
|
||||
),
|
||||
|
||||
TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
|
||||
@ -325,6 +328,7 @@ DECLARE_EVENT_CLASS(sched_stat_template,
|
||||
)
|
||||
TP_perf_assign(
|
||||
__perf_count(delay);
|
||||
__perf_task(tsk);
|
||||
),
|
||||
|
||||
TP_printk("comm=%s pid=%d delay=%Lu [ns]",
|
||||
|
@ -712,6 +712,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
|
||||
#undef __perf_count
|
||||
#define __perf_count(c) __count = (c)
|
||||
|
||||
#undef __perf_task
|
||||
#define __perf_task(t) __task = (t)
|
||||
|
||||
#undef TP_perf_assign
|
||||
#define TP_perf_assign(args...) args
|
||||
|
||||
@ -725,6 +728,7 @@ perf_trace_##call(void *__data, proto) \
|
||||
struct ftrace_raw_##call *entry; \
|
||||
struct pt_regs __regs; \
|
||||
u64 __addr = 0, __count = 1; \
|
||||
struct task_struct *__task = NULL; \
|
||||
struct hlist_head *head; \
|
||||
int __entry_size; \
|
||||
int __data_size; \
|
||||
@ -752,7 +756,7 @@ perf_trace_##call(void *__data, proto) \
|
||||
\
|
||||
head = this_cpu_ptr(event_call->perf_events); \
|
||||
perf_trace_buf_submit(entry, __entry_size, rctx, __addr, \
|
||||
__count, &__regs, head); \
|
||||
__count, &__regs, head, __task); \
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -153,7 +153,8 @@ put_callchain_entry(int rctx)
|
||||
put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
|
||||
}
|
||||
|
||||
struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
|
||||
struct perf_callchain_entry *
|
||||
perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
||||
{
|
||||
int rctx;
|
||||
struct perf_callchain_entry *entry;
|
||||
@ -178,6 +179,12 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
|
||||
}
|
||||
|
||||
if (regs) {
|
||||
/*
|
||||
* Disallow cross-task user callchains.
|
||||
*/
|
||||
if (event->ctx->task && event->ctx->task != current)
|
||||
goto exit_put;
|
||||
|
||||
perf_callchain_store(entry, PERF_CONTEXT_USER);
|
||||
perf_callchain_user(entry, regs);
|
||||
}
|
||||
|
@ -4039,7 +4039,7 @@ void perf_prepare_sample(struct perf_event_header *header,
|
||||
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
|
||||
int size = 1;
|
||||
|
||||
data->callchain = perf_callchain(regs);
|
||||
data->callchain = perf_callchain(event, regs);
|
||||
|
||||
if (data->callchain)
|
||||
size += data->callchain->nr;
|
||||
@ -5209,7 +5209,8 @@ static int perf_tp_event_match(struct perf_event *event,
|
||||
}
|
||||
|
||||
void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
|
||||
struct pt_regs *regs, struct hlist_head *head, int rctx)
|
||||
struct pt_regs *regs, struct hlist_head *head, int rctx,
|
||||
struct task_struct *task)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct perf_event *event;
|
||||
@ -5228,6 +5229,31 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
|
||||
perf_swevent_event(event, count, &data, regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we got specified a target task, also iterate its context and
|
||||
* deliver this event there too.
|
||||
*/
|
||||
if (task && task != current) {
|
||||
struct perf_event_context *ctx;
|
||||
struct trace_entry *entry = record;
|
||||
|
||||
rcu_read_lock();
|
||||
ctx = rcu_dereference(task->perf_event_ctxp[perf_sw_context]);
|
||||
if (!ctx)
|
||||
goto unlock;
|
||||
|
||||
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
|
||||
if (event->attr.type != PERF_TYPE_TRACEPOINT)
|
||||
continue;
|
||||
if (event->attr.config != entry->type)
|
||||
continue;
|
||||
if (perf_tp_event_match(event, &data, regs))
|
||||
perf_swevent_event(event, count, &data, regs);
|
||||
}
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
perf_swevent_put_recursion_context(rctx);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_tp_event);
|
||||
|
@ -101,7 +101,8 @@ __output_copy(struct perf_output_handle *handle,
|
||||
}
|
||||
|
||||
/* Callchain handling */
|
||||
extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
|
||||
extern struct perf_callchain_entry *
|
||||
perf_callchain(struct perf_event *event, struct pt_regs *regs);
|
||||
extern int get_callchain_buffers(void);
|
||||
extern void put_callchain_buffers(void);
|
||||
|
||||
|
@ -281,7 +281,7 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
|
||||
|
||||
head = this_cpu_ptr(event_function.perf_events);
|
||||
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
|
||||
1, ®s, head);
|
||||
1, ®s, head, NULL);
|
||||
|
||||
#undef ENTRY_SIZE
|
||||
}
|
||||
|
@ -1002,7 +1002,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
|
||||
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
|
||||
perf_trace_buf_submit(entry, size, rctx,
|
||||
entry->ip, 1, regs, head, NULL);
|
||||
}
|
||||
|
||||
/* Kretprobe profile handler */
|
||||
@ -1033,7 +1034,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
|
||||
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
|
||||
perf_trace_buf_submit(entry, size, rctx,
|
||||
entry->ret_ip, 1, regs, head, NULL);
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
|
@ -532,7 +532,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
(unsigned long *)&rec->args);
|
||||
|
||||
head = this_cpu_ptr(sys_data->enter_event->perf_events);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
|
||||
int perf_sysenter_enable(struct ftrace_event_call *call)
|
||||
@ -608,7 +608,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
rec->ret = syscall_get_return_value(current, regs);
|
||||
|
||||
head = this_cpu_ptr(sys_data->exit_event->perf_events);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
|
||||
int perf_sysexit_enable(struct ftrace_event_call *call)
|
||||
|
@ -670,7 +670,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
|
Loading…
Reference in New Issue
Block a user