perf/core: Implement the 'perf_kprobe' PMU
A new PMU type, perf_kprobe is added. Based on attr from perf_event_open(), perf_kprobe creates a kprobe (or kretprobe) for the perf_event. This kprobe is private to this perf_event, and thus not added to global lists, and not available in tracefs. Two functions, create_local_trace_kprobe() and destroy_local_trace_kprobe() are added to created and destroy these local trace_kprobe. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Yonghong Song <yhs@fb.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Cc: <daniel@iogearbox.net> Cc: <davem@davemloft.net> Cc: <kernel-team@fb.com> Cc: <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20171206224518.3598254-6-songliubraving@fb.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
							parent
							
								
									0d8dd67be0
								
							
						
					
					
						commit
						e12f03d703
					
				| @ -533,6 +533,10 @@ extern int  perf_trace_init(struct perf_event *event); | ||||
| extern void perf_trace_destroy(struct perf_event *event); | ||||
| extern int  perf_trace_add(struct perf_event *event, int flags); | ||||
| extern void perf_trace_del(struct perf_event *event, int flags); | ||||
| #ifdef CONFIG_KPROBE_EVENTS | ||||
| extern int  perf_kprobe_init(struct perf_event *event, bool is_retprobe); | ||||
| extern void perf_kprobe_destroy(struct perf_event *event); | ||||
| #endif | ||||
| extern int  ftrace_profile_set_filter(struct perf_event *event, int event_id, | ||||
| 				     char *filter_str); | ||||
| extern void ftrace_profile_free_filter(struct perf_event *event); | ||||
|  | ||||
| @ -7992,9 +7992,77 @@ static struct pmu perf_tracepoint = { | ||||
| 	.read		= perf_swevent_read, | ||||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_KPROBE_EVENTS | ||||
| /*
 | ||||
|  * Flags in config, used by dynamic PMU kprobe and uprobe | ||||
|  * The flags should match following PMU_FORMAT_ATTR(). | ||||
|  * | ||||
|  * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe | ||||
|  *                               if not set, create kprobe/uprobe | ||||
|  */ | ||||
| enum perf_probe_config { | ||||
| 	PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,  /* [k,u]retprobe */ | ||||
| }; | ||||
| 
 | ||||
| PMU_FORMAT_ATTR(retprobe, "config:0"); | ||||
| 
 | ||||
| static struct attribute *probe_attrs[] = { | ||||
| 	&format_attr_retprobe.attr, | ||||
| 	NULL, | ||||
| }; | ||||
| 
 | ||||
| static struct attribute_group probe_format_group = { | ||||
| 	.name = "format", | ||||
| 	.attrs = probe_attrs, | ||||
| }; | ||||
| 
 | ||||
| static const struct attribute_group *probe_attr_groups[] = { | ||||
| 	&probe_format_group, | ||||
| 	NULL, | ||||
| }; | ||||
| 
 | ||||
| static int perf_kprobe_event_init(struct perf_event *event); | ||||
| static struct pmu perf_kprobe = { | ||||
| 	.task_ctx_nr	= perf_sw_context, | ||||
| 	.event_init	= perf_kprobe_event_init, | ||||
| 	.add		= perf_trace_add, | ||||
| 	.del		= perf_trace_del, | ||||
| 	.start		= perf_swevent_start, | ||||
| 	.stop		= perf_swevent_stop, | ||||
| 	.read		= perf_swevent_read, | ||||
| 	.attr_groups	= probe_attr_groups, | ||||
| }; | ||||
| 
 | ||||
| static int perf_kprobe_event_init(struct perf_event *event) | ||||
| { | ||||
| 	int err; | ||||
| 	bool is_retprobe; | ||||
| 
 | ||||
| 	if (event->attr.type != perf_kprobe.type) | ||||
| 		return -ENOENT; | ||||
| 	/*
 | ||||
| 	 * no branch sampling for probe events | ||||
| 	 */ | ||||
| 	if (has_branch_stack(event)) | ||||
| 		return -EOPNOTSUPP; | ||||
| 
 | ||||
| 	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; | ||||
| 	err = perf_kprobe_init(event, is_retprobe); | ||||
| 	if (err) | ||||
| 		return err; | ||||
| 
 | ||||
| 	event->destroy = perf_kprobe_destroy; | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
| #endif /* CONFIG_KPROBE_EVENTS */ | ||||
| 
 | ||||
| static inline void perf_tp_register(void) | ||||
| { | ||||
| 	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); | ||||
| #ifdef CONFIG_KPROBE_EVENTS | ||||
| 	perf_pmu_register(&perf_kprobe, "kprobe", -1); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| static void perf_event_free_filter(struct perf_event *event) | ||||
| @ -8071,13 +8139,28 @@ static void perf_event_free_bpf_handler(struct perf_event *event) | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  * returns true if the event is a tracepoint, or a kprobe/upprobe created | ||||
|  * with perf_event_open() | ||||
|  */ | ||||
| static inline bool perf_event_is_tracing(struct perf_event *event) | ||||
| { | ||||
| 	if (event->pmu == &perf_tracepoint) | ||||
| 		return true; | ||||
| #ifdef CONFIG_KPROBE_EVENTS | ||||
| 	if (event->pmu == &perf_kprobe) | ||||
| 		return true; | ||||
| #endif | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | ||||
| { | ||||
| 	bool is_kprobe, is_tracepoint, is_syscall_tp; | ||||
| 	struct bpf_prog *prog; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (event->attr.type != PERF_TYPE_TRACEPOINT) | ||||
| 	if (!perf_event_is_tracing(event)) | ||||
| 		return perf_event_set_bpf_handler(event, prog_fd); | ||||
| 
 | ||||
| 	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; | ||||
| @ -8116,7 +8199,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) | ||||
| 
 | ||||
| static void perf_event_free_bpf_prog(struct perf_event *event) | ||||
| { | ||||
| 	if (event->attr.type != PERF_TYPE_TRACEPOINT) { | ||||
| 	if (!perf_event_is_tracing(event)) { | ||||
| 		perf_event_free_bpf_handler(event); | ||||
| 		return; | ||||
| 	} | ||||
| @ -8535,47 +8618,36 @@ fail_clear_files: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| perf_tracepoint_set_filter(struct perf_event *event, char *filter_str) | ||||
| { | ||||
| 	struct perf_event_context *ctx = event->ctx; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Beware, here be dragons!! | ||||
| 	 * | ||||
| 	 * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint | ||||
| 	 * stuff does not actually need it. So temporarily drop ctx->mutex. As per | ||||
| 	 * perf_event_ctx_lock() we already have a reference on ctx. | ||||
| 	 * | ||||
| 	 * This can result in event getting moved to a different ctx, but that | ||||
| 	 * does not affect the tracepoint state. | ||||
| 	 */ | ||||
| 	mutex_unlock(&ctx->mutex); | ||||
| 	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); | ||||
| 	mutex_lock(&ctx->mutex); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int perf_event_set_filter(struct perf_event *event, void __user *arg) | ||||
| { | ||||
| 	char *filter_str; | ||||
| 	int ret = -EINVAL; | ||||
| 
 | ||||
| 	if ((event->attr.type != PERF_TYPE_TRACEPOINT || | ||||
| 	    !IS_ENABLED(CONFIG_EVENT_TRACING)) && | ||||
| 	    !has_addr_filter(event)) | ||||
| 		return -EINVAL; | ||||
| 	char *filter_str; | ||||
| 
 | ||||
| 	filter_str = strndup_user(arg, PAGE_SIZE); | ||||
| 	if (IS_ERR(filter_str)) | ||||
| 		return PTR_ERR(filter_str); | ||||
| 
 | ||||
| 	if (IS_ENABLED(CONFIG_EVENT_TRACING) && | ||||
| 	    event->attr.type == PERF_TYPE_TRACEPOINT) | ||||
| 		ret = perf_tracepoint_set_filter(event, filter_str); | ||||
| 	else if (has_addr_filter(event)) | ||||
| #ifdef CONFIG_EVENT_TRACING | ||||
| 	if (perf_event_is_tracing(event)) { | ||||
| 		struct perf_event_context *ctx = event->ctx; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Beware, here be dragons!! | ||||
| 		 * | ||||
| 		 * the tracepoint muck will deadlock against ctx->mutex, but | ||||
| 		 * the tracepoint stuff does not actually need it. So | ||||
| 		 * temporarily drop ctx->mutex. As per perf_event_ctx_lock() we | ||||
| 		 * already have a reference on ctx. | ||||
| 		 * | ||||
| 		 * This can result in event getting moved to a different ctx, | ||||
| 		 * but that does not affect the tracepoint state. | ||||
| 		 */ | ||||
| 		mutex_unlock(&ctx->mutex); | ||||
| 		ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); | ||||
| 		mutex_lock(&ctx->mutex); | ||||
| 	} else | ||||
| #endif | ||||
| 	if (has_addr_filter(event)) | ||||
| 		ret = perf_event_set_addr_filter(event, filter_str); | ||||
| 
 | ||||
| 	kfree(filter_str); | ||||
|  | ||||
| @ -8,6 +8,7 @@ | ||||
| #include <linux/module.h> | ||||
| #include <linux/kprobes.h> | ||||
| #include "trace.h" | ||||
| #include "trace_probe.h" | ||||
| 
 | ||||
| static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; | ||||
| 
 | ||||
| @ -237,6 +238,54 @@ void perf_trace_destroy(struct perf_event *p_event) | ||||
| 	mutex_unlock(&event_mutex); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_KPROBE_EVENTS | ||||
| int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe) | ||||
| { | ||||
| 	int ret; | ||||
| 	char *func = NULL; | ||||
| 	struct trace_event_call *tp_event; | ||||
| 
 | ||||
| 	if (p_event->attr.kprobe_func) { | ||||
| 		func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL); | ||||
| 		if (!func) | ||||
| 			return -ENOMEM; | ||||
| 		ret = strncpy_from_user( | ||||
| 			func, u64_to_user_ptr(p_event->attr.kprobe_func), | ||||
| 			KSYM_NAME_LEN); | ||||
| 		if (ret < 0) | ||||
| 			goto out; | ||||
| 
 | ||||
| 		if (func[0] == '\0') { | ||||
| 			kfree(func); | ||||
| 			func = NULL; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	tp_event = create_local_trace_kprobe( | ||||
| 		func, (void *)(unsigned long)(p_event->attr.kprobe_addr), | ||||
| 		p_event->attr.probe_offset, is_retprobe); | ||||
| 	if (IS_ERR(tp_event)) { | ||||
| 		ret = PTR_ERR(tp_event); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = perf_trace_event_init(tp_event, p_event); | ||||
| 	if (ret) | ||||
| 		destroy_local_trace_kprobe(tp_event); | ||||
| out: | ||||
| 	kfree(func); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| void perf_kprobe_destroy(struct perf_event *p_event) | ||||
| { | ||||
| 	perf_trace_event_close(p_event); | ||||
| 	perf_trace_event_unreg(p_event); | ||||
| 
 | ||||
| 	destroy_local_trace_kprobe(p_event->tp_event); | ||||
| } | ||||
| #endif /* CONFIG_KPROBE_EVENTS */ | ||||
| 
 | ||||
| int perf_trace_add(struct perf_event *p_event, int flags) | ||||
| { | ||||
| 	struct trace_event_call *tp_event = p_event->tp_event; | ||||
|  | ||||
| @ -438,6 +438,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) | ||||
| 			disable_kprobe(&tk->rp.kp); | ||||
| 		wait = 1; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * if tk is not added to any list, it must be a local trace_kprobe | ||||
| 	 * created with perf_event_open. We don't need to wait for these | ||||
| 	 * trace_kprobes | ||||
| 	 */ | ||||
| 	if (list_empty(&tk->list)) | ||||
| 		wait = 0; | ||||
|  out: | ||||
| 	if (wait) { | ||||
| 		/*
 | ||||
| @ -1313,12 +1321,9 @@ static struct trace_event_functions kprobe_funcs = { | ||||
| 	.trace		= print_kprobe_event | ||||
| }; | ||||
| 
 | ||||
| static int register_kprobe_event(struct trace_kprobe *tk) | ||||
| static inline void init_trace_event_call(struct trace_kprobe *tk, | ||||
| 					 struct trace_event_call *call) | ||||
| { | ||||
| 	struct trace_event_call *call = &tk->tp.call; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	/* Initialize trace_event_call */ | ||||
| 	INIT_LIST_HEAD(&call->class->fields); | ||||
| 	if (trace_kprobe_is_return(tk)) { | ||||
| 		call->event.funcs = &kretprobe_funcs; | ||||
| @ -1327,6 +1332,19 @@ static int register_kprobe_event(struct trace_kprobe *tk) | ||||
| 		call->event.funcs = &kprobe_funcs; | ||||
| 		call->class->define_fields = kprobe_event_define_fields; | ||||
| 	} | ||||
| 
 | ||||
| 	call->flags = TRACE_EVENT_FL_KPROBE; | ||||
| 	call->class->reg = kprobe_register; | ||||
| 	call->data = tk; | ||||
| } | ||||
| 
 | ||||
| static int register_kprobe_event(struct trace_kprobe *tk) | ||||
| { | ||||
| 	struct trace_event_call *call = &tk->tp.call; | ||||
| 	int ret = 0; | ||||
| 
 | ||||
| 	init_trace_event_call(tk, call); | ||||
| 
 | ||||
| 	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) | ||||
| 		return -ENOMEM; | ||||
| 	ret = register_trace_event(&call->event); | ||||
| @ -1334,9 +1352,6 @@ static int register_kprobe_event(struct trace_kprobe *tk) | ||||
| 		kfree(call->print_fmt); | ||||
| 		return -ENODEV; | ||||
| 	} | ||||
| 	call->flags = TRACE_EVENT_FL_KPROBE; | ||||
| 	call->class->reg = kprobe_register; | ||||
| 	call->data = tk; | ||||
| 	ret = trace_add_event_call(call); | ||||
| 	if (ret) { | ||||
| 		pr_info("Failed to register kprobe event: %s\n", | ||||
| @ -1358,6 +1373,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk) | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PERF_EVENTS | ||||
| /* create a trace_kprobe, but don't add it to global lists */ | ||||
| struct trace_event_call * | ||||
| create_local_trace_kprobe(char *func, void *addr, unsigned long offs, | ||||
| 			  bool is_return) | ||||
| { | ||||
| 	struct trace_kprobe *tk; | ||||
| 	int ret; | ||||
| 	char *event; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * local trace_kprobes are not added to probe_list, so they are never | ||||
| 	 * searched in find_trace_kprobe(). Therefore, there is no concern of | ||||
| 	 * duplicated name here. | ||||
| 	 */ | ||||
| 	event = func ? func : "DUMMY_EVENT"; | ||||
| 
 | ||||
| 	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func, | ||||
| 				offs, 0 /* maxactive */, 0 /* nargs */, | ||||
| 				is_return); | ||||
| 
 | ||||
| 	if (IS_ERR(tk)) { | ||||
| 		pr_info("Failed to allocate trace_probe.(%d)\n", | ||||
| 			(int)PTR_ERR(tk)); | ||||
| 		return ERR_CAST(tk); | ||||
| 	} | ||||
| 
 | ||||
| 	init_trace_event_call(tk, &tk->tp.call); | ||||
| 
 | ||||
| 	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) { | ||||
| 		ret = -ENOMEM; | ||||
| 		goto error; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = __register_trace_kprobe(tk); | ||||
| 	if (ret < 0) | ||||
| 		goto error; | ||||
| 
 | ||||
| 	return &tk->tp.call; | ||||
| error: | ||||
| 	free_trace_kprobe(tk); | ||||
| 	return ERR_PTR(ret); | ||||
| } | ||||
| 
 | ||||
| void destroy_local_trace_kprobe(struct trace_event_call *event_call) | ||||
| { | ||||
| 	struct trace_kprobe *tk; | ||||
| 
 | ||||
| 	tk = container_of(event_call, struct trace_kprobe, tp.call); | ||||
| 
 | ||||
| 	if (trace_probe_is_enabled(&tk->tp)) { | ||||
| 		WARN_ON(1); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	__unregister_trace_kprobe(tk); | ||||
| 	free_trace_kprobe(tk); | ||||
| } | ||||
| #endif /* CONFIG_PERF_EVENTS */ | ||||
| 
 | ||||
| /* Make a tracefs interface for controlling probe points */ | ||||
| static __init int init_kprobe_trace(void) | ||||
| { | ||||
|  | ||||
| @ -404,3 +404,10 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, | ||||
| } | ||||
| 
 | ||||
| extern int set_print_fmt(struct trace_probe *tp, bool is_return); | ||||
| 
 | ||||
| #ifdef CONFIG_PERF_EVENTS | ||||
| extern struct trace_event_call * | ||||
| create_local_trace_kprobe(char *func, void *addr, unsigned long offs, | ||||
| 			  bool is_return); | ||||
| extern void destroy_local_trace_kprobe(struct trace_event_call *event_call); | ||||
| #endif | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user