bpf: Introduce helper bpf_get_task_stack()
Introduce helper bpf_get_task_stack(), which dumps stack trace of given task. This is different to bpf_get_stack(), which gets stack track of current task. One potential use case of bpf_get_task_stack() is to call it from bpf_iter__task and dump all /proc/<pid>/stack to a seq_file. bpf_get_task_stack() uses stack_trace_save_tsk() instead of get_perf_callchain() for kernel stack. The benefit of this choice is that stack_trace_save_tsk() doesn't require changes in arch/. The downside of using stack_trace_save_tsk() is that stack_trace_save_tsk() dumps the stack trace to unsigned long array. For 32-bit systems, we need to translate it to u64 array. Signed-off-by: Song Liu <songliubraving@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andriin@fb.com> Link: https://lore.kernel.org/bpf/20200630062846.664389-3-songliubraving@fb.com
This commit is contained in:
		
							parent
							
								
									d141b8bc57
								
							
						
					
					
						commit
						fa28dcb82a
					
				| @ -1627,6 +1627,7 @@ extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; | ||||
| extern const struct bpf_func_proto bpf_get_current_comm_proto; | ||||
| extern const struct bpf_func_proto bpf_get_stackid_proto; | ||||
| extern const struct bpf_func_proto bpf_get_stack_proto; | ||||
| extern const struct bpf_func_proto bpf_get_task_stack_proto; | ||||
| extern const struct bpf_func_proto bpf_sock_map_update_proto; | ||||
| extern const struct bpf_func_proto bpf_sock_hash_update_proto; | ||||
| extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; | ||||
|  | ||||
| @ -3285,6 +3285,39 @@ union bpf_attr { | ||||
|  *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer. | ||||
|  *	Return | ||||
|  *		*sk* if casting is valid, or NULL otherwise. | ||||
|  * | ||||
|  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) | ||||
|  *	Description | ||||
|  *		Return a user or a kernel stack in bpf program provided buffer. | ||||
|  *		To achieve this, the helper needs *task*, which is a valid | ||||
|  *		pointer to struct task_struct. To store the stacktrace, the | ||||
|  *		bpf program provides *buf* with	a nonnegative *size*. | ||||
|  * | ||||
|  *		The last argument, *flags*, holds the number of stack frames to | ||||
|  *		skip (from 0 to 255), masked with | ||||
|  *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set | ||||
|  *		the following flags: | ||||
|  * | ||||
|  *		**BPF_F_USER_STACK** | ||||
|  *			Collect a user space stack instead of a kernel stack. | ||||
|  *		**BPF_F_USER_BUILD_ID** | ||||
|  *			Collect buildid+offset instead of ips for user stack, | ||||
|  *			only valid if **BPF_F_USER_STACK** is also specified. | ||||
|  * | ||||
|  *		**bpf_get_task_stack**\ () can collect up to | ||||
|  *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject | ||||
|  *		to sufficient large buffer size. Note that | ||||
|  *		this limit can be controlled with the **sysctl** program, and | ||||
|  *		that it should be manually increased in order to profile long | ||||
|  *		user stacks (such as stacks for Java programs). To do so, use: | ||||
|  * | ||||
|  *		:: | ||||
|  * | ||||
|  *			# sysctl kernel.perf_event_max_stack=<new value> | ||||
|  *	Return | ||||
|  *		A non-negative value equal to or less than *size* on success, | ||||
|  *		or a negative error in case of failure. | ||||
|  * | ||||
|  */ | ||||
| #define __BPF_FUNC_MAPPER(FN)		\ | ||||
| 	FN(unspec),			\ | ||||
| @ -3427,7 +3460,9 @@ union bpf_attr { | ||||
| 	FN(skc_to_tcp_sock),		\ | ||||
| 	FN(skc_to_tcp_timewait_sock),	\ | ||||
| 	FN(skc_to_tcp_request_sock),	\ | ||||
| 	FN(skc_to_udp6_sock), | ||||
| 	FN(skc_to_udp6_sock),		\ | ||||
| 	FN(get_task_stack),		\ | ||||
| 	/* */ | ||||
| 
 | ||||
| /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 | ||||
|  * function eBPF program intends to call | ||||
|  | ||||
| @ -348,6 +348,40 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static struct perf_callchain_entry * | ||||
| get_callchain_entry_for_task(struct task_struct *task, u32 init_nr) | ||||
| { | ||||
| 	struct perf_callchain_entry *entry; | ||||
| 	int rctx; | ||||
| 
 | ||||
| 	entry = get_callchain_entry(&rctx); | ||||
| 
 | ||||
| 	if (!entry) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	entry->nr = init_nr + | ||||
| 		stack_trace_save_tsk(task, (unsigned long *)(entry->ip + init_nr), | ||||
| 				     sysctl_perf_event_max_stack - init_nr, 0); | ||||
| 
 | ||||
| 	/* stack_trace_save_tsk() works on unsigned long array, while
 | ||||
| 	 * perf_callchain_entry uses u64 array. For 32-bit systems, it is | ||||
| 	 * necessary to fix this mismatch. | ||||
| 	 */ | ||||
| 	if (__BITS_PER_LONG != 64) { | ||||
| 		unsigned long *from = (unsigned long *) entry->ip; | ||||
| 		u64 *to = entry->ip; | ||||
| 		int i; | ||||
| 
 | ||||
| 		/* copy data from the end to avoid using extra buffer */ | ||||
| 		for (i = entry->nr - 1; i >= (int)init_nr; i--) | ||||
| 			to[i] = (u64)(from[i]); | ||||
| 	} | ||||
| 
 | ||||
| 	put_callchain_entry(rctx); | ||||
| 
 | ||||
| 	return entry; | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, | ||||
| 	   u64, flags) | ||||
| { | ||||
| @ -448,8 +482,8 @@ const struct bpf_func_proto bpf_get_stackid_proto = { | ||||
| 	.arg3_type	= ARG_ANYTHING, | ||||
| }; | ||||
| 
 | ||||
| BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, | ||||
| 	   u64, flags) | ||||
| static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task, | ||||
| 			    void *buf, u32 size, u64 flags) | ||||
| { | ||||
| 	u32 init_nr, trace_nr, copy_len, elem_size, num_elem; | ||||
| 	bool user_build_id = flags & BPF_F_USER_BUILD_ID; | ||||
| @ -471,13 +505,22 @@ BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, | ||||
| 	if (unlikely(size % elem_size)) | ||||
| 		goto clear; | ||||
| 
 | ||||
| 	/* cannot get valid user stack for task without user_mode regs */ | ||||
| 	if (task && user && !user_mode(regs)) | ||||
| 		goto err_fault; | ||||
| 
 | ||||
| 	num_elem = size / elem_size; | ||||
| 	if (sysctl_perf_event_max_stack < num_elem) | ||||
| 		init_nr = 0; | ||||
| 	else | ||||
| 		init_nr = sysctl_perf_event_max_stack - num_elem; | ||||
| 	trace = get_perf_callchain(regs, init_nr, kernel, user, | ||||
| 				   sysctl_perf_event_max_stack, false, false); | ||||
| 
 | ||||
| 	if (kernel && task) | ||||
| 		trace = get_callchain_entry_for_task(task, init_nr); | ||||
| 	else | ||||
| 		trace = get_perf_callchain(regs, init_nr, kernel, user, | ||||
| 					   sysctl_perf_event_max_stack, | ||||
| 					   false, false); | ||||
| 	if (unlikely(!trace)) | ||||
| 		goto err_fault; | ||||
| 
 | ||||
| @ -505,6 +548,12 @@ clear: | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
| BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, | ||||
| 	   u64, flags) | ||||
| { | ||||
| 	return __bpf_get_stack(regs, NULL, buf, size, flags); | ||||
| } | ||||
| 
 | ||||
| const struct bpf_func_proto bpf_get_stack_proto = { | ||||
| 	.func		= bpf_get_stack, | ||||
| 	.gpl_only	= true, | ||||
| @ -515,6 +564,26 @@ const struct bpf_func_proto bpf_get_stack_proto = { | ||||
| 	.arg4_type	= ARG_ANYTHING, | ||||
| }; | ||||
| 
 | ||||
| BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, | ||||
| 	   u32, size, u64, flags) | ||||
| { | ||||
| 	struct pt_regs *regs = task_pt_regs(task); | ||||
| 
 | ||||
| 	return __bpf_get_stack(regs, task, buf, size, flags); | ||||
| } | ||||
| 
 | ||||
| static int bpf_get_task_stack_btf_ids[5]; | ||||
| const struct bpf_func_proto bpf_get_task_stack_proto = { | ||||
| 	.func		= bpf_get_task_stack, | ||||
| 	.gpl_only	= false, | ||||
| 	.ret_type	= RET_INTEGER, | ||||
| 	.arg1_type	= ARG_PTR_TO_BTF_ID, | ||||
| 	.arg2_type	= ARG_PTR_TO_UNINIT_MEM, | ||||
| 	.arg3_type	= ARG_CONST_SIZE_OR_ZERO, | ||||
| 	.arg4_type	= ARG_ANYTHING, | ||||
| 	.btf_id		= bpf_get_task_stack_btf_ids, | ||||
| }; | ||||
| 
 | ||||
| /* Called from eBPF program */ | ||||
| static void *stack_map_lookup_elem(struct bpf_map *map, void *key) | ||||
| { | ||||
|  | ||||
| @ -4864,7 +4864,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn | ||||
| 	if (err) | ||||
| 		return err; | ||||
| 
 | ||||
| 	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { | ||||
| 	if ((func_id == BPF_FUNC_get_stack || | ||||
| 	     func_id == BPF_FUNC_get_task_stack) && | ||||
| 	    !env->prog->has_callchain_buf) { | ||||
| 		const char *err_str; | ||||
| 
 | ||||
| #ifdef CONFIG_PERF_EVENTS | ||||
|  | ||||
| @ -1137,6 +1137,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | ||||
| 		return &bpf_ringbuf_query_proto; | ||||
| 	case BPF_FUNC_jiffies64: | ||||
| 		return &bpf_jiffies64_proto; | ||||
| 	case BPF_FUNC_get_task_stack: | ||||
| 		return &bpf_get_task_stack_proto; | ||||
| 	default: | ||||
| 		return NULL; | ||||
| 	} | ||||
|  | ||||
| @ -426,6 +426,7 @@ class PrinterHelpers(Printer): | ||||
|             'struct tcp_timewait_sock', | ||||
|             'struct tcp_request_sock', | ||||
|             'struct udp6_sock', | ||||
|             'struct task_struct', | ||||
| 
 | ||||
|             'struct __sk_buff', | ||||
|             'struct sk_msg_md', | ||||
| @ -468,6 +469,7 @@ class PrinterHelpers(Printer): | ||||
|             'struct tcp_timewait_sock', | ||||
|             'struct tcp_request_sock', | ||||
|             'struct udp6_sock', | ||||
|             'struct task_struct', | ||||
|     } | ||||
|     mapped_types = { | ||||
|             'u8': '__u8', | ||||
|  | ||||
| @ -3285,6 +3285,39 @@ union bpf_attr { | ||||
|  *		Dynamically cast a *sk* pointer to a *udp6_sock* pointer. | ||||
|  *	Return | ||||
|  *		*sk* if casting is valid, or NULL otherwise. | ||||
|  * | ||||
|  * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) | ||||
|  *	Description | ||||
|  *		Return a user or a kernel stack in bpf program provided buffer. | ||||
|  *		To achieve this, the helper needs *task*, which is a valid | ||||
|  *		pointer to struct task_struct. To store the stacktrace, the | ||||
|  *		bpf program provides *buf* with	a nonnegative *size*. | ||||
|  * | ||||
|  *		The last argument, *flags*, holds the number of stack frames to | ||||
|  *		skip (from 0 to 255), masked with | ||||
|  *		**BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set | ||||
|  *		the following flags: | ||||
|  * | ||||
|  *		**BPF_F_USER_STACK** | ||||
|  *			Collect a user space stack instead of a kernel stack. | ||||
|  *		**BPF_F_USER_BUILD_ID** | ||||
|  *			Collect buildid+offset instead of ips for user stack, | ||||
|  *			only valid if **BPF_F_USER_STACK** is also specified. | ||||
|  * | ||||
|  *		**bpf_get_task_stack**\ () can collect up to | ||||
|  *		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject | ||||
|  *		to sufficient large buffer size. Note that | ||||
|  *		this limit can be controlled with the **sysctl** program, and | ||||
|  *		that it should be manually increased in order to profile long | ||||
|  *		user stacks (such as stacks for Java programs). To do so, use: | ||||
|  * | ||||
|  *		:: | ||||
|  * | ||||
|  *			# sysctl kernel.perf_event_max_stack=<new value> | ||||
|  *	Return | ||||
|  *		A non-negative value equal to or less than *size* on success, | ||||
|  *		or a negative error in case of failure. | ||||
|  * | ||||
|  */ | ||||
| #define __BPF_FUNC_MAPPER(FN)		\ | ||||
| 	FN(unspec),			\ | ||||
| @ -3427,7 +3460,9 @@ union bpf_attr { | ||||
| 	FN(skc_to_tcp_sock),		\ | ||||
| 	FN(skc_to_tcp_timewait_sock),	\ | ||||
| 	FN(skc_to_tcp_request_sock),	\ | ||||
| 	FN(skc_to_udp6_sock), | ||||
| 	FN(skc_to_udp6_sock),		\ | ||||
| 	FN(get_task_stack),		\ | ||||
| 	/* */ | ||||
| 
 | ||||
| /* integer value in 'imm' field of BPF_CALL instruction selects which helper
 | ||||
|  * function eBPF program intends to call | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user