Function graph tracing recurses into itself when stackleak is enabled, causing the ftrace graph selftest to run for up to 90 seconds and trigger the softlockup watchdog. Breakpoint 2, ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:200 200 mcount_get_lr_addr x0 // pointer to function's saved lr (gdb) bt \#0 ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:200 \#1 0xffffff80081d5280 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:153 \#2 0xffffff8008555484 in stackleak_track_stack () at ../kernel/stackleak.c:106 \#3 0xffffff8008421ff8 in ftrace_ops_test (ops=0xffffff8009eaa840 <graph_ops>, ip=18446743524091297036, regs=<optimized out>) at ../kernel/trace/ftrace.c:1507 \#4 0xffffff8008428770 in __ftrace_ops_list_func (regs=<optimized out>, ignored=<optimized out>, parent_ip=<optimized out>, ip=<optimized out>) at ../kernel/trace/ftrace.c:6286 \#5 ftrace_ops_no_ops (ip=18446743524091297036, parent_ip=18446743524091242824) at ../kernel/trace/ftrace.c:6321 \#6 0xffffff80081d5280 in ftrace_caller () at ../arch/arm64/kernel/entry-ftrace.S:153 \#7 0xffffff800832fd10 in irq_find_mapping (domain=0xffffffc03fc4bc80, hwirq=27) at ../kernel/irq/irqdomain.c:876 \#8 0xffffff800832294c in __handle_domain_irq (domain=0xffffffc03fc4bc80, hwirq=27, lookup=true, regs=0xffffff800814b840) at ../kernel/irq/irqdesc.c:650 \#9 0xffffff80081d52b4 in ftrace_graph_caller () at ../arch/arm64/kernel/entry-ftrace.S:205 Rework so we mark stackleak_track_stack as notrace Co-developed-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Anders Roxell <anders.roxell@linaro.org> Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Signed-off-by: Kees Cook <keescook@chromium.org>
		
			
				
	
	
		
			135 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			135 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
// SPDX-License-Identifier: GPL-2.0
 | 
						|
/*
 | 
						|
 * This code fills the used part of the kernel stack with a poison value
 | 
						|
 * before returning to userspace. It's part of the STACKLEAK feature
 | 
						|
 * ported from grsecurity/PaX.
 | 
						|
 *
 | 
						|
 * Author: Alexander Popov <alex.popov@linux.com>
 | 
						|
 *
 | 
						|
 * STACKLEAK reduces the information which kernel stack leak bugs can
 | 
						|
 * reveal and blocks some uninitialized stack variable attacks.
 | 
						|
 */
 | 
						|
 | 
						|
#include <linux/stackleak.h>
 | 
						|
#include <linux/kprobes.h>
 | 
						|
 | 
						|
#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
 | 
						|
#include <linux/jump_label.h>
 | 
						|
#include <linux/sysctl.h>
 | 
						|
 | 
						|
static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
 | 
						|
 | 
						|
int stack_erasing_sysctl(struct ctl_table *table, int write,
 | 
						|
			void __user *buffer, size_t *lenp, loff_t *ppos)
 | 
						|
{
 | 
						|
	int ret = 0;
 | 
						|
	int state = !static_branch_unlikely(&stack_erasing_bypass);
 | 
						|
	int prev_state = state;
 | 
						|
 | 
						|
	table->data = &state;
 | 
						|
	table->maxlen = sizeof(int);
 | 
						|
	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 | 
						|
	state = !!state;
 | 
						|
	if (ret || !write || state == prev_state)
 | 
						|
		return ret;
 | 
						|
 | 
						|
	if (state)
 | 
						|
		static_branch_disable(&stack_erasing_bypass);
 | 
						|
	else
 | 
						|
		static_branch_enable(&stack_erasing_bypass);
 | 
						|
 | 
						|
	pr_warn("stackleak: kernel stack erasing is %s\n",
 | 
						|
					state ? "enabled" : "disabled");
 | 
						|
	return ret;
 | 
						|
}
 | 
						|
 | 
						|
#define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
 | 
						|
#else
 | 
						|
#define skip_erasing()	false
 | 
						|
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
 | 
						|
 | 
						|
asmlinkage void notrace stackleak_erase(void)
 | 
						|
{
 | 
						|
	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
 | 
						|
	unsigned long kstack_ptr = current->lowest_stack;
 | 
						|
	unsigned long boundary = (unsigned long)end_of_stack(current);
 | 
						|
	unsigned int poison_count = 0;
 | 
						|
	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
 | 
						|
 | 
						|
	if (skip_erasing())
 | 
						|
		return;
 | 
						|
 | 
						|
	/* Check that 'lowest_stack' value is sane */
 | 
						|
	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
 | 
						|
		kstack_ptr = boundary;
 | 
						|
 | 
						|
	/* Search for the poison value in the kernel stack */
 | 
						|
	while (kstack_ptr > boundary && poison_count <= depth) {
 | 
						|
		if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
 | 
						|
			poison_count++;
 | 
						|
		else
 | 
						|
			poison_count = 0;
 | 
						|
 | 
						|
		kstack_ptr -= sizeof(unsigned long);
 | 
						|
	}
 | 
						|
 | 
						|
	/*
 | 
						|
	 * One 'long int' at the bottom of the thread stack is reserved and
 | 
						|
	 * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
 | 
						|
	 */
 | 
						|
	if (kstack_ptr == boundary)
 | 
						|
		kstack_ptr += sizeof(unsigned long);
 | 
						|
 | 
						|
#ifdef CONFIG_STACKLEAK_METRICS
 | 
						|
	current->prev_lowest_stack = kstack_ptr;
 | 
						|
#endif
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Now write the poison value to the kernel stack. Start from
 | 
						|
	 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
 | 
						|
	 * the stack pointer doesn't change when we write poison.
 | 
						|
	 */
 | 
						|
	if (on_thread_stack())
 | 
						|
		boundary = current_stack_pointer;
 | 
						|
	else
 | 
						|
		boundary = current_top_of_stack();
 | 
						|
 | 
						|
	while (kstack_ptr < boundary) {
 | 
						|
		*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
 | 
						|
		kstack_ptr += sizeof(unsigned long);
 | 
						|
	}
 | 
						|
 | 
						|
	/* Reset the 'lowest_stack' value for the next syscall */
 | 
						|
	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
 | 
						|
}
 | 
						|
NOKPROBE_SYMBOL(stackleak_erase);
 | 
						|
 | 
						|
void __used notrace stackleak_track_stack(void)
 | 
						|
{
 | 
						|
	/*
 | 
						|
	 * N.B. stackleak_erase() fills the kernel stack with the poison value,
 | 
						|
	 * which has the register width. That code assumes that the value
 | 
						|
	 * of 'lowest_stack' is aligned on the register width boundary.
 | 
						|
	 *
 | 
						|
	 * That is true for x86 and x86_64 because of the kernel stack
 | 
						|
	 * alignment on these platforms (for details, see 'cc_stack_align' in
 | 
						|
	 * arch/x86/Makefile). Take care of that when you port STACKLEAK to
 | 
						|
	 * new platforms.
 | 
						|
	 */
 | 
						|
	unsigned long sp = (unsigned long)&sp;
 | 
						|
 | 
						|
	/*
 | 
						|
	 * Having CONFIG_STACKLEAK_TRACK_MIN_SIZE larger than
 | 
						|
	 * STACKLEAK_SEARCH_DEPTH makes the poison search in
 | 
						|
	 * stackleak_erase() unreliable. Let's prevent that.
 | 
						|
	 */
 | 
						|
	BUILD_BUG_ON(CONFIG_STACKLEAK_TRACK_MIN_SIZE > STACKLEAK_SEARCH_DEPTH);
 | 
						|
 | 
						|
	if (sp < current->lowest_stack &&
 | 
						|
	    sp >= (unsigned long)task_stack_page(current) +
 | 
						|
						sizeof(unsigned long)) {
 | 
						|
		current->lowest_stack = sp;
 | 
						|
	}
 | 
						|
}
 | 
						|
EXPORT_SYMBOL(stackleak_track_stack);
 |