bpf: Fix accesses to uninit stack slots

Privileged programs are supposed to be able to read uninitialized stack
memory (ever since 6715df8d5) but, before this patch, these accesses
were permitted inconsistently. In particular, accesses were permitted
above state->allocated_stack, but not below it. In other words, if the
stack was already "large enough", the access was permitted, but
otherwise the access was rejected instead of being allowed to "grow the
stack". This undesired rejection was happening in two places:
- in check_stack_slot_within_bounds()
- in check_stack_range_initialized()
This patch arranges for these accesses to be permitted. A bunch of tests
that were relying on the old rejection had to change; all of them were
changed to add also run unprivileged, in which case the old behavior
persists. One tests couldn't be updated - global_func16 - because it
can't run unprivileged for other reasons.

This patch also fixes the tracking of the stack size for variable-offset
reads. This second fix is bundled in the same commit as the first one
because they're inter-related. Before this patch, writes to the stack
using registers containing a variable offset (as opposed to registers
with fixed, known values) were not properly contributing to the
function's needed stack size. As a result, it was possible for a program
to verify, but then to attempt to read out-of-bounds data at runtime
because a too small stack had been allocated for it.

Each function tracks the size of the stack it needs in
bpf_subprog_info.stack_depth, which is maintained by
update_stack_depth(). For regular memory accesses, check_mem_access()
was calling update_state_depth() but it was passing in only the fixed
part of the offset register, ignoring the variable offset. This was
incorrect; the minimum possible value of that register should be used
instead.

This tracking is now fixed by centralizing the tracking of stack size in
grow_stack_state(), and by lifting the calls to grow_stack_state() to
check_stack_access_within_bounds() as suggested by Andrii. The code is
now simpler and more convincingly tracks the correct maximum stack size.
check_stack_range_initialized() can now rely on enough stack having been
allocated for the access; this helps with the fix for the first issue.

A few tests were changed to also check the stack depth computation. The
one that fails without this patch is verifier_var_off:stack_write_priv_vs_unpriv.

Fixes: 01f810ace9 ("bpf: Allow variable-offset stack access")
Reported-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Andrei Matei <andreimatei1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20231208032519.260451-3-andreimatei1@gmail.com

Closes: https://lore.kernel.org/bpf/CABWLsev9g8UP_c3a=1qbuZUi20tGoUXoU07FPf-5FLvhOKOY+Q@mail.gmail.com/
This commit is contained in:
Andrei Matei 2023-12-07 22:25:18 -05:00 committed by Andrii Nakryiko
parent 92e1567ee3
commit 6b4a64bafd
9 changed files with 92 additions and 72 deletions

View File

@ -1259,7 +1259,10 @@ static int resize_reference_state(struct bpf_func_state *state, size_t n)
return 0; return 0;
} }
static int grow_stack_state(struct bpf_func_state *state, int size) /* Possibly update state->allocated_stack to be at least size bytes. Also
* possibly update the function's high-water mark in its bpf_subprog_info.
*/
static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
{ {
size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE; size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
@ -1271,6 +1274,11 @@ static int grow_stack_state(struct bpf_func_state *state, int size)
return -ENOMEM; return -ENOMEM;
state->allocated_stack = size; state->allocated_stack = size;
/* update known max for given subprogram */
if (env->subprog_info[state->subprogno].stack_depth < size)
env->subprog_info[state->subprogno].stack_depth = size;
return 0; return 0;
} }
@ -4440,9 +4448,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = NULL; struct bpf_reg_state *reg = NULL;
int insn_flags = insn_stack_access_flags(state->frameno, spi); int insn_flags = insn_stack_access_flags(state->frameno, spi);
err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
if (err)
return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
* so it's aligned access and [off, off + size) are within stack limits * so it's aligned access and [off, off + size) are within stack limits
*/ */
@ -4595,10 +4600,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
(!value_reg && is_bpf_st_mem(insn) && insn->imm == 0)) (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
writing_zero = true; writing_zero = true;
err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
if (err)
return err;
for (i = min_off; i < max_off; i++) { for (i = min_off; i < max_off; i++) {
int spi; int spi;
@ -5774,20 +5775,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
strict); strict);
} }
static int update_stack_depth(struct bpf_verifier_env *env,
const struct bpf_func_state *func,
int off)
{
u16 stack = env->subprog_info[func->subprogno].stack_depth;
if (stack >= -off)
return 0;
/* update known max for given subprogram */
env->subprog_info[func->subprogno].stack_depth = -off;
return 0;
}
/* starting from main bpf function walk all instructions of the function /* starting from main bpf function walk all instructions of the function
* and recursively walk all callees that given function can call. * and recursively walk all callees that given function can call.
* Ignore jump and exit insns. * Ignore jump and exit insns.
@ -6577,13 +6564,14 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
* The minimum valid offset is -MAX_BPF_STACK for writes, and * The minimum valid offset is -MAX_BPF_STACK for writes, and
* -state->allocated_stack for reads. * -state->allocated_stack for reads.
*/ */
static int check_stack_slot_within_bounds(s64 off, static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
struct bpf_func_state *state, s64 off,
enum bpf_access_type t) struct bpf_func_state *state,
enum bpf_access_type t)
{ {
int min_valid_off; int min_valid_off;
if (t == BPF_WRITE) if (t == BPF_WRITE || env->allow_uninit_stack)
min_valid_off = -MAX_BPF_STACK; min_valid_off = -MAX_BPF_STACK;
else else
min_valid_off = -state->allocated_stack; min_valid_off = -state->allocated_stack;
@ -6632,7 +6620,7 @@ static int check_stack_access_within_bounds(
max_off = reg->smax_value + off + access_size; max_off = reg->smax_value + off + access_size;
} }
err = check_stack_slot_within_bounds(min_off, state, type); err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0) if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */ err = -EINVAL; /* out of stack access into non-negative offsets */
@ -6647,8 +6635,10 @@ static int check_stack_access_within_bounds(
verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n", verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
err_extra, regno, tn_buf, off, access_size); err_extra, regno, tn_buf, off, access_size);
} }
return err;
} }
return err;
return grow_stack_state(env, state, round_up(-min_off, BPF_REG_SIZE));
} }
/* check whether memory at (regno + off) is accessible for t = (read | write) /* check whether memory at (regno + off) is accessible for t = (read | write)
@ -6663,7 +6653,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
{ {
struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno; struct bpf_reg_state *reg = regs + regno;
struct bpf_func_state *state;
int size, err = 0; int size, err = 0;
size = bpf_size_to_bytes(bpf_size); size = bpf_size_to_bytes(bpf_size);
@ -6806,11 +6795,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err) if (err)
return err; return err;
state = func(env, reg);
err = update_stack_depth(env, state, off);
if (err)
return err;
if (t == BPF_READ) if (t == BPF_READ)
err = check_stack_read(env, regno, off, size, err = check_stack_read(env, regno, off, size,
value_regno); value_regno);
@ -7004,7 +6988,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
/* When register 'regno' is used to read the stack (either directly or through /* When register 'regno' is used to read the stack (either directly or through
* a helper function) make sure that it's within stack boundary and, depending * a helper function) make sure that it's within stack boundary and, depending
* on the access type, that all elements of the stack are initialized. * on the access type and privileges, that all elements of the stack are
* initialized.
* *
* 'off' includes 'regno->off', but not its dynamic part (if any). * 'off' includes 'regno->off', but not its dynamic part (if any).
* *
@ -7112,8 +7097,11 @@ static int check_stack_range_initialized(
slot = -i - 1; slot = -i - 1;
spi = slot / BPF_REG_SIZE; spi = slot / BPF_REG_SIZE;
if (state->allocated_stack <= slot) if (state->allocated_stack <= slot) {
goto err; verbose(env, "verifier bug: allocated_stack too small");
return -EFAULT;
}
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
if (*stype == STACK_MISC) if (*stype == STACK_MISC)
goto mark; goto mark;
@ -7137,7 +7125,6 @@ static int check_stack_range_initialized(
goto mark; goto mark;
} }
err:
if (tnum_is_const(reg->var_off)) { if (tnum_is_const(reg->var_off)) {
verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
err_extra, regno, min_off, i - min_off, access_size); err_extra, regno, min_off, i - min_off, access_size);
@ -7162,7 +7149,7 @@ mark:
* helper may write to the entire memory range. * helper may write to the entire memory range.
*/ */
} }
return update_stack_depth(env, state, min_off); return 0;
} }
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,

View File

@ -846,7 +846,7 @@ __naked int delayed_precision_mark(void)
"call %[bpf_iter_num_next];" "call %[bpf_iter_num_next];"
"if r0 == 0 goto 2f;" "if r0 == 0 goto 2f;"
"if r6 != 42 goto 3f;" "if r6 != 42 goto 3f;"
"r7 = -32;" "r7 = -33;"
"call %[bpf_get_prandom_u32];" "call %[bpf_get_prandom_u32];"
"r6 = r0;" "r6 = r0;"
"goto 1b;\n" "goto 1b;\n"

View File

@ -13,7 +13,7 @@ __noinline int foo(int (*arr)[10])
} }
SEC("cgroup_skb/ingress") SEC("cgroup_skb/ingress")
__failure __msg("invalid indirect read from stack") __success
int global_func16(struct __sk_buff *skb) int global_func16(struct __sk_buff *skb)
{ {
int array[10]; int array[10];

View File

@ -27,8 +27,8 @@ __naked void stack_out_of_bounds(void)
SEC("socket") SEC("socket")
__description("uninitialized stack1") __description("uninitialized stack1")
__failure __msg("invalid indirect read from stack") __success __log_level(4) __msg("stack depth 8")
__failure_unpriv __failure_unpriv __msg_unpriv("invalid indirect read from stack")
__naked void uninitialized_stack1(void) __naked void uninitialized_stack1(void)
{ {
asm volatile (" \ asm volatile (" \
@ -45,8 +45,8 @@ __naked void uninitialized_stack1(void)
SEC("socket") SEC("socket")
__description("uninitialized stack2") __description("uninitialized stack2")
__failure __msg("invalid read from stack") __success __log_level(4) __msg("stack depth 8")
__failure_unpriv __failure_unpriv __msg_unpriv("invalid read from stack")
__naked void uninitialized_stack2(void) __naked void uninitialized_stack2(void)
{ {
asm volatile (" \ asm volatile (" \

View File

@ -5,9 +5,10 @@
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include "bpf_misc.h" #include "bpf_misc.h"
SEC("cgroup/sysctl") SEC("socket")
__description("ARG_PTR_TO_LONG uninitialized") __description("ARG_PTR_TO_LONG uninitialized")
__failure __msg("invalid indirect read from stack R4 off -16+0 size 8") __success
__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8")
__naked void arg_ptr_to_long_uninitialized(void) __naked void arg_ptr_to_long_uninitialized(void)
{ {
asm volatile (" \ asm volatile (" \

View File

@ -5,9 +5,10 @@
#include <bpf/bpf_helpers.h> #include <bpf/bpf_helpers.h>
#include "bpf_misc.h" #include "bpf_misc.h"
SEC("tc") SEC("socket")
__description("raw_stack: no skb_load_bytes") __description("raw_stack: no skb_load_bytes")
__failure __msg("invalid read from stack R6 off=-8 size=8") __success
__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8")
__naked void stack_no_skb_load_bytes(void) __naked void stack_no_skb_load_bytes(void)
{ {
asm volatile (" \ asm volatile (" \

View File

@ -59,9 +59,10 @@ __naked void stack_read_priv_vs_unpriv(void)
" ::: __clobber_all); " ::: __clobber_all);
} }
SEC("lwt_in") SEC("cgroup/skb")
__description("variable-offset stack read, uninitialized") __description("variable-offset stack read, uninitialized")
__failure __msg("invalid variable-offset read from stack R2") __success
__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
__naked void variable_offset_stack_read_uninitialized(void) __naked void variable_offset_stack_read_uninitialized(void)
{ {
asm volatile (" \ asm volatile (" \
@ -83,12 +84,55 @@ __naked void variable_offset_stack_read_uninitialized(void)
SEC("socket") SEC("socket")
__description("variable-offset stack write, priv vs unpriv") __description("variable-offset stack write, priv vs unpriv")
__success __failure_unpriv __success
/* Check that the maximum stack depth is correctly maintained according to the
* maximum possible variable offset.
*/
__log_level(4) __msg("stack depth 16")
__failure_unpriv
/* Variable stack access is rejected for unprivileged. /* Variable stack access is rejected for unprivileged.
*/ */
__msg_unpriv("R2 variable stack access prohibited for !root") __msg_unpriv("R2 variable stack access prohibited for !root")
__retval(0) __retval(0)
__naked void stack_write_priv_vs_unpriv(void) __naked void stack_write_priv_vs_unpriv(void)
{
asm volatile (" \
/* Get an unknown value */ \
r2 = *(u32*)(r1 + 0); \
/* Make it small and 8-byte aligned */ \
r2 &= 8; \
r2 -= 16; \
/* Add it to fp. We now have either fp-8 or \
* fp-16, but we don't know which \
*/ \
r2 += r10; \
/* Dereference it for a stack write */ \
r0 = 0; \
*(u64*)(r2 + 0) = r0; \
exit; \
" ::: __clobber_all);
}
/* Similar to the previous test, but this time also perform a read from the
* address written to with a variable offset. The read is allowed, showing that,
* after a variable-offset write, a priviledged program can read the slots that
* were in the range of that write (even if the verifier doesn't actually know if
* the slot being read was really written to or not.
*
* Despite this test being mostly a superset, the previous test is also kept for
* the sake of it checking the stack depth in the case where there is no read.
*/
SEC("socket")
__description("variable-offset stack write followed by read")
__success
/* Check that the maximum stack depth is correctly maintained according to the
* maximum possible variable offset.
*/
__log_level(4) __msg("stack depth 16")
__failure_unpriv
__msg_unpriv("R2 variable stack access prohibited for !root")
__retval(0)
__naked void stack_write_followed_by_read(void)
{ {
asm volatile (" \ asm volatile (" \
/* Get an unknown value */ \ /* Get an unknown value */ \
@ -103,12 +147,7 @@ __naked void stack_write_priv_vs_unpriv(void)
/* Dereference it for a stack write */ \ /* Dereference it for a stack write */ \
r0 = 0; \ r0 = 0; \
*(u64*)(r2 + 0) = r0; \ *(u64*)(r2 + 0) = r0; \
/* Now read from the address we just wrote. This shows\ /* Now read from the address we just wrote. */ \
* that, after a variable-offset write, a priviledged\
* program can read the slots that were in the range of\
* that write (even if the verifier doesn't actually know\
* if the slot being read was really written to or not.\
*/ \
r3 = *(u64*)(r2 + 0); \ r3 = *(u64*)(r2 + 0); \
r0 = 0; \ r0 = 0; \
exit; \ exit; \
@ -282,9 +321,10 @@ __naked void access_min_out_of_bound(void)
: __clobber_all); : __clobber_all);
} }
SEC("lwt_in") SEC("cgroup/skb")
__description("indirect variable-offset stack access, min_off < min_initialized") __description("indirect variable-offset stack access, min_off < min_initialized")
__failure __msg("invalid indirect read from stack R2 var_off") __success
__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
__naked void access_min_off_min_initialized(void) __naked void access_min_off_min_initialized(void)
{ {
asm volatile (" \ asm volatile (" \

View File

@ -83,17 +83,6 @@
.result = REJECT, .result = REJECT,
.errstr = "!read_ok", .errstr = "!read_ok",
}, },
{
"Can't use cmpxchg on uninit memory",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 3),
BPF_MOV64_IMM(BPF_REG_2, 4),
BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "invalid read from stack",
},
{ {
"BPF_W cmpxchg should zero top 32 bits", "BPF_W cmpxchg should zero top 32 bits",
.insns = { .insns = {

View File

@ -1505,7 +1505,9 @@
.prog_type = BPF_PROG_TYPE_XDP, .prog_type = BPF_PROG_TYPE_XDP,
.fixup_map_hash_8b = { 23 }, .fixup_map_hash_8b = { 23 },
.result = REJECT, .result = REJECT,
.errstr = "invalid read from stack R7 off=-16 size=8", .errstr = "R0 invalid mem access 'scalar'",
.result_unpriv = REJECT,
.errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
}, },
{ {
"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",