bpf-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZiwdfQAKCRDbK58LschI
 g1oqAP9mjayeIHCfYMQZa2eevy1PmVlgdNdFdMDWZFS/pHv9cgD/ZdmGzbUDKCAQ
 Y/KiTajitZw3kxtHX45v8/Ugtlsh9Qg=
 =Ewiw
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-04-26

We've added 12 non-merge commits during the last 22 day(s) which contain
a total of 14 files changed, 168 insertions(+), 72 deletions(-).

The main changes are:

1) Fix BPF_PROBE_MEM in verifier and JIT to skip loads from vsyscall page,
   from Puranjay Mohan.

2) Fix a crash in XDP with devmap broadcast redirect when the latter map
   is in process of being torn down, from Toke Høiland-Jørgensen.

3) Fix arm64 and riscv64 BPF JITs to properly clear start time for BPF
   program runtime stats, from Xu Kuohai.

4) Fix a sockmap KCSAN-reported data race in sk_psock_skb_ingress_enqueue,
    from Jason Xing.

5) Fix BPF verifier error message in resolve_pseudo_ldimm64,
   from Anton Protopopov.

6) Fix missing DEBUG_INFO_BTF_MODULES Kconfig menu item,
   from Andrii Nakryiko.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Test PROBE_MEM of VSYSCALL_ADDR on x86-64
  bpf, x86: Fix PROBE_MEM runtime load check
  bpf: verifier: prevent userspace memory access
  xdp: use flags field to disambiguate broadcast redirect
  arm32, bpf: Reimplement sign-extension mov instruction
  riscv, bpf: Fix incorrect runtime stats
  bpf, arm64: Fix incorrect runtime stats
  bpf: Fix a verifier verbose message
  bpf, skmsg: Fix NULL pointer dereference in sk_psock_skb_ingress_enqueue
  MAINTAINERS: bpf: Add Lehui and Puranjay as riscv64 reviewers
  MAINTAINERS: Update email address for Puranjay Mohan
  bpf, kconfig: Fix DEBUG_INFO_BTF_MODULES Kconfig definition
====================

Link: https://lore.kernel.org/r/20240426224248.26197-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-04-26 17:36:53 -07:00
commit b2ff42c6d3
14 changed files with 168 additions and 72 deletions

View File

@ -512,6 +512,7 @@ Praveen BP <praveenbp@ti.com>
Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com> <pradeepc@codeaurora.org>
Prasad Sodagudi <quic_psodagud@quicinc.com> <psodagud@codeaurora.org>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
Puranjay Mohan <puranjay@kernel.org> <puranjay12@gmail.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com>
Quentin Monnet <qmo@kernel.org> <quentin.monnet@netronome.com>

View File

@ -553,7 +553,7 @@ F: Documentation/devicetree/bindings/iio/accel/adi,adxl345.yaml
F: drivers/input/misc/adxl34x.c
ADXL355 THREE-AXIS DIGITAL ACCELEROMETER DRIVER
M: Puranjay Mohan <puranjay12@gmail.com>
M: Puranjay Mohan <puranjay@kernel.org>
L: linux-iio@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
@ -3714,7 +3714,7 @@ F: drivers/iio/imu/bmi323/
BPF JIT for ARM
M: Russell King <linux@armlinux.org.uk>
M: Puranjay Mohan <puranjay12@gmail.com>
M: Puranjay Mohan <puranjay@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: arch/arm/net/
@ -3764,6 +3764,8 @@ X: arch/riscv/net/bpf_jit_comp64.c
BPF JIT for RISC-V (64-bit)
M: Björn Töpel <bjorn@kernel.org>
R: Pu Lehui <pulehui@huawei.com>
R: Puranjay Mohan <puranjay@kernel.org>
L: bpf@vger.kernel.org
S: Maintained
F: arch/riscv/net/
@ -21925,7 +21927,7 @@ F: include/linux/soc/ti/ti_sci_inta_msi.h
F: include/linux/soc/ti/ti_sci_protocol.h
TEXAS INSTRUMENTS' TMP117 TEMPERATURE SENSOR DRIVER
M: Puranjay Mohan <puranjay12@gmail.com>
M: Puranjay Mohan <puranjay@kernel.org>
L: linux-iio@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/iio/temperature/ti,tmp117.yaml

View File

@ -871,16 +871,11 @@ static inline void emit_a32_alu_r64(const bool is64, const s8 dst[],
}
/* dst = src (4 bytes)*/
static inline void emit_a32_mov_r(const s8 dst, const s8 src, const u8 off,
struct jit_ctx *ctx) {
static inline void emit_a32_mov_r(const s8 dst, const s8 src, struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
s8 rt;
rt = arm_bpf_get_reg32(src, tmp[0], ctx);
if (off && off != 32) {
emit(ARM_LSL_I(rt, rt, 32 - off), ctx);
emit(ARM_ASR_I(rt, rt, 32 - off), ctx);
}
arm_bpf_put_reg32(dst, rt, ctx);
}
@ -889,15 +884,15 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
const s8 src[],
struct jit_ctx *ctx) {
if (!is64) {
emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
emit_a32_mov_r(dst_lo, src_lo, ctx);
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else if (__LINUX_ARM_ARCH__ < 6 &&
ctx->cpu_architecture < CPU_ARCH_ARMv5TE) {
/* complete 8 byte move */
emit_a32_mov_r(dst_lo, src_lo, 0, ctx);
emit_a32_mov_r(dst_hi, src_hi, 0, ctx);
emit_a32_mov_r(dst_lo, src_lo, ctx);
emit_a32_mov_r(dst_hi, src_hi, ctx);
} else if (is_stacked(src_lo) && is_stacked(dst_lo)) {
const u8 *tmp = bpf2a32[TMP_REG_1];
@ -917,17 +912,52 @@ static inline void emit_a32_mov_r64(const bool is64, const s8 dst[],
static inline void emit_a32_movsx_r64(const bool is64, const u8 off, const s8 dst[], const s8 src[],
struct jit_ctx *ctx) {
const s8 *tmp = bpf2a32[TMP_REG_1];
const s8 *rt;
s8 rs;
s8 rd;
rt = arm_bpf_get_reg64(dst, tmp, ctx);
if (is_stacked(dst_lo))
rd = tmp[1];
else
rd = dst_lo;
rs = arm_bpf_get_reg32(src_lo, rd, ctx);
/* rs may be one of src[1], dst[1], or tmp[1] */
/* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign
* extension only happens in the upper 64 bits.
*/
if (off != 32) {
/* Sign extend rs into rd */
emit(ARM_LSL_I(rd, rs, 32 - off), ctx);
emit(ARM_ASR_I(rd, rd, 32 - off), ctx);
} else {
rd = rs;
}
/* Write rd to dst_lo
*
* Optimization:
* Assume:
* 1. dst == src and stacked.
* 2. off == 32
*
* In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32.
* So, we don't need to write rd back to dst_lo as they have the same value.
* This saves us one str instruction.
*/
if (dst_lo != src_lo || off != 32)
arm_bpf_put_reg32(dst_lo, rd, ctx);
emit_a32_mov_r(dst_lo, src_lo, off, ctx);
if (!is64) {
if (!ctx->prog->aux->verifier_zext)
/* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else {
emit(ARM_ASR_I(rt[0], rt[1], 31), ctx);
if (is_stacked(dst_hi)) {
emit(ARM_ASR_I(tmp[0], rd, 31), ctx);
arm_bpf_put_reg32(dst_hi, tmp[0], ctx);
} else {
emit(ARM_ASR_I(dst_hi, rd, 31), ctx);
}
}
}

View File

@ -1844,15 +1844,15 @@ static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
emit_call(enter_prog, ctx);
/* save return value to callee saved register x20 */
emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
branch = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
/* save return value to callee saved register x20 */
emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
if (!p->jited)
emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);

View File

@ -722,6 +722,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
if (ret)
return ret;
/* store prog start time */
emit_mv(RV_REG_S1, RV_REG_A0, ctx);
/* if (__bpf_prog_enter(prog) == 0)
* goto skip_exec_of_prog;
*/
@ -729,9 +732,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of
/* nop reserved for conditional jump */
emit(rv_nop(), ctx);
/* store prog start time */
emit_mv(RV_REG_S1, RV_REG_A0, ctx);
/* arg1: &args_off */
emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx);
if (!p->jited)

View File

@ -1807,36 +1807,41 @@ populate_extable:
if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address:
* src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE
* src_reg is used as scratch for src_reg += insn->off and restored
* after emit_ldx if necessary
* src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
* and
* src_reg + insn->off < VSYSCALL_ADDR
*/
u64 limit = TASK_SIZE_MAX + PAGE_SIZE;
u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp;
/* At end of these emitted checks, insn->off will have been added
* to src_reg, so no need to do relative load with insn->off offset
*/
insn_off = 0;
/* movabsq r10, VSYSCALL_ADDR */
emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
(u32)(long)VSYSCALL_ADDR);
/* movabsq r11, limit */
EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG));
EMIT((u32)limit, 4);
EMIT(limit >> 32, 4);
/* mov src_reg, r11 */
EMIT_mov(AUX_REG, src_reg);
if (insn->off) {
/* add src_reg, insn->off */
maybe_emit_1mod(&prog, src_reg, true);
EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off);
/* add r11, insn->off */
maybe_emit_1mod(&prog, AUX_REG, true);
EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
}
/* cmp src_reg, r11 */
maybe_emit_mod(&prog, src_reg, AUX_REG, true);
EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG));
/* sub r11, r10 */
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
/* if unsigned '>=', goto load */
EMIT2(X86_JAE, 0);
/* movabsq r10, limit */
emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
(u32)(long)limit);
/* cmp r10, r11 */
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
/* if unsigned '>', goto load */
EMIT2(X86_JA, 0);
end_of_jmp = prog;
/* xor dst_reg, dst_reg */
@ -1862,18 +1867,6 @@ populate_extable:
/* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx;
if (insn->off && src_reg != dst_reg) {
/* sub src_reg, insn->off
* Restore src_reg after "add src_reg, insn->off" in prev
* if statement. But if src_reg == dst_reg, emit_ldx
* above already clobbered src_reg, so no need to restore.
* If add src_reg, insn->off was unnecessary, no need to
* restore either.
*/
maybe_emit_1mod(&prog, src_reg, true);
EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
}
if (!bpf_prog->aux->extable)
break;
@ -3473,3 +3466,9 @@ bool bpf_jit_supports_ptr_xchg(void)
{
return true;
}
/* x86-64 JIT emits its own code to filter user addresses so return 0 here */
u64 bpf_arch_uaddress_limit(void)
{
return 0;
}

View File

@ -963,6 +963,7 @@ bool bpf_jit_supports_far_kfunc_call(void);
bool bpf_jit_supports_exceptions(void);
bool bpf_jit_supports_ptr_xchg(void);
bool bpf_jit_supports_arena(void);
u64 bpf_arch_uaddress_limit(void);
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
bool bpf_helper_changes_pkt_data(void *func);

View File

@ -461,10 +461,12 @@ static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
{
read_lock_bh(&sk->sk_callback_lock);
if (psock->saved_data_ready)
psock->saved_data_ready(sk);
else
sk->sk_data_ready(sk);
read_unlock_bh(&sk->sk_callback_lock);
}
static inline void psock_set_prog(struct bpf_prog **pprog,

View File

@ -2942,6 +2942,15 @@ bool __weak bpf_jit_supports_arena(void)
return false;
}
u64 __weak bpf_arch_uaddress_limit(void)
{
#if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE)
return TASK_SIZE;
#else
return 0;
#endif
}
/* Return TRUE if the JIT backend satisfies the following two conditions:
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
* 2) Under the specific arch, the implementation of xchg() is the same

View File

@ -18289,8 +18289,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
f = fdget(fd);
map = __bpf_map_get(f);
if (IS_ERR(map)) {
verbose(env, "fd %d is not pointing to valid bpf_map\n",
insn[0].imm);
verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
return PTR_ERR(map);
}
@ -19676,6 +19675,36 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
goto next_insn;
}
/* Make it impossible to de-reference a userspace address */
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX)) {
struct bpf_insn *patch = &insn_buf[0];
u64 uaddress_limit = bpf_arch_uaddress_limit();
if (!uaddress_limit)
goto next_insn;
*patch++ = BPF_MOV64_REG(BPF_REG_AX, insn->src_reg);
if (insn->off)
*patch++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_AX, insn->off);
*patch++ = BPF_ALU64_IMM(BPF_RSH, BPF_REG_AX, 32);
*patch++ = BPF_JMP_IMM(BPF_JLE, BPF_REG_AX, uaddress_limit >> 32, 2);
*patch++ = *insn;
*patch++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*patch++ = BPF_MOV64_IMM(insn->dst_reg, 0);
cnt = patch - insn_buf;
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
goto next_insn;
}
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS ||

View File

@ -375,7 +375,7 @@ config DEBUG_INFO_SPLIT
Incompatible with older versions of ccache.
config DEBUG_INFO_BTF
bool "Generate BTF typeinfo"
bool "Generate BTF type information"
depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
depends on BPF_SYSCALL
@ -408,7 +408,8 @@ config PAHOLE_HAS_LANG_EXCLUDE
using DEBUG_INFO_BTF_MODULES.
config DEBUG_INFO_BTF_MODULES
def_bool y
bool "Generate BTF type information for kernel modules"
default y
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
help
Generate compact split BTF type information for kernel modules.

View File

@ -4360,10 +4360,12 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
u32 flags = ri->flags;
struct bpf_map *map;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (unlikely(!xdpf)) {
@ -4375,11 +4377,20 @@ static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
map = READ_ONCE(ri->map);
if (unlikely(map)) {
if (unlikely(flags & BPF_F_BROADCAST)) {
map = READ_ONCE(ri->map);
/* The map pointer is cleared when the map is being torn
* down by bpf_clear_redirect_map()
*/
if (unlikely(!map)) {
err = -ENOENT;
break;
}
WRITE_ONCE(ri->map, NULL);
err = dev_map_enqueue_multi(xdpf, dev, map,
ri->flags & BPF_F_EXCLUDE_INGRESS);
flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_enqueue(fwd, xdpf, dev);
}
@ -4442,9 +4453,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog,
void *fwd,
enum bpf_map_type map_type, u32 map_id)
struct bpf_prog *xdp_prog, void *fwd,
enum bpf_map_type map_type, u32 map_id,
u32 flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map;
@ -4454,11 +4465,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
map = READ_ONCE(ri->map);
if (unlikely(map)) {
if (unlikely(flags & BPF_F_BROADCAST)) {
map = READ_ONCE(ri->map);
/* The map pointer is cleared when the map is being torn
* down by bpf_clear_redirect_map()
*/
if (unlikely(!map)) {
err = -ENOENT;
break;
}
WRITE_ONCE(ri->map, NULL);
err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
ri->flags & BPF_F_EXCLUDE_INGRESS);
flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
}
@ -4495,9 +4515,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
u32 flags = ri->flags;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
@ -4517,7 +4539,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
return 0;
}
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
err:
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
return err;

View File

@ -1226,11 +1226,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
if (psock) {
read_lock_bh(&sk->sk_callback_lock);
if (psock)
sk_psock_data_ready(sk, psock);
read_unlock_bh(&sk->sk_callback_lock);
}
rcu_read_unlock();
}
}

View File

@ -205,6 +205,9 @@ __weak noinline struct file *bpf_testmod_return_ptr(int arg)
case 5: return (void *)~(1ull << 30); /* trigger extable */
case 6: return &f; /* valid addr */
case 7: return (void *)((long)&f | 1); /* kernel tricks */
#ifdef CONFIG_X86_64
case 8: return (void *)VSYSCALL_ADDR; /* vsyscall page address */
#endif
default: return NULL;
}
}