bpf-next-for-netdev

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZuH9UQAKCRDbK58LschI
 g0/zAP99WOcCBp1M/jSTUOba230+eiol7l5RirDEA6wu7TqY2QEAuvMG0KfCCpTI
 I0WqStrK1QMbhwKPodJC1k+17jArKgw=
 =jfMU
 -----END PGP SIGNATURE-----

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2024-09-11

We've added 12 non-merge commits during the last 16 day(s) which contain
a total of 20 files changed, 228 insertions(+), 30 deletions(-).

There's a minor merge conflict in drivers/net/netkit.c:
  00d066a4d4 ("netdev_features: convert NETIF_F_LLTX to dev->lltx")
  d966087948 ("netkit: Disable netpoll support")

The main changes are:

1) Enable bpf_dynptr_from_skb for tp_btf such that this can be used
   to easily parse skbs in BPF programs attached to tracepoints,
   from Philo Lu.

2) Add a cond_resched() point in BPF's sock_hash_free() as there have
   been several syzbot soft lockup reports recently, from Eric Dumazet.

3) Fix xsk_buff_can_alloc() to account for queue_empty_descs which
   got noticed when zero copy ice driver started to use it,
   from Maciej Fijalkowski.

4) Move the xdp:xdp_cpumap_kthread tracepoint before cpumap pushes skbs
   up via netif_receive_skb_list() to better measure latencies,
   from Daniel Xu.

5) Follow-up to disable netpoll support from netkit, from Daniel Borkmann.

6) Improve xsk selftests to not assume a fixed MAX_SKB_FRAGS of 17 but
   instead gather the actual value via /proc/sys/net/core/max_skb_frags,
   also from Maciej Fijalkowski.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next:
  sock_map: Add a cond_resched() in sock_hash_free()
  selftests/bpf: Expand skb dynptr selftests for tp_btf
  bpf: Allow bpf_dynptr_from_skb() for tp_btf
  tcp: Use skb__nullable in trace_tcp_send_reset
  selftests/bpf: Add test for __nullable suffix in tp_btf
  bpf: Support __nullable argument suffix for tp_btf
  bpf, cpumap: Move xdp:xdp_cpumap_kthread tracepoint before rcv
  selftests/xsk: Read current MAX_SKB_FRAGS from sysctl knob
  xsk: Bump xsk_queue::queue_empty_descs in xp_can_alloc()
  tcp_bpf: Remove an unused parameter for bpf_tcp_ingress()
  bpf, sockmap: Correct spelling skmsg.c
  netkit: Disable netpoll support

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
====================

Link: https://patch.msgid.link/20240911211525.13834-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2024-09-12 20:21:50 -07:00
commit 3b7dc7000e
20 changed files with 228 additions and 30 deletions

View File

@ -255,6 +255,7 @@ static void netkit_setup(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
dev->priv_flags |= IFF_PHONY_HEADROOM;
dev->priv_flags |= IFF_NO_QUEUE;
dev->priv_flags |= IFF_DISABLE_NETPOLL;
dev->lltx = true;
dev->ethtool_ops = &netkit_ethtool_ops;

View File

@ -91,10 +91,10 @@ DEFINE_RST_REASON(FN, FN)
TRACE_EVENT(tcp_send_reset,
TP_PROTO(const struct sock *sk,
const struct sk_buff *skb,
const struct sk_buff *skb__nullable,
const enum sk_rst_reason reason),
TP_ARGS(sk, skb, reason),
TP_ARGS(sk, skb__nullable, reason),
TP_STRUCT__entry(
__field(const void *, skbaddr)
@ -106,7 +106,7 @@ TRACE_EVENT(tcp_send_reset,
),
TP_fast_assign(
__entry->skbaddr = skb;
__entry->skbaddr = skb__nullable;
__entry->skaddr = sk;
/* Zero means unknown state. */
__entry->state = sk ? sk->sk_state : 0;
@ -118,13 +118,13 @@ TRACE_EVENT(tcp_send_reset,
const struct inet_sock *inet = inet_sk(sk);
TP_STORE_ADDR_PORTS(__entry, inet, sk);
} else if (skb) {
const struct tcphdr *th = (const struct tcphdr *)skb->data;
} else if (skb__nullable) {
const struct tcphdr *th = (const struct tcphdr *)skb__nullable->data;
/*
* We should reverse the 4-tuple of skb, so later
* it can print the right flow direction of rst.
*/
TP_STORE_ADDR_PORTS_SKB(skb, th, entry->daddr, entry->saddr);
TP_STORE_ADDR_PORTS_SKB(skb__nullable, th, entry->daddr, entry->saddr);
}
__entry->reason = reason;
),

View File

@ -6525,6 +6525,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
if (prog_args_trusted(prog))
info->reg_type |= PTR_TRUSTED;
if (btf_param_match_suffix(btf, &args[arg], "__nullable"))
info->reg_type |= PTR_MAYBE_NULL;
if (tgt_prog) {
enum bpf_prog_type tgt_type;

View File

@ -354,12 +354,14 @@ static int cpu_map_kthread_run(void *data)
list_add_tail(&skb->list, &list);
}
netif_receive_skb_list(&list);
/* Feedback loop via tracepoint */
/* Feedback loop via tracepoint.
* NB: keep before recv to allow measuring enqueue/dequeue latency.
*/
trace_xdp_cpumap_kthread(rcpu->map_id, n, kmem_alloc_drops,
sched, &stats);
netif_receive_skb_list(&list);
local_bh_enable(); /* resched point, may call do_softirq() */
}
__set_current_state(TASK_RUNNING);

View File

@ -28,6 +28,8 @@
#include <linux/cpumask.h>
#include <linux/bpf_mem_alloc.h>
#include <net/xdp.h>
#include <linux/trace_events.h>
#include <linux/kallsyms.h>
#include "disasm.h"
@ -21154,11 +21156,13 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
{
bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
bool prog_tracing = prog->type == BPF_PROG_TYPE_TRACING;
char trace_symbol[KSYM_SYMBOL_LEN];
const char prefix[] = "btf_trace_";
struct bpf_raw_event_map *btp;
int ret = 0, subprog = -1, i;
const struct btf_type *t;
bool conservative = true;
const char *tname;
const char *tname, *fname;
struct btf *btf;
long addr = 0;
struct module *mod = NULL;
@ -21289,10 +21293,34 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
return -EINVAL;
}
tname += sizeof(prefix) - 1;
/* The func_proto of "btf_trace_##tname" is generated from typedef without argument
* names. Thus using bpf_raw_event_map to get argument names.
*/
btp = bpf_get_raw_tracepoint(tname);
if (!btp)
return -EINVAL;
fname = kallsyms_lookup((unsigned long)btp->bpf_func, NULL, NULL, NULL,
trace_symbol);
bpf_put_raw_tracepoint(btp);
if (fname)
ret = btf_find_by_name_kind(btf, fname, BTF_KIND_FUNC);
if (!fname || ret < 0) {
bpf_log(log, "Cannot find btf of tracepoint template, fall back to %s%s.\n",
prefix, tname);
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_ptr(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
} else {
t = btf_type_by_id(btf, ret);
if (!btf_type_is_func(t))
/* should never happen in valid vmlinux build */
return -EINVAL;
}
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
/* should never happen in valid vmlinux build */

View File

@ -12063,7 +12063,7 @@ int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags,
}
BTF_KFUNCS_START(bpf_kfunc_check_set_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb)
BTF_ID_FLAGS(func, bpf_dynptr_from_skb, KF_TRUSTED_ARGS)
BTF_KFUNCS_END(bpf_kfunc_check_set_skb)
BTF_KFUNCS_START(bpf_kfunc_check_set_xdp)
@ -12112,6 +12112,7 @@ static int __init bpf_kfunc_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_XMIT, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_kfunc_set_skb);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
&bpf_kfunc_set_sock_addr);

View File

@ -293,7 +293,7 @@ out:
/* If we trim data a full sg elem before curr pointer update
* copybreak and current so that any future copy operations
* start at new copy location.
* However trimed data that has not yet been used in a copy op
* However trimmed data that has not yet been used in a copy op
* does not require an update.
*/
if (!msg->sg.size) {

View File

@ -1183,6 +1183,7 @@ static void sock_hash_free(struct bpf_map *map)
sock_put(elem->sk);
sock_hash_free_elem(htab, elem);
}
cond_resched();
}
/* wait for psock readers accessing its map link */

View File

@ -30,7 +30,7 @@ void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
}
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, u32 apply_bytes, int flags)
struct sk_msg *msg, u32 apply_bytes)
{
bool apply = apply_bytes;
struct scatterlist *sge;
@ -167,7 +167,7 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
if (unlikely(!psock))
return -EPIPE;
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes) :
tcp_bpf_push_locked(sk, msg, bytes, flags, false);
sk_psock_put(sk, psock);
return ret;

View File

@ -661,9 +661,17 @@ EXPORT_SYMBOL(xp_alloc_batch);
bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
{
u32 req_count, avail_count;
if (pool->free_list_cnt >= count)
return true;
return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
req_count = count - pool->free_list_cnt;
avail_count = xskq_cons_nb_entries(pool->fq, req_count);
if (!avail_count)
pool->fq->queue_empty_descs++;
return avail_count >= req_count;
}
EXPORT_SYMBOL(xp_can_alloc);

View File

@ -306,11 +306,6 @@ static inline u32 xskq_cons_nb_entries(struct xsk_queue *q, u32 max)
return entries >= max ? max : entries;
}
static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
{
return xskq_cons_nb_entries(q, cnt) >= cnt;
}
static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
{
if (q->cached_prod == q->cached_cons)

View File

@ -34,6 +34,12 @@ DECLARE_TRACE(bpf_testmod_test_write_bare,
TP_ARGS(task, ctx)
);
/* Used in bpf_testmod_test_read() to test __nullable suffix */
DECLARE_TRACE(bpf_testmod_test_nullable_bare,
TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable),
TP_ARGS(ctx__nullable)
);
#undef BPF_TESTMOD_DECLARE_TRACE
#ifdef DECLARE_TRACE_WRITABLE
#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \

View File

@ -356,6 +356,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_loop_test(101) > 100)
trace_bpf_testmod_test_read(current, &ctx);
trace_bpf_testmod_test_nullable_bare(NULL);
/* Magic number to enable writable tp */
if (len == 64) {
struct bpf_testmod_test_writable_ctx writable = {

View File

@ -9,6 +9,7 @@
enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
SETUP_SKB_PROG_TP,
};
static struct {
@ -28,6 +29,7 @@ static struct {
{"test_dynptr_clone", SETUP_SKB_PROG},
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
@ -87,6 +89,37 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
break;
}
case SETUP_SKB_PROG_TP:
{
struct __sk_buff skb = {};
struct bpf_object *obj;
int aux_prog_fd;
/* Just use its test_run to trigger kfree_skb tracepoint */
err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
&obj, &aux_prog_fd);
if (!ASSERT_OK(err, "prog_load sched cls"))
goto cleanup;
LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
);
link = bpf_program__attach(prog);
if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
goto cleanup;
err = bpf_prog_test_run_opts(aux_prog_fd, &topts);
bpf_link__destroy(link);
if (!ASSERT_OK(err, "test_run"))
goto cleanup;
break;
}
}
ASSERT_EQ(skel->bss->err, 0, "err");

View File

@ -0,0 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "test_tp_btf_nullable.skel.h"
void test_tp_btf_nullable(void)
{
if (!env.has_testmod) {
test__skip();
return;
}
RUN_TESTS(test_tp_btf_nullable);
}

View File

@ -6,6 +6,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <linux/if_ether.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
@ -1254,6 +1255,30 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
SEC("fentry/skb_tx_error")
__failure __msg("must be referenced or trusted")
int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
{
struct bpf_dynptr ptr;
/* this should fail */
bpf_dynptr_from_skb(skb, 0, &ptr);
return 0;
}
SEC("fexit/skb_tx_error")
__failure __msg("must be referenced or trusted")
int BPF_PROG(skb_invalid_ctx_fexit, void *skb)
{
struct bpf_dynptr ptr;
/* this should fail */
bpf_dynptr_from_skb(skb, 0, &ptr);
return 0;
}
/* Reject writes to dynptr slot for uninit arg */
SEC("?raw_tp")
__failure __msg("potential write to dynptr at off=-16")

View File

@ -5,6 +5,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
#include "errno.h"
@ -544,3 +545,25 @@ int test_dynptr_skb_strcmp(struct __sk_buff *skb)
return 1;
}
SEC("tp_btf/kfree_skb")
int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
{
__u8 write_data[2] = {1, 2};
struct bpf_dynptr ptr;
int ret;
if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
err = 1;
return 1;
}
/* since tp_btf skbs are read only, writes should fail */
ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
if (ret != -EINVAL) {
err = 2;
return 1;
}
return 1;
}

View File

@ -0,0 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "../bpf_testmod/bpf_testmod.h"
#include "bpf_misc.h"
SEC("tp_btf/bpf_testmod_test_nullable_bare")
__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
{
return nullable_ctx->len;
}
SEC("tp_btf/bpf_testmod_test_nullable_bare")
int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
{
if (nullable_ctx)
return nullable_ctx->len;
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -324,6 +324,25 @@ out:
return zc_avail;
}
#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
static unsigned int get_max_skb_frags(void)
{
unsigned int max_skb_frags = 0;
FILE *file;
file = fopen(MAX_SKB_FRAGS_PATH, "r");
if (!file) {
ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
return 0;
}
if (fscanf(file, "%u", &max_skb_frags) != 1)
ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
fclose(file);
return max_skb_frags;
}
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
@ -2244,13 +2263,24 @@ static int testapp_poll_rxq_tmout(struct test_spec *test)
static int testapp_too_many_frags(struct test_spec *test)
{
struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
struct pkt *pkts;
u32 max_frags, i;
int ret;
if (test->mode == TEST_MODE_ZC)
if (test->mode == TEST_MODE_ZC) {
max_frags = test->ifobj_tx->xdp_zc_max_segs;
else
max_frags = XSK_DESC__MAX_SKB_FRAGS;
} else {
max_frags = get_max_skb_frags();
if (!max_frags) {
ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
max_frags = 17;
}
max_frags += 1;
}
pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
if (!pkts)
return TEST_FAILURE;
test->mtu = MAX_ETH_JUMBO_SIZE;
@ -2280,7 +2310,10 @@ static int testapp_too_many_frags(struct test_spec *test)
pkts[2 * max_frags + 1].valid = true;
pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
return testapp_validate_traffic(test);
ret = testapp_validate_traffic(test);
free(pkts);
return ret;
}
static int xsk_load_xdp_programs(struct ifobject *ifobj)

View File

@ -55,7 +55,6 @@
#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
#define XSK_DESC__INVALID_OPTION (0xffff)
#define XSK_DESC__MAX_SKB_FRAGS 18
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
#define PKT_DUMP_NB_TO_PRINT 16
#define RUN_ALL_TESTS UINT_MAX