mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 14:11:52 +00:00
Alexei Starovoitov says: ==================== pull-request: bpf-next 2022-01-06 We've added 41 non-merge commits during the last 2 day(s) which contain a total of 36 files changed, 1214 insertions(+), 368 deletions(-). The main changes are: 1) Various fixes in the verifier, from Kris and Daniel. 2) Fixes in sockmap, from John. 3) bpf_getsockopt fix, from Kuniyuki. 4) INET_POST_BIND fix, from Menglong. 5) arm64 JIT fix for bpf pseudo funcs, from Hou. 6) BPF ISA doc improvements, from Christoph. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (41 commits) bpf: selftests: Add bind retry for post_bind{4, 6} bpf: selftests: Use C99 initializers in test_sock.c net: bpf: Handle return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() bpf/selftests: Test bpf_d_path on rdonly_mem. libbpf: Add documentation for bpf_map batch operations selftests/bpf: Don't rely on preserving volatile in PT_REGS macros in loop3 xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames xdp: Move conversion to xdp_frame out of map functions page_pool: Store the XDP mem id page_pool: Add callback to init pages when they are allocated xdp: Allow registering memory model without rxq reference samples/bpf: xdpsock: Add timestamp for Tx-only operation samples/bpf: xdpsock: Add time-out for cleaning Tx samples/bpf: xdpsock: Add sched policy and priority support samples/bpf: xdpsock: Add cyclic TX operation capability samples/bpf: xdpsock: Add clockid selection support samples/bpf: xdpsock: Add Dest and Src MAC setting for Tx-only operation samples/bpf: xdpsock: Add VLAN support for Tx-only operation libbpf 1.0: Deprecate bpf_object__find_map_by_offset() API libbpf 1.0: Deprecate bpf_map__is_offload_neutral() ... ==================== Link: https://lore.kernel.org/r/20220107013626.53943-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
257367c0c9
@ -19,23 +19,37 @@ The eBPF calling convention is defined as:
|
||||
R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
|
||||
necessary across calls.
|
||||
|
||||
Instruction encoding
|
||||
====================
|
||||
|
||||
eBPF uses 64-bit instructions with the following encoding:
|
||||
|
||||
============= ======= =============== ==================== ============
|
||||
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
|
||||
============= ======= =============== ==================== ============
|
||||
immediate offset source register destination register opcode
|
||||
============= ======= =============== ==================== ============
|
||||
|
||||
Note that most instructions do not use all of the fields.
|
||||
Unused fields shall be cleared to zero.
|
||||
|
||||
Instruction classes
|
||||
===================
|
||||
-------------------
|
||||
|
||||
The three LSB bits of the 'opcode' field store the instruction class:
|
||||
|
||||
========= =====
|
||||
class value
|
||||
========= =====
|
||||
BPF_LD 0x00
|
||||
BPF_LDX 0x01
|
||||
BPF_ST 0x02
|
||||
BPF_STX 0x03
|
||||
BPF_ALU 0x04
|
||||
BPF_JMP 0x05
|
||||
BPF_JMP32 0x06
|
||||
BPF_ALU64 0x07
|
||||
========= =====
|
||||
========= ===== ===============================
|
||||
class value description
|
||||
========= ===== ===============================
|
||||
BPF_LD 0x00 non-standard load operations
|
||||
BPF_LDX 0x01 load into register operations
|
||||
BPF_ST 0x02 store from immediate operations
|
||||
BPF_STX 0x03 store from register operations
|
||||
BPF_ALU 0x04 32-bit arithmetic operations
|
||||
BPF_JMP 0x05 64-bit jump operations
|
||||
BPF_JMP32 0x06 32-bit jump operations
|
||||
BPF_ALU64 0x07 64-bit arithmetic operations
|
||||
========= ===== ===============================
|
||||
|
||||
Arithmetic and jump instructions
|
||||
================================
|
||||
@ -60,66 +74,78 @@ The 4th bit encodes the source operand:
|
||||
|
||||
The four MSB bits store the operation code.
|
||||
|
||||
For class BPF_ALU or BPF_ALU64:
|
||||
|
||||
======== ===== =========================
|
||||
Arithmetic instructions
|
||||
-----------------------
|
||||
|
||||
BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
|
||||
otherwise identical operations.
|
||||
The code field encodes the operation as below:
|
||||
|
||||
======== ===== ==========================
|
||||
code value description
|
||||
======== ===== =========================
|
||||
BPF_ADD 0x00
|
||||
BPF_SUB 0x10
|
||||
BPF_MUL 0x20
|
||||
BPF_DIV 0x30
|
||||
BPF_OR 0x40
|
||||
BPF_AND 0x50
|
||||
BPF_LSH 0x60
|
||||
BPF_RSH 0x70
|
||||
BPF_NEG 0x80
|
||||
BPF_MOD 0x90
|
||||
BPF_XOR 0xa0
|
||||
BPF_MOV 0xb0 mov reg to reg
|
||||
======== ===== ==========================
|
||||
BPF_ADD 0x00 dst += src
|
||||
BPF_SUB 0x10 dst -= src
|
||||
BPF_MUL 0x20 dst \*= src
|
||||
BPF_DIV 0x30 dst /= src
|
||||
BPF_OR 0x40 dst \|= src
|
||||
BPF_AND 0x50 dst &= src
|
||||
BPF_LSH 0x60 dst <<= src
|
||||
BPF_RSH 0x70 dst >>= src
|
||||
BPF_NEG 0x80 dst = ~src
|
||||
BPF_MOD 0x90 dst %= src
|
||||
BPF_XOR 0xa0 dst ^= src
|
||||
BPF_MOV 0xb0 dst = src
|
||||
BPF_ARSH 0xc0 sign extending shift right
|
||||
BPF_END 0xd0 endianness conversion
|
||||
======== ===== =========================
|
||||
======== ===== ==========================
|
||||
|
||||
For class BPF_JMP or BPF_JMP32:
|
||||
|
||||
======== ===== =========================
|
||||
code value description
|
||||
======== ===== =========================
|
||||
BPF_JA 0x00 BPF_JMP only
|
||||
BPF_JEQ 0x10
|
||||
BPF_JGT 0x20
|
||||
BPF_JGE 0x30
|
||||
BPF_JSET 0x40
|
||||
BPF_JNE 0x50 jump '!='
|
||||
BPF_JSGT 0x60 signed '>'
|
||||
BPF_JSGE 0x70 signed '>='
|
||||
BPF_CALL 0x80 function call
|
||||
BPF_EXIT 0x90 function return
|
||||
BPF_JLT 0xa0 unsigned '<'
|
||||
BPF_JLE 0xb0 unsigned '<='
|
||||
BPF_JSLT 0xc0 signed '<'
|
||||
BPF_JSLE 0xd0 signed '<='
|
||||
======== ===== =========================
|
||||
|
||||
So BPF_ADD | BPF_X | BPF_ALU means::
|
||||
BPF_ADD | BPF_X | BPF_ALU means::
|
||||
|
||||
dst_reg = (u32) dst_reg + (u32) src_reg;
|
||||
|
||||
Similarly, BPF_XOR | BPF_K | BPF_ALU means::
|
||||
|
||||
src_reg = (u32) src_reg ^ (u32) imm32
|
||||
|
||||
eBPF is using BPF_MOV | BPF_X | BPF_ALU to represent A = B moves. BPF_ALU64
|
||||
is used to mean exactly the same operations as BPF_ALU, but with 64-bit wide
|
||||
operands instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.::
|
||||
BPF_ADD | BPF_X | BPF_ALU64 means::
|
||||
|
||||
dst_reg = dst_reg + src_reg
|
||||
|
||||
BPF_JMP | BPF_EXIT means function exit only. The eBPF program needs to store
|
||||
the return value into register R0 before doing a BPF_EXIT. Class 6 is used as
|
||||
BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
|
||||
operands for the comparisons instead.
|
||||
BPF_XOR | BPF_K | BPF_ALU means::
|
||||
|
||||
src_reg = (u32) src_reg ^ (u32) imm32
|
||||
|
||||
BPF_XOR | BPF_K | BPF_ALU64 means::
|
||||
|
||||
src_reg = src_reg ^ imm32
|
||||
|
||||
|
||||
Jump instructions
|
||||
-----------------
|
||||
|
||||
BPF_JMP32 uses 32-bit wide operands while BPF_JMP uses 64-bit wide operands for
|
||||
otherwise identical operations.
|
||||
The code field encodes the operation as below:
|
||||
|
||||
======== ===== ========================= ============
|
||||
code value description notes
|
||||
======== ===== ========================= ============
|
||||
BPF_JA 0x00 PC += off BPF_JMP only
|
||||
BPF_JEQ 0x10 PC += off if dst == src
|
||||
BPF_JGT 0x20 PC += off if dst > src unsigned
|
||||
BPF_JGE 0x30 PC += off if dst >= src unsigned
|
||||
BPF_JSET 0x40 PC += off if dst & src
|
||||
BPF_JNE 0x50 PC += off if dst != src
|
||||
BPF_JSGT 0x60 PC += off if dst > src signed
|
||||
BPF_JSGE 0x70 PC += off if dst >= src signed
|
||||
BPF_CALL 0x80 function call
|
||||
BPF_EXIT 0x90 function / program return BPF_JMP only
|
||||
BPF_JLT 0xa0 PC += off if dst < src unsigned
|
||||
BPF_JLE 0xb0 PC += off if dst <= src unsigned
|
||||
BPF_JSLT 0xc0 PC += off if dst < src signed
|
||||
BPF_JSLE 0xd0 PC += off if dst <= src signed
|
||||
======== ===== ========================= ============
|
||||
|
||||
The eBPF program needs to store the return value into register R0 before doing a
|
||||
BPF_EXIT.
|
||||
|
||||
|
||||
Load and store instructions
|
||||
@ -147,15 +173,15 @@ The size modifier is one of:
|
||||
|
||||
The mode modifier is one of:
|
||||
|
||||
============= ===== =====================
|
||||
============= ===== ====================================
|
||||
mode modifier value description
|
||||
============= ===== =====================
|
||||
============= ===== ====================================
|
||||
BPF_IMM 0x00 used for 64-bit mov
|
||||
BPF_ABS 0x20
|
||||
BPF_IND 0x40
|
||||
BPF_MEM 0x60
|
||||
BPF_ABS 0x20 legacy BPF packet access
|
||||
BPF_IND 0x40 legacy BPF packet access
|
||||
BPF_MEM 0x60 all normal load and store operations
|
||||
BPF_ATOMIC 0xc0 atomic operations
|
||||
============= ===== =====================
|
||||
============= ===== ====================================
|
||||
|
||||
BPF_MEM | <size> | BPF_STX means::
|
||||
|
||||
|
@ -792,7 +792,10 @@ emit_cond_jmp:
|
||||
u64 imm64;
|
||||
|
||||
imm64 = (u64)insn1.imm << 32 | (u32)imm;
|
||||
emit_a64_mov_i64(dst, imm64, ctx);
|
||||
if (bpf_pseudo_func(insn))
|
||||
emit_addr_mov_i64(dst, imm64, ctx);
|
||||
else
|
||||
emit_a64_mov_i64(dst, imm64, ctx);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -1669,17 +1669,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
|
||||
struct btf *bpf_get_btf_vmlinux(void);
|
||||
|
||||
/* Map specifics */
|
||||
struct xdp_buff;
|
||||
struct xdp_frame;
|
||||
struct sk_buff;
|
||||
struct bpf_dtab_netdev;
|
||||
struct bpf_cpu_map_entry;
|
||||
|
||||
void __dev_flush(void);
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx);
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx);
|
||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
||||
struct bpf_map *map, bool exclude_ingress);
|
||||
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
||||
struct bpf_prog *xdp_prog);
|
||||
@ -1688,7 +1688,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
|
||||
bool exclude_ingress);
|
||||
|
||||
void __cpu_map_flush(void);
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx);
|
||||
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
|
||||
struct sk_buff *skb);
|
||||
@ -1866,26 +1866,26 @@ static inline void __dev_flush(void)
|
||||
{
|
||||
}
|
||||
|
||||
struct xdp_buff;
|
||||
struct xdp_frame;
|
||||
struct bpf_dtab_netdev;
|
||||
struct bpf_cpu_map_entry;
|
||||
|
||||
static inline
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
||||
struct bpf_map *map, bool exclude_ingress)
|
||||
{
|
||||
return 0;
|
||||
@ -1913,7 +1913,7 @@ static inline void __cpu_map_flush(void)
|
||||
}
|
||||
|
||||
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
|
||||
struct xdp_buff *xdp,
|
||||
struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
return 0;
|
||||
|
@ -1019,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
|
||||
int xdp_do_redirect(struct net_device *dev,
|
||||
struct xdp_buff *xdp,
|
||||
struct bpf_prog *prog);
|
||||
int xdp_do_redirect_frame(struct net_device *dev,
|
||||
struct xdp_buff *xdp,
|
||||
struct xdp_frame *xdpf,
|
||||
struct bpf_prog *prog);
|
||||
void xdp_do_flush(void);
|
||||
|
||||
/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
|
||||
|
@ -80,6 +80,8 @@ struct page_pool_params {
|
||||
enum dma_data_direction dma_dir; /* DMA mapping direction */
|
||||
unsigned int max_len; /* max DMA sync memory size */
|
||||
unsigned int offset; /* DMA addr offset */
|
||||
void (*init_callback)(struct page *page, void *arg);
|
||||
void *init_arg;
|
||||
};
|
||||
|
||||
struct page_pool {
|
||||
@ -94,6 +96,7 @@ struct page_pool {
|
||||
unsigned int frag_offset;
|
||||
struct page *frag_page;
|
||||
long frag_users;
|
||||
u32 xdp_mem_id;
|
||||
|
||||
/*
|
||||
* Data structure for allocation side
|
||||
@ -168,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page);
|
||||
|
||||
struct page_pool *page_pool_create(const struct page_pool_params *params);
|
||||
|
||||
struct xdp_mem_info;
|
||||
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
void page_pool_destroy(struct page_pool *pool);
|
||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
|
||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
|
||||
struct xdp_mem_info *mem);
|
||||
void page_pool_release_page(struct page_pool *pool, struct page *page);
|
||||
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
||||
int count);
|
||||
@ -180,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool)
|
||||
}
|
||||
|
||||
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
|
||||
void (*disconnect)(void *))
|
||||
void (*disconnect)(void *),
|
||||
struct xdp_mem_info *mem)
|
||||
{
|
||||
}
|
||||
static inline void page_pool_release_page(struct page_pool *pool,
|
||||
|
@ -1209,6 +1209,7 @@ struct proto {
|
||||
void (*unhash)(struct sock *sk);
|
||||
void (*rehash)(struct sock *sk);
|
||||
int (*get_port)(struct sock *sk, unsigned short snum);
|
||||
void (*put_port)(struct sock *sk);
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
int (*psock_update_sk_prot)(struct sock *sk,
|
||||
struct sk_psock *psock,
|
||||
|
@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
|
||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
enum xdp_mem_type type, void *allocator);
|
||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
|
||||
int xdp_reg_mem_model(struct xdp_mem_info *mem,
|
||||
enum xdp_mem_type type, void *allocator);
|
||||
void xdp_unreg_mem_model(struct xdp_mem_info *mem);
|
||||
|
||||
/* Drivers not supporting XDP metadata can use this helper, which
|
||||
* rejects any room expansion for metadata as a result.
|
||||
|
@ -746,15 +746,9 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
|
||||
list_add(&bq->flush_node, flush_list);
|
||||
}
|
||||
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* Info needed when constructing SKB on remote CPU */
|
||||
xdpf->dev_rx = dev_rx;
|
||||
|
||||
|
@ -467,24 +467,19 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
bq->q[bq->count++] = xdpf;
|
||||
}
|
||||
|
||||
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct xdp_frame *xdpf;
|
||||
int err;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
|
||||
err = xdp_ok_fwd_dev(dev, xdpf->len);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
|
||||
return 0;
|
||||
}
|
||||
@ -520,27 +515,27 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev
|
||||
return act;
|
||||
}
|
||||
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
return __xdp_enqueue(dev, xdp, dev_rx, NULL);
|
||||
return __xdp_enqueue(dev, xdpf, dev_rx, NULL);
|
||||
}
|
||||
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
struct net_device *dev = dst->dev;
|
||||
|
||||
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
|
||||
return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog);
|
||||
}
|
||||
|
||||
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
|
||||
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
|
||||
{
|
||||
if (!obj ||
|
||||
!obj->dev->netdev_ops->ndo_xdp_xmit)
|
||||
return false;
|
||||
|
||||
if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data))
|
||||
if (xdp_ok_fwd_dev(obj->dev, xdpf->len))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -586,14 +581,13 @@ static int get_upper_ifindexes(struct net_device *dev, int *indexes)
|
||||
return n;
|
||||
}
|
||||
|
||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
||||
struct bpf_map *map, bool exclude_ingress)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct bpf_dtab_netdev *dst, *last_dst = NULL;
|
||||
int excluded_devices[1+MAX_NEST_DEV];
|
||||
struct hlist_head *head;
|
||||
struct xdp_frame *xdpf;
|
||||
int num_excluded = 0;
|
||||
unsigned int i;
|
||||
int err;
|
||||
@ -603,15 +597,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
excluded_devices[num_excluded++] = dev_rx->ifindex;
|
||||
}
|
||||
|
||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
|
||||
for (i = 0; i < map->max_entries; i++) {
|
||||
dst = rcu_dereference_check(dtab->netdev_map[i],
|
||||
rcu_read_lock_bh_held());
|
||||
if (!is_valid_dst(dst, xdp))
|
||||
if (!is_valid_dst(dst, xdpf))
|
||||
continue;
|
||||
|
||||
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
|
||||
@ -634,7 +624,7 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
||||
head = dev_map_index_hash(dtab, i);
|
||||
hlist_for_each_entry_rcu(dst, head, index_hlist,
|
||||
lockdep_is_held(&dtab->index_lock)) {
|
||||
if (!is_valid_dst(dst, xdp))
|
||||
if (!is_valid_dst(dst, xdpf))
|
||||
continue;
|
||||
|
||||
if (is_ifindex_excluded(excluded_devices, num_excluded,
|
||||
|
@ -6031,6 +6031,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
||||
}
|
||||
|
||||
if (insn->code == (BPF_JMP | BPF_CALL) &&
|
||||
insn->src_reg == 0 &&
|
||||
insn->imm == BPF_FUNC_timer_set_callback) {
|
||||
struct bpf_verifier_state *async_cb;
|
||||
|
||||
@ -9079,15 +9080,15 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
|
||||
{
|
||||
if (type_may_be_null(reg->type) && reg->id == id &&
|
||||
!WARN_ON_ONCE(!reg->id)) {
|
||||
/* Old offset (both fixed and variable parts) should
|
||||
* have been known-zero, because we don't allow pointer
|
||||
* arithmetic on pointers that might be NULL.
|
||||
*/
|
||||
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
|
||||
!tnum_equals_const(reg->var_off, 0) ||
|
||||
reg->off)) {
|
||||
__mark_reg_known_zero(reg);
|
||||
reg->off = 0;
|
||||
/* Old offset (both fixed and variable parts) should
|
||||
* have been known-zero, because we don't allow pointer
|
||||
* arithmetic on pointers that might be NULL. If we
|
||||
* see this happening, don't convert the register.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
if (is_null) {
|
||||
reg->type = SCALAR_VALUE;
|
||||
|
@ -3957,10 +3957,35 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_master_redirect);
|
||||
|
||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
|
||||
struct net_device *dev,
|
||||
struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
void *fwd = ri->tgt_value;
|
||||
u32 map_id = ri->map_id;
|
||||
int err;
|
||||
|
||||
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
|
||||
err = __xsk_map_redirect(fwd, xdp);
|
||||
if (unlikely(err))
|
||||
goto err;
|
||||
|
||||
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
|
||||
return 0;
|
||||
err:
|
||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
|
||||
struct net_device *dev,
|
||||
struct xdp_frame *xdpf,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
void *fwd = ri->tgt_value;
|
||||
u32 map_id = ri->map_id;
|
||||
@ -3970,6 +3995,11 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||
|
||||
if (unlikely(!xdpf)) {
|
||||
err = -EOVERFLOW;
|
||||
goto err;
|
||||
}
|
||||
|
||||
switch (map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP:
|
||||
fallthrough;
|
||||
@ -3977,17 +4007,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
map = READ_ONCE(ri->map);
|
||||
if (unlikely(map)) {
|
||||
WRITE_ONCE(ri->map, NULL);
|
||||
err = dev_map_enqueue_multi(xdp, dev, map,
|
||||
err = dev_map_enqueue_multi(xdpf, dev, map,
|
||||
ri->flags & BPF_F_EXCLUDE_INGRESS);
|
||||
} else {
|
||||
err = dev_map_enqueue(fwd, xdp, dev);
|
||||
err = dev_map_enqueue(fwd, xdpf, dev);
|
||||
}
|
||||
break;
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
err = cpu_map_enqueue(fwd, xdp, dev);
|
||||
break;
|
||||
case BPF_MAP_TYPE_XSKMAP:
|
||||
err = __xsk_map_redirect(fwd, xdp);
|
||||
err = cpu_map_enqueue(fwd, xdpf, dev);
|
||||
break;
|
||||
case BPF_MAP_TYPE_UNSPEC:
|
||||
if (map_id == INT_MAX) {
|
||||
@ -3996,7 +4023,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
err = dev_xdp_enqueue(fwd, xdp, dev);
|
||||
err = dev_xdp_enqueue(fwd, xdpf, dev);
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
@ -4013,8 +4040,34 @@ err:
|
||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
|
||||
if (map_type == BPF_MAP_TYPE_XSKMAP)
|
||||
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
|
||||
|
||||
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
|
||||
xdp_prog);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_do_redirect);
|
||||
|
||||
int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
|
||||
struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
|
||||
{
|
||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||
enum bpf_map_type map_type = ri->map_type;
|
||||
|
||||
if (map_type == BPF_MAP_TYPE_XSKMAP)
|
||||
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
|
||||
|
||||
return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
|
||||
|
||||
static int xdp_do_generic_redirect_map(struct net_device *dev,
|
||||
struct sk_buff *skb,
|
||||
struct xdp_buff *xdp,
|
||||
@ -4741,12 +4794,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
||||
switch (optname) {
|
||||
case SO_RCVBUF:
|
||||
val = min_t(u32, val, sysctl_rmem_max);
|
||||
val = min_t(int, val, INT_MAX / 2);
|
||||
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
|
||||
WRITE_ONCE(sk->sk_rcvbuf,
|
||||
max_t(int, val * 2, SOCK_MIN_RCVBUF));
|
||||
break;
|
||||
case SO_SNDBUF:
|
||||
val = min_t(u32, val, sysctl_wmem_max);
|
||||
val = min_t(int, val, INT_MAX / 2);
|
||||
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
||||
WRITE_ONCE(sk->sk_sndbuf,
|
||||
max_t(int, val * 2, SOCK_MIN_SNDBUF));
|
||||
@ -4967,6 +5022,12 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
|
||||
goto err_clear;
|
||||
|
||||
switch (optname) {
|
||||
case SO_RCVBUF:
|
||||
*((int *)optval) = sk->sk_rcvbuf;
|
||||
break;
|
||||
case SO_SNDBUF:
|
||||
*((int *)optval) = sk->sk_sndbuf;
|
||||
break;
|
||||
case SO_MARK:
|
||||
*((int *)optval) = sk->sk_mark;
|
||||
break;
|
||||
|
@ -217,6 +217,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
|
||||
{
|
||||
page->pp = pool;
|
||||
page->pp_magic |= PP_SIGNATURE;
|
||||
if (pool->p.init_callback)
|
||||
pool->p.init_callback(page, pool->p.init_arg);
|
||||
}
|
||||
|
||||
static void page_pool_clear_pp_info(struct page *page)
|
||||
@ -691,10 +693,12 @@ static void page_pool_release_retry(struct work_struct *wq)
|
||||
schedule_delayed_work(&pool->release_dw, DEFER_TIME);
|
||||
}
|
||||
|
||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
|
||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
|
||||
struct xdp_mem_info *mem)
|
||||
{
|
||||
refcount_inc(&pool->user_cnt);
|
||||
pool->disconnect = disconnect;
|
||||
pool->xdp_mem_id = mem->id;
|
||||
}
|
||||
|
||||
void page_pool_destroy(struct page_pool *pool)
|
||||
|
@ -292,15 +292,23 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
||||
if (skb_verdict)
|
||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||
|
||||
/* msg_* and stream_* programs references tracked in psock after this
|
||||
* point. Reference dec and cleanup will occur through psock destructor
|
||||
*/
|
||||
ret = sock_map_init_proto(sk, psock);
|
||||
if (ret < 0)
|
||||
goto out_drop;
|
||||
if (ret < 0) {
|
||||
sk_psock_put(sk, psock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
write_lock_bh(&sk->sk_callback_lock);
|
||||
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||
ret = sk_psock_init_strp(sk, psock);
|
||||
if (ret)
|
||||
goto out_unlock_drop;
|
||||
if (ret) {
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
sk_psock_put(sk, psock);
|
||||
goto out;
|
||||
}
|
||||
sk_psock_start_strp(sk, psock);
|
||||
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||
sk_psock_start_verdict(sk,psock);
|
||||
@ -309,10 +317,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
||||
}
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
return 0;
|
||||
out_unlock_drop:
|
||||
write_unlock_bh(&sk->sk_callback_lock);
|
||||
out_drop:
|
||||
sk_psock_put(sk, psock);
|
||||
out_progs:
|
||||
if (skb_verdict)
|
||||
bpf_prog_put(skb_verdict);
|
||||
@ -325,6 +329,7 @@ out_put_stream_parser:
|
||||
out_put_stream_verdict:
|
||||
if (stream_verdict)
|
||||
bpf_prog_put(stream_verdict);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator)
|
||||
mutex_unlock(&mem_id_lock);
|
||||
}
|
||||
|
||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
void xdp_unreg_mem_model(struct xdp_mem_info *mem)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
int type = xdp_rxq->mem.type;
|
||||
int id = xdp_rxq->mem.id;
|
||||
int type = mem->type;
|
||||
int id = mem->id;
|
||||
|
||||
/* Reset mem info to defaults */
|
||||
xdp_rxq->mem.id = 0;
|
||||
xdp_rxq->mem.type = 0;
|
||||
|
||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||
WARN(1, "Missing register, driver bug");
|
||||
return;
|
||||
}
|
||||
mem->id = 0;
|
||||
mem->type = 0;
|
||||
|
||||
if (id == 0)
|
||||
return;
|
||||
@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
|
||||
|
||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||
{
|
||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||
WARN(1, "Missing register, driver bug");
|
||||
return;
|
||||
}
|
||||
|
||||
xdp_unreg_mem_model(&xdp_rxq->mem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
|
||||
|
||||
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
||||
@ -259,28 +265,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
|
||||
return true;
|
||||
}
|
||||
|
||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
enum xdp_mem_type type, void *allocator)
|
||||
static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
|
||||
enum xdp_mem_type type,
|
||||
void *allocator)
|
||||
{
|
||||
struct xdp_mem_allocator *xdp_alloc;
|
||||
gfp_t gfp = GFP_KERNEL;
|
||||
int id, errno, ret;
|
||||
void *ptr;
|
||||
|
||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||
WARN(1, "Missing register, driver bug");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!__is_supported_mem_type(type))
|
||||
return -EOPNOTSUPP;
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
xdp_rxq->mem.type = type;
|
||||
mem->type = type;
|
||||
|
||||
if (!allocator) {
|
||||
if (type == MEM_TYPE_PAGE_POOL)
|
||||
return -EINVAL; /* Setup time check page_pool req */
|
||||
return 0;
|
||||
return ERR_PTR(-EINVAL); /* Setup time check page_pool req */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Delay init of rhashtable to save memory if feature isn't used */
|
||||
@ -290,13 +292,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
mutex_unlock(&mem_id_lock);
|
||||
if (ret < 0) {
|
||||
WARN_ON(1);
|
||||
return ret;
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
|
||||
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
|
||||
if (!xdp_alloc)
|
||||
return -ENOMEM;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mutex_lock(&mem_id_lock);
|
||||
id = __mem_id_cyclic_get(gfp);
|
||||
@ -304,31 +306,61 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
errno = id;
|
||||
goto err;
|
||||
}
|
||||
xdp_rxq->mem.id = id;
|
||||
xdp_alloc->mem = xdp_rxq->mem;
|
||||
mem->id = id;
|
||||
xdp_alloc->mem = *mem;
|
||||
xdp_alloc->allocator = allocator;
|
||||
|
||||
/* Insert allocator into ID lookup table */
|
||||
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
|
||||
if (IS_ERR(ptr)) {
|
||||
ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
|
||||
xdp_rxq->mem.id = 0;
|
||||
ida_simple_remove(&mem_id_pool, mem->id);
|
||||
mem->id = 0;
|
||||
errno = PTR_ERR(ptr);
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (type == MEM_TYPE_PAGE_POOL)
|
||||
page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
|
||||
page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem);
|
||||
|
||||
mutex_unlock(&mem_id_lock);
|
||||
|
||||
trace_mem_connect(xdp_alloc, xdp_rxq);
|
||||
return 0;
|
||||
return xdp_alloc;
|
||||
err:
|
||||
mutex_unlock(&mem_id_lock);
|
||||
kfree(xdp_alloc);
|
||||
return errno;
|
||||
return ERR_PTR(errno);
|
||||
}
|
||||
|
||||
int xdp_reg_mem_model(struct xdp_mem_info *mem,
|
||||
enum xdp_mem_type type, void *allocator)
|
||||
{
|
||||
struct xdp_mem_allocator *xdp_alloc;
|
||||
|
||||
xdp_alloc = __xdp_reg_mem_model(mem, type, allocator);
|
||||
if (IS_ERR(xdp_alloc))
|
||||
return PTR_ERR(xdp_alloc);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_reg_mem_model);
|
||||
|
||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||
enum xdp_mem_type type, void *allocator)
|
||||
{
|
||||
struct xdp_mem_allocator *xdp_alloc;
|
||||
|
||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||
WARN(1, "Missing register, driver bug");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator);
|
||||
if (IS_ERR(xdp_alloc))
|
||||
return PTR_ERR(xdp_alloc);
|
||||
|
||||
trace_mem_connect(xdp_alloc, xdp_rxq);
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
|
||||
|
||||
/* XDP RX runs under NAPI protection, and in different delivery error
|
||||
|
@ -531,6 +531,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
|
||||
if (err) {
|
||||
inet->inet_saddr = inet->inet_rcv_saddr = 0;
|
||||
if (sk->sk_prot->put_port)
|
||||
sk->sk_prot->put_port(sk);
|
||||
goto out_release_sock;
|
||||
}
|
||||
}
|
||||
|
@ -994,6 +994,7 @@ struct proto ping_prot = {
|
||||
.hash = ping_hash,
|
||||
.unhash = ping_unhash,
|
||||
.get_port = ping_get_port,
|
||||
.put_port = ping_unhash,
|
||||
.obj_size = sizeof(struct inet_sock),
|
||||
};
|
||||
EXPORT_SYMBOL(ping_prot);
|
||||
|
@ -196,12 +196,39 @@ msg_bytes_ready:
|
||||
long timeo;
|
||||
int data;
|
||||
|
||||
if (sock_flag(sk, SOCK_DONE))
|
||||
goto out;
|
||||
|
||||
if (sk->sk_err) {
|
||||
copied = sock_error(sk);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||
goto out;
|
||||
|
||||
if (sk->sk_state == TCP_CLOSE) {
|
||||
copied = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
timeo = sock_rcvtimeo(sk, nonblock);
|
||||
if (!timeo) {
|
||||
copied = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (signal_pending(current)) {
|
||||
copied = sock_intr_errno(timeo);
|
||||
goto out;
|
||||
}
|
||||
|
||||
data = tcp_msg_wait_data(sk, psock, timeo);
|
||||
if (data && !sk_psock_queue_empty(psock))
|
||||
goto msg_bytes_ready;
|
||||
copied = -EAGAIN;
|
||||
}
|
||||
out:
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied;
|
||||
|
@ -3076,6 +3076,7 @@ struct proto tcp_prot = {
|
||||
.hash = inet_hash,
|
||||
.unhash = inet_unhash,
|
||||
.get_port = inet_csk_get_port,
|
||||
.put_port = inet_put_port,
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
.psock_update_sk_prot = tcp_bpf_update_proto,
|
||||
#endif
|
||||
|
@ -2927,6 +2927,7 @@ struct proto udp_prot = {
|
||||
.unhash = udp_lib_unhash,
|
||||
.rehash = udp_v4_rehash,
|
||||
.get_port = udp_v4_get_port,
|
||||
.put_port = udp_lib_unhash,
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
.psock_update_sk_prot = udp_bpf_update_proto,
|
||||
#endif
|
||||
|
@ -413,6 +413,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
if (err) {
|
||||
sk->sk_ipv6only = saved_ipv6only;
|
||||
inet_reset_saddr(sk);
|
||||
if (sk->sk_prot->put_port)
|
||||
sk->sk_prot->put_port(sk);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -177,6 +177,7 @@ struct proto pingv6_prot = {
|
||||
.hash = ping_hash,
|
||||
.unhash = ping_unhash,
|
||||
.get_port = ping_get_port,
|
||||
.put_port = ping_unhash,
|
||||
.obj_size = sizeof(struct raw6_sock),
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(pingv6_prot);
|
||||
|
@ -2181,6 +2181,7 @@ struct proto tcpv6_prot = {
|
||||
.hash = inet6_hash,
|
||||
.unhash = inet_unhash,
|
||||
.get_port = inet_csk_get_port,
|
||||
.put_port = inet_put_port,
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
.psock_update_sk_prot = tcp_bpf_update_proto,
|
||||
#endif
|
||||
|
@ -1733,6 +1733,7 @@ struct proto udpv6_prot = {
|
||||
.unhash = udp_lib_unhash,
|
||||
.rehash = udp_v6_rehash,
|
||||
.get_port = udp_v6_get_port,
|
||||
.put_port = udp_lib_unhash,
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
.psock_update_sk_prot = udp_bpf_update_proto,
|
||||
#endif
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <arpa/inet.h>
|
||||
#include <locale.h>
|
||||
#include <net/ethernet.h>
|
||||
#include <netinet/ether.h>
|
||||
#include <net/if.h>
|
||||
#include <poll.h>
|
||||
#include <pthread.h>
|
||||
@ -30,6 +31,7 @@
|
||||
#include <sys/un.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <sched.h>
|
||||
|
||||
#include <bpf/libbpf.h>
|
||||
#include <bpf/xsk.h>
|
||||
@ -56,12 +58,27 @@
|
||||
|
||||
#define DEBUG_HEXDUMP 0
|
||||
|
||||
#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
|
||||
#define VLAN_PRIO_SHIFT 13
|
||||
#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
|
||||
#define VLAN_VID__DEFAULT 1
|
||||
#define VLAN_PRI__DEFAULT 0
|
||||
|
||||
#define NSEC_PER_SEC 1000000000UL
|
||||
#define NSEC_PER_USEC 1000
|
||||
|
||||
#define SCHED_PRI__DEFAULT 0
|
||||
|
||||
typedef __u64 u64;
|
||||
typedef __u32 u32;
|
||||
typedef __u16 u16;
|
||||
typedef __u8 u8;
|
||||
|
||||
static unsigned long prev_time;
|
||||
static long tx_cycle_diff_min;
|
||||
static long tx_cycle_diff_max;
|
||||
static double tx_cycle_diff_ave;
|
||||
static long tx_cycle_cnt;
|
||||
|
||||
enum benchmark_type {
|
||||
BENCH_RXDROP = 0,
|
||||
@ -81,14 +98,23 @@ static u32 opt_batch_size = 64;
|
||||
static int opt_pkt_count;
|
||||
static u16 opt_pkt_size = MIN_PKT_SIZE;
|
||||
static u32 opt_pkt_fill_pattern = 0x12345678;
|
||||
static bool opt_vlan_tag;
|
||||
static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT;
|
||||
static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT;
|
||||
static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe,
|
||||
0x9e, 0x7f, 0x71 }};
|
||||
static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7,
|
||||
0x98, 0x3a, 0xc0 }};
|
||||
static bool opt_extra_stats;
|
||||
static bool opt_quiet;
|
||||
static bool opt_app_stats;
|
||||
static const char *opt_irq_str = "";
|
||||
static u32 irq_no;
|
||||
static int irqs_at_init = -1;
|
||||
static u32 sequence;
|
||||
static int opt_poll;
|
||||
static int opt_interval = 1;
|
||||
static int opt_retries = 3;
|
||||
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
|
||||
static u32 opt_umem_flags;
|
||||
static int opt_unaligned_chunks;
|
||||
@ -100,6 +126,27 @@ static u32 opt_num_xsks = 1;
|
||||
static u32 prog_id;
|
||||
static bool opt_busy_poll;
|
||||
static bool opt_reduced_cap;
|
||||
static clockid_t opt_clock = CLOCK_MONOTONIC;
|
||||
static unsigned long opt_tx_cycle_ns;
|
||||
static int opt_schpolicy = SCHED_OTHER;
|
||||
static int opt_schprio = SCHED_PRI__DEFAULT;
|
||||
static bool opt_tstamp;
|
||||
|
||||
struct vlan_ethhdr {
|
||||
unsigned char h_dest[6];
|
||||
unsigned char h_source[6];
|
||||
__be16 h_vlan_proto;
|
||||
__be16 h_vlan_TCI;
|
||||
__be16 h_vlan_encapsulated_proto;
|
||||
};
|
||||
|
||||
#define PKTGEN_MAGIC 0xbe9be955
|
||||
struct pktgen_hdr {
|
||||
__be32 pgh_magic;
|
||||
__be32 seq_num;
|
||||
__be32 tv_sec;
|
||||
__be32 tv_usec;
|
||||
};
|
||||
|
||||
struct xsk_ring_stats {
|
||||
unsigned long rx_npkts;
|
||||
@ -156,15 +203,63 @@ struct xsk_socket_info {
|
||||
u32 outstanding_tx;
|
||||
};
|
||||
|
||||
static const struct clockid_map {
|
||||
const char *name;
|
||||
clockid_t clockid;
|
||||
} clockids_map[] = {
|
||||
{ "REALTIME", CLOCK_REALTIME },
|
||||
{ "TAI", CLOCK_TAI },
|
||||
{ "BOOTTIME", CLOCK_BOOTTIME },
|
||||
{ "MONOTONIC", CLOCK_MONOTONIC },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static const struct sched_map {
|
||||
const char *name;
|
||||
int policy;
|
||||
} schmap[] = {
|
||||
{ "OTHER", SCHED_OTHER },
|
||||
{ "FIFO", SCHED_FIFO },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static int num_socks;
|
||||
struct xsk_socket_info *xsks[MAX_SOCKS];
|
||||
int sock;
|
||||
|
||||
static int get_clockid(clockid_t *id, const char *name)
|
||||
{
|
||||
const struct clockid_map *clk;
|
||||
|
||||
for (clk = clockids_map; clk->name; clk++) {
|
||||
if (strcasecmp(clk->name, name) == 0) {
|
||||
*id = clk->clockid;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int get_schpolicy(int *policy, const char *name)
|
||||
{
|
||||
const struct sched_map *sch;
|
||||
|
||||
for (sch = schmap; sch->name; sch++) {
|
||||
if (strcasecmp(sch->name, name) == 0) {
|
||||
*policy = sch->policy;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static unsigned long get_nsecs(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
clock_gettime(opt_clock, &ts);
|
||||
return ts.tv_sec * 1000000000UL + ts.tv_nsec;
|
||||
}
|
||||
|
||||
@ -257,6 +352,15 @@ static void dump_app_stats(long dt)
|
||||
xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
|
||||
xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
|
||||
}
|
||||
|
||||
if (opt_tx_cycle_ns) {
|
||||
printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n",
|
||||
"", "period", "min", "ave", "max", "cycle");
|
||||
printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n",
|
||||
"Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min,
|
||||
(long)(tx_cycle_diff_ave / tx_cycle_cnt),
|
||||
tx_cycle_diff_max, tx_cycle_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
static bool get_interrupt_number(void)
|
||||
@ -740,29 +844,69 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
|
||||
|
||||
#define ETH_FCS_SIZE 4
|
||||
|
||||
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
|
||||
sizeof(struct udphdr))
|
||||
#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \
|
||||
sizeof(struct ethhdr))
|
||||
#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0)
|
||||
#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \
|
||||
sizeof(struct udphdr) + PKTGEN_HDR_SIZE)
|
||||
#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \
|
||||
sizeof(struct udphdr))
|
||||
#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \
|
||||
ETH_FCS_SIZE)
|
||||
|
||||
#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
|
||||
#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
|
||||
#define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE)
|
||||
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
|
||||
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
|
||||
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - \
|
||||
(sizeof(struct udphdr) + PKTGEN_HDR_SIZE))
|
||||
|
||||
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
|
||||
|
||||
static void gen_eth_hdr_data(void)
|
||||
{
|
||||
struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
|
||||
sizeof(struct ethhdr) +
|
||||
sizeof(struct iphdr));
|
||||
struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
|
||||
sizeof(struct ethhdr));
|
||||
struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
|
||||
struct pktgen_hdr *pktgen_hdr;
|
||||
struct udphdr *udp_hdr;
|
||||
struct iphdr *ip_hdr;
|
||||
|
||||
if (opt_vlan_tag) {
|
||||
struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data;
|
||||
u16 vlan_tci = 0;
|
||||
|
||||
udp_hdr = (struct udphdr *)(pkt_data +
|
||||
sizeof(struct vlan_ethhdr) +
|
||||
sizeof(struct iphdr));
|
||||
ip_hdr = (struct iphdr *)(pkt_data +
|
||||
sizeof(struct vlan_ethhdr));
|
||||
pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
|
||||
sizeof(struct vlan_ethhdr) +
|
||||
sizeof(struct iphdr) +
|
||||
sizeof(struct udphdr));
|
||||
/* ethernet & VLAN header */
|
||||
memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
|
||||
memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN);
|
||||
veth_hdr->h_vlan_proto = htons(ETH_P_8021Q);
|
||||
vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK;
|
||||
vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
|
||||
veth_hdr->h_vlan_TCI = htons(vlan_tci);
|
||||
veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP);
|
||||
} else {
|
||||
struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
|
||||
|
||||
udp_hdr = (struct udphdr *)(pkt_data +
|
||||
sizeof(struct ethhdr) +
|
||||
sizeof(struct iphdr));
|
||||
ip_hdr = (struct iphdr *)(pkt_data +
|
||||
sizeof(struct ethhdr));
|
||||
pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
|
||||
sizeof(struct ethhdr) +
|
||||
sizeof(struct iphdr) +
|
||||
sizeof(struct udphdr));
|
||||
/* ethernet header */
|
||||
memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
|
||||
memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN);
|
||||
eth_hdr->h_proto = htons(ETH_P_IP);
|
||||
}
|
||||
|
||||
/* ethernet header */
|
||||
memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
|
||||
memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
|
||||
eth_hdr->h_proto = htons(ETH_P_IP);
|
||||
|
||||
/* IP header */
|
||||
ip_hdr->version = IPVERSION;
|
||||
@ -785,6 +929,9 @@ static void gen_eth_hdr_data(void)
|
||||
udp_hdr->dest = htons(0x1000);
|
||||
udp_hdr->len = htons(UDP_PKT_SIZE);
|
||||
|
||||
if (opt_tstamp)
|
||||
pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC);
|
||||
|
||||
/* UDP data */
|
||||
memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
|
||||
UDP_PKT_DATA_SIZE);
|
||||
@ -908,6 +1055,7 @@ static struct option long_options[] = {
|
||||
{"xdp-skb", no_argument, 0, 'S'},
|
||||
{"xdp-native", no_argument, 0, 'N'},
|
||||
{"interval", required_argument, 0, 'n'},
|
||||
{"retries", required_argument, 0, 'O'},
|
||||
{"zero-copy", no_argument, 0, 'z'},
|
||||
{"copy", no_argument, 0, 'c'},
|
||||
{"frame-size", required_argument, 0, 'f'},
|
||||
@ -916,10 +1064,20 @@ static struct option long_options[] = {
|
||||
{"shared-umem", no_argument, 0, 'M'},
|
||||
{"force", no_argument, 0, 'F'},
|
||||
{"duration", required_argument, 0, 'd'},
|
||||
{"clock", required_argument, 0, 'w'},
|
||||
{"batch-size", required_argument, 0, 'b'},
|
||||
{"tx-pkt-count", required_argument, 0, 'C'},
|
||||
{"tx-pkt-size", required_argument, 0, 's'},
|
||||
{"tx-pkt-pattern", required_argument, 0, 'P'},
|
||||
{"tx-vlan", no_argument, 0, 'V'},
|
||||
{"tx-vlan-id", required_argument, 0, 'J'},
|
||||
{"tx-vlan-pri", required_argument, 0, 'K'},
|
||||
{"tx-dmac", required_argument, 0, 'G'},
|
||||
{"tx-smac", required_argument, 0, 'H'},
|
||||
{"tx-cycle", required_argument, 0, 'T'},
|
||||
{"tstamp", no_argument, 0, 'y'},
|
||||
{"policy", required_argument, 0, 'W'},
|
||||
{"schpri", required_argument, 0, 'U'},
|
||||
{"extra-stats", no_argument, 0, 'x'},
|
||||
{"quiet", no_argument, 0, 'Q'},
|
||||
{"app-stats", no_argument, 0, 'a'},
|
||||
@ -943,6 +1101,7 @@ static void usage(const char *prog)
|
||||
" -S, --xdp-skb=n Use XDP skb-mod\n"
|
||||
" -N, --xdp-native=n Enforce XDP native mode\n"
|
||||
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
|
||||
" -O, --retries=n Specify time-out retries (1s interval) attempt (default 3).\n"
|
||||
" -z, --zero-copy Force zero-copy mode.\n"
|
||||
" -c, --copy Force copy mode.\n"
|
||||
" -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
|
||||
@ -952,6 +1111,7 @@ static void usage(const char *prog)
|
||||
" -F, --force Force loading the XDP prog\n"
|
||||
" -d, --duration=n Duration in secs to run command.\n"
|
||||
" Default: forever.\n"
|
||||
" -w, --clock=CLOCK Clock NAME (default MONOTONIC).\n"
|
||||
" -b, --batch-size=n Batch size for sending or receiving\n"
|
||||
" packets. Default: %d\n"
|
||||
" -C, --tx-pkt-count=n Number of packets to send.\n"
|
||||
@ -960,6 +1120,15 @@ static void usage(const char *prog)
|
||||
" (Default: %d bytes)\n"
|
||||
" Min size: %d, Max size %d.\n"
|
||||
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
|
||||
" -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n"
|
||||
" -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n"
|
||||
" -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
|
||||
" -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
|
||||
" -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
|
||||
" -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n"
|
||||
" -y, --tstamp Add time-stamp to packet (For -t|--txonly).\n"
|
||||
" -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n"
|
||||
" -U, --schpri=n Schedule priority. Default: %d\n"
|
||||
" -x, --extra-stats Display extra statistics.\n"
|
||||
" -Q, --quiet Do not display any stats.\n"
|
||||
" -a, --app-stats Display application (syscall) statistics.\n"
|
||||
@ -969,7 +1138,9 @@ static void usage(const char *prog)
|
||||
"\n";
|
||||
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
|
||||
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
|
||||
XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
|
||||
XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern,
|
||||
VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT,
|
||||
SCHED_PRI__DEFAULT);
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
@ -981,7 +1152,8 @@ static void parse_command_line(int argc, char **argv)
|
||||
opterr = 0;
|
||||
|
||||
for (;;) {
|
||||
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR",
|
||||
c = getopt_long(argc, argv,
|
||||
"Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR",
|
||||
long_options, &option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
@ -1015,6 +1187,17 @@ static void parse_command_line(int argc, char **argv)
|
||||
case 'n':
|
||||
opt_interval = atoi(optarg);
|
||||
break;
|
||||
case 'w':
|
||||
if (get_clockid(&opt_clock, optarg)) {
|
||||
fprintf(stderr,
|
||||
"ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n",
|
||||
optarg);
|
||||
opt_clock = CLOCK_MONOTONIC;
|
||||
}
|
||||
break;
|
||||
case 'O':
|
||||
opt_retries = atoi(optarg);
|
||||
break;
|
||||
case 'z':
|
||||
opt_xdp_bind_flags |= XDP_ZEROCOPY;
|
||||
break;
|
||||
@ -1062,6 +1245,49 @@ static void parse_command_line(int argc, char **argv)
|
||||
case 'P':
|
||||
opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
|
||||
break;
|
||||
case 'V':
|
||||
opt_vlan_tag = true;
|
||||
break;
|
||||
case 'J':
|
||||
opt_pkt_vlan_id = atoi(optarg);
|
||||
break;
|
||||
case 'K':
|
||||
opt_pkt_vlan_pri = atoi(optarg);
|
||||
break;
|
||||
case 'G':
|
||||
if (!ether_aton_r(optarg,
|
||||
(struct ether_addr *)&opt_txdmac)) {
|
||||
fprintf(stderr, "Invalid dmac address:%s\n",
|
||||
optarg);
|
||||
usage(basename(argv[0]));
|
||||
}
|
||||
break;
|
||||
case 'H':
|
||||
if (!ether_aton_r(optarg,
|
||||
(struct ether_addr *)&opt_txsmac)) {
|
||||
fprintf(stderr, "Invalid smac address:%s\n",
|
||||
optarg);
|
||||
usage(basename(argv[0]));
|
||||
}
|
||||
break;
|
||||
case 'T':
|
||||
opt_tx_cycle_ns = atoi(optarg);
|
||||
opt_tx_cycle_ns *= NSEC_PER_USEC;
|
||||
break;
|
||||
case 'y':
|
||||
opt_tstamp = 1;
|
||||
break;
|
||||
case 'W':
|
||||
if (get_schpolicy(&opt_schpolicy, optarg)) {
|
||||
fprintf(stderr,
|
||||
"ERROR: Invalid policy %s. Default to SCHED_OTHER.\n",
|
||||
optarg);
|
||||
opt_schpolicy = SCHED_OTHER;
|
||||
}
|
||||
break;
|
||||
case 'U':
|
||||
opt_schprio = atoi(optarg);
|
||||
break;
|
||||
case 'x':
|
||||
opt_extra_stats = 1;
|
||||
break;
|
||||
@ -1267,16 +1493,22 @@ static void rx_drop_all(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
|
||||
static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb,
|
||||
int batch_size, unsigned long tx_ns)
|
||||
{
|
||||
u32 idx;
|
||||
u32 idx, tv_sec, tv_usec;
|
||||
unsigned int i;
|
||||
|
||||
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
|
||||
batch_size) {
|
||||
complete_tx_only(xsk, batch_size);
|
||||
if (benchmark_done)
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (opt_tstamp) {
|
||||
tv_sec = (u32)(tx_ns / NSEC_PER_SEC);
|
||||
tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000);
|
||||
}
|
||||
|
||||
for (i = 0; i < batch_size; i++) {
|
||||
@ -1284,6 +1516,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
|
||||
idx + i);
|
||||
tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
|
||||
tx_desc->len = PKT_SIZE;
|
||||
|
||||
if (opt_tstamp) {
|
||||
struct pktgen_hdr *pktgen_hdr;
|
||||
u64 addr = tx_desc->addr;
|
||||
char *pkt;
|
||||
|
||||
pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
|
||||
pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET);
|
||||
|
||||
pktgen_hdr->seq_num = htonl(sequence++);
|
||||
pktgen_hdr->tv_sec = htonl(tv_sec);
|
||||
pktgen_hdr->tv_usec = htonl(tv_usec);
|
||||
|
||||
hex_dump(pkt, PKT_SIZE, addr);
|
||||
}
|
||||
}
|
||||
|
||||
xsk_ring_prod__submit(&xsk->tx, batch_size);
|
||||
@ -1292,6 +1539,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
|
||||
*frame_nb += batch_size;
|
||||
*frame_nb %= NUM_FRAMES;
|
||||
complete_tx_only(xsk, batch_size);
|
||||
|
||||
return batch_size;
|
||||
}
|
||||
|
||||
static inline int get_batch_size(int pkt_cnt)
|
||||
@ -1318,23 +1567,48 @@ static void complete_tx_only_all(void)
|
||||
pending = !!xsks[i]->outstanding_tx;
|
||||
}
|
||||
}
|
||||
} while (pending);
|
||||
sleep(1);
|
||||
} while (pending && opt_retries-- > 0);
|
||||
}
|
||||
|
||||
static void tx_only_all(void)
|
||||
{
|
||||
struct pollfd fds[MAX_SOCKS] = {};
|
||||
u32 frame_nb[MAX_SOCKS] = {};
|
||||
unsigned long next_tx_ns = 0;
|
||||
int pkt_cnt = 0;
|
||||
int i, ret;
|
||||
|
||||
if (opt_poll && opt_tx_cycle_ns) {
|
||||
fprintf(stderr,
|
||||
"Error: --poll and --tx-cycles are both set\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_socks; i++) {
|
||||
fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
|
||||
fds[0].events = POLLOUT;
|
||||
}
|
||||
|
||||
if (opt_tx_cycle_ns) {
|
||||
/* Align Tx time to micro-second boundary */
|
||||
next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) *
|
||||
NSEC_PER_USEC;
|
||||
next_tx_ns += opt_tx_cycle_ns;
|
||||
|
||||
/* Initialize periodic Tx scheduling variance */
|
||||
tx_cycle_diff_min = 1000000000;
|
||||
tx_cycle_diff_max = 0;
|
||||
tx_cycle_diff_ave = 0.0;
|
||||
}
|
||||
|
||||
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
|
||||
int batch_size = get_batch_size(pkt_cnt);
|
||||
unsigned long tx_ns = 0;
|
||||
struct timespec next;
|
||||
int tx_cnt = 0;
|
||||
long diff;
|
||||
int err;
|
||||
|
||||
if (opt_poll) {
|
||||
for (i = 0; i < num_socks; i++)
|
||||
@ -1347,13 +1621,43 @@ static void tx_only_all(void)
|
||||
continue;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_socks; i++)
|
||||
tx_only(xsks[i], &frame_nb[i], batch_size);
|
||||
if (opt_tx_cycle_ns) {
|
||||
next.tv_sec = next_tx_ns / NSEC_PER_SEC;
|
||||
next.tv_nsec = next_tx_ns % NSEC_PER_SEC;
|
||||
err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL);
|
||||
if (err) {
|
||||
if (err != EINTR)
|
||||
fprintf(stderr,
|
||||
"clock_nanosleep failed. Err:%d errno:%d\n",
|
||||
err, errno);
|
||||
break;
|
||||
}
|
||||
|
||||
pkt_cnt += batch_size;
|
||||
/* Measure periodic Tx scheduling variance */
|
||||
tx_ns = get_nsecs();
|
||||
diff = tx_ns - next_tx_ns;
|
||||
if (diff < tx_cycle_diff_min)
|
||||
tx_cycle_diff_min = diff;
|
||||
|
||||
if (diff > tx_cycle_diff_max)
|
||||
tx_cycle_diff_max = diff;
|
||||
|
||||
tx_cycle_diff_ave += (double)diff;
|
||||
tx_cycle_cnt++;
|
||||
} else if (opt_tstamp) {
|
||||
tx_ns = get_nsecs();
|
||||
}
|
||||
|
||||
for (i = 0; i < num_socks; i++)
|
||||
tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns);
|
||||
|
||||
pkt_cnt += tx_cnt;
|
||||
|
||||
if (benchmark_done)
|
||||
break;
|
||||
|
||||
if (opt_tx_cycle_ns)
|
||||
next_tx_ns += opt_tx_cycle_ns;
|
||||
}
|
||||
|
||||
if (opt_pkt_count)
|
||||
@ -1584,6 +1888,7 @@ int main(int argc, char **argv)
|
||||
struct __user_cap_data_struct data[2] = { { 0 } };
|
||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||
bool rx = false, tx = false;
|
||||
struct sched_param schparam;
|
||||
struct xsk_umem_info *umem;
|
||||
struct bpf_object *obj;
|
||||
int xsks_map_fd = 0;
|
||||
@ -1646,6 +1951,9 @@ int main(int argc, char **argv)
|
||||
apply_setsockopt(xsks[i]);
|
||||
|
||||
if (opt_bench == BENCH_TXONLY) {
|
||||
if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN)
|
||||
opt_pkt_size = PKTGEN_SIZE_MIN;
|
||||
|
||||
gen_eth_hdr_data();
|
||||
|
||||
for (i = 0; i < NUM_FRAMES; i++)
|
||||
@ -1685,6 +1993,16 @@ int main(int argc, char **argv)
|
||||
prev_time = get_nsecs();
|
||||
start_time = prev_time;
|
||||
|
||||
/* Configure sched priority for better wake-up accuracy */
|
||||
memset(&schparam, 0, sizeof(schparam));
|
||||
schparam.sched_priority = opt_schprio;
|
||||
ret = sched_setscheduler(0, opt_schpolicy, &schparam);
|
||||
if (ret) {
|
||||
fprintf(stderr, "Error(%d) in setting priority(%d): %s\n",
|
||||
errno, opt_schprio, strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (opt_bench == BENCH_RXDROP)
|
||||
rx_drop_all();
|
||||
else if (opt_bench == BENCH_TXONLY)
|
||||
@ -1692,6 +2010,7 @@ int main(int argc, char **argv)
|
||||
else
|
||||
l2fwd_all();
|
||||
|
||||
out:
|
||||
benchmark_done = true;
|
||||
|
||||
if (!opt_quiet)
|
||||
|
@ -642,6 +642,30 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
probe_misc_feature(struct bpf_insn *insns, size_t len,
|
||||
const char *define_prefix, __u32 ifindex,
|
||||
const char *feat_name, const char *plain_name,
|
||||
const char *define_name)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_prog_load_opts, opts,
|
||||
.prog_ifindex = ifindex,
|
||||
);
|
||||
bool res;
|
||||
int fd;
|
||||
|
||||
errno = 0;
|
||||
fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
|
||||
insns, len, &opts);
|
||||
res = fd >= 0 || !errno;
|
||||
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
print_bool_feature(feat_name, plain_name, define_name, res,
|
||||
define_prefix);
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe for availability of kernel commit (5.3):
|
||||
*
|
||||
@ -649,29 +673,81 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
||||
*/
|
||||
static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_prog_load_opts, opts,
|
||||
.prog_ifindex = ifindex,
|
||||
);
|
||||
struct bpf_insn insns[BPF_MAXINSNS + 1];
|
||||
bool res;
|
||||
int i, fd;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BPF_MAXINSNS; i++)
|
||||
insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
|
||||
insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
|
||||
|
||||
errno = 0;
|
||||
fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL",
|
||||
insns, ARRAY_SIZE(insns), &opts);
|
||||
res = fd >= 0 || (errno != E2BIG && errno != EINVAL);
|
||||
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
print_bool_feature("have_large_insn_limit",
|
||||
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||
define_prefix, ifindex,
|
||||
"have_large_insn_limit",
|
||||
"Large program size limit",
|
||||
"LARGE_INSN_LIMIT",
|
||||
res, define_prefix);
|
||||
"LARGE_INSN_LIMIT");
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe for bounded loop support introduced in commit 2589726d12a1
|
||||
* ("bpf: introduce bounded loops").
|
||||
*/
|
||||
static void
|
||||
probe_bounded_loops(const char *define_prefix, __u32 ifindex)
|
||||
{
|
||||
struct bpf_insn insns[4] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 10),
|
||||
BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2),
|
||||
BPF_EXIT_INSN()
|
||||
};
|
||||
|
||||
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||
define_prefix, ifindex,
|
||||
"have_bounded_loops",
|
||||
"Bounded loop support",
|
||||
"BOUNDED_LOOPS");
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe for the v2 instruction set extension introduced in commit 92b31a9af73b
|
||||
* ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions").
|
||||
*/
|
||||
static void
|
||||
probe_v2_isa_extension(const char *define_prefix, __u32 ifindex)
|
||||
{
|
||||
struct bpf_insn insns[4] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN()
|
||||
};
|
||||
|
||||
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||
define_prefix, ifindex,
|
||||
"have_v2_isa_extension",
|
||||
"ISA extension v2",
|
||||
"V2_ISA_EXTENSION");
|
||||
}
|
||||
|
||||
/*
|
||||
* Probe for the v3 instruction set extension introduced in commit 092ed0968bb6
|
||||
* ("bpf: verifier support JMP32").
|
||||
*/
|
||||
static void
|
||||
probe_v3_isa_extension(const char *define_prefix, __u32 ifindex)
|
||||
{
|
||||
struct bpf_insn insns[4] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN()
|
||||
};
|
||||
|
||||
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||
define_prefix, ifindex,
|
||||
"have_v3_isa_extension",
|
||||
"ISA extension v3",
|
||||
"V3_ISA_EXTENSION");
|
||||
}
|
||||
|
||||
static void
|
||||
@ -788,6 +864,9 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
|
||||
"/*** eBPF misc features ***/",
|
||||
define_prefix);
|
||||
probe_large_insn_limit(define_prefix, ifindex);
|
||||
probe_bounded_loops(define_prefix, ifindex);
|
||||
probe_v2_isa_extension(define_prefix, ifindex);
|
||||
probe_v3_isa_extension(define_prefix, ifindex);
|
||||
print_end_section();
|
||||
}
|
||||
|
||||
|
@ -1655,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
|
||||
j = 0;
|
||||
idx = 0;
|
||||
bpf_object__for_each_map(map, obj) {
|
||||
if (!bpf_map__is_offload_neutral(map))
|
||||
if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
||||
bpf_map__set_ifindex(map, ifindex);
|
||||
|
||||
if (j < old_map_fds && idx == map_replace[j].idx) {
|
||||
|
@ -691,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
|
||||
return libbpf_err_errno(ret);
|
||||
}
|
||||
|
||||
int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
|
||||
int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
|
||||
const struct bpf_map_batch_opts *opts)
|
||||
{
|
||||
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
|
||||
NULL, keys, NULL, count, opts);
|
||||
NULL, (void *)keys, NULL, count, opts);
|
||||
}
|
||||
|
||||
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
|
||||
@ -715,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
|
||||
count, opts);
|
||||
}
|
||||
|
||||
int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
|
||||
int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
|
||||
const struct bpf_map_batch_opts *opts)
|
||||
{
|
||||
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
|
||||
keys, values, count, opts);
|
||||
(void *)keys, (void *)values, count, opts);
|
||||
}
|
||||
|
||||
int bpf_obj_pin(int fd, const char *pathname)
|
||||
|
@ -254,17 +254,128 @@ struct bpf_map_batch_opts {
|
||||
};
|
||||
#define bpf_map_batch_opts__last_field flags
|
||||
|
||||
LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
|
||||
|
||||
/**
|
||||
* @brief **bpf_map_delete_batch()** allows for batch deletion of multiple
|
||||
* elements in a BPF map.
|
||||
*
|
||||
* @param fd BPF map file descriptor
|
||||
* @param keys pointer to an array of *count* keys
|
||||
* @param count input and output parameter; on input **count** represents the
|
||||
* number of elements in the map to delete in batch;
|
||||
* on output if a non-EFAULT error is returned, **count** represents the number of deleted
|
||||
* elements if the output **count** value is not equal to the input **count** value
|
||||
* If EFAULT is returned, **count** should not be trusted to be correct.
|
||||
* @param opts options for configuring the way the batch deletion works
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
|
||||
__u32 *count,
|
||||
const struct bpf_map_batch_opts *opts);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
|
||||
*
|
||||
* The parameter *in_batch* is the address of the first element in the batch to read.
|
||||
* *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
|
||||
* calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
|
||||
* that the batched lookup starts from the beginning of the map.
|
||||
*
|
||||
* The *keys* and *values* are output parameters which must point to memory large enough to
|
||||
* hold *count* items based on the key and value size of the map *map_fd*. The *keys*
|
||||
* buffer must be of *key_size* * *count*. The *values* buffer must be of
|
||||
* *value_size* * *count*.
|
||||
*
|
||||
* @param fd BPF map file descriptor
|
||||
* @param in_batch address of the first element in batch to read, can pass NULL to
|
||||
* indicate that the batched lookup starts from the beginning of the map.
|
||||
* @param out_batch output parameter that should be passed to next call as *in_batch*
|
||||
* @param keys pointer to an array large enough for *count* keys
|
||||
* @param values pointer to an array large enough for *count* values
|
||||
* @param count input and output parameter; on input it's the number of elements
|
||||
* in the map to read in batch; on output it's the number of elements that were
|
||||
* successfully read.
|
||||
* If a non-EFAULT error is returned, count will be set as the number of elements
|
||||
* that were read before the error occurred.
|
||||
* If EFAULT is returned, **count** should not be trusted to be correct.
|
||||
* @param opts options for configuring the way the batch lookup works
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
|
||||
void *keys, void *values, __u32 *count,
|
||||
const struct bpf_map_batch_opts *opts);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion
|
||||
* of BPF map elements where each element is deleted after being retrieved.
|
||||
*
|
||||
* @param fd BPF map file descriptor
|
||||
* @param in_batch address of the first element in batch to read, can pass NULL to
|
||||
* get address of the first element in *out_batch*
|
||||
* @param out_batch output parameter that should be passed to next call as *in_batch*
|
||||
* @param keys pointer to an array of *count* keys
|
||||
* @param values pointer to an array large enough for *count* values
|
||||
* @param count input and output parameter; on input it's the number of elements
|
||||
* in the map to read and delete in batch; on output it represents the number of
|
||||
* elements that were successfully read and deleted
|
||||
* If a non-**EFAULT** error code is returned and if the output **count** value
|
||||
* is not equal to the input **count** value, up to **count** elements may
|
||||
* have been deleted.
|
||||
* if **EFAULT** is returned up to *count* elements may have been deleted without
|
||||
* being returned via the *keys* and *values* output parameters.
|
||||
* @param opts options for configuring the way the batch lookup and delete works
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
|
||||
void *out_batch, void *keys,
|
||||
void *values, __u32 *count,
|
||||
const struct bpf_map_batch_opts *opts);
|
||||
LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
|
||||
|
||||
/**
|
||||
* @brief **bpf_map_update_batch()** updates multiple elements in a map
|
||||
* by specifying keys and their corresponding values.
|
||||
*
|
||||
* The *keys* and *values* parameters must point to memory large enough
|
||||
* to hold *count* items based on the key and value size of the map.
|
||||
*
|
||||
* The *opts* parameter can be used to control how *bpf_map_update_batch()*
|
||||
* should handle keys that either do or do not already exist in the map.
|
||||
* In particular the *flags* parameter of *bpf_map_batch_opts* can be
|
||||
* one of the following:
|
||||
*
|
||||
* Note that *count* is an input and output parameter, where on output it
|
||||
* represents how many elements were successfully updated. Also note that if
|
||||
* **EFAULT** then *count* should not be trusted to be correct.
|
||||
*
|
||||
* **BPF_ANY**
|
||||
* Create new elements or update existing.
|
||||
*
|
||||
* **BPF_NOEXIST**
|
||||
* Create new elements only if they do not exist.
|
||||
*
|
||||
* **BPF_EXIST**
|
||||
* Update existing elements.
|
||||
*
|
||||
* **BPF_F_LOCK**
|
||||
* Update spin_lock-ed map elements. This must be
|
||||
* specified if the map value contains a spinlock.
|
||||
*
|
||||
* @param fd BPF map file descriptor
|
||||
* @param keys pointer to an array of *count* keys
|
||||
* @param values pointer to an array of *count* values
|
||||
* @param count input and output parameter; on input it's the number of elements
|
||||
* in the map to update in batch; on output if a non-EFAULT error is returned,
|
||||
* **count** represents the number of updated elements if the output **count**
|
||||
* value is not equal to the input **count** value.
|
||||
* If EFAULT is returned, **count** should not be trusted to be correct.
|
||||
* @param opts options for configuring the way the batch update works
|
||||
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||
* the error code)
|
||||
*/
|
||||
LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,
|
||||
__u32 *count,
|
||||
const struct bpf_map_batch_opts *opts);
|
||||
|
||||
|
@ -9916,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...)
|
||||
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
|
||||
const char *kfunc_name, size_t offset)
|
||||
{
|
||||
snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset);
|
||||
static int index = 0;
|
||||
|
||||
snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
|
||||
__sync_fetch_and_add(&index, 1));
|
||||
}
|
||||
|
||||
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
|
||||
@ -10017,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
|
||||
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
|
||||
func_name, offset);
|
||||
|
||||
legacy_probe = strdup(func_name);
|
||||
legacy_probe = strdup(probe_name);
|
||||
if (!legacy_probe)
|
||||
return libbpf_err_ptr(-ENOMEM);
|
||||
|
||||
@ -10676,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
|
||||
return link;
|
||||
}
|
||||
|
||||
enum bpf_perf_event_ret
|
||||
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||
void **copy_mem, size_t *copy_size,
|
||||
bpf_perf_event_print_t fn, void *private_data)
|
||||
static enum bpf_perf_event_ret
|
||||
perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||
void **copy_mem, size_t *copy_size,
|
||||
bpf_perf_event_print_t fn, void *private_data)
|
||||
{
|
||||
struct perf_event_mmap_page *header = mmap_mem;
|
||||
__u64 data_head = ring_buffer_read_head(header);
|
||||
@ -10724,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||
return libbpf_err(ret);
|
||||
}
|
||||
|
||||
__attribute__((alias("perf_event_read_simple")))
|
||||
enum bpf_perf_event_ret
|
||||
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||
void **copy_mem, size_t *copy_size,
|
||||
bpf_perf_event_print_t fn, void *private_data);
|
||||
|
||||
struct perf_buffer;
|
||||
|
||||
struct perf_buffer_params {
|
||||
@ -11132,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
|
||||
{
|
||||
enum bpf_perf_event_ret ret;
|
||||
|
||||
ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
|
||||
pb->page_size, &cpu_buf->buf,
|
||||
&cpu_buf->buf_size,
|
||||
perf_buffer__process_record, cpu_buf);
|
||||
ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
|
||||
pb->page_size, &cpu_buf->buf,
|
||||
&cpu_buf->buf_size,
|
||||
perf_buffer__process_record, cpu_buf);
|
||||
if (ret != LIBBPF_PERF_EVENT_CONT)
|
||||
return ret;
|
||||
return 0;
|
||||
|
@ -677,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
|
||||
* Get bpf_map through the offset of corresponding struct bpf_map_def
|
||||
* in the BPF object file.
|
||||
*/
|
||||
LIBBPF_API struct bpf_map *
|
||||
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
|
||||
struct bpf_map *
|
||||
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
|
||||
|
||||
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
|
||||
@ -744,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
|
||||
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
|
||||
const void *data, size_t size);
|
||||
LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
|
||||
LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
|
||||
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
|
||||
|
||||
/**
|
||||
@ -1026,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i
|
||||
typedef enum bpf_perf_event_ret
|
||||
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
|
||||
void *private_data);
|
||||
LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead")
|
||||
LIBBPF_API enum bpf_perf_event_ret
|
||||
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||
void **copy_mem, size_t *copy_size,
|
||||
|
@ -9,6 +9,7 @@
|
||||
#define MAX_FILES 7
|
||||
|
||||
#include "test_d_path.skel.h"
|
||||
#include "test_d_path_check_rdonly_mem.skel.h"
|
||||
|
||||
static int duration;
|
||||
|
||||
@ -99,7 +100,7 @@ out_close:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void test_d_path(void)
|
||||
static void test_d_path_basic(void)
|
||||
{
|
||||
struct test_d_path__bss *bss;
|
||||
struct test_d_path *skel;
|
||||
@ -155,3 +156,22 @@ void test_d_path(void)
|
||||
cleanup:
|
||||
test_d_path__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_d_path_check_rdonly_mem(void)
|
||||
{
|
||||
struct test_d_path_check_rdonly_mem *skel;
|
||||
|
||||
skel = test_d_path_check_rdonly_mem__open_and_load();
|
||||
ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem");
|
||||
|
||||
test_d_path_check_rdonly_mem__destroy(skel);
|
||||
}
|
||||
|
||||
void test_d_path(void)
|
||||
{
|
||||
if (test__start_subtest("basic"))
|
||||
test_d_path_basic();
|
||||
|
||||
if (test__start_subtest("check_rdonly_mem"))
|
||||
test_d_path_check_rdonly_mem();
|
||||
}
|
||||
|
@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd)
|
||||
if (!ASSERT_OK(err, "unshare"))
|
||||
return err;
|
||||
|
||||
/* Make our /sys mount private, so the following umount won't
|
||||
* trigger the global umount in case it's shared.
|
||||
*/
|
||||
err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
|
||||
if (!ASSERT_OK(err, "remount private /sys"))
|
||||
return err;
|
||||
|
||||
err = umount2("/sys", MNT_DETACH);
|
||||
if (!ASSERT_OK(err, "umount2 /sys"))
|
||||
return err;
|
||||
|
@ -12,9 +12,9 @@
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
SEC("raw_tracepoint/consume_skb")
|
||||
int while_true(volatile struct pt_regs* ctx)
|
||||
int while_true(struct pt_regs *ctx)
|
||||
{
|
||||
__u64 i = 0, sum = 0;
|
||||
volatile __u64 i = 0, sum = 0;
|
||||
do {
|
||||
i++;
|
||||
sum += PT_REGS_RC(ctx);
|
||||
|
@ -0,0 +1,28 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2022 Google */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
extern const int bpf_prog_active __ksym;
|
||||
|
||||
SEC("fentry/security_inode_getattr")
|
||||
int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
|
||||
__u32 request_mask, unsigned int query_flags)
|
||||
{
|
||||
void *active;
|
||||
__u32 cpu;
|
||||
|
||||
cpu = bpf_get_smp_processor_id();
|
||||
active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
|
||||
if (active) {
|
||||
/* FAIL here! 'active' points to readonly memory. bpf helpers
|
||||
* that update its arguments can not write into it.
|
||||
*/
|
||||
bpf_d_path(path, active, sizeof(int));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -35,18 +35,21 @@ struct sock_test {
|
||||
/* Endpoint to bind() to */
|
||||
const char *ip;
|
||||
unsigned short port;
|
||||
unsigned short port_retry;
|
||||
/* Expected test result */
|
||||
enum {
|
||||
LOAD_REJECT,
|
||||
ATTACH_REJECT,
|
||||
BIND_REJECT,
|
||||
SUCCESS,
|
||||
RETRY_SUCCESS,
|
||||
RETRY_REJECT
|
||||
} result;
|
||||
};
|
||||
|
||||
static struct sock_test tests[] = {
|
||||
{
|
||||
"bind4 load with invalid access: src_ip6",
|
||||
.descr = "bind4 load with invalid access: src_ip6",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
@ -54,16 +57,12 @@ static struct sock_test tests[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
LOAD_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.result = LOAD_REJECT,
|
||||
},
|
||||
{
|
||||
"bind4 load with invalid access: mark",
|
||||
.descr = "bind4 load with invalid access: mark",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
@ -71,16 +70,12 @@ static struct sock_test tests[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
LOAD_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.result = LOAD_REJECT,
|
||||
},
|
||||
{
|
||||
"bind6 load with invalid access: src_ip4",
|
||||
.descr = "bind6 load with invalid access: src_ip4",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
@ -88,16 +83,12 @@ static struct sock_test tests[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
LOAD_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.result = LOAD_REJECT,
|
||||
},
|
||||
{
|
||||
"sock_create load with invalid access: src_port",
|
||||
.descr = "sock_create load with invalid access: src_port",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
@ -105,128 +96,106 @@ static struct sock_test tests[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
LOAD_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||
.result = LOAD_REJECT,
|
||||
},
|
||||
{
|
||||
"sock_create load w/o expected_attach_type (compat mode)",
|
||||
.descr = "sock_create load w/o expected_attach_type (compat mode)",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
0,
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
AF_INET,
|
||||
SOCK_STREAM,
|
||||
"127.0.0.1",
|
||||
8097,
|
||||
SUCCESS,
|
||||
.expected_attach_type = 0,
|
||||
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "127.0.0.1",
|
||||
.port = 8097,
|
||||
.result = SUCCESS,
|
||||
},
|
||||
{
|
||||
"sock_create load w/ expected_attach_type",
|
||||
.descr = "sock_create load w/ expected_attach_type",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
AF_INET,
|
||||
SOCK_STREAM,
|
||||
"127.0.0.1",
|
||||
8097,
|
||||
SUCCESS,
|
||||
.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "127.0.0.1",
|
||||
.port = 8097,
|
||||
.result = SUCCESS,
|
||||
},
|
||||
{
|
||||
"attach type mismatch bind4 vs bind6",
|
||||
.descr = "attach type mismatch bind4 vs bind6",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ATTACH_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.result = ATTACH_REJECT,
|
||||
},
|
||||
{
|
||||
"attach type mismatch bind6 vs bind4",
|
||||
.descr = "attach type mismatch bind6 vs bind4",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ATTACH_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.result = ATTACH_REJECT,
|
||||
},
|
||||
{
|
||||
"attach type mismatch default vs bind4",
|
||||
.descr = "attach type mismatch default vs bind4",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
0,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ATTACH_REJECT,
|
||||
.expected_attach_type = 0,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.result = ATTACH_REJECT,
|
||||
},
|
||||
{
|
||||
"attach type mismatch bind6 vs sock_create",
|
||||
.descr = "attach type mismatch bind6 vs sock_create",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
0,
|
||||
ATTACH_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||
.result = ATTACH_REJECT,
|
||||
},
|
||||
{
|
||||
"bind4 reject all",
|
||||
.descr = "bind4 reject all",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
AF_INET,
|
||||
SOCK_STREAM,
|
||||
"0.0.0.0",
|
||||
0,
|
||||
BIND_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "0.0.0.0",
|
||||
.result = BIND_REJECT,
|
||||
},
|
||||
{
|
||||
"bind6 reject all",
|
||||
.descr = "bind6 reject all",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
AF_INET6,
|
||||
SOCK_STREAM,
|
||||
"::",
|
||||
0,
|
||||
BIND_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.domain = AF_INET6,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "::",
|
||||
.result = BIND_REJECT,
|
||||
},
|
||||
{
|
||||
"bind6 deny specific IP & port",
|
||||
.descr = "bind6 deny specific IP & port",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
|
||||
@ -247,16 +216,16 @@ static struct sock_test tests[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
AF_INET6,
|
||||
SOCK_STREAM,
|
||||
"::1",
|
||||
8193,
|
||||
BIND_REJECT,
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.domain = AF_INET6,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "::1",
|
||||
.port = 8193,
|
||||
.result = BIND_REJECT,
|
||||
},
|
||||
{
|
||||
"bind4 allow specific IP & port",
|
||||
.descr = "bind4 allow specific IP & port",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
|
||||
@ -277,41 +246,132 @@ static struct sock_test tests[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
AF_INET,
|
||||
SOCK_STREAM,
|
||||
"127.0.0.1",
|
||||
4098,
|
||||
SUCCESS,
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "127.0.0.1",
|
||||
.port = 4098,
|
||||
.result = SUCCESS,
|
||||
},
|
||||
{
|
||||
"bind4 allow all",
|
||||
.descr = "bind4 deny specific IP & port of TCP, and retry",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
|
||||
/* if (ip == expected && port == expected) */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
offsetof(struct bpf_sock, src_ip4)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
|
||||
__bpf_constant_ntohl(0x7F000001), 4),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
offsetof(struct bpf_sock, src_port)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
|
||||
|
||||
/* return DENY; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_JMP_A(1),
|
||||
|
||||
/* else return ALLOW; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "127.0.0.1",
|
||||
.port = 4098,
|
||||
.port_retry = 5000,
|
||||
.result = RETRY_SUCCESS,
|
||||
},
|
||||
{
|
||||
.descr = "bind4 deny specific IP & port of UDP, and retry",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
|
||||
/* if (ip == expected && port == expected) */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
offsetof(struct bpf_sock, src_ip4)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
|
||||
__bpf_constant_ntohl(0x7F000001), 4),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
offsetof(struct bpf_sock, src_port)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
|
||||
|
||||
/* return DENY; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_JMP_A(1),
|
||||
|
||||
/* else return ALLOW; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_DGRAM,
|
||||
.ip = "127.0.0.1",
|
||||
.port = 4098,
|
||||
.port_retry = 5000,
|
||||
.result = RETRY_SUCCESS,
|
||||
},
|
||||
{
|
||||
.descr = "bind6 deny specific IP & port, and retry",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
|
||||
/* if (ip == expected && port == expected) */
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
offsetof(struct bpf_sock, src_ip6[3])),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
|
||||
__bpf_constant_ntohl(0x00000001), 4),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||
offsetof(struct bpf_sock, src_port)),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
|
||||
|
||||
/* return DENY; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_JMP_A(1),
|
||||
|
||||
/* else return ALLOW; */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.domain = AF_INET6,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "::1",
|
||||
.port = 8193,
|
||||
.port_retry = 9000,
|
||||
.result = RETRY_SUCCESS,
|
||||
},
|
||||
{
|
||||
.descr = "bind4 allow all",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
BPF_CGROUP_INET4_POST_BIND,
|
||||
AF_INET,
|
||||
SOCK_STREAM,
|
||||
"0.0.0.0",
|
||||
0,
|
||||
SUCCESS,
|
||||
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||
.domain = AF_INET,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "0.0.0.0",
|
||||
.result = SUCCESS,
|
||||
},
|
||||
{
|
||||
"bind6 allow all",
|
||||
.descr = "bind6 allow all",
|
||||
.insns = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
BPF_CGROUP_INET6_POST_BIND,
|
||||
AF_INET6,
|
||||
SOCK_STREAM,
|
||||
"::",
|
||||
0,
|
||||
SUCCESS,
|
||||
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||
.domain = AF_INET6,
|
||||
.type = SOCK_STREAM,
|
||||
.ip = "::",
|
||||
.result = SUCCESS,
|
||||
},
|
||||
};
|
||||
|
||||
@ -351,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd,
|
||||
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
|
||||
}
|
||||
|
||||
static int bind_sock(int domain, int type, const char *ip, unsigned short port)
|
||||
static int bind_sock(int domain, int type, const char *ip,
|
||||
unsigned short port, unsigned short port_retry)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
struct sockaddr_in6 *addr6;
|
||||
struct sockaddr_in *addr4;
|
||||
int sockfd = -1;
|
||||
socklen_t len;
|
||||
int err = 0;
|
||||
int res = SUCCESS;
|
||||
|
||||
sockfd = socket(domain, type, 0);
|
||||
if (sockfd < 0)
|
||||
@ -384,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
|
||||
goto err;
|
||||
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
|
||||
/* sys_bind() may fail for different reasons, errno has to be
|
||||
* checked to confirm that BPF program rejected it.
|
||||
*/
|
||||
if (errno != EPERM)
|
||||
goto err;
|
||||
if (port_retry)
|
||||
goto retry;
|
||||
res = BIND_REJECT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
goto out;
|
||||
retry:
|
||||
if (domain == AF_INET)
|
||||
addr4->sin_port = htons(port_retry);
|
||||
else
|
||||
addr6->sin6_port = htons(port_retry);
|
||||
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
|
||||
if (errno != EPERM)
|
||||
goto err;
|
||||
res = RETRY_REJECT;
|
||||
} else {
|
||||
res = RETRY_SUCCESS;
|
||||
}
|
||||
goto out;
|
||||
err:
|
||||
err = -1;
|
||||
res = -1;
|
||||
out:
|
||||
close(sockfd);
|
||||
return err;
|
||||
return res;
|
||||
}
|
||||
|
||||
static int run_test_case(int cgfd, const struct sock_test *test)
|
||||
{
|
||||
int progfd = -1;
|
||||
int err = 0;
|
||||
int res;
|
||||
|
||||
printf("Test case: %s .. ", test->descr);
|
||||
progfd = load_sock_prog(test->insns, test->expected_attach_type);
|
||||
@ -416,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
|
||||
/* sys_bind() may fail for different reasons, errno has to be
|
||||
* checked to confirm that BPF program rejected it.
|
||||
*/
|
||||
if (test->result == BIND_REJECT && errno == EPERM)
|
||||
goto out;
|
||||
else
|
||||
goto err;
|
||||
}
|
||||
res = bind_sock(test->domain, test->type, test->ip, test->port,
|
||||
test->port_retry);
|
||||
if (res > 0 && test->result == res)
|
||||
goto out;
|
||||
|
||||
|
||||
if (test->result != SUCCESS)
|
||||
goto err;
|
||||
|
||||
goto out;
|
||||
err:
|
||||
err = -1;
|
||||
out:
|
||||
|
@ -58,6 +58,34 @@
|
||||
.result = ACCEPT,
|
||||
.result_unpriv = ACCEPT,
|
||||
},
|
||||
{
|
||||
"check with invalid reg offset 0",
|
||||
.insns = {
|
||||
/* reserve 8 byte ringbuf memory */
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_MOV64_IMM(BPF_REG_2, 8),
|
||||
BPF_MOV64_IMM(BPF_REG_3, 0),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
|
||||
/* store a pointer to the reserved memory in R6 */
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
|
||||
/* add invalid offset to memory or NULL */
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
|
||||
/* check whether the reservation was successful */
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
|
||||
/* should not be able to access *(R7) = 0 */
|
||||
BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0),
|
||||
/* submit the reserved ringbuf memory */
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
|
||||
BPF_MOV64_IMM(BPF_REG_2, 0),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_ringbuf = { 1 },
|
||||
.result = REJECT,
|
||||
.errstr = "R0 pointer arithmetic on mem_or_null prohibited",
|
||||
},
|
||||
{
|
||||
"check corrupted spill/fill",
|
||||
.insns = {
|
||||
|
Loading…
Reference in New Issue
Block a user