Alexei Starovoitov says: ==================== pull-request: bpf-next 2022-01-06 We've added 41 non-merge commits during the last 2 day(s) which contain a total of 36 files changed, 1214 insertions(+), 368 deletions(-). The main changes are: 1) Various fixes in the verifier, from Kris and Daniel. 2) Fixes in sockmap, from John. 3) bpf_getsockopt fix, from Kuniyuki. 4) INET_POST_BIND fix, from Menglong. 5) arm64 JIT fix for bpf pseudo funcs, from Hou. 6) BPF ISA doc improvements, from Christoph. * https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (41 commits) bpf: selftests: Add bind retry for post_bind{4, 6} bpf: selftests: Use C99 initializers in test_sock.c net: bpf: Handle return value of BPF_CGROUP_RUN_PROG_INET{4,6}_POST_BIND() bpf/selftests: Test bpf_d_path on rdonly_mem. libbpf: Add documentation for bpf_map batch operations selftests/bpf: Don't rely on preserving volatile in PT_REGS macros in loop3 xdp: Add xdp_do_redirect_frame() for pre-computed xdp_frames xdp: Move conversion to xdp_frame out of map functions page_pool: Store the XDP mem id page_pool: Add callback to init pages when they are allocated xdp: Allow registering memory model without rxq reference samples/bpf: xdpsock: Add timestamp for Tx-only operation samples/bpf: xdpsock: Add time-out for cleaning Tx samples/bpf: xdpsock: Add sched policy and priority support samples/bpf: xdpsock: Add cyclic TX operation capability samples/bpf: xdpsock: Add clockid selection support samples/bpf: xdpsock: Add Dest and Src MAC setting for Tx-only operation samples/bpf: xdpsock: Add VLAN support for Tx-only operation libbpf 1.0: Deprecate bpf_object__find_map_by_offset() API libbpf 1.0: Deprecate bpf_map__is_offload_neutral() ... ==================== Link: https://lore.kernel.org/r/20220107013626.53943-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
@@ -19,23 +19,37 @@ The eBPF calling convention is defined as:
|
|||||||
R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
|
R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
|
||||||
necessary across calls.
|
necessary across calls.
|
||||||
|
|
||||||
|
Instruction encoding
|
||||||
|
====================
|
||||||
|
|
||||||
|
eBPF uses 64-bit instructions with the following encoding:
|
||||||
|
|
||||||
|
============= ======= =============== ==================== ============
|
||||||
|
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
|
||||||
|
============= ======= =============== ==================== ============
|
||||||
|
immediate offset source register destination register opcode
|
||||||
|
============= ======= =============== ==================== ============
|
||||||
|
|
||||||
|
Note that most instructions do not use all of the fields.
|
||||||
|
Unused fields shall be cleared to zero.
|
||||||
|
|
||||||
Instruction classes
|
Instruction classes
|
||||||
===================
|
-------------------
|
||||||
|
|
||||||
The three LSB bits of the 'opcode' field store the instruction class:
|
The three LSB bits of the 'opcode' field store the instruction class:
|
||||||
|
|
||||||
========= =====
|
========= ===== ===============================
|
||||||
class value
|
class value description
|
||||||
========= =====
|
========= ===== ===============================
|
||||||
BPF_LD 0x00
|
BPF_LD 0x00 non-standard load operations
|
||||||
BPF_LDX 0x01
|
BPF_LDX 0x01 load into register operations
|
||||||
BPF_ST 0x02
|
BPF_ST 0x02 store from immediate operations
|
||||||
BPF_STX 0x03
|
BPF_STX 0x03 store from register operations
|
||||||
BPF_ALU 0x04
|
BPF_ALU 0x04 32-bit arithmetic operations
|
||||||
BPF_JMP 0x05
|
BPF_JMP 0x05 64-bit jump operations
|
||||||
BPF_JMP32 0x06
|
BPF_JMP32 0x06 32-bit jump operations
|
||||||
BPF_ALU64 0x07
|
BPF_ALU64 0x07 64-bit arithmetic operations
|
||||||
========= =====
|
========= ===== ===============================
|
||||||
|
|
||||||
Arithmetic and jump instructions
|
Arithmetic and jump instructions
|
||||||
================================
|
================================
|
||||||
@@ -60,66 +74,78 @@ The 4th bit encodes the source operand:
|
|||||||
|
|
||||||
The four MSB bits store the operation code.
|
The four MSB bits store the operation code.
|
||||||
|
|
||||||
For class BPF_ALU or BPF_ALU64:
|
|
||||||
|
|
||||||
======== ===== =========================
|
Arithmetic instructions
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
|
||||||
|
otherwise identical operations.
|
||||||
|
The code field encodes the operation as below:
|
||||||
|
|
||||||
|
======== ===== ==========================
|
||||||
code value description
|
code value description
|
||||||
======== ===== =========================
|
======== ===== ==========================
|
||||||
BPF_ADD 0x00
|
BPF_ADD 0x00 dst += src
|
||||||
BPF_SUB 0x10
|
BPF_SUB 0x10 dst -= src
|
||||||
BPF_MUL 0x20
|
BPF_MUL 0x20 dst \*= src
|
||||||
BPF_DIV 0x30
|
BPF_DIV 0x30 dst /= src
|
||||||
BPF_OR 0x40
|
BPF_OR 0x40 dst \|= src
|
||||||
BPF_AND 0x50
|
BPF_AND 0x50 dst &= src
|
||||||
BPF_LSH 0x60
|
BPF_LSH 0x60 dst <<= src
|
||||||
BPF_RSH 0x70
|
BPF_RSH 0x70 dst >>= src
|
||||||
BPF_NEG 0x80
|
BPF_NEG 0x80 dst = ~src
|
||||||
BPF_MOD 0x90
|
BPF_MOD 0x90 dst %= src
|
||||||
BPF_XOR 0xa0
|
BPF_XOR 0xa0 dst ^= src
|
||||||
BPF_MOV 0xb0 mov reg to reg
|
BPF_MOV 0xb0 dst = src
|
||||||
BPF_ARSH 0xc0 sign extending shift right
|
BPF_ARSH 0xc0 sign extending shift right
|
||||||
BPF_END 0xd0 endianness conversion
|
BPF_END 0xd0 endianness conversion
|
||||||
======== ===== =========================
|
======== ===== ==========================
|
||||||
|
|
||||||
For class BPF_JMP or BPF_JMP32:
|
BPF_ADD | BPF_X | BPF_ALU means::
|
||||||
|
|
||||||
======== ===== =========================
|
|
||||||
code value description
|
|
||||||
======== ===== =========================
|
|
||||||
BPF_JA 0x00 BPF_JMP only
|
|
||||||
BPF_JEQ 0x10
|
|
||||||
BPF_JGT 0x20
|
|
||||||
BPF_JGE 0x30
|
|
||||||
BPF_JSET 0x40
|
|
||||||
BPF_JNE 0x50 jump '!='
|
|
||||||
BPF_JSGT 0x60 signed '>'
|
|
||||||
BPF_JSGE 0x70 signed '>='
|
|
||||||
BPF_CALL 0x80 function call
|
|
||||||
BPF_EXIT 0x90 function return
|
|
||||||
BPF_JLT 0xa0 unsigned '<'
|
|
||||||
BPF_JLE 0xb0 unsigned '<='
|
|
||||||
BPF_JSLT 0xc0 signed '<'
|
|
||||||
BPF_JSLE 0xd0 signed '<='
|
|
||||||
======== ===== =========================
|
|
||||||
|
|
||||||
So BPF_ADD | BPF_X | BPF_ALU means::
|
|
||||||
|
|
||||||
dst_reg = (u32) dst_reg + (u32) src_reg;
|
dst_reg = (u32) dst_reg + (u32) src_reg;
|
||||||
|
|
||||||
Similarly, BPF_XOR | BPF_K | BPF_ALU means::
|
BPF_ADD | BPF_X | BPF_ALU64 means::
|
||||||
|
|
||||||
src_reg = (u32) src_reg ^ (u32) imm32
|
|
||||||
|
|
||||||
eBPF is using BPF_MOV | BPF_X | BPF_ALU to represent A = B moves. BPF_ALU64
|
|
||||||
is used to mean exactly the same operations as BPF_ALU, but with 64-bit wide
|
|
||||||
operands instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.::
|
|
||||||
|
|
||||||
dst_reg = dst_reg + src_reg
|
dst_reg = dst_reg + src_reg
|
||||||
|
|
||||||
BPF_JMP | BPF_EXIT means function exit only. The eBPF program needs to store
|
BPF_XOR | BPF_K | BPF_ALU means::
|
||||||
the return value into register R0 before doing a BPF_EXIT. Class 6 is used as
|
|
||||||
BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
|
src_reg = (u32) src_reg ^ (u32) imm32
|
||||||
operands for the comparisons instead.
|
|
||||||
|
BPF_XOR | BPF_K | BPF_ALU64 means::
|
||||||
|
|
||||||
|
src_reg = src_reg ^ imm32
|
||||||
|
|
||||||
|
|
||||||
|
Jump instructions
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
BPF_JMP32 uses 32-bit wide operands while BPF_JMP uses 64-bit wide operands for
|
||||||
|
otherwise identical operations.
|
||||||
|
The code field encodes the operation as below:
|
||||||
|
|
||||||
|
======== ===== ========================= ============
|
||||||
|
code value description notes
|
||||||
|
======== ===== ========================= ============
|
||||||
|
BPF_JA 0x00 PC += off BPF_JMP only
|
||||||
|
BPF_JEQ 0x10 PC += off if dst == src
|
||||||
|
BPF_JGT 0x20 PC += off if dst > src unsigned
|
||||||
|
BPF_JGE 0x30 PC += off if dst >= src unsigned
|
||||||
|
BPF_JSET 0x40 PC += off if dst & src
|
||||||
|
BPF_JNE 0x50 PC += off if dst != src
|
||||||
|
BPF_JSGT 0x60 PC += off if dst > src signed
|
||||||
|
BPF_JSGE 0x70 PC += off if dst >= src signed
|
||||||
|
BPF_CALL 0x80 function call
|
||||||
|
BPF_EXIT 0x90 function / program return BPF_JMP only
|
||||||
|
BPF_JLT 0xa0 PC += off if dst < src unsigned
|
||||||
|
BPF_JLE 0xb0 PC += off if dst <= src unsigned
|
||||||
|
BPF_JSLT 0xc0 PC += off if dst < src signed
|
||||||
|
BPF_JSLE 0xd0 PC += off if dst <= src signed
|
||||||
|
======== ===== ========================= ============
|
||||||
|
|
||||||
|
The eBPF program needs to store the return value into register R0 before doing a
|
||||||
|
BPF_EXIT.
|
||||||
|
|
||||||
|
|
||||||
Load and store instructions
|
Load and store instructions
|
||||||
@@ -147,15 +173,15 @@ The size modifier is one of:
|
|||||||
|
|
||||||
The mode modifier is one of:
|
The mode modifier is one of:
|
||||||
|
|
||||||
============= ===== =====================
|
============= ===== ====================================
|
||||||
mode modifier value description
|
mode modifier value description
|
||||||
============= ===== =====================
|
============= ===== ====================================
|
||||||
BPF_IMM 0x00 used for 64-bit mov
|
BPF_IMM 0x00 used for 64-bit mov
|
||||||
BPF_ABS 0x20
|
BPF_ABS 0x20 legacy BPF packet access
|
||||||
BPF_IND 0x40
|
BPF_IND 0x40 legacy BPF packet access
|
||||||
BPF_MEM 0x60
|
BPF_MEM 0x60 all normal load and store operations
|
||||||
BPF_ATOMIC 0xc0 atomic operations
|
BPF_ATOMIC 0xc0 atomic operations
|
||||||
============= ===== =====================
|
============= ===== ====================================
|
||||||
|
|
||||||
BPF_MEM | <size> | BPF_STX means::
|
BPF_MEM | <size> | BPF_STX means::
|
||||||
|
|
||||||
|
|||||||
@@ -792,7 +792,10 @@ emit_cond_jmp:
|
|||||||
u64 imm64;
|
u64 imm64;
|
||||||
|
|
||||||
imm64 = (u64)insn1.imm << 32 | (u32)imm;
|
imm64 = (u64)insn1.imm << 32 | (u32)imm;
|
||||||
emit_a64_mov_i64(dst, imm64, ctx);
|
if (bpf_pseudo_func(insn))
|
||||||
|
emit_addr_mov_i64(dst, imm64, ctx);
|
||||||
|
else
|
||||||
|
emit_a64_mov_i64(dst, imm64, ctx);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1669,17 +1669,17 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
|
|||||||
struct btf *bpf_get_btf_vmlinux(void);
|
struct btf *bpf_get_btf_vmlinux(void);
|
||||||
|
|
||||||
/* Map specifics */
|
/* Map specifics */
|
||||||
struct xdp_buff;
|
struct xdp_frame;
|
||||||
struct sk_buff;
|
struct sk_buff;
|
||||||
struct bpf_dtab_netdev;
|
struct bpf_dtab_netdev;
|
||||||
struct bpf_cpu_map_entry;
|
struct bpf_cpu_map_entry;
|
||||||
|
|
||||||
void __dev_flush(void);
|
void __dev_flush(void);
|
||||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx);
|
struct net_device *dev_rx);
|
||||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx);
|
struct net_device *dev_rx);
|
||||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
||||||
struct bpf_map *map, bool exclude_ingress);
|
struct bpf_map *map, bool exclude_ingress);
|
||||||
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
|
||||||
struct bpf_prog *xdp_prog);
|
struct bpf_prog *xdp_prog);
|
||||||
@@ -1688,7 +1688,7 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
|
|||||||
bool exclude_ingress);
|
bool exclude_ingress);
|
||||||
|
|
||||||
void __cpu_map_flush(void);
|
void __cpu_map_flush(void);
|
||||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx);
|
struct net_device *dev_rx);
|
||||||
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
|
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
|
||||||
struct sk_buff *skb);
|
struct sk_buff *skb);
|
||||||
@@ -1866,26 +1866,26 @@ static inline void __dev_flush(void)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
struct xdp_buff;
|
struct xdp_frame;
|
||||||
struct bpf_dtab_netdev;
|
struct bpf_dtab_netdev;
|
||||||
struct bpf_cpu_map_entry;
|
struct bpf_cpu_map_entry;
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
||||||
struct bpf_map *map, bool exclude_ingress)
|
struct bpf_map *map, bool exclude_ingress)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1913,7 +1913,7 @@ static inline void __cpu_map_flush(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
|
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
|
||||||
struct xdp_buff *xdp,
|
struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -1019,6 +1019,10 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
|
|||||||
int xdp_do_redirect(struct net_device *dev,
|
int xdp_do_redirect(struct net_device *dev,
|
||||||
struct xdp_buff *xdp,
|
struct xdp_buff *xdp,
|
||||||
struct bpf_prog *prog);
|
struct bpf_prog *prog);
|
||||||
|
int xdp_do_redirect_frame(struct net_device *dev,
|
||||||
|
struct xdp_buff *xdp,
|
||||||
|
struct xdp_frame *xdpf,
|
||||||
|
struct bpf_prog *prog);
|
||||||
void xdp_do_flush(void);
|
void xdp_do_flush(void);
|
||||||
|
|
||||||
/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
|
/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
|
||||||
|
|||||||
@@ -80,6 +80,8 @@ struct page_pool_params {
|
|||||||
enum dma_data_direction dma_dir; /* DMA mapping direction */
|
enum dma_data_direction dma_dir; /* DMA mapping direction */
|
||||||
unsigned int max_len; /* max DMA sync memory size */
|
unsigned int max_len; /* max DMA sync memory size */
|
||||||
unsigned int offset; /* DMA addr offset */
|
unsigned int offset; /* DMA addr offset */
|
||||||
|
void (*init_callback)(struct page *page, void *arg);
|
||||||
|
void *init_arg;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct page_pool {
|
struct page_pool {
|
||||||
@@ -94,6 +96,7 @@ struct page_pool {
|
|||||||
unsigned int frag_offset;
|
unsigned int frag_offset;
|
||||||
struct page *frag_page;
|
struct page *frag_page;
|
||||||
long frag_users;
|
long frag_users;
|
||||||
|
u32 xdp_mem_id;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Data structure for allocation side
|
* Data structure for allocation side
|
||||||
@@ -168,9 +171,12 @@ bool page_pool_return_skb_page(struct page *page);
|
|||||||
|
|
||||||
struct page_pool *page_pool_create(const struct page_pool_params *params);
|
struct page_pool *page_pool_create(const struct page_pool_params *params);
|
||||||
|
|
||||||
|
struct xdp_mem_info;
|
||||||
|
|
||||||
#ifdef CONFIG_PAGE_POOL
|
#ifdef CONFIG_PAGE_POOL
|
||||||
void page_pool_destroy(struct page_pool *pool);
|
void page_pool_destroy(struct page_pool *pool);
|
||||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *));
|
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
|
||||||
|
struct xdp_mem_info *mem);
|
||||||
void page_pool_release_page(struct page_pool *pool, struct page *page);
|
void page_pool_release_page(struct page_pool *pool, struct page *page);
|
||||||
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
void page_pool_put_page_bulk(struct page_pool *pool, void **data,
|
||||||
int count);
|
int count);
|
||||||
@@ -180,7 +186,8 @@ static inline void page_pool_destroy(struct page_pool *pool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
|
static inline void page_pool_use_xdp_mem(struct page_pool *pool,
|
||||||
void (*disconnect)(void *))
|
void (*disconnect)(void *),
|
||||||
|
struct xdp_mem_info *mem)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void page_pool_release_page(struct page_pool *pool,
|
static inline void page_pool_release_page(struct page_pool *pool,
|
||||||
|
|||||||
@@ -1209,6 +1209,7 @@ struct proto {
|
|||||||
void (*unhash)(struct sock *sk);
|
void (*unhash)(struct sock *sk);
|
||||||
void (*rehash)(struct sock *sk);
|
void (*rehash)(struct sock *sk);
|
||||||
int (*get_port)(struct sock *sk, unsigned short snum);
|
int (*get_port)(struct sock *sk, unsigned short snum);
|
||||||
|
void (*put_port)(struct sock *sk);
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
int (*psock_update_sk_prot)(struct sock *sk,
|
int (*psock_update_sk_prot)(struct sock *sk,
|
||||||
struct sk_psock *psock,
|
struct sk_psock *psock,
|
||||||
|
|||||||
@@ -260,6 +260,9 @@ bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
|
|||||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||||
enum xdp_mem_type type, void *allocator);
|
enum xdp_mem_type type, void *allocator);
|
||||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
|
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
|
||||||
|
int xdp_reg_mem_model(struct xdp_mem_info *mem,
|
||||||
|
enum xdp_mem_type type, void *allocator);
|
||||||
|
void xdp_unreg_mem_model(struct xdp_mem_info *mem);
|
||||||
|
|
||||||
/* Drivers not supporting XDP metadata can use this helper, which
|
/* Drivers not supporting XDP metadata can use this helper, which
|
||||||
* rejects any room expansion for metadata as a result.
|
* rejects any room expansion for metadata as a result.
|
||||||
|
|||||||
@@ -746,15 +746,9 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
|
|||||||
list_add(&bq->flush_node, flush_list);
|
list_add(&bq->flush_node, flush_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
{
|
{
|
||||||
struct xdp_frame *xdpf;
|
|
||||||
|
|
||||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
|
||||||
if (unlikely(!xdpf))
|
|
||||||
return -EOVERFLOW;
|
|
||||||
|
|
||||||
/* Info needed when constructing SKB on remote CPU */
|
/* Info needed when constructing SKB on remote CPU */
|
||||||
xdpf->dev_rx = dev_rx;
|
xdpf->dev_rx = dev_rx;
|
||||||
|
|
||||||
|
|||||||
@@ -467,24 +467,19 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
|||||||
bq->q[bq->count++] = xdpf;
|
bq->q[bq->count++] = xdpf;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx,
|
struct net_device *dev_rx,
|
||||||
struct bpf_prog *xdp_prog)
|
struct bpf_prog *xdp_prog)
|
||||||
{
|
{
|
||||||
struct xdp_frame *xdpf;
|
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
|
err = xdp_ok_fwd_dev(dev, xdpf->len);
|
||||||
if (unlikely(err))
|
if (unlikely(err))
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
|
||||||
if (unlikely(!xdpf))
|
|
||||||
return -EOVERFLOW;
|
|
||||||
|
|
||||||
bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
|
bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -520,27 +515,27 @@ static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev
|
|||||||
return act;
|
return act;
|
||||||
}
|
}
|
||||||
|
|
||||||
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
|
int dev_xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
{
|
{
|
||||||
return __xdp_enqueue(dev, xdp, dev_rx, NULL);
|
return __xdp_enqueue(dev, xdpf, dev_rx, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf,
|
||||||
struct net_device *dev_rx)
|
struct net_device *dev_rx)
|
||||||
{
|
{
|
||||||
struct net_device *dev = dst->dev;
|
struct net_device *dev = dst->dev;
|
||||||
|
|
||||||
return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
|
return __xdp_enqueue(dev, xdpf, dev_rx, dst->xdp_prog);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp)
|
static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
|
||||||
{
|
{
|
||||||
if (!obj ||
|
if (!obj ||
|
||||||
!obj->dev->netdev_ops->ndo_xdp_xmit)
|
!obj->dev->netdev_ops->ndo_xdp_xmit)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data))
|
if (xdp_ok_fwd_dev(obj->dev, xdpf->len))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@@ -586,14 +581,13 @@ static int get_upper_ifindexes(struct net_device *dev, int *indexes)
|
|||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
|
||||||
struct bpf_map *map, bool exclude_ingress)
|
struct bpf_map *map, bool exclude_ingress)
|
||||||
{
|
{
|
||||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||||
struct bpf_dtab_netdev *dst, *last_dst = NULL;
|
struct bpf_dtab_netdev *dst, *last_dst = NULL;
|
||||||
int excluded_devices[1+MAX_NEST_DEV];
|
int excluded_devices[1+MAX_NEST_DEV];
|
||||||
struct hlist_head *head;
|
struct hlist_head *head;
|
||||||
struct xdp_frame *xdpf;
|
|
||||||
int num_excluded = 0;
|
int num_excluded = 0;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
int err;
|
int err;
|
||||||
@@ -603,15 +597,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
|||||||
excluded_devices[num_excluded++] = dev_rx->ifindex;
|
excluded_devices[num_excluded++] = dev_rx->ifindex;
|
||||||
}
|
}
|
||||||
|
|
||||||
xdpf = xdp_convert_buff_to_frame(xdp);
|
|
||||||
if (unlikely(!xdpf))
|
|
||||||
return -EOVERFLOW;
|
|
||||||
|
|
||||||
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
|
if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
|
||||||
for (i = 0; i < map->max_entries; i++) {
|
for (i = 0; i < map->max_entries; i++) {
|
||||||
dst = rcu_dereference_check(dtab->netdev_map[i],
|
dst = rcu_dereference_check(dtab->netdev_map[i],
|
||||||
rcu_read_lock_bh_held());
|
rcu_read_lock_bh_held());
|
||||||
if (!is_valid_dst(dst, xdp))
|
if (!is_valid_dst(dst, xdpf))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
|
if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex))
|
||||||
@@ -634,7 +624,7 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
|
|||||||
head = dev_map_index_hash(dtab, i);
|
head = dev_map_index_hash(dtab, i);
|
||||||
hlist_for_each_entry_rcu(dst, head, index_hlist,
|
hlist_for_each_entry_rcu(dst, head, index_hlist,
|
||||||
lockdep_is_held(&dtab->index_lock)) {
|
lockdep_is_held(&dtab->index_lock)) {
|
||||||
if (!is_valid_dst(dst, xdp))
|
if (!is_valid_dst(dst, xdpf))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (is_ifindex_excluded(excluded_devices, num_excluded,
|
if (is_ifindex_excluded(excluded_devices, num_excluded,
|
||||||
|
|||||||
@@ -6031,6 +6031,7 @@ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (insn->code == (BPF_JMP | BPF_CALL) &&
|
if (insn->code == (BPF_JMP | BPF_CALL) &&
|
||||||
|
insn->src_reg == 0 &&
|
||||||
insn->imm == BPF_FUNC_timer_set_callback) {
|
insn->imm == BPF_FUNC_timer_set_callback) {
|
||||||
struct bpf_verifier_state *async_cb;
|
struct bpf_verifier_state *async_cb;
|
||||||
|
|
||||||
@@ -9079,15 +9080,15 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
|
|||||||
{
|
{
|
||||||
if (type_may_be_null(reg->type) && reg->id == id &&
|
if (type_may_be_null(reg->type) && reg->id == id &&
|
||||||
!WARN_ON_ONCE(!reg->id)) {
|
!WARN_ON_ONCE(!reg->id)) {
|
||||||
/* Old offset (both fixed and variable parts) should
|
|
||||||
* have been known-zero, because we don't allow pointer
|
|
||||||
* arithmetic on pointers that might be NULL.
|
|
||||||
*/
|
|
||||||
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
|
if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
|
||||||
!tnum_equals_const(reg->var_off, 0) ||
|
!tnum_equals_const(reg->var_off, 0) ||
|
||||||
reg->off)) {
|
reg->off)) {
|
||||||
__mark_reg_known_zero(reg);
|
/* Old offset (both fixed and variable parts) should
|
||||||
reg->off = 0;
|
* have been known-zero, because we don't allow pointer
|
||||||
|
* arithmetic on pointers that might be NULL. If we
|
||||||
|
* see this happening, don't convert the register.
|
||||||
|
*/
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (is_null) {
|
if (is_null) {
|
||||||
reg->type = SCALAR_VALUE;
|
reg->type = SCALAR_VALUE;
|
||||||
|
|||||||
@@ -3957,10 +3957,35 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(xdp_master_redirect);
|
EXPORT_SYMBOL_GPL(xdp_master_redirect);
|
||||||
|
|
||||||
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
|
||||||
struct bpf_prog *xdp_prog)
|
struct net_device *dev,
|
||||||
|
struct xdp_buff *xdp,
|
||||||
|
struct bpf_prog *xdp_prog)
|
||||||
|
{
|
||||||
|
enum bpf_map_type map_type = ri->map_type;
|
||||||
|
void *fwd = ri->tgt_value;
|
||||||
|
u32 map_id = ri->map_id;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||||
|
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||||
|
|
||||||
|
err = __xsk_map_redirect(fwd, xdp);
|
||||||
|
if (unlikely(err))
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
|
||||||
|
return 0;
|
||||||
|
err:
|
||||||
|
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
|
||||||
|
struct net_device *dev,
|
||||||
|
struct xdp_frame *xdpf,
|
||||||
|
struct bpf_prog *xdp_prog)
|
||||||
{
|
{
|
||||||
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
|
||||||
enum bpf_map_type map_type = ri->map_type;
|
enum bpf_map_type map_type = ri->map_type;
|
||||||
void *fwd = ri->tgt_value;
|
void *fwd = ri->tgt_value;
|
||||||
u32 map_id = ri->map_id;
|
u32 map_id = ri->map_id;
|
||||||
@@ -3970,6 +3995,11 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
|||||||
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
|
||||||
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
ri->map_type = BPF_MAP_TYPE_UNSPEC;
|
||||||
|
|
||||||
|
if (unlikely(!xdpf)) {
|
||||||
|
err = -EOVERFLOW;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
switch (map_type) {
|
switch (map_type) {
|
||||||
case BPF_MAP_TYPE_DEVMAP:
|
case BPF_MAP_TYPE_DEVMAP:
|
||||||
fallthrough;
|
fallthrough;
|
||||||
@@ -3977,17 +4007,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
|||||||
map = READ_ONCE(ri->map);
|
map = READ_ONCE(ri->map);
|
||||||
if (unlikely(map)) {
|
if (unlikely(map)) {
|
||||||
WRITE_ONCE(ri->map, NULL);
|
WRITE_ONCE(ri->map, NULL);
|
||||||
err = dev_map_enqueue_multi(xdp, dev, map,
|
err = dev_map_enqueue_multi(xdpf, dev, map,
|
||||||
ri->flags & BPF_F_EXCLUDE_INGRESS);
|
ri->flags & BPF_F_EXCLUDE_INGRESS);
|
||||||
} else {
|
} else {
|
||||||
err = dev_map_enqueue(fwd, xdp, dev);
|
err = dev_map_enqueue(fwd, xdpf, dev);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case BPF_MAP_TYPE_CPUMAP:
|
case BPF_MAP_TYPE_CPUMAP:
|
||||||
err = cpu_map_enqueue(fwd, xdp, dev);
|
err = cpu_map_enqueue(fwd, xdpf, dev);
|
||||||
break;
|
|
||||||
case BPF_MAP_TYPE_XSKMAP:
|
|
||||||
err = __xsk_map_redirect(fwd, xdp);
|
|
||||||
break;
|
break;
|
||||||
case BPF_MAP_TYPE_UNSPEC:
|
case BPF_MAP_TYPE_UNSPEC:
|
||||||
if (map_id == INT_MAX) {
|
if (map_id == INT_MAX) {
|
||||||
@@ -3996,7 +4023,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
|||||||
err = -EINVAL;
|
err = -EINVAL;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
err = dev_xdp_enqueue(fwd, xdp, dev);
|
err = dev_xdp_enqueue(fwd, xdpf, dev);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
fallthrough;
|
fallthrough;
|
||||||
@@ -4013,8 +4040,34 @@ err:
|
|||||||
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
|
||||||
|
struct bpf_prog *xdp_prog)
|
||||||
|
{
|
||||||
|
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||||
|
enum bpf_map_type map_type = ri->map_type;
|
||||||
|
|
||||||
|
if (map_type == BPF_MAP_TYPE_XSKMAP)
|
||||||
|
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
|
||||||
|
|
||||||
|
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
|
||||||
|
xdp_prog);
|
||||||
|
}
|
||||||
EXPORT_SYMBOL_GPL(xdp_do_redirect);
|
EXPORT_SYMBOL_GPL(xdp_do_redirect);
|
||||||
|
|
||||||
|
int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
|
||||||
|
struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
|
||||||
|
{
|
||||||
|
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
|
||||||
|
enum bpf_map_type map_type = ri->map_type;
|
||||||
|
|
||||||
|
if (map_type == BPF_MAP_TYPE_XSKMAP)
|
||||||
|
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
|
||||||
|
|
||||||
|
return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
|
||||||
|
|
||||||
static int xdp_do_generic_redirect_map(struct net_device *dev,
|
static int xdp_do_generic_redirect_map(struct net_device *dev,
|
||||||
struct sk_buff *skb,
|
struct sk_buff *skb,
|
||||||
struct xdp_buff *xdp,
|
struct xdp_buff *xdp,
|
||||||
@@ -4741,12 +4794,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
|
|||||||
switch (optname) {
|
switch (optname) {
|
||||||
case SO_RCVBUF:
|
case SO_RCVBUF:
|
||||||
val = min_t(u32, val, sysctl_rmem_max);
|
val = min_t(u32, val, sysctl_rmem_max);
|
||||||
|
val = min_t(int, val, INT_MAX / 2);
|
||||||
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
|
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
|
||||||
WRITE_ONCE(sk->sk_rcvbuf,
|
WRITE_ONCE(sk->sk_rcvbuf,
|
||||||
max_t(int, val * 2, SOCK_MIN_RCVBUF));
|
max_t(int, val * 2, SOCK_MIN_RCVBUF));
|
||||||
break;
|
break;
|
||||||
case SO_SNDBUF:
|
case SO_SNDBUF:
|
||||||
val = min_t(u32, val, sysctl_wmem_max);
|
val = min_t(u32, val, sysctl_wmem_max);
|
||||||
|
val = min_t(int, val, INT_MAX / 2);
|
||||||
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
|
||||||
WRITE_ONCE(sk->sk_sndbuf,
|
WRITE_ONCE(sk->sk_sndbuf,
|
||||||
max_t(int, val * 2, SOCK_MIN_SNDBUF));
|
max_t(int, val * 2, SOCK_MIN_SNDBUF));
|
||||||
@@ -4967,6 +5022,12 @@ static int _bpf_getsockopt(struct sock *sk, int level, int optname,
|
|||||||
goto err_clear;
|
goto err_clear;
|
||||||
|
|
||||||
switch (optname) {
|
switch (optname) {
|
||||||
|
case SO_RCVBUF:
|
||||||
|
*((int *)optval) = sk->sk_rcvbuf;
|
||||||
|
break;
|
||||||
|
case SO_SNDBUF:
|
||||||
|
*((int *)optval) = sk->sk_sndbuf;
|
||||||
|
break;
|
||||||
case SO_MARK:
|
case SO_MARK:
|
||||||
*((int *)optval) = sk->sk_mark;
|
*((int *)optval) = sk->sk_mark;
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -217,6 +217,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
|
|||||||
{
|
{
|
||||||
page->pp = pool;
|
page->pp = pool;
|
||||||
page->pp_magic |= PP_SIGNATURE;
|
page->pp_magic |= PP_SIGNATURE;
|
||||||
|
if (pool->p.init_callback)
|
||||||
|
pool->p.init_callback(page, pool->p.init_arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void page_pool_clear_pp_info(struct page *page)
|
static void page_pool_clear_pp_info(struct page *page)
|
||||||
@@ -691,10 +693,12 @@ static void page_pool_release_retry(struct work_struct *wq)
|
|||||||
schedule_delayed_work(&pool->release_dw, DEFER_TIME);
|
schedule_delayed_work(&pool->release_dw, DEFER_TIME);
|
||||||
}
|
}
|
||||||
|
|
||||||
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *))
|
void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
|
||||||
|
struct xdp_mem_info *mem)
|
||||||
{
|
{
|
||||||
refcount_inc(&pool->user_cnt);
|
refcount_inc(&pool->user_cnt);
|
||||||
pool->disconnect = disconnect;
|
pool->disconnect = disconnect;
|
||||||
|
pool->xdp_mem_id = mem->id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void page_pool_destroy(struct page_pool *pool)
|
void page_pool_destroy(struct page_pool *pool)
|
||||||
|
|||||||
@@ -292,15 +292,23 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
|||||||
if (skb_verdict)
|
if (skb_verdict)
|
||||||
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
||||||
|
|
||||||
|
/* msg_* and stream_* programs references tracked in psock after this
|
||||||
|
* point. Reference dec and cleanup will occur through psock destructor
|
||||||
|
*/
|
||||||
ret = sock_map_init_proto(sk, psock);
|
ret = sock_map_init_proto(sk, psock);
|
||||||
if (ret < 0)
|
if (ret < 0) {
|
||||||
goto out_drop;
|
sk_psock_put(sk, psock);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
write_lock_bh(&sk->sk_callback_lock);
|
write_lock_bh(&sk->sk_callback_lock);
|
||||||
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
|
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||||
ret = sk_psock_init_strp(sk, psock);
|
ret = sk_psock_init_strp(sk, psock);
|
||||||
if (ret)
|
if (ret) {
|
||||||
goto out_unlock_drop;
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
|
sk_psock_put(sk, psock);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
sk_psock_start_strp(sk, psock);
|
sk_psock_start_strp(sk, psock);
|
||||||
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
|
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
|
||||||
sk_psock_start_verdict(sk,psock);
|
sk_psock_start_verdict(sk,psock);
|
||||||
@@ -309,10 +317,6 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
|||||||
}
|
}
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
write_unlock_bh(&sk->sk_callback_lock);
|
||||||
return 0;
|
return 0;
|
||||||
out_unlock_drop:
|
|
||||||
write_unlock_bh(&sk->sk_callback_lock);
|
|
||||||
out_drop:
|
|
||||||
sk_psock_put(sk, psock);
|
|
||||||
out_progs:
|
out_progs:
|
||||||
if (skb_verdict)
|
if (skb_verdict)
|
||||||
bpf_prog_put(skb_verdict);
|
bpf_prog_put(skb_verdict);
|
||||||
@@ -325,6 +329,7 @@ out_put_stream_parser:
|
|||||||
out_put_stream_verdict:
|
out_put_stream_verdict:
|
||||||
if (stream_verdict)
|
if (stream_verdict)
|
||||||
bpf_prog_put(stream_verdict);
|
bpf_prog_put(stream_verdict);
|
||||||
|
out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -110,20 +110,15 @@ static void mem_allocator_disconnect(void *allocator)
|
|||||||
mutex_unlock(&mem_id_lock);
|
mutex_unlock(&mem_id_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
void xdp_unreg_mem_model(struct xdp_mem_info *mem)
|
||||||
{
|
{
|
||||||
struct xdp_mem_allocator *xa;
|
struct xdp_mem_allocator *xa;
|
||||||
int type = xdp_rxq->mem.type;
|
int type = mem->type;
|
||||||
int id = xdp_rxq->mem.id;
|
int id = mem->id;
|
||||||
|
|
||||||
/* Reset mem info to defaults */
|
/* Reset mem info to defaults */
|
||||||
xdp_rxq->mem.id = 0;
|
mem->id = 0;
|
||||||
xdp_rxq->mem.type = 0;
|
mem->type = 0;
|
||||||
|
|
||||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
|
||||||
WARN(1, "Missing register, driver bug");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (id == 0)
|
if (id == 0)
|
||||||
return;
|
return;
|
||||||
@@ -135,6 +130,17 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
|||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(xdp_unreg_mem_model);
|
||||||
|
|
||||||
|
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
|
||||||
|
{
|
||||||
|
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||||
|
WARN(1, "Missing register, driver bug");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
xdp_unreg_mem_model(&xdp_rxq->mem);
|
||||||
|
}
|
||||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
|
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
|
||||||
|
|
||||||
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
|
||||||
@@ -259,28 +265,24 @@ static bool __is_supported_mem_type(enum xdp_mem_type type)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem,
|
||||||
enum xdp_mem_type type, void *allocator)
|
enum xdp_mem_type type,
|
||||||
|
void *allocator)
|
||||||
{
|
{
|
||||||
struct xdp_mem_allocator *xdp_alloc;
|
struct xdp_mem_allocator *xdp_alloc;
|
||||||
gfp_t gfp = GFP_KERNEL;
|
gfp_t gfp = GFP_KERNEL;
|
||||||
int id, errno, ret;
|
int id, errno, ret;
|
||||||
void *ptr;
|
void *ptr;
|
||||||
|
|
||||||
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
|
||||||
WARN(1, "Missing register, driver bug");
|
|
||||||
return -EFAULT;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!__is_supported_mem_type(type))
|
if (!__is_supported_mem_type(type))
|
||||||
return -EOPNOTSUPP;
|
return ERR_PTR(-EOPNOTSUPP);
|
||||||
|
|
||||||
xdp_rxq->mem.type = type;
|
mem->type = type;
|
||||||
|
|
||||||
if (!allocator) {
|
if (!allocator) {
|
||||||
if (type == MEM_TYPE_PAGE_POOL)
|
if (type == MEM_TYPE_PAGE_POOL)
|
||||||
return -EINVAL; /* Setup time check page_pool req */
|
return ERR_PTR(-EINVAL); /* Setup time check page_pool req */
|
||||||
return 0;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Delay init of rhashtable to save memory if feature isn't used */
|
/* Delay init of rhashtable to save memory if feature isn't used */
|
||||||
@@ -290,13 +292,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
|||||||
mutex_unlock(&mem_id_lock);
|
mutex_unlock(&mem_id_lock);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
WARN_ON(1);
|
WARN_ON(1);
|
||||||
return ret;
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
|
xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
|
||||||
if (!xdp_alloc)
|
if (!xdp_alloc)
|
||||||
return -ENOMEM;
|
return ERR_PTR(-ENOMEM);
|
||||||
|
|
||||||
mutex_lock(&mem_id_lock);
|
mutex_lock(&mem_id_lock);
|
||||||
id = __mem_id_cyclic_get(gfp);
|
id = __mem_id_cyclic_get(gfp);
|
||||||
@@ -304,31 +306,61 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
|||||||
errno = id;
|
errno = id;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
xdp_rxq->mem.id = id;
|
mem->id = id;
|
||||||
xdp_alloc->mem = xdp_rxq->mem;
|
xdp_alloc->mem = *mem;
|
||||||
xdp_alloc->allocator = allocator;
|
xdp_alloc->allocator = allocator;
|
||||||
|
|
||||||
/* Insert allocator into ID lookup table */
|
/* Insert allocator into ID lookup table */
|
||||||
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
|
ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
|
||||||
if (IS_ERR(ptr)) {
|
if (IS_ERR(ptr)) {
|
||||||
ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
|
ida_simple_remove(&mem_id_pool, mem->id);
|
||||||
xdp_rxq->mem.id = 0;
|
mem->id = 0;
|
||||||
errno = PTR_ERR(ptr);
|
errno = PTR_ERR(ptr);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type == MEM_TYPE_PAGE_POOL)
|
if (type == MEM_TYPE_PAGE_POOL)
|
||||||
page_pool_use_xdp_mem(allocator, mem_allocator_disconnect);
|
page_pool_use_xdp_mem(allocator, mem_allocator_disconnect, mem);
|
||||||
|
|
||||||
mutex_unlock(&mem_id_lock);
|
mutex_unlock(&mem_id_lock);
|
||||||
|
|
||||||
trace_mem_connect(xdp_alloc, xdp_rxq);
|
return xdp_alloc;
|
||||||
return 0;
|
|
||||||
err:
|
err:
|
||||||
mutex_unlock(&mem_id_lock);
|
mutex_unlock(&mem_id_lock);
|
||||||
kfree(xdp_alloc);
|
kfree(xdp_alloc);
|
||||||
return errno;
|
return ERR_PTR(errno);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int xdp_reg_mem_model(struct xdp_mem_info *mem,
|
||||||
|
enum xdp_mem_type type, void *allocator)
|
||||||
|
{
|
||||||
|
struct xdp_mem_allocator *xdp_alloc;
|
||||||
|
|
||||||
|
xdp_alloc = __xdp_reg_mem_model(mem, type, allocator);
|
||||||
|
if (IS_ERR(xdp_alloc))
|
||||||
|
return PTR_ERR(xdp_alloc);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(xdp_reg_mem_model);
|
||||||
|
|
||||||
|
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
|
||||||
|
enum xdp_mem_type type, void *allocator)
|
||||||
|
{
|
||||||
|
struct xdp_mem_allocator *xdp_alloc;
|
||||||
|
|
||||||
|
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
|
||||||
|
WARN(1, "Missing register, driver bug");
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator);
|
||||||
|
if (IS_ERR(xdp_alloc))
|
||||||
|
return PTR_ERR(xdp_alloc);
|
||||||
|
|
||||||
|
trace_mem_connect(xdp_alloc, xdp_rxq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
|
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
|
||||||
|
|
||||||
/* XDP RX runs under NAPI protection, and in different delivery error
|
/* XDP RX runs under NAPI protection, and in different delivery error
|
||||||
|
|||||||
@@ -531,6 +531,8 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
|||||||
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
|
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
|
||||||
if (err) {
|
if (err) {
|
||||||
inet->inet_saddr = inet->inet_rcv_saddr = 0;
|
inet->inet_saddr = inet->inet_rcv_saddr = 0;
|
||||||
|
if (sk->sk_prot->put_port)
|
||||||
|
sk->sk_prot->put_port(sk);
|
||||||
goto out_release_sock;
|
goto out_release_sock;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -994,6 +994,7 @@ struct proto ping_prot = {
|
|||||||
.hash = ping_hash,
|
.hash = ping_hash,
|
||||||
.unhash = ping_unhash,
|
.unhash = ping_unhash,
|
||||||
.get_port = ping_get_port,
|
.get_port = ping_get_port,
|
||||||
|
.put_port = ping_unhash,
|
||||||
.obj_size = sizeof(struct inet_sock),
|
.obj_size = sizeof(struct inet_sock),
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL(ping_prot);
|
EXPORT_SYMBOL(ping_prot);
|
||||||
|
|||||||
@@ -196,12 +196,39 @@ msg_bytes_ready:
|
|||||||
long timeo;
|
long timeo;
|
||||||
int data;
|
int data;
|
||||||
|
|
||||||
|
if (sock_flag(sk, SOCK_DONE))
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (sk->sk_err) {
|
||||||
|
copied = sock_error(sk);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
if (sk->sk_state == TCP_CLOSE) {
|
||||||
|
copied = -ENOTCONN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
timeo = sock_rcvtimeo(sk, nonblock);
|
timeo = sock_rcvtimeo(sk, nonblock);
|
||||||
|
if (!timeo) {
|
||||||
|
copied = -EAGAIN;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (signal_pending(current)) {
|
||||||
|
copied = sock_intr_errno(timeo);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
data = tcp_msg_wait_data(sk, psock, timeo);
|
data = tcp_msg_wait_data(sk, psock, timeo);
|
||||||
if (data && !sk_psock_queue_empty(psock))
|
if (data && !sk_psock_queue_empty(psock))
|
||||||
goto msg_bytes_ready;
|
goto msg_bytes_ready;
|
||||||
copied = -EAGAIN;
|
copied = -EAGAIN;
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
sk_psock_put(sk, psock);
|
sk_psock_put(sk, psock);
|
||||||
return copied;
|
return copied;
|
||||||
|
|||||||
@@ -3076,6 +3076,7 @@ struct proto tcp_prot = {
|
|||||||
.hash = inet_hash,
|
.hash = inet_hash,
|
||||||
.unhash = inet_unhash,
|
.unhash = inet_unhash,
|
||||||
.get_port = inet_csk_get_port,
|
.get_port = inet_csk_get_port,
|
||||||
|
.put_port = inet_put_port,
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
.psock_update_sk_prot = tcp_bpf_update_proto,
|
.psock_update_sk_prot = tcp_bpf_update_proto,
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -2927,6 +2927,7 @@ struct proto udp_prot = {
|
|||||||
.unhash = udp_lib_unhash,
|
.unhash = udp_lib_unhash,
|
||||||
.rehash = udp_v4_rehash,
|
.rehash = udp_v4_rehash,
|
||||||
.get_port = udp_v4_get_port,
|
.get_port = udp_v4_get_port,
|
||||||
|
.put_port = udp_lib_unhash,
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
.psock_update_sk_prot = udp_bpf_update_proto,
|
.psock_update_sk_prot = udp_bpf_update_proto,
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -413,6 +413,8 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
|||||||
if (err) {
|
if (err) {
|
||||||
sk->sk_ipv6only = saved_ipv6only;
|
sk->sk_ipv6only = saved_ipv6only;
|
||||||
inet_reset_saddr(sk);
|
inet_reset_saddr(sk);
|
||||||
|
if (sk->sk_prot->put_port)
|
||||||
|
sk->sk_prot->put_port(sk);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -177,6 +177,7 @@ struct proto pingv6_prot = {
|
|||||||
.hash = ping_hash,
|
.hash = ping_hash,
|
||||||
.unhash = ping_unhash,
|
.unhash = ping_unhash,
|
||||||
.get_port = ping_get_port,
|
.get_port = ping_get_port,
|
||||||
|
.put_port = ping_unhash,
|
||||||
.obj_size = sizeof(struct raw6_sock),
|
.obj_size = sizeof(struct raw6_sock),
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL_GPL(pingv6_prot);
|
EXPORT_SYMBOL_GPL(pingv6_prot);
|
||||||
|
|||||||
@@ -2181,6 +2181,7 @@ struct proto tcpv6_prot = {
|
|||||||
.hash = inet6_hash,
|
.hash = inet6_hash,
|
||||||
.unhash = inet_unhash,
|
.unhash = inet_unhash,
|
||||||
.get_port = inet_csk_get_port,
|
.get_port = inet_csk_get_port,
|
||||||
|
.put_port = inet_put_port,
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
.psock_update_sk_prot = tcp_bpf_update_proto,
|
.psock_update_sk_prot = tcp_bpf_update_proto,
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1733,6 +1733,7 @@ struct proto udpv6_prot = {
|
|||||||
.unhash = udp_lib_unhash,
|
.unhash = udp_lib_unhash,
|
||||||
.rehash = udp_v6_rehash,
|
.rehash = udp_v6_rehash,
|
||||||
.get_port = udp_v6_get_port,
|
.get_port = udp_v6_get_port,
|
||||||
|
.put_port = udp_lib_unhash,
|
||||||
#ifdef CONFIG_BPF_SYSCALL
|
#ifdef CONFIG_BPF_SYSCALL
|
||||||
.psock_update_sk_prot = udp_bpf_update_proto,
|
.psock_update_sk_prot = udp_bpf_update_proto,
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -14,6 +14,7 @@
|
|||||||
#include <arpa/inet.h>
|
#include <arpa/inet.h>
|
||||||
#include <locale.h>
|
#include <locale.h>
|
||||||
#include <net/ethernet.h>
|
#include <net/ethernet.h>
|
||||||
|
#include <netinet/ether.h>
|
||||||
#include <net/if.h>
|
#include <net/if.h>
|
||||||
#include <poll.h>
|
#include <poll.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
@@ -30,6 +31,7 @@
|
|||||||
#include <sys/un.h>
|
#include <sys/un.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <sched.h>
|
||||||
|
|
||||||
#include <bpf/libbpf.h>
|
#include <bpf/libbpf.h>
|
||||||
#include <bpf/xsk.h>
|
#include <bpf/xsk.h>
|
||||||
@@ -56,12 +58,27 @@
|
|||||||
|
|
||||||
#define DEBUG_HEXDUMP 0
|
#define DEBUG_HEXDUMP 0
|
||||||
|
|
||||||
|
#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
|
||||||
|
#define VLAN_PRIO_SHIFT 13
|
||||||
|
#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
|
||||||
|
#define VLAN_VID__DEFAULT 1
|
||||||
|
#define VLAN_PRI__DEFAULT 0
|
||||||
|
|
||||||
|
#define NSEC_PER_SEC 1000000000UL
|
||||||
|
#define NSEC_PER_USEC 1000
|
||||||
|
|
||||||
|
#define SCHED_PRI__DEFAULT 0
|
||||||
|
|
||||||
typedef __u64 u64;
|
typedef __u64 u64;
|
||||||
typedef __u32 u32;
|
typedef __u32 u32;
|
||||||
typedef __u16 u16;
|
typedef __u16 u16;
|
||||||
typedef __u8 u8;
|
typedef __u8 u8;
|
||||||
|
|
||||||
static unsigned long prev_time;
|
static unsigned long prev_time;
|
||||||
|
static long tx_cycle_diff_min;
|
||||||
|
static long tx_cycle_diff_max;
|
||||||
|
static double tx_cycle_diff_ave;
|
||||||
|
static long tx_cycle_cnt;
|
||||||
|
|
||||||
enum benchmark_type {
|
enum benchmark_type {
|
||||||
BENCH_RXDROP = 0,
|
BENCH_RXDROP = 0,
|
||||||
@@ -81,14 +98,23 @@ static u32 opt_batch_size = 64;
|
|||||||
static int opt_pkt_count;
|
static int opt_pkt_count;
|
||||||
static u16 opt_pkt_size = MIN_PKT_SIZE;
|
static u16 opt_pkt_size = MIN_PKT_SIZE;
|
||||||
static u32 opt_pkt_fill_pattern = 0x12345678;
|
static u32 opt_pkt_fill_pattern = 0x12345678;
|
||||||
|
static bool opt_vlan_tag;
|
||||||
|
static u16 opt_pkt_vlan_id = VLAN_VID__DEFAULT;
|
||||||
|
static u16 opt_pkt_vlan_pri = VLAN_PRI__DEFAULT;
|
||||||
|
static struct ether_addr opt_txdmac = {{ 0x3c, 0xfd, 0xfe,
|
||||||
|
0x9e, 0x7f, 0x71 }};
|
||||||
|
static struct ether_addr opt_txsmac = {{ 0xec, 0xb1, 0xd7,
|
||||||
|
0x98, 0x3a, 0xc0 }};
|
||||||
static bool opt_extra_stats;
|
static bool opt_extra_stats;
|
||||||
static bool opt_quiet;
|
static bool opt_quiet;
|
||||||
static bool opt_app_stats;
|
static bool opt_app_stats;
|
||||||
static const char *opt_irq_str = "";
|
static const char *opt_irq_str = "";
|
||||||
static u32 irq_no;
|
static u32 irq_no;
|
||||||
static int irqs_at_init = -1;
|
static int irqs_at_init = -1;
|
||||||
|
static u32 sequence;
|
||||||
static int opt_poll;
|
static int opt_poll;
|
||||||
static int opt_interval = 1;
|
static int opt_interval = 1;
|
||||||
|
static int opt_retries = 3;
|
||||||
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
|
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
|
||||||
static u32 opt_umem_flags;
|
static u32 opt_umem_flags;
|
||||||
static int opt_unaligned_chunks;
|
static int opt_unaligned_chunks;
|
||||||
@@ -100,6 +126,27 @@ static u32 opt_num_xsks = 1;
|
|||||||
static u32 prog_id;
|
static u32 prog_id;
|
||||||
static bool opt_busy_poll;
|
static bool opt_busy_poll;
|
||||||
static bool opt_reduced_cap;
|
static bool opt_reduced_cap;
|
||||||
|
static clockid_t opt_clock = CLOCK_MONOTONIC;
|
||||||
|
static unsigned long opt_tx_cycle_ns;
|
||||||
|
static int opt_schpolicy = SCHED_OTHER;
|
||||||
|
static int opt_schprio = SCHED_PRI__DEFAULT;
|
||||||
|
static bool opt_tstamp;
|
||||||
|
|
||||||
|
struct vlan_ethhdr {
|
||||||
|
unsigned char h_dest[6];
|
||||||
|
unsigned char h_source[6];
|
||||||
|
__be16 h_vlan_proto;
|
||||||
|
__be16 h_vlan_TCI;
|
||||||
|
__be16 h_vlan_encapsulated_proto;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PKTGEN_MAGIC 0xbe9be955
|
||||||
|
struct pktgen_hdr {
|
||||||
|
__be32 pgh_magic;
|
||||||
|
__be32 seq_num;
|
||||||
|
__be32 tv_sec;
|
||||||
|
__be32 tv_usec;
|
||||||
|
};
|
||||||
|
|
||||||
struct xsk_ring_stats {
|
struct xsk_ring_stats {
|
||||||
unsigned long rx_npkts;
|
unsigned long rx_npkts;
|
||||||
@@ -156,15 +203,63 @@ struct xsk_socket_info {
|
|||||||
u32 outstanding_tx;
|
u32 outstanding_tx;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const struct clockid_map {
|
||||||
|
const char *name;
|
||||||
|
clockid_t clockid;
|
||||||
|
} clockids_map[] = {
|
||||||
|
{ "REALTIME", CLOCK_REALTIME },
|
||||||
|
{ "TAI", CLOCK_TAI },
|
||||||
|
{ "BOOTTIME", CLOCK_BOOTTIME },
|
||||||
|
{ "MONOTONIC", CLOCK_MONOTONIC },
|
||||||
|
{ NULL }
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct sched_map {
|
||||||
|
const char *name;
|
||||||
|
int policy;
|
||||||
|
} schmap[] = {
|
||||||
|
{ "OTHER", SCHED_OTHER },
|
||||||
|
{ "FIFO", SCHED_FIFO },
|
||||||
|
{ NULL }
|
||||||
|
};
|
||||||
|
|
||||||
static int num_socks;
|
static int num_socks;
|
||||||
struct xsk_socket_info *xsks[MAX_SOCKS];
|
struct xsk_socket_info *xsks[MAX_SOCKS];
|
||||||
int sock;
|
int sock;
|
||||||
|
|
||||||
|
static int get_clockid(clockid_t *id, const char *name)
|
||||||
|
{
|
||||||
|
const struct clockid_map *clk;
|
||||||
|
|
||||||
|
for (clk = clockids_map; clk->name; clk++) {
|
||||||
|
if (strcasecmp(clk->name, name) == 0) {
|
||||||
|
*id = clk->clockid;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int get_schpolicy(int *policy, const char *name)
|
||||||
|
{
|
||||||
|
const struct sched_map *sch;
|
||||||
|
|
||||||
|
for (sch = schmap; sch->name; sch++) {
|
||||||
|
if (strcasecmp(sch->name, name) == 0) {
|
||||||
|
*policy = sch->policy;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned long get_nsecs(void)
|
static unsigned long get_nsecs(void)
|
||||||
{
|
{
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
|
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(opt_clock, &ts);
|
||||||
return ts.tv_sec * 1000000000UL + ts.tv_nsec;
|
return ts.tv_sec * 1000000000UL + ts.tv_nsec;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -257,6 +352,15 @@ static void dump_app_stats(long dt)
|
|||||||
xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
|
xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos;
|
||||||
xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
|
xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opt_tx_cycle_ns) {
|
||||||
|
printf("\n%-18s %-10s %-10s %-10s %-10s %-10s\n",
|
||||||
|
"", "period", "min", "ave", "max", "cycle");
|
||||||
|
printf("%-18s %-10lu %-10lu %-10lu %-10lu %-10lu\n",
|
||||||
|
"Cyclic TX", opt_tx_cycle_ns, tx_cycle_diff_min,
|
||||||
|
(long)(tx_cycle_diff_ave / tx_cycle_cnt),
|
||||||
|
tx_cycle_diff_max, tx_cycle_cnt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool get_interrupt_number(void)
|
static bool get_interrupt_number(void)
|
||||||
@@ -740,29 +844,69 @@ static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
|
|||||||
|
|
||||||
#define ETH_FCS_SIZE 4
|
#define ETH_FCS_SIZE 4
|
||||||
|
|
||||||
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
|
#define ETH_HDR_SIZE (opt_vlan_tag ? sizeof(struct vlan_ethhdr) : \
|
||||||
sizeof(struct udphdr))
|
sizeof(struct ethhdr))
|
||||||
|
#define PKTGEN_HDR_SIZE (opt_tstamp ? sizeof(struct pktgen_hdr) : 0)
|
||||||
|
#define PKT_HDR_SIZE (ETH_HDR_SIZE + sizeof(struct iphdr) + \
|
||||||
|
sizeof(struct udphdr) + PKTGEN_HDR_SIZE)
|
||||||
|
#define PKTGEN_HDR_OFFSET (ETH_HDR_SIZE + sizeof(struct iphdr) + \
|
||||||
|
sizeof(struct udphdr))
|
||||||
|
#define PKTGEN_SIZE_MIN (PKTGEN_HDR_OFFSET + sizeof(struct pktgen_hdr) + \
|
||||||
|
ETH_FCS_SIZE)
|
||||||
|
|
||||||
#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
|
#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
|
||||||
#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
|
#define IP_PKT_SIZE (PKT_SIZE - ETH_HDR_SIZE)
|
||||||
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
|
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
|
||||||
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
|
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - \
|
||||||
|
(sizeof(struct udphdr) + PKTGEN_HDR_SIZE))
|
||||||
|
|
||||||
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
|
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
|
||||||
|
|
||||||
static void gen_eth_hdr_data(void)
|
static void gen_eth_hdr_data(void)
|
||||||
{
|
{
|
||||||
struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
|
struct pktgen_hdr *pktgen_hdr;
|
||||||
sizeof(struct ethhdr) +
|
struct udphdr *udp_hdr;
|
||||||
sizeof(struct iphdr));
|
struct iphdr *ip_hdr;
|
||||||
struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
|
|
||||||
sizeof(struct ethhdr));
|
if (opt_vlan_tag) {
|
||||||
struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
|
struct vlan_ethhdr *veth_hdr = (struct vlan_ethhdr *)pkt_data;
|
||||||
|
u16 vlan_tci = 0;
|
||||||
|
|
||||||
|
udp_hdr = (struct udphdr *)(pkt_data +
|
||||||
|
sizeof(struct vlan_ethhdr) +
|
||||||
|
sizeof(struct iphdr));
|
||||||
|
ip_hdr = (struct iphdr *)(pkt_data +
|
||||||
|
sizeof(struct vlan_ethhdr));
|
||||||
|
pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
|
||||||
|
sizeof(struct vlan_ethhdr) +
|
||||||
|
sizeof(struct iphdr) +
|
||||||
|
sizeof(struct udphdr));
|
||||||
|
/* ethernet & VLAN header */
|
||||||
|
memcpy(veth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
|
||||||
|
memcpy(veth_hdr->h_source, &opt_txsmac, ETH_ALEN);
|
||||||
|
veth_hdr->h_vlan_proto = htons(ETH_P_8021Q);
|
||||||
|
vlan_tci = opt_pkt_vlan_id & VLAN_VID_MASK;
|
||||||
|
vlan_tci |= (opt_pkt_vlan_pri << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
|
||||||
|
veth_hdr->h_vlan_TCI = htons(vlan_tci);
|
||||||
|
veth_hdr->h_vlan_encapsulated_proto = htons(ETH_P_IP);
|
||||||
|
} else {
|
||||||
|
struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
|
||||||
|
|
||||||
|
udp_hdr = (struct udphdr *)(pkt_data +
|
||||||
|
sizeof(struct ethhdr) +
|
||||||
|
sizeof(struct iphdr));
|
||||||
|
ip_hdr = (struct iphdr *)(pkt_data +
|
||||||
|
sizeof(struct ethhdr));
|
||||||
|
pktgen_hdr = (struct pktgen_hdr *)(pkt_data +
|
||||||
|
sizeof(struct ethhdr) +
|
||||||
|
sizeof(struct iphdr) +
|
||||||
|
sizeof(struct udphdr));
|
||||||
|
/* ethernet header */
|
||||||
|
memcpy(eth_hdr->h_dest, &opt_txdmac, ETH_ALEN);
|
||||||
|
memcpy(eth_hdr->h_source, &opt_txsmac, ETH_ALEN);
|
||||||
|
eth_hdr->h_proto = htons(ETH_P_IP);
|
||||||
|
}
|
||||||
|
|
||||||
/* ethernet header */
|
|
||||||
memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
|
|
||||||
memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
|
|
||||||
eth_hdr->h_proto = htons(ETH_P_IP);
|
|
||||||
|
|
||||||
/* IP header */
|
/* IP header */
|
||||||
ip_hdr->version = IPVERSION;
|
ip_hdr->version = IPVERSION;
|
||||||
@@ -785,6 +929,9 @@ static void gen_eth_hdr_data(void)
|
|||||||
udp_hdr->dest = htons(0x1000);
|
udp_hdr->dest = htons(0x1000);
|
||||||
udp_hdr->len = htons(UDP_PKT_SIZE);
|
udp_hdr->len = htons(UDP_PKT_SIZE);
|
||||||
|
|
||||||
|
if (opt_tstamp)
|
||||||
|
pktgen_hdr->pgh_magic = htonl(PKTGEN_MAGIC);
|
||||||
|
|
||||||
/* UDP data */
|
/* UDP data */
|
||||||
memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
|
memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
|
||||||
UDP_PKT_DATA_SIZE);
|
UDP_PKT_DATA_SIZE);
|
||||||
@@ -908,6 +1055,7 @@ static struct option long_options[] = {
|
|||||||
{"xdp-skb", no_argument, 0, 'S'},
|
{"xdp-skb", no_argument, 0, 'S'},
|
||||||
{"xdp-native", no_argument, 0, 'N'},
|
{"xdp-native", no_argument, 0, 'N'},
|
||||||
{"interval", required_argument, 0, 'n'},
|
{"interval", required_argument, 0, 'n'},
|
||||||
|
{"retries", required_argument, 0, 'O'},
|
||||||
{"zero-copy", no_argument, 0, 'z'},
|
{"zero-copy", no_argument, 0, 'z'},
|
||||||
{"copy", no_argument, 0, 'c'},
|
{"copy", no_argument, 0, 'c'},
|
||||||
{"frame-size", required_argument, 0, 'f'},
|
{"frame-size", required_argument, 0, 'f'},
|
||||||
@@ -916,10 +1064,20 @@ static struct option long_options[] = {
|
|||||||
{"shared-umem", no_argument, 0, 'M'},
|
{"shared-umem", no_argument, 0, 'M'},
|
||||||
{"force", no_argument, 0, 'F'},
|
{"force", no_argument, 0, 'F'},
|
||||||
{"duration", required_argument, 0, 'd'},
|
{"duration", required_argument, 0, 'd'},
|
||||||
|
{"clock", required_argument, 0, 'w'},
|
||||||
{"batch-size", required_argument, 0, 'b'},
|
{"batch-size", required_argument, 0, 'b'},
|
||||||
{"tx-pkt-count", required_argument, 0, 'C'},
|
{"tx-pkt-count", required_argument, 0, 'C'},
|
||||||
{"tx-pkt-size", required_argument, 0, 's'},
|
{"tx-pkt-size", required_argument, 0, 's'},
|
||||||
{"tx-pkt-pattern", required_argument, 0, 'P'},
|
{"tx-pkt-pattern", required_argument, 0, 'P'},
|
||||||
|
{"tx-vlan", no_argument, 0, 'V'},
|
||||||
|
{"tx-vlan-id", required_argument, 0, 'J'},
|
||||||
|
{"tx-vlan-pri", required_argument, 0, 'K'},
|
||||||
|
{"tx-dmac", required_argument, 0, 'G'},
|
||||||
|
{"tx-smac", required_argument, 0, 'H'},
|
||||||
|
{"tx-cycle", required_argument, 0, 'T'},
|
||||||
|
{"tstamp", no_argument, 0, 'y'},
|
||||||
|
{"policy", required_argument, 0, 'W'},
|
||||||
|
{"schpri", required_argument, 0, 'U'},
|
||||||
{"extra-stats", no_argument, 0, 'x'},
|
{"extra-stats", no_argument, 0, 'x'},
|
||||||
{"quiet", no_argument, 0, 'Q'},
|
{"quiet", no_argument, 0, 'Q'},
|
||||||
{"app-stats", no_argument, 0, 'a'},
|
{"app-stats", no_argument, 0, 'a'},
|
||||||
@@ -943,6 +1101,7 @@ static void usage(const char *prog)
|
|||||||
" -S, --xdp-skb=n Use XDP skb-mod\n"
|
" -S, --xdp-skb=n Use XDP skb-mod\n"
|
||||||
" -N, --xdp-native=n Enforce XDP native mode\n"
|
" -N, --xdp-native=n Enforce XDP native mode\n"
|
||||||
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
|
" -n, --interval=n Specify statistics update interval (default 1 sec).\n"
|
||||||
|
" -O, --retries=n Specify time-out retries (1s interval) attempt (default 3).\n"
|
||||||
" -z, --zero-copy Force zero-copy mode.\n"
|
" -z, --zero-copy Force zero-copy mode.\n"
|
||||||
" -c, --copy Force copy mode.\n"
|
" -c, --copy Force copy mode.\n"
|
||||||
" -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
|
" -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n"
|
||||||
@@ -952,6 +1111,7 @@ static void usage(const char *prog)
|
|||||||
" -F, --force Force loading the XDP prog\n"
|
" -F, --force Force loading the XDP prog\n"
|
||||||
" -d, --duration=n Duration in secs to run command.\n"
|
" -d, --duration=n Duration in secs to run command.\n"
|
||||||
" Default: forever.\n"
|
" Default: forever.\n"
|
||||||
|
" -w, --clock=CLOCK Clock NAME (default MONOTONIC).\n"
|
||||||
" -b, --batch-size=n Batch size for sending or receiving\n"
|
" -b, --batch-size=n Batch size for sending or receiving\n"
|
||||||
" packets. Default: %d\n"
|
" packets. Default: %d\n"
|
||||||
" -C, --tx-pkt-count=n Number of packets to send.\n"
|
" -C, --tx-pkt-count=n Number of packets to send.\n"
|
||||||
@@ -960,6 +1120,15 @@ static void usage(const char *prog)
|
|||||||
" (Default: %d bytes)\n"
|
" (Default: %d bytes)\n"
|
||||||
" Min size: %d, Max size %d.\n"
|
" Min size: %d, Max size %d.\n"
|
||||||
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
|
" -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
|
||||||
|
" -V, --tx-vlan Send VLAN tagged packets (For -t|--txonly)\n"
|
||||||
|
" -J, --tx-vlan-id=n Tx VLAN ID [1-4095]. Default: %d (For -V|--tx-vlan)\n"
|
||||||
|
" -K, --tx-vlan-pri=n Tx VLAN Priority [0-7]. Default: %d (For -V|--tx-vlan)\n"
|
||||||
|
" -G, --tx-dmac=<MAC> Dest MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
|
||||||
|
" -H, --tx-smac=<MAC> Src MAC addr of TX frame in aa:bb:cc:dd:ee:ff format (For -V|--tx-vlan)\n"
|
||||||
|
" -T, --tx-cycle=n Tx cycle time in micro-seconds (For -t|--txonly).\n"
|
||||||
|
" -y, --tstamp Add time-stamp to packet (For -t|--txonly).\n"
|
||||||
|
" -W, --policy=POLICY Schedule policy. Default: SCHED_OTHER\n"
|
||||||
|
" -U, --schpri=n Schedule priority. Default: %d\n"
|
||||||
" -x, --extra-stats Display extra statistics.\n"
|
" -x, --extra-stats Display extra statistics.\n"
|
||||||
" -Q, --quiet Do not display any stats.\n"
|
" -Q, --quiet Do not display any stats.\n"
|
||||||
" -a, --app-stats Display application (syscall) statistics.\n"
|
" -a, --app-stats Display application (syscall) statistics.\n"
|
||||||
@@ -969,7 +1138,9 @@ static void usage(const char *prog)
|
|||||||
"\n";
|
"\n";
|
||||||
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
|
fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
|
||||||
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
|
opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
|
||||||
XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
|
XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern,
|
||||||
|
VLAN_VID__DEFAULT, VLAN_PRI__DEFAULT,
|
||||||
|
SCHED_PRI__DEFAULT);
|
||||||
|
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
@@ -981,7 +1152,8 @@ static void parse_command_line(int argc, char **argv)
|
|||||||
opterr = 0;
|
opterr = 0;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR",
|
c = getopt_long(argc, argv,
|
||||||
|
"Frtli:q:pSNn:w:O:czf:muMd:b:C:s:P:VJ:K:G:H:T:yW:U:xQaI:BR",
|
||||||
long_options, &option_index);
|
long_options, &option_index);
|
||||||
if (c == -1)
|
if (c == -1)
|
||||||
break;
|
break;
|
||||||
@@ -1015,6 +1187,17 @@ static void parse_command_line(int argc, char **argv)
|
|||||||
case 'n':
|
case 'n':
|
||||||
opt_interval = atoi(optarg);
|
opt_interval = atoi(optarg);
|
||||||
break;
|
break;
|
||||||
|
case 'w':
|
||||||
|
if (get_clockid(&opt_clock, optarg)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"ERROR: Invalid clock %s. Default to CLOCK_MONOTONIC.\n",
|
||||||
|
optarg);
|
||||||
|
opt_clock = CLOCK_MONOTONIC;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'O':
|
||||||
|
opt_retries = atoi(optarg);
|
||||||
|
break;
|
||||||
case 'z':
|
case 'z':
|
||||||
opt_xdp_bind_flags |= XDP_ZEROCOPY;
|
opt_xdp_bind_flags |= XDP_ZEROCOPY;
|
||||||
break;
|
break;
|
||||||
@@ -1062,6 +1245,49 @@ static void parse_command_line(int argc, char **argv)
|
|||||||
case 'P':
|
case 'P':
|
||||||
opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
|
opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
|
||||||
break;
|
break;
|
||||||
|
case 'V':
|
||||||
|
opt_vlan_tag = true;
|
||||||
|
break;
|
||||||
|
case 'J':
|
||||||
|
opt_pkt_vlan_id = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'K':
|
||||||
|
opt_pkt_vlan_pri = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'G':
|
||||||
|
if (!ether_aton_r(optarg,
|
||||||
|
(struct ether_addr *)&opt_txdmac)) {
|
||||||
|
fprintf(stderr, "Invalid dmac address:%s\n",
|
||||||
|
optarg);
|
||||||
|
usage(basename(argv[0]));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'H':
|
||||||
|
if (!ether_aton_r(optarg,
|
||||||
|
(struct ether_addr *)&opt_txsmac)) {
|
||||||
|
fprintf(stderr, "Invalid smac address:%s\n",
|
||||||
|
optarg);
|
||||||
|
usage(basename(argv[0]));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'T':
|
||||||
|
opt_tx_cycle_ns = atoi(optarg);
|
||||||
|
opt_tx_cycle_ns *= NSEC_PER_USEC;
|
||||||
|
break;
|
||||||
|
case 'y':
|
||||||
|
opt_tstamp = 1;
|
||||||
|
break;
|
||||||
|
case 'W':
|
||||||
|
if (get_schpolicy(&opt_schpolicy, optarg)) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"ERROR: Invalid policy %s. Default to SCHED_OTHER.\n",
|
||||||
|
optarg);
|
||||||
|
opt_schpolicy = SCHED_OTHER;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'U':
|
||||||
|
opt_schprio = atoi(optarg);
|
||||||
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
opt_extra_stats = 1;
|
opt_extra_stats = 1;
|
||||||
break;
|
break;
|
||||||
@@ -1267,16 +1493,22 @@ static void rx_drop_all(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
|
static int tx_only(struct xsk_socket_info *xsk, u32 *frame_nb,
|
||||||
|
int batch_size, unsigned long tx_ns)
|
||||||
{
|
{
|
||||||
u32 idx;
|
u32 idx, tv_sec, tv_usec;
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
|
||||||
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
|
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
|
||||||
batch_size) {
|
batch_size) {
|
||||||
complete_tx_only(xsk, batch_size);
|
complete_tx_only(xsk, batch_size);
|
||||||
if (benchmark_done)
|
if (benchmark_done)
|
||||||
return;
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (opt_tstamp) {
|
||||||
|
tv_sec = (u32)(tx_ns / NSEC_PER_SEC);
|
||||||
|
tv_usec = (u32)((tx_ns % NSEC_PER_SEC) / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < batch_size; i++) {
|
for (i = 0; i < batch_size; i++) {
|
||||||
@@ -1284,6 +1516,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
|
|||||||
idx + i);
|
idx + i);
|
||||||
tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
|
tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size;
|
||||||
tx_desc->len = PKT_SIZE;
|
tx_desc->len = PKT_SIZE;
|
||||||
|
|
||||||
|
if (opt_tstamp) {
|
||||||
|
struct pktgen_hdr *pktgen_hdr;
|
||||||
|
u64 addr = tx_desc->addr;
|
||||||
|
char *pkt;
|
||||||
|
|
||||||
|
pkt = xsk_umem__get_data(xsk->umem->buffer, addr);
|
||||||
|
pktgen_hdr = (struct pktgen_hdr *)(pkt + PKTGEN_HDR_OFFSET);
|
||||||
|
|
||||||
|
pktgen_hdr->seq_num = htonl(sequence++);
|
||||||
|
pktgen_hdr->tv_sec = htonl(tv_sec);
|
||||||
|
pktgen_hdr->tv_usec = htonl(tv_usec);
|
||||||
|
|
||||||
|
hex_dump(pkt, PKT_SIZE, addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
xsk_ring_prod__submit(&xsk->tx, batch_size);
|
xsk_ring_prod__submit(&xsk->tx, batch_size);
|
||||||
@@ -1292,6 +1539,8 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size)
|
|||||||
*frame_nb += batch_size;
|
*frame_nb += batch_size;
|
||||||
*frame_nb %= NUM_FRAMES;
|
*frame_nb %= NUM_FRAMES;
|
||||||
complete_tx_only(xsk, batch_size);
|
complete_tx_only(xsk, batch_size);
|
||||||
|
|
||||||
|
return batch_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int get_batch_size(int pkt_cnt)
|
static inline int get_batch_size(int pkt_cnt)
|
||||||
@@ -1318,23 +1567,48 @@ static void complete_tx_only_all(void)
|
|||||||
pending = !!xsks[i]->outstanding_tx;
|
pending = !!xsks[i]->outstanding_tx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (pending);
|
sleep(1);
|
||||||
|
} while (pending && opt_retries-- > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void tx_only_all(void)
|
static void tx_only_all(void)
|
||||||
{
|
{
|
||||||
struct pollfd fds[MAX_SOCKS] = {};
|
struct pollfd fds[MAX_SOCKS] = {};
|
||||||
u32 frame_nb[MAX_SOCKS] = {};
|
u32 frame_nb[MAX_SOCKS] = {};
|
||||||
|
unsigned long next_tx_ns = 0;
|
||||||
int pkt_cnt = 0;
|
int pkt_cnt = 0;
|
||||||
int i, ret;
|
int i, ret;
|
||||||
|
|
||||||
|
if (opt_poll && opt_tx_cycle_ns) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"Error: --poll and --tx-cycles are both set\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_socks; i++) {
|
for (i = 0; i < num_socks; i++) {
|
||||||
fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
|
fds[0].fd = xsk_socket__fd(xsks[i]->xsk);
|
||||||
fds[0].events = POLLOUT;
|
fds[0].events = POLLOUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (opt_tx_cycle_ns) {
|
||||||
|
/* Align Tx time to micro-second boundary */
|
||||||
|
next_tx_ns = (get_nsecs() / NSEC_PER_USEC + 1) *
|
||||||
|
NSEC_PER_USEC;
|
||||||
|
next_tx_ns += opt_tx_cycle_ns;
|
||||||
|
|
||||||
|
/* Initialize periodic Tx scheduling variance */
|
||||||
|
tx_cycle_diff_min = 1000000000;
|
||||||
|
tx_cycle_diff_max = 0;
|
||||||
|
tx_cycle_diff_ave = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
|
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
|
||||||
int batch_size = get_batch_size(pkt_cnt);
|
int batch_size = get_batch_size(pkt_cnt);
|
||||||
|
unsigned long tx_ns = 0;
|
||||||
|
struct timespec next;
|
||||||
|
int tx_cnt = 0;
|
||||||
|
long diff;
|
||||||
|
int err;
|
||||||
|
|
||||||
if (opt_poll) {
|
if (opt_poll) {
|
||||||
for (i = 0; i < num_socks; i++)
|
for (i = 0; i < num_socks; i++)
|
||||||
@@ -1347,13 +1621,43 @@ static void tx_only_all(void)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_socks; i++)
|
if (opt_tx_cycle_ns) {
|
||||||
tx_only(xsks[i], &frame_nb[i], batch_size);
|
next.tv_sec = next_tx_ns / NSEC_PER_SEC;
|
||||||
|
next.tv_nsec = next_tx_ns % NSEC_PER_SEC;
|
||||||
|
err = clock_nanosleep(opt_clock, TIMER_ABSTIME, &next, NULL);
|
||||||
|
if (err) {
|
||||||
|
if (err != EINTR)
|
||||||
|
fprintf(stderr,
|
||||||
|
"clock_nanosleep failed. Err:%d errno:%d\n",
|
||||||
|
err, errno);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
pkt_cnt += batch_size;
|
/* Measure periodic Tx scheduling variance */
|
||||||
|
tx_ns = get_nsecs();
|
||||||
|
diff = tx_ns - next_tx_ns;
|
||||||
|
if (diff < tx_cycle_diff_min)
|
||||||
|
tx_cycle_diff_min = diff;
|
||||||
|
|
||||||
|
if (diff > tx_cycle_diff_max)
|
||||||
|
tx_cycle_diff_max = diff;
|
||||||
|
|
||||||
|
tx_cycle_diff_ave += (double)diff;
|
||||||
|
tx_cycle_cnt++;
|
||||||
|
} else if (opt_tstamp) {
|
||||||
|
tx_ns = get_nsecs();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < num_socks; i++)
|
||||||
|
tx_cnt += tx_only(xsks[i], &frame_nb[i], batch_size, tx_ns);
|
||||||
|
|
||||||
|
pkt_cnt += tx_cnt;
|
||||||
|
|
||||||
if (benchmark_done)
|
if (benchmark_done)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
if (opt_tx_cycle_ns)
|
||||||
|
next_tx_ns += opt_tx_cycle_ns;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_pkt_count)
|
if (opt_pkt_count)
|
||||||
@@ -1584,6 +1888,7 @@ int main(int argc, char **argv)
|
|||||||
struct __user_cap_data_struct data[2] = { { 0 } };
|
struct __user_cap_data_struct data[2] = { { 0 } };
|
||||||
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||||
bool rx = false, tx = false;
|
bool rx = false, tx = false;
|
||||||
|
struct sched_param schparam;
|
||||||
struct xsk_umem_info *umem;
|
struct xsk_umem_info *umem;
|
||||||
struct bpf_object *obj;
|
struct bpf_object *obj;
|
||||||
int xsks_map_fd = 0;
|
int xsks_map_fd = 0;
|
||||||
@@ -1646,6 +1951,9 @@ int main(int argc, char **argv)
|
|||||||
apply_setsockopt(xsks[i]);
|
apply_setsockopt(xsks[i]);
|
||||||
|
|
||||||
if (opt_bench == BENCH_TXONLY) {
|
if (opt_bench == BENCH_TXONLY) {
|
||||||
|
if (opt_tstamp && opt_pkt_size < PKTGEN_SIZE_MIN)
|
||||||
|
opt_pkt_size = PKTGEN_SIZE_MIN;
|
||||||
|
|
||||||
gen_eth_hdr_data();
|
gen_eth_hdr_data();
|
||||||
|
|
||||||
for (i = 0; i < NUM_FRAMES; i++)
|
for (i = 0; i < NUM_FRAMES; i++)
|
||||||
@@ -1685,6 +1993,16 @@ int main(int argc, char **argv)
|
|||||||
prev_time = get_nsecs();
|
prev_time = get_nsecs();
|
||||||
start_time = prev_time;
|
start_time = prev_time;
|
||||||
|
|
||||||
|
/* Configure sched priority for better wake-up accuracy */
|
||||||
|
memset(&schparam, 0, sizeof(schparam));
|
||||||
|
schparam.sched_priority = opt_schprio;
|
||||||
|
ret = sched_setscheduler(0, opt_schpolicy, &schparam);
|
||||||
|
if (ret) {
|
||||||
|
fprintf(stderr, "Error(%d) in setting priority(%d): %s\n",
|
||||||
|
errno, opt_schprio, strerror(errno));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
if (opt_bench == BENCH_RXDROP)
|
if (opt_bench == BENCH_RXDROP)
|
||||||
rx_drop_all();
|
rx_drop_all();
|
||||||
else if (opt_bench == BENCH_TXONLY)
|
else if (opt_bench == BENCH_TXONLY)
|
||||||
@@ -1692,6 +2010,7 @@ int main(int argc, char **argv)
|
|||||||
else
|
else
|
||||||
l2fwd_all();
|
l2fwd_all();
|
||||||
|
|
||||||
|
out:
|
||||||
benchmark_done = true;
|
benchmark_done = true;
|
||||||
|
|
||||||
if (!opt_quiet)
|
if (!opt_quiet)
|
||||||
|
|||||||
@@ -642,6 +642,30 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
probe_misc_feature(struct bpf_insn *insns, size_t len,
|
||||||
|
const char *define_prefix, __u32 ifindex,
|
||||||
|
const char *feat_name, const char *plain_name,
|
||||||
|
const char *define_name)
|
||||||
|
{
|
||||||
|
LIBBPF_OPTS(bpf_prog_load_opts, opts,
|
||||||
|
.prog_ifindex = ifindex,
|
||||||
|
);
|
||||||
|
bool res;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
errno = 0;
|
||||||
|
fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
|
||||||
|
insns, len, &opts);
|
||||||
|
res = fd >= 0 || !errno;
|
||||||
|
|
||||||
|
if (fd >= 0)
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
print_bool_feature(feat_name, plain_name, define_name, res,
|
||||||
|
define_prefix);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Probe for availability of kernel commit (5.3):
|
* Probe for availability of kernel commit (5.3):
|
||||||
*
|
*
|
||||||
@@ -649,29 +673,81 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
|
|||||||
*/
|
*/
|
||||||
static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
|
static void probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
|
||||||
{
|
{
|
||||||
LIBBPF_OPTS(bpf_prog_load_opts, opts,
|
|
||||||
.prog_ifindex = ifindex,
|
|
||||||
);
|
|
||||||
struct bpf_insn insns[BPF_MAXINSNS + 1];
|
struct bpf_insn insns[BPF_MAXINSNS + 1];
|
||||||
bool res;
|
int i;
|
||||||
int i, fd;
|
|
||||||
|
|
||||||
for (i = 0; i < BPF_MAXINSNS; i++)
|
for (i = 0; i < BPF_MAXINSNS; i++)
|
||||||
insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
|
insns[i] = BPF_MOV64_IMM(BPF_REG_0, 1);
|
||||||
insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
|
insns[BPF_MAXINSNS] = BPF_EXIT_INSN();
|
||||||
|
|
||||||
errno = 0;
|
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||||
fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL",
|
define_prefix, ifindex,
|
||||||
insns, ARRAY_SIZE(insns), &opts);
|
"have_large_insn_limit",
|
||||||
res = fd >= 0 || (errno != E2BIG && errno != EINVAL);
|
|
||||||
|
|
||||||
if (fd >= 0)
|
|
||||||
close(fd);
|
|
||||||
|
|
||||||
print_bool_feature("have_large_insn_limit",
|
|
||||||
"Large program size limit",
|
"Large program size limit",
|
||||||
"LARGE_INSN_LIMIT",
|
"LARGE_INSN_LIMIT");
|
||||||
res, define_prefix);
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Probe for bounded loop support introduced in commit 2589726d12a1
|
||||||
|
* ("bpf: introduce bounded loops").
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
probe_bounded_loops(const char *define_prefix, __u32 ifindex)
|
||||||
|
{
|
||||||
|
struct bpf_insn insns[4] = {
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 10),
|
||||||
|
BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, -2),
|
||||||
|
BPF_EXIT_INSN()
|
||||||
|
};
|
||||||
|
|
||||||
|
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||||
|
define_prefix, ifindex,
|
||||||
|
"have_bounded_loops",
|
||||||
|
"Bounded loop support",
|
||||||
|
"BOUNDED_LOOPS");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Probe for the v2 instruction set extension introduced in commit 92b31a9af73b
|
||||||
|
* ("bpf: add BPF_J{LT,LE,SLT,SLE} instructions").
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
probe_v2_isa_extension(const char *define_prefix, __u32 ifindex)
|
||||||
|
{
|
||||||
|
struct bpf_insn insns[4] = {
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 0, 1),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
|
BPF_EXIT_INSN()
|
||||||
|
};
|
||||||
|
|
||||||
|
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||||
|
define_prefix, ifindex,
|
||||||
|
"have_v2_isa_extension",
|
||||||
|
"ISA extension v2",
|
||||||
|
"V2_ISA_EXTENSION");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Probe for the v3 instruction set extension introduced in commit 092ed0968bb6
|
||||||
|
* ("bpf: verifier support JMP32").
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
probe_v3_isa_extension(const char *define_prefix, __u32 ifindex)
|
||||||
|
{
|
||||||
|
struct bpf_insn insns[4] = {
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 0, 1),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
|
BPF_EXIT_INSN()
|
||||||
|
};
|
||||||
|
|
||||||
|
probe_misc_feature(insns, ARRAY_SIZE(insns),
|
||||||
|
define_prefix, ifindex,
|
||||||
|
"have_v3_isa_extension",
|
||||||
|
"ISA extension v3",
|
||||||
|
"V3_ISA_EXTENSION");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -788,6 +864,9 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
|
|||||||
"/*** eBPF misc features ***/",
|
"/*** eBPF misc features ***/",
|
||||||
define_prefix);
|
define_prefix);
|
||||||
probe_large_insn_limit(define_prefix, ifindex);
|
probe_large_insn_limit(define_prefix, ifindex);
|
||||||
|
probe_bounded_loops(define_prefix, ifindex);
|
||||||
|
probe_v2_isa_extension(define_prefix, ifindex);
|
||||||
|
probe_v3_isa_extension(define_prefix, ifindex);
|
||||||
print_end_section();
|
print_end_section();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1655,7 +1655,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
|
|||||||
j = 0;
|
j = 0;
|
||||||
idx = 0;
|
idx = 0;
|
||||||
bpf_object__for_each_map(map, obj) {
|
bpf_object__for_each_map(map, obj) {
|
||||||
if (!bpf_map__is_offload_neutral(map))
|
if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
|
||||||
bpf_map__set_ifindex(map, ifindex);
|
bpf_map__set_ifindex(map, ifindex);
|
||||||
|
|
||||||
if (j < old_map_fds && idx == map_replace[j].idx) {
|
if (j < old_map_fds && idx == map_replace[j].idx) {
|
||||||
|
|||||||
@@ -691,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
|
|||||||
return libbpf_err_errno(ret);
|
return libbpf_err_errno(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
|
int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
|
||||||
const struct bpf_map_batch_opts *opts)
|
const struct bpf_map_batch_opts *opts)
|
||||||
{
|
{
|
||||||
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
|
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
|
||||||
NULL, keys, NULL, count, opts);
|
NULL, (void *)keys, NULL, count, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
|
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
|
||||||
@@ -715,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
|
|||||||
count, opts);
|
count, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
|
int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
|
||||||
const struct bpf_map_batch_opts *opts)
|
const struct bpf_map_batch_opts *opts)
|
||||||
{
|
{
|
||||||
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
|
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
|
||||||
keys, values, count, opts);
|
(void *)keys, (void *)values, count, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
int bpf_obj_pin(int fd, const char *pathname)
|
int bpf_obj_pin(int fd, const char *pathname)
|
||||||
|
|||||||
@@ -254,17 +254,128 @@ struct bpf_map_batch_opts {
|
|||||||
};
|
};
|
||||||
#define bpf_map_batch_opts__last_field flags
|
#define bpf_map_batch_opts__last_field flags
|
||||||
|
|
||||||
LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
|
|
||||||
|
/**
|
||||||
|
* @brief **bpf_map_delete_batch()** allows for batch deletion of multiple
|
||||||
|
* elements in a BPF map.
|
||||||
|
*
|
||||||
|
* @param fd BPF map file descriptor
|
||||||
|
* @param keys pointer to an array of *count* keys
|
||||||
|
* @param count input and output parameter; on input **count** represents the
|
||||||
|
* number of elements in the map to delete in batch;
|
||||||
|
* on output if a non-EFAULT error is returned, **count** represents the number of deleted
|
||||||
|
* elements if the output **count** value is not equal to the input **count** value
|
||||||
|
* If EFAULT is returned, **count** should not be trusted to be correct.
|
||||||
|
* @param opts options for configuring the way the batch deletion works
|
||||||
|
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||||
|
* the error code)
|
||||||
|
*/
|
||||||
|
LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,
|
||||||
__u32 *count,
|
__u32 *count,
|
||||||
const struct bpf_map_batch_opts *opts);
|
const struct bpf_map_batch_opts *opts);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements.
|
||||||
|
*
|
||||||
|
* The parameter *in_batch* is the address of the first element in the batch to read.
|
||||||
|
* *out_batch* is an output parameter that should be passed as *in_batch* to subsequent
|
||||||
|
* calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate
|
||||||
|
* that the batched lookup starts from the beginning of the map.
|
||||||
|
*
|
||||||
|
* The *keys* and *values* are output parameters which must point to memory large enough to
|
||||||
|
* hold *count* items based on the key and value size of the map *map_fd*. The *keys*
|
||||||
|
* buffer must be of *key_size* * *count*. The *values* buffer must be of
|
||||||
|
* *value_size* * *count*.
|
||||||
|
*
|
||||||
|
* @param fd BPF map file descriptor
|
||||||
|
* @param in_batch address of the first element in batch to read, can pass NULL to
|
||||||
|
* indicate that the batched lookup starts from the beginning of the map.
|
||||||
|
* @param out_batch output parameter that should be passed to next call as *in_batch*
|
||||||
|
* @param keys pointer to an array large enough for *count* keys
|
||||||
|
* @param values pointer to an array large enough for *count* values
|
||||||
|
* @param count input and output parameter; on input it's the number of elements
|
||||||
|
* in the map to read in batch; on output it's the number of elements that were
|
||||||
|
* successfully read.
|
||||||
|
* If a non-EFAULT error is returned, count will be set as the number of elements
|
||||||
|
* that were read before the error occurred.
|
||||||
|
* If EFAULT is returned, **count** should not be trusted to be correct.
|
||||||
|
* @param opts options for configuring the way the batch lookup works
|
||||||
|
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||||
|
* the error code)
|
||||||
|
*/
|
||||||
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
|
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
|
||||||
void *keys, void *values, __u32 *count,
|
void *keys, void *values, __u32 *count,
|
||||||
const struct bpf_map_batch_opts *opts);
|
const struct bpf_map_batch_opts *opts);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion
|
||||||
|
* of BPF map elements where each element is deleted after being retrieved.
|
||||||
|
*
|
||||||
|
* @param fd BPF map file descriptor
|
||||||
|
* @param in_batch address of the first element in batch to read, can pass NULL to
|
||||||
|
* get address of the first element in *out_batch*
|
||||||
|
* @param out_batch output parameter that should be passed to next call as *in_batch*
|
||||||
|
* @param keys pointer to an array of *count* keys
|
||||||
|
* @param values pointer to an array large enough for *count* values
|
||||||
|
* @param count input and output parameter; on input it's the number of elements
|
||||||
|
* in the map to read and delete in batch; on output it represents the number of
|
||||||
|
* elements that were successfully read and deleted
|
||||||
|
* If a non-**EFAULT** error code is returned and if the output **count** value
|
||||||
|
* is not equal to the input **count** value, up to **count** elements may
|
||||||
|
* have been deleted.
|
||||||
|
* if **EFAULT** is returned up to *count* elements may have been deleted without
|
||||||
|
* being returned via the *keys* and *values* output parameters.
|
||||||
|
* @param opts options for configuring the way the batch lookup and delete works
|
||||||
|
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||||
|
* the error code)
|
||||||
|
*/
|
||||||
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
|
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
|
||||||
void *out_batch, void *keys,
|
void *out_batch, void *keys,
|
||||||
void *values, __u32 *count,
|
void *values, __u32 *count,
|
||||||
const struct bpf_map_batch_opts *opts);
|
const struct bpf_map_batch_opts *opts);
|
||||||
LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
|
|
||||||
|
/**
|
||||||
|
* @brief **bpf_map_update_batch()** updates multiple elements in a map
|
||||||
|
* by specifying keys and their corresponding values.
|
||||||
|
*
|
||||||
|
* The *keys* and *values* parameters must point to memory large enough
|
||||||
|
* to hold *count* items based on the key and value size of the map.
|
||||||
|
*
|
||||||
|
* The *opts* parameter can be used to control how *bpf_map_update_batch()*
|
||||||
|
* should handle keys that either do or do not already exist in the map.
|
||||||
|
* In particular the *flags* parameter of *bpf_map_batch_opts* can be
|
||||||
|
* one of the following:
|
||||||
|
*
|
||||||
|
* Note that *count* is an input and output parameter, where on output it
|
||||||
|
* represents how many elements were successfully updated. Also note that if
|
||||||
|
* **EFAULT** then *count* should not be trusted to be correct.
|
||||||
|
*
|
||||||
|
* **BPF_ANY**
|
||||||
|
* Create new elements or update existing.
|
||||||
|
*
|
||||||
|
* **BPF_NOEXIST**
|
||||||
|
* Create new elements only if they do not exist.
|
||||||
|
*
|
||||||
|
* **BPF_EXIST**
|
||||||
|
* Update existing elements.
|
||||||
|
*
|
||||||
|
* **BPF_F_LOCK**
|
||||||
|
* Update spin_lock-ed map elements. This must be
|
||||||
|
* specified if the map value contains a spinlock.
|
||||||
|
*
|
||||||
|
* @param fd BPF map file descriptor
|
||||||
|
* @param keys pointer to an array of *count* keys
|
||||||
|
* @param values pointer to an array of *count* values
|
||||||
|
* @param count input and output parameter; on input it's the number of elements
|
||||||
|
* in the map to update in batch; on output if a non-EFAULT error is returned,
|
||||||
|
* **count** represents the number of updated elements if the output **count**
|
||||||
|
* value is not equal to the input **count** value.
|
||||||
|
* If EFAULT is returned, **count** should not be trusted to be correct.
|
||||||
|
* @param opts options for configuring the way the batch update works
|
||||||
|
* @return 0, on success; negative error code, otherwise (errno is also set to
|
||||||
|
* the error code)
|
||||||
|
*/
|
||||||
|
LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,
|
||||||
__u32 *count,
|
__u32 *count,
|
||||||
const struct bpf_map_batch_opts *opts);
|
const struct bpf_map_batch_opts *opts);
|
||||||
|
|
||||||
|
|||||||
@@ -9916,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...)
|
|||||||
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
|
static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
|
||||||
const char *kfunc_name, size_t offset)
|
const char *kfunc_name, size_t offset)
|
||||||
{
|
{
|
||||||
snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset);
|
static int index = 0;
|
||||||
|
|
||||||
|
snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
|
||||||
|
__sync_fetch_and_add(&index, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
|
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
|
||||||
@@ -10017,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
|
|||||||
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
|
gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
|
||||||
func_name, offset);
|
func_name, offset);
|
||||||
|
|
||||||
legacy_probe = strdup(func_name);
|
legacy_probe = strdup(probe_name);
|
||||||
if (!legacy_probe)
|
if (!legacy_probe)
|
||||||
return libbpf_err_ptr(-ENOMEM);
|
return libbpf_err_ptr(-ENOMEM);
|
||||||
|
|
||||||
@@ -10676,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
|
|||||||
return link;
|
return link;
|
||||||
}
|
}
|
||||||
|
|
||||||
enum bpf_perf_event_ret
|
static enum bpf_perf_event_ret
|
||||||
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||||
void **copy_mem, size_t *copy_size,
|
void **copy_mem, size_t *copy_size,
|
||||||
bpf_perf_event_print_t fn, void *private_data)
|
bpf_perf_event_print_t fn, void *private_data)
|
||||||
{
|
{
|
||||||
struct perf_event_mmap_page *header = mmap_mem;
|
struct perf_event_mmap_page *header = mmap_mem;
|
||||||
__u64 data_head = ring_buffer_read_head(header);
|
__u64 data_head = ring_buffer_read_head(header);
|
||||||
@@ -10724,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
|||||||
return libbpf_err(ret);
|
return libbpf_err(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((alias("perf_event_read_simple")))
|
||||||
|
enum bpf_perf_event_ret
|
||||||
|
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||||
|
void **copy_mem, size_t *copy_size,
|
||||||
|
bpf_perf_event_print_t fn, void *private_data);
|
||||||
|
|
||||||
struct perf_buffer;
|
struct perf_buffer;
|
||||||
|
|
||||||
struct perf_buffer_params {
|
struct perf_buffer_params {
|
||||||
@@ -11132,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
|
|||||||
{
|
{
|
||||||
enum bpf_perf_event_ret ret;
|
enum bpf_perf_event_ret ret;
|
||||||
|
|
||||||
ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
|
ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
|
||||||
pb->page_size, &cpu_buf->buf,
|
pb->page_size, &cpu_buf->buf,
|
||||||
&cpu_buf->buf_size,
|
&cpu_buf->buf_size,
|
||||||
perf_buffer__process_record, cpu_buf);
|
perf_buffer__process_record, cpu_buf);
|
||||||
if (ret != LIBBPF_PERF_EVENT_CONT)
|
if (ret != LIBBPF_PERF_EVENT_CONT)
|
||||||
return ret;
|
return ret;
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
@@ -677,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
|
|||||||
* Get bpf_map through the offset of corresponding struct bpf_map_def
|
* Get bpf_map through the offset of corresponding struct bpf_map_def
|
||||||
* in the BPF object file.
|
* in the BPF object file.
|
||||||
*/
|
*/
|
||||||
LIBBPF_API struct bpf_map *
|
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead")
|
||||||
|
struct bpf_map *
|
||||||
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
|
bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
|
||||||
|
|
||||||
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
|
LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead")
|
||||||
@@ -744,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
|
|||||||
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
|
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
|
||||||
const void *data, size_t size);
|
const void *data, size_t size);
|
||||||
LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
|
LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
|
||||||
|
LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")
|
||||||
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
|
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -1026,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i
|
|||||||
typedef enum bpf_perf_event_ret
|
typedef enum bpf_perf_event_ret
|
||||||
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
|
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
|
||||||
void *private_data);
|
void *private_data);
|
||||||
|
LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or perf_buffer__consume() instead")
|
||||||
LIBBPF_API enum bpf_perf_event_ret
|
LIBBPF_API enum bpf_perf_event_ret
|
||||||
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
|
||||||
void **copy_mem, size_t *copy_size,
|
void **copy_mem, size_t *copy_size,
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
#define MAX_FILES 7
|
#define MAX_FILES 7
|
||||||
|
|
||||||
#include "test_d_path.skel.h"
|
#include "test_d_path.skel.h"
|
||||||
|
#include "test_d_path_check_rdonly_mem.skel.h"
|
||||||
|
|
||||||
static int duration;
|
static int duration;
|
||||||
|
|
||||||
@@ -99,7 +100,7 @@ out_close:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_d_path(void)
|
static void test_d_path_basic(void)
|
||||||
{
|
{
|
||||||
struct test_d_path__bss *bss;
|
struct test_d_path__bss *bss;
|
||||||
struct test_d_path *skel;
|
struct test_d_path *skel;
|
||||||
@@ -155,3 +156,22 @@ void test_d_path(void)
|
|||||||
cleanup:
|
cleanup:
|
||||||
test_d_path__destroy(skel);
|
test_d_path__destroy(skel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_d_path_check_rdonly_mem(void)
|
||||||
|
{
|
||||||
|
struct test_d_path_check_rdonly_mem *skel;
|
||||||
|
|
||||||
|
skel = test_d_path_check_rdonly_mem__open_and_load();
|
||||||
|
ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem");
|
||||||
|
|
||||||
|
test_d_path_check_rdonly_mem__destroy(skel);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_d_path(void)
|
||||||
|
{
|
||||||
|
if (test__start_subtest("basic"))
|
||||||
|
test_d_path_basic();
|
||||||
|
|
||||||
|
if (test__start_subtest("check_rdonly_mem"))
|
||||||
|
test_d_path_check_rdonly_mem();
|
||||||
|
}
|
||||||
|
|||||||
@@ -105,6 +105,13 @@ static int setns_by_fd(int nsfd)
|
|||||||
if (!ASSERT_OK(err, "unshare"))
|
if (!ASSERT_OK(err, "unshare"))
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
/* Make our /sys mount private, so the following umount won't
|
||||||
|
* trigger the global umount in case it's shared.
|
||||||
|
*/
|
||||||
|
err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
|
||||||
|
if (!ASSERT_OK(err, "remount private /sys"))
|
||||||
|
return err;
|
||||||
|
|
||||||
err = umount2("/sys", MNT_DETACH);
|
err = umount2("/sys", MNT_DETACH);
|
||||||
if (!ASSERT_OK(err, "umount2 /sys"))
|
if (!ASSERT_OK(err, "umount2 /sys"))
|
||||||
return err;
|
return err;
|
||||||
|
|||||||
@@ -12,9 +12,9 @@
|
|||||||
char _license[] SEC("license") = "GPL";
|
char _license[] SEC("license") = "GPL";
|
||||||
|
|
||||||
SEC("raw_tracepoint/consume_skb")
|
SEC("raw_tracepoint/consume_skb")
|
||||||
int while_true(volatile struct pt_regs* ctx)
|
int while_true(struct pt_regs *ctx)
|
||||||
{
|
{
|
||||||
__u64 i = 0, sum = 0;
|
volatile __u64 i = 0, sum = 0;
|
||||||
do {
|
do {
|
||||||
i++;
|
i++;
|
||||||
sum += PT_REGS_RC(ctx);
|
sum += PT_REGS_RC(ctx);
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/* Copyright (c) 2022 Google */
|
||||||
|
|
||||||
|
#include "vmlinux.h"
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
#include <bpf/bpf_tracing.h>
|
||||||
|
|
||||||
|
extern const int bpf_prog_active __ksym;
|
||||||
|
|
||||||
|
SEC("fentry/security_inode_getattr")
|
||||||
|
int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
|
||||||
|
__u32 request_mask, unsigned int query_flags)
|
||||||
|
{
|
||||||
|
void *active;
|
||||||
|
__u32 cpu;
|
||||||
|
|
||||||
|
cpu = bpf_get_smp_processor_id();
|
||||||
|
active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
|
||||||
|
if (active) {
|
||||||
|
/* FAIL here! 'active' points to readonly memory. bpf helpers
|
||||||
|
* that update its arguments can not write into it.
|
||||||
|
*/
|
||||||
|
bpf_d_path(path, active, sizeof(int));
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
||||||
@@ -35,18 +35,21 @@ struct sock_test {
|
|||||||
/* Endpoint to bind() to */
|
/* Endpoint to bind() to */
|
||||||
const char *ip;
|
const char *ip;
|
||||||
unsigned short port;
|
unsigned short port;
|
||||||
|
unsigned short port_retry;
|
||||||
/* Expected test result */
|
/* Expected test result */
|
||||||
enum {
|
enum {
|
||||||
LOAD_REJECT,
|
LOAD_REJECT,
|
||||||
ATTACH_REJECT,
|
ATTACH_REJECT,
|
||||||
BIND_REJECT,
|
BIND_REJECT,
|
||||||
SUCCESS,
|
SUCCESS,
|
||||||
|
RETRY_SUCCESS,
|
||||||
|
RETRY_REJECT
|
||||||
} result;
|
} result;
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct sock_test tests[] = {
|
static struct sock_test tests[] = {
|
||||||
{
|
{
|
||||||
"bind4 load with invalid access: src_ip6",
|
.descr = "bind4 load with invalid access: src_ip6",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
@@ -54,16 +57,12 @@ static struct sock_test tests[] = {
|
|||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
0,
|
.result = LOAD_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
LOAD_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind4 load with invalid access: mark",
|
.descr = "bind4 load with invalid access: mark",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
@@ -71,16 +70,12 @@ static struct sock_test tests[] = {
|
|||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
0,
|
.result = LOAD_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
LOAD_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind6 load with invalid access: src_ip4",
|
.descr = "bind6 load with invalid access: src_ip4",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
@@ -88,16 +83,12 @@ static struct sock_test tests[] = {
|
|||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
0,
|
.result = LOAD_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
LOAD_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"sock_create load with invalid access: src_port",
|
.descr = "sock_create load with invalid access: src_port",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
@@ -105,128 +96,106 @@ static struct sock_test tests[] = {
|
|||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET_SOCK_CREATE,
|
.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||||
BPF_CGROUP_INET_SOCK_CREATE,
|
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||||
0,
|
.result = LOAD_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
LOAD_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"sock_create load w/o expected_attach_type (compat mode)",
|
.descr = "sock_create load w/o expected_attach_type (compat mode)",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
0,
|
.expected_attach_type = 0,
|
||||||
BPF_CGROUP_INET_SOCK_CREATE,
|
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||||
AF_INET,
|
.domain = AF_INET,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"127.0.0.1",
|
.ip = "127.0.0.1",
|
||||||
8097,
|
.port = 8097,
|
||||||
SUCCESS,
|
.result = SUCCESS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"sock_create load w/ expected_attach_type",
|
.descr = "sock_create load w/ expected_attach_type",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET_SOCK_CREATE,
|
.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||||
BPF_CGROUP_INET_SOCK_CREATE,
|
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||||
AF_INET,
|
.domain = AF_INET,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"127.0.0.1",
|
.ip = "127.0.0.1",
|
||||||
8097,
|
.port = 8097,
|
||||||
SUCCESS,
|
.result = SUCCESS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attach type mismatch bind4 vs bind6",
|
.descr = "attach type mismatch bind4 vs bind6",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
0,
|
.result = ATTACH_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
ATTACH_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attach type mismatch bind6 vs bind4",
|
.descr = "attach type mismatch bind6 vs bind4",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
0,
|
.result = ATTACH_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
ATTACH_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attach type mismatch default vs bind4",
|
.descr = "attach type mismatch default vs bind4",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
0,
|
.expected_attach_type = 0,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
0,
|
.result = ATTACH_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
ATTACH_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attach type mismatch bind6 vs sock_create",
|
.descr = "attach type mismatch bind6 vs sock_create",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
BPF_CGROUP_INET_SOCK_CREATE,
|
.attach_type = BPF_CGROUP_INET_SOCK_CREATE,
|
||||||
0,
|
.result = ATTACH_REJECT,
|
||||||
0,
|
|
||||||
NULL,
|
|
||||||
0,
|
|
||||||
ATTACH_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind4 reject all",
|
.descr = "bind4 reject all",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
AF_INET,
|
.domain = AF_INET,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"0.0.0.0",
|
.ip = "0.0.0.0",
|
||||||
0,
|
.result = BIND_REJECT,
|
||||||
BIND_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind6 reject all",
|
.descr = "bind6 reject all",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
AF_INET6,
|
.domain = AF_INET6,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"::",
|
.ip = "::",
|
||||||
0,
|
.result = BIND_REJECT,
|
||||||
BIND_REJECT,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind6 deny specific IP & port",
|
.descr = "bind6 deny specific IP & port",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
|
||||||
@@ -247,16 +216,16 @@ static struct sock_test tests[] = {
|
|||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
AF_INET6,
|
.domain = AF_INET6,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"::1",
|
.ip = "::1",
|
||||||
8193,
|
.port = 8193,
|
||||||
BIND_REJECT,
|
.result = BIND_REJECT,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind4 allow specific IP & port",
|
.descr = "bind4 allow specific IP & port",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
|
||||||
@@ -277,41 +246,132 @@ static struct sock_test tests[] = {
|
|||||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
AF_INET,
|
.domain = AF_INET,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"127.0.0.1",
|
.ip = "127.0.0.1",
|
||||||
4098,
|
.port = 4098,
|
||||||
SUCCESS,
|
.result = SUCCESS,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind4 allow all",
|
.descr = "bind4 deny specific IP & port of TCP, and retry",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
|
||||||
|
/* if (ip == expected && port == expected) */
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
|
offsetof(struct bpf_sock, src_ip4)),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
|
||||||
|
__bpf_constant_ntohl(0x7F000001), 4),
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
|
offsetof(struct bpf_sock, src_port)),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
|
||||||
|
|
||||||
|
/* return DENY; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_JMP_A(1),
|
||||||
|
|
||||||
|
/* else return ALLOW; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
|
.domain = AF_INET,
|
||||||
|
.type = SOCK_STREAM,
|
||||||
|
.ip = "127.0.0.1",
|
||||||
|
.port = 4098,
|
||||||
|
.port_retry = 5000,
|
||||||
|
.result = RETRY_SUCCESS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.descr = "bind4 deny specific IP & port of UDP, and retry",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
|
||||||
|
/* if (ip == expected && port == expected) */
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
|
offsetof(struct bpf_sock, src_ip4)),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
|
||||||
|
__bpf_constant_ntohl(0x7F000001), 4),
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
|
offsetof(struct bpf_sock, src_port)),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
|
||||||
|
|
||||||
|
/* return DENY; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_JMP_A(1),
|
||||||
|
|
||||||
|
/* else return ALLOW; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
|
.domain = AF_INET,
|
||||||
|
.type = SOCK_DGRAM,
|
||||||
|
.ip = "127.0.0.1",
|
||||||
|
.port = 4098,
|
||||||
|
.port_retry = 5000,
|
||||||
|
.result = RETRY_SUCCESS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.descr = "bind6 deny specific IP & port, and retry",
|
||||||
|
.insns = {
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||||
|
|
||||||
|
/* if (ip == expected && port == expected) */
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
|
offsetof(struct bpf_sock, src_ip6[3])),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
|
||||||
|
__bpf_constant_ntohl(0x00000001), 4),
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
|
||||||
|
offsetof(struct bpf_sock, src_port)),
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
|
||||||
|
|
||||||
|
/* return DENY; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_JMP_A(1),
|
||||||
|
|
||||||
|
/* else return ALLOW; */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
|
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
|
.domain = AF_INET6,
|
||||||
|
.type = SOCK_STREAM,
|
||||||
|
.ip = "::1",
|
||||||
|
.port = 8193,
|
||||||
|
.port_retry = 9000,
|
||||||
|
.result = RETRY_SUCCESS,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.descr = "bind4 allow all",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
BPF_CGROUP_INET4_POST_BIND,
|
.attach_type = BPF_CGROUP_INET4_POST_BIND,
|
||||||
AF_INET,
|
.domain = AF_INET,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"0.0.0.0",
|
.ip = "0.0.0.0",
|
||||||
0,
|
.result = SUCCESS,
|
||||||
SUCCESS,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"bind6 allow all",
|
.descr = "bind6 allow all",
|
||||||
.insns = {
|
.insns = {
|
||||||
BPF_MOV64_IMM(BPF_REG_0, 1),
|
BPF_MOV64_IMM(BPF_REG_0, 1),
|
||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
},
|
},
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
BPF_CGROUP_INET6_POST_BIND,
|
.attach_type = BPF_CGROUP_INET6_POST_BIND,
|
||||||
AF_INET6,
|
.domain = AF_INET6,
|
||||||
SOCK_STREAM,
|
.type = SOCK_STREAM,
|
||||||
"::",
|
.ip = "::",
|
||||||
0,
|
.result = SUCCESS,
|
||||||
SUCCESS,
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -351,14 +411,15 @@ static int attach_sock_prog(int cgfd, int progfd,
|
|||||||
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
|
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int bind_sock(int domain, int type, const char *ip, unsigned short port)
|
static int bind_sock(int domain, int type, const char *ip,
|
||||||
|
unsigned short port, unsigned short port_retry)
|
||||||
{
|
{
|
||||||
struct sockaddr_storage addr;
|
struct sockaddr_storage addr;
|
||||||
struct sockaddr_in6 *addr6;
|
struct sockaddr_in6 *addr6;
|
||||||
struct sockaddr_in *addr4;
|
struct sockaddr_in *addr4;
|
||||||
int sockfd = -1;
|
int sockfd = -1;
|
||||||
socklen_t len;
|
socklen_t len;
|
||||||
int err = 0;
|
int res = SUCCESS;
|
||||||
|
|
||||||
sockfd = socket(domain, type, 0);
|
sockfd = socket(domain, type, 0);
|
||||||
if (sockfd < 0)
|
if (sockfd < 0)
|
||||||
@@ -384,21 +445,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port)
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
|
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
|
||||||
goto err;
|
/* sys_bind() may fail for different reasons, errno has to be
|
||||||
|
* checked to confirm that BPF program rejected it.
|
||||||
|
*/
|
||||||
|
if (errno != EPERM)
|
||||||
|
goto err;
|
||||||
|
if (port_retry)
|
||||||
|
goto retry;
|
||||||
|
res = BIND_REJECT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
goto out;
|
||||||
|
retry:
|
||||||
|
if (domain == AF_INET)
|
||||||
|
addr4->sin_port = htons(port_retry);
|
||||||
|
else
|
||||||
|
addr6->sin6_port = htons(port_retry);
|
||||||
|
if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
|
||||||
|
if (errno != EPERM)
|
||||||
|
goto err;
|
||||||
|
res = RETRY_REJECT;
|
||||||
|
} else {
|
||||||
|
res = RETRY_SUCCESS;
|
||||||
|
}
|
||||||
goto out;
|
goto out;
|
||||||
err:
|
err:
|
||||||
err = -1;
|
res = -1;
|
||||||
out:
|
out:
|
||||||
close(sockfd);
|
close(sockfd);
|
||||||
return err;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int run_test_case(int cgfd, const struct sock_test *test)
|
static int run_test_case(int cgfd, const struct sock_test *test)
|
||||||
{
|
{
|
||||||
int progfd = -1;
|
int progfd = -1;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
int res;
|
||||||
|
|
||||||
printf("Test case: %s .. ", test->descr);
|
printf("Test case: %s .. ", test->descr);
|
||||||
progfd = load_sock_prog(test->insns, test->expected_attach_type);
|
progfd = load_sock_prog(test->insns, test->expected_attach_type);
|
||||||
@@ -416,21 +500,11 @@ static int run_test_case(int cgfd, const struct sock_test *test)
|
|||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
|
res = bind_sock(test->domain, test->type, test->ip, test->port,
|
||||||
/* sys_bind() may fail for different reasons, errno has to be
|
test->port_retry);
|
||||||
* checked to confirm that BPF program rejected it.
|
if (res > 0 && test->result == res)
|
||||||
*/
|
goto out;
|
||||||
if (test->result == BIND_REJECT && errno == EPERM)
|
|
||||||
goto out;
|
|
||||||
else
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (test->result != SUCCESS)
|
|
||||||
goto err;
|
|
||||||
|
|
||||||
goto out;
|
|
||||||
err:
|
err:
|
||||||
err = -1;
|
err = -1;
|
||||||
out:
|
out:
|
||||||
|
|||||||
@@ -58,6 +58,34 @@
|
|||||||
.result = ACCEPT,
|
.result = ACCEPT,
|
||||||
.result_unpriv = ACCEPT,
|
.result_unpriv = ACCEPT,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"check with invalid reg offset 0",
|
||||||
|
.insns = {
|
||||||
|
/* reserve 8 byte ringbuf memory */
|
||||||
|
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_2, 8),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_3, 0),
|
||||||
|
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
|
||||||
|
/* store a pointer to the reserved memory in R6 */
|
||||||
|
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
|
||||||
|
/* add invalid offset to memory or NULL */
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
|
||||||
|
/* check whether the reservation was successful */
|
||||||
|
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
|
||||||
|
/* should not be able to access *(R7) = 0 */
|
||||||
|
BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0),
|
||||||
|
/* submit the reserved ringbuf memory */
|
||||||
|
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_2, 0),
|
||||||
|
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_ringbuf = { 1 },
|
||||||
|
.result = REJECT,
|
||||||
|
.errstr = "R0 pointer arithmetic on mem_or_null prohibited",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"check corrupted spill/fill",
|
"check corrupted spill/fill",
|
||||||
.insns = {
|
.insns = {
|
||||||
|
|||||||
Reference in New Issue
Block a user