Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2020-05-14 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Merged tag 'perf-for-bpf-2020-05-06' from tip tree that includes CAP_PERFMON. 2) support for narrow loads in bpf_sock_addr progs and additional helpers in cg-skb progs, from Andrey. 3) bpf benchmark runner, from Andrii. 4) arm and riscv JIT optimizations, from Luke. 5) bpf iterator infrastructure, from Yonghong. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
d00f26b623
@ -795,6 +795,9 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val,
|
||||
case BPF_RSH:
|
||||
emit(ARM_LSR_I(rd, rd, val), ctx);
|
||||
break;
|
||||
case BPF_ARSH:
|
||||
emit(ARM_ASR_I(rd, rd, val), ctx);
|
||||
break;
|
||||
case BPF_NEG:
|
||||
emit(ARM_RSB_I(rd, rd, val), ctx);
|
||||
break;
|
||||
@ -860,8 +863,8 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
|
||||
emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
|
||||
emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
|
||||
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
|
||||
_emit(ARM_COND_MI, ARM_B(0), ctx);
|
||||
emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
|
||||
_emit(ARM_COND_PL,
|
||||
ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
|
||||
emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
|
||||
|
||||
arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
|
||||
@ -1408,7 +1411,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
case BPF_ALU | BPF_MUL | BPF_X:
|
||||
case BPF_ALU | BPF_LSH | BPF_X:
|
||||
case BPF_ALU | BPF_RSH | BPF_X:
|
||||
case BPF_ALU | BPF_ARSH | BPF_K:
|
||||
case BPF_ALU | BPF_ARSH | BPF_X:
|
||||
case BPF_ALU64 | BPF_ADD | BPF_K:
|
||||
case BPF_ALU64 | BPF_ADD | BPF_X:
|
||||
@ -1465,10 +1467,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
case BPF_ALU64 | BPF_MOD | BPF_K:
|
||||
case BPF_ALU64 | BPF_MOD | BPF_X:
|
||||
goto notyet;
|
||||
/* dst = dst >> imm */
|
||||
/* dst = dst << imm */
|
||||
case BPF_ALU | BPF_RSH | BPF_K:
|
||||
/* dst = dst >> imm */
|
||||
/* dst = dst >> imm (signed) */
|
||||
case BPF_ALU | BPF_LSH | BPF_K:
|
||||
case BPF_ALU | BPF_RSH | BPF_K:
|
||||
case BPF_ALU | BPF_ARSH | BPF_K:
|
||||
if (unlikely(imm > 31))
|
||||
return -EINVAL;
|
||||
if (imm)
|
||||
|
@ -94,6 +94,9 @@
|
||||
#define ARM_INST_LSR_I 0x01a00020
|
||||
#define ARM_INST_LSR_R 0x01a00030
|
||||
|
||||
#define ARM_INST_ASR_I 0x01a00040
|
||||
#define ARM_INST_ASR_R 0x01a00050
|
||||
|
||||
#define ARM_INST_MOV_R 0x01a00000
|
||||
#define ARM_INST_MOVS_R 0x01b00000
|
||||
#define ARM_INST_MOV_I 0x03a00000
|
||||
|
@ -515,7 +515,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
|
||||
case BPF_ALU | BPF_LSH | BPF_X:
|
||||
case BPF_ALU64 | BPF_LSH | BPF_X:
|
||||
emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
|
||||
if (!is64)
|
||||
if (!is64 && !aux->verifier_zext)
|
||||
emit_zext_32(rd, ctx);
|
||||
break;
|
||||
case BPF_ALU | BPF_RSH | BPF_X:
|
||||
@ -542,13 +542,21 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
|
||||
|
||||
/* dst = BSWAP##imm(dst) */
|
||||
case BPF_ALU | BPF_END | BPF_FROM_LE:
|
||||
{
|
||||
int shift = 64 - imm;
|
||||
|
||||
emit(rv_slli(rd, rd, shift), ctx);
|
||||
emit(rv_srli(rd, rd, shift), ctx);
|
||||
switch (imm) {
|
||||
case 16:
|
||||
emit(rv_slli(rd, rd, 48), ctx);
|
||||
emit(rv_srli(rd, rd, 48), ctx);
|
||||
break;
|
||||
case 32:
|
||||
if (!aux->verifier_zext)
|
||||
emit_zext_32(rd, ctx);
|
||||
break;
|
||||
case 64:
|
||||
/* Do nothing */
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case BPF_ALU | BPF_END | BPF_FROM_BE:
|
||||
emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
|
||||
|
||||
@ -692,19 +700,19 @@ out_be:
|
||||
case BPF_ALU | BPF_LSH | BPF_K:
|
||||
case BPF_ALU64 | BPF_LSH | BPF_K:
|
||||
emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
|
||||
if (!is64)
|
||||
if (!is64 && !aux->verifier_zext)
|
||||
emit_zext_32(rd, ctx);
|
||||
break;
|
||||
case BPF_ALU | BPF_RSH | BPF_K:
|
||||
case BPF_ALU64 | BPF_RSH | BPF_K:
|
||||
emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
|
||||
if (!is64)
|
||||
if (!is64 && !aux->verifier_zext)
|
||||
emit_zext_32(rd, ctx);
|
||||
break;
|
||||
case BPF_ALU | BPF_ARSH | BPF_K:
|
||||
case BPF_ALU64 | BPF_ARSH | BPF_K:
|
||||
emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
|
||||
if (!is64)
|
||||
if (!is64 && !aux->verifier_zext)
|
||||
emit_zext_32(rd, ctx);
|
||||
break;
|
||||
|
||||
@ -784,11 +792,15 @@ out_be:
|
||||
case BPF_JMP32 | BPF_JSGE | BPF_K:
|
||||
case BPF_JMP | BPF_JSLE | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSLE | BPF_K:
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K:
|
||||
rvoff = rv_offset(i, off, ctx);
|
||||
s = ctx->ninsns;
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
if (imm) {
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
rs = RV_REG_T1;
|
||||
} else {
|
||||
/* If imm is 0, simply use zero register. */
|
||||
rs = RV_REG_ZERO;
|
||||
}
|
||||
if (!is64) {
|
||||
if (is_signed_bpf_cond(BPF_OP(code)))
|
||||
emit_sext_32_rd(&rd, ctx);
|
||||
@ -799,16 +811,28 @@ out_be:
|
||||
|
||||
/* Adjust for extra insns */
|
||||
rvoff -= (e - s) << 2;
|
||||
emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
|
||||
break;
|
||||
|
||||
if (BPF_OP(code) == BPF_JSET) {
|
||||
/* Adjust for and */
|
||||
rvoff -= 4;
|
||||
emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
|
||||
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
|
||||
ctx);
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
case BPF_JMP32 | BPF_JSET | BPF_K:
|
||||
rvoff = rv_offset(i, off, ctx);
|
||||
s = ctx->ninsns;
|
||||
if (is_12b_int(imm)) {
|
||||
emit(rv_andi(RV_REG_T1, rd, imm), ctx);
|
||||
} else {
|
||||
emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx);
|
||||
emit_imm(RV_REG_T1, imm, ctx);
|
||||
emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
|
||||
}
|
||||
/* For jset32, we should clear the upper 32 bits of t1, but
|
||||
* sign-extension is sufficient here and saves one instruction,
|
||||
* as t1 is used only in comparison against zero.
|
||||
*/
|
||||
if (!is64 && imm < 0)
|
||||
emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
|
||||
e = ctx->ninsns;
|
||||
rvoff -= (e - s) << 2;
|
||||
emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
|
||||
break;
|
||||
|
||||
/* function call */
|
||||
|
@ -1475,8 +1475,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
const s32 imm32 = insn->imm;
|
||||
const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
|
||||
const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
|
||||
const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
|
||||
const bool dstk = insn->dst_reg != BPF_REG_AX;
|
||||
const bool sstk = insn->src_reg != BPF_REG_AX;
|
||||
const u8 code = insn->code;
|
||||
const u8 *dst = bpf2ia32[insn->dst_reg];
|
||||
const u8 *src = bpf2ia32[insn->src_reg];
|
||||
|
@ -98,6 +98,25 @@ static const struct proc_ops proc_net_seq_ops = {
|
||||
.proc_release = seq_release_net,
|
||||
};
|
||||
|
||||
int bpf_iter_init_seq_net(void *priv_data)
|
||||
{
|
||||
#ifdef CONFIG_NET_NS
|
||||
struct seq_net_private *p = priv_data;
|
||||
|
||||
p->net = get_net(current->nsproxy->net_ns);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_iter_fini_seq_net(void *priv_data)
|
||||
{
|
||||
#ifdef CONFIG_NET_NS
|
||||
struct seq_net_private *p = priv_data;
|
||||
|
||||
put_net(p->net);
|
||||
#endif
|
||||
}
|
||||
|
||||
struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
|
||||
struct proc_dir_entry *parent, const struct seq_operations *ops,
|
||||
unsigned int state_size, void *data)
|
||||
|
@ -31,6 +31,7 @@ struct seq_file;
|
||||
struct btf;
|
||||
struct btf_type;
|
||||
struct exception_table_entry;
|
||||
struct seq_operations;
|
||||
|
||||
extern struct idr btf_idr;
|
||||
extern spinlock_t btf_idr_lock;
|
||||
@ -319,6 +320,7 @@ enum bpf_reg_type {
|
||||
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
|
||||
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
|
||||
PTR_TO_BTF_ID, /* reg points to kernel struct */
|
||||
PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */
|
||||
};
|
||||
|
||||
/* The information passed from prog-specific *_is_valid_access
|
||||
@ -641,6 +643,12 @@ struct bpf_jit_poke_descriptor {
|
||||
u16 reason;
|
||||
};
|
||||
|
||||
/* reg_type info for ctx arguments */
|
||||
struct bpf_ctx_arg_aux {
|
||||
u32 offset;
|
||||
enum bpf_reg_type reg_type;
|
||||
};
|
||||
|
||||
struct bpf_prog_aux {
|
||||
atomic64_t refcnt;
|
||||
u32 used_map_cnt;
|
||||
@ -652,6 +660,8 @@ struct bpf_prog_aux {
|
||||
u32 func_cnt; /* used by non-func prog as the number of func progs */
|
||||
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
|
||||
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
|
||||
u32 ctx_arg_info_size;
|
||||
const struct bpf_ctx_arg_aux *ctx_arg_info;
|
||||
struct bpf_prog *linked_prog;
|
||||
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
|
||||
bool offload_requested;
|
||||
@ -1021,6 +1031,7 @@ static inline void bpf_enable_instrumentation(void)
|
||||
|
||||
extern const struct file_operations bpf_map_fops;
|
||||
extern const struct file_operations bpf_prog_fops;
|
||||
extern const struct file_operations bpf_iter_fops;
|
||||
|
||||
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
||||
extern const struct bpf_prog_ops _name ## _prog_ops; \
|
||||
@ -1080,6 +1091,7 @@ int generic_map_update_batch(struct bpf_map *map,
|
||||
int generic_map_delete_batch(struct bpf_map *map,
|
||||
const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr);
|
||||
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
|
||||
|
||||
extern int sysctl_unprivileged_bpf_disabled;
|
||||
|
||||
@ -1126,6 +1138,40 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
|
||||
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
|
||||
int bpf_obj_get_user(const char __user *pathname, int flags);
|
||||
|
||||
#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
|
||||
#define DEFINE_BPF_ITER_FUNC(target, args...) \
|
||||
extern int bpf_iter_ ## target(args); \
|
||||
int __init bpf_iter_ ## target(args) { return 0; }
|
||||
|
||||
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
|
||||
typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
|
||||
|
||||
#define BPF_ITER_CTX_ARG_MAX 2
|
||||
struct bpf_iter_reg {
|
||||
const char *target;
|
||||
const struct seq_operations *seq_ops;
|
||||
bpf_iter_init_seq_priv_t init_seq_private;
|
||||
bpf_iter_fini_seq_priv_t fini_seq_private;
|
||||
u32 seq_priv_size;
|
||||
u32 ctx_arg_info_size;
|
||||
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
|
||||
};
|
||||
|
||||
struct bpf_iter_meta {
|
||||
__bpf_md_ptr(struct seq_file *, seq);
|
||||
u64 session_id;
|
||||
u64 seq_num;
|
||||
};
|
||||
|
||||
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
|
||||
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
|
||||
bool bpf_iter_prog_supported(struct bpf_prog *prog);
|
||||
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
|
||||
int bpf_iter_new_fd(struct bpf_link *link);
|
||||
bool bpf_link_is_iter(struct bpf_link *link);
|
||||
struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
|
||||
int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
|
||||
|
||||
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
|
||||
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
|
||||
int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
|
||||
|
@ -124,3 +124,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
|
||||
#ifdef CONFIG_CGROUP_BPF
|
||||
BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
|
||||
#endif
|
||||
BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
|
||||
|
@ -251,6 +251,10 @@ extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct
|
||||
extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
|
||||
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
|
||||
extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
|
||||
static inline bool perfmon_capable(void)
|
||||
{
|
||||
return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN);
|
||||
}
|
||||
|
||||
/* audit system wants to get cap info from files as well */
|
||||
extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
|
||||
|
@ -545,10 +545,8 @@ struct bpf_prog {
|
||||
unsigned int (*bpf_func)(const void *ctx,
|
||||
const struct bpf_insn *insn);
|
||||
/* Instructions for interpreter */
|
||||
union {
|
||||
struct sock_filter insns[0];
|
||||
struct bpf_insn insnsi[0];
|
||||
};
|
||||
struct sock_filter insns[0];
|
||||
struct bpf_insn insnsi[];
|
||||
};
|
||||
|
||||
struct sk_filter {
|
||||
|
@ -105,6 +105,9 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
|
||||
void *data);
|
||||
extern struct pid *tgid_pidfd_to_pid(const struct file *file);
|
||||
|
||||
extern int bpf_iter_init_seq_net(void *priv_data);
|
||||
extern void bpf_iter_fini_seq_net(void *priv_data);
|
||||
|
||||
#ifdef CONFIG_PROC_PID_ARCH_STATUS
|
||||
/*
|
||||
* The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
|
||||
|
@ -35,8 +35,14 @@ int inet_shutdown(struct socket *sock, int how);
|
||||
int inet_listen(struct socket *sock, int backlog);
|
||||
void inet_sock_destruct(struct sock *sk);
|
||||
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
|
||||
/* Don't allocate port at this moment, defer to connect. */
|
||||
#define BIND_FORCE_ADDRESS_NO_PORT (1 << 0)
|
||||
/* Grab and release socket lock. */
|
||||
#define BIND_WITH_LOCK (1 << 1)
|
||||
/* Called from BPF program. */
|
||||
#define BIND_FROM_BPF (1 << 2)
|
||||
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
bool force_bind_address_no_port, bool with_lock);
|
||||
u32 flags);
|
||||
int inet_getname(struct socket *sock, struct sockaddr *uaddr,
|
||||
int peer);
|
||||
int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
|
||||
|
@ -544,6 +544,13 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
|
||||
return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
|
||||
}
|
||||
|
||||
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
|
||||
struct bpf_iter__ipv6_route {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct fib6_info *, rt);
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
|
||||
static inline bool fib6_has_custom_rules(const struct net *net)
|
||||
{
|
||||
|
@ -63,7 +63,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
|
||||
/* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
|
||||
struct ipv6_bpf_stub {
|
||||
int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
bool force_bind_address_no_port, bool with_lock);
|
||||
u32 flags);
|
||||
struct sock *(*udp6_lib_lookup)(struct net *net,
|
||||
const struct in6_addr *saddr, __be16 sport,
|
||||
const struct in6_addr *daddr, __be16 dport,
|
||||
|
@ -50,7 +50,6 @@ struct xdp_umem {
|
||||
u32 headroom;
|
||||
u32 chunk_size_nohr;
|
||||
struct user_struct *user;
|
||||
unsigned long address;
|
||||
refcount_t users;
|
||||
struct work_struct work;
|
||||
struct page **pgs;
|
||||
@ -62,8 +61,8 @@ struct xdp_umem {
|
||||
struct net_device *dev;
|
||||
struct xdp_umem_fq_reuse *fq_reuse;
|
||||
bool zc;
|
||||
spinlock_t xsk_list_lock;
|
||||
struct list_head xsk_list;
|
||||
spinlock_t xsk_tx_list_lock;
|
||||
struct list_head xsk_tx_list;
|
||||
};
|
||||
|
||||
/* Nodes are linked in the struct xdp_sock map_list field, and used to
|
||||
|
@ -116,6 +116,7 @@ enum bpf_cmd {
|
||||
BPF_LINK_GET_FD_BY_ID,
|
||||
BPF_LINK_GET_NEXT_ID,
|
||||
BPF_ENABLE_STATS,
|
||||
BPF_ITER_CREATE,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
@ -218,6 +219,7 @@ enum bpf_attach_type {
|
||||
BPF_TRACE_FEXIT,
|
||||
BPF_MODIFY_RETURN,
|
||||
BPF_LSM_MAC,
|
||||
BPF_TRACE_ITER,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
@ -228,6 +230,7 @@ enum bpf_link_type {
|
||||
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
|
||||
BPF_LINK_TYPE_TRACING = 2,
|
||||
BPF_LINK_TYPE_CGROUP = 3,
|
||||
BPF_LINK_TYPE_ITER = 4,
|
||||
|
||||
MAX_BPF_LINK_TYPE,
|
||||
};
|
||||
@ -612,6 +615,11 @@ union bpf_attr {
|
||||
__u32 type;
|
||||
} enable_stats;
|
||||
|
||||
struct { /* struct used by BPF_ITER_CREATE command */
|
||||
__u32 link_fd;
|
||||
__u32 flags;
|
||||
} iter_create;
|
||||
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
@ -667,8 +675,8 @@ union bpf_attr {
|
||||
* For tracing programs, safely attempt to read *size* bytes from
|
||||
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user**\ () or
|
||||
* **bpf_probe_read_kernel**\ () instead.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@ -676,7 +684,7 @@ union bpf_attr {
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does not include time the system was suspended.
|
||||
* See: clock_gettime(CLOCK_MONOTONIC)
|
||||
* See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
@ -1535,11 +1543,11 @@ union bpf_attr {
|
||||
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address
|
||||
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
|
||||
* more details.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user_str**\ () or
|
||||
* **bpf_probe_read_kernel_str**\ () instead.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
@ -1567,7 +1575,7 @@ union bpf_attr {
|
||||
*
|
||||
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
|
||||
* Description
|
||||
* Equivalent to bpf_get_socket_cookie() helper that accepts
|
||||
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
|
||||
* *skb*, but gets socket from **struct bpf_sock_ops** context.
|
||||
* Return
|
||||
* A 8-byte long non-decreasing number.
|
||||
@ -1596,6 +1604,7 @@ union bpf_attr {
|
||||
* The option value of length *optlen* is pointed by *optval*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
@ -1664,12 +1673,12 @@ union bpf_attr {
|
||||
*
|
||||
* The lower two bits of *flags* are used as the return code if
|
||||
* the map lookup fails. This is so that the return value can be
|
||||
* one of the XDP program return codes up to XDP_TX, as chosen by
|
||||
* the caller. Any higher bits in the *flags* argument must be
|
||||
* one of the XDP program return codes up to **XDP_TX**, as chosen
|
||||
* by the caller. Any higher bits in the *flags* argument must be
|
||||
* unset.
|
||||
*
|
||||
* See also bpf_redirect(), which only supports redirecting to an
|
||||
* ifindex, but doesn't require a map to do so.
|
||||
* See also **bpf_redirect**\ (), which only supports redirecting
|
||||
* to an ifindex, but doesn't require a map to do so.
|
||||
* Return
|
||||
* **XDP_REDIRECT** on success, or the value of the two lower bits
|
||||
* of the *flags* argument on error.
|
||||
@ -1777,7 +1786,7 @@ union bpf_attr {
|
||||
* the time running for event since last normalization. The
|
||||
* enabled and running times are accumulated since the perf event
|
||||
* open. To achieve scaling factor between two invocations of an
|
||||
* eBPF program, users can can use CPU id as the key (which is
|
||||
* eBPF program, users can use CPU id as the key (which is
|
||||
* typical for perf array usage model) to remember the previous
|
||||
* value and do the calculation inside the eBPF program.
|
||||
* Return
|
||||
@ -1804,6 +1813,7 @@ union bpf_attr {
|
||||
* *opval* and of length *optlen*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
@ -1825,7 +1835,7 @@ union bpf_attr {
|
||||
* The first argument is the context *regs* on which the kprobe
|
||||
* works.
|
||||
*
|
||||
* This helper works by setting setting the PC (program counter)
|
||||
* This helper works by setting the PC (program counter)
|
||||
* to an override function which is run in place of the original
|
||||
* probed function. This means the probed function is not run at
|
||||
* all. The replacement function just returns with the required
|
||||
@ -1994,10 +2004,11 @@ union bpf_attr {
|
||||
*
|
||||
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
|
||||
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
|
||||
* **AF_INET6**). Looking for a free port to bind to can be
|
||||
* expensive, therefore binding to port is not permitted by the
|
||||
* helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
|
||||
* must be set to zero.
|
||||
* **AF_INET6**). It's advised to pass zero port (**sin_port**
|
||||
* or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
|
||||
* behavior and lets the kernel efficiently pick up an unused
|
||||
* port as long as 4-tuple is unique. Passing non-zero port might
|
||||
* lead to degraded performance.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@ -2291,7 +2302,7 @@ union bpf_attr {
|
||||
* **bpf_rc_keydown**\ () again with the same values, or calling
|
||||
* **bpf_rc_repeat**\ ().
|
||||
*
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* released and pressed again between consecutive scancodes.
|
||||
*
|
||||
* The *ctx* should point to the lirc sample as passed into
|
||||
@ -2637,7 +2648,6 @@ union bpf_attr {
|
||||
*
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains **sizeof**\ (**struct tcphdr**).
|
||||
*
|
||||
* Return
|
||||
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
|
||||
* error otherwise.
|
||||
@ -2820,7 +2830,6 @@ union bpf_attr {
|
||||
*
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains the length of the TCP header.
|
||||
*
|
||||
* Return
|
||||
* On success, lower 32 bits hold the generated SYN cookie in
|
||||
* followed by 16 bits which hold the MSS value for that cookie,
|
||||
@ -2903,7 +2912,7 @@ union bpf_attr {
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read_user()** helper here
|
||||
* In comparison, using **bpf_probe_read_user**\ () helper here
|
||||
* instead to read the string would require to estimate the length
|
||||
* at compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
@ -2921,14 +2930,14 @@ union bpf_attr {
|
||||
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string, including
|
||||
* On success, the strictly positive length of the string, including
|
||||
* the trailing NUL character. On error, a negative value.
|
||||
*
|
||||
* int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
|
||||
* Description
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
|
||||
* *rcv_nxt* is the ack_seq to be sent out.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
@ -2956,19 +2965,19 @@ union bpf_attr {
|
||||
* int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* For an eBPF program attached to a perf event, retrieve the
|
||||
* branch records (struct perf_branch_entry) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* branch records (**struct perf_branch_entry**) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* *size* bytes.
|
||||
* Return
|
||||
* On success, number of bytes written to *buf*. On error, a
|
||||
* negative value.
|
||||
*
|
||||
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
|
||||
* instead return the number of bytes required to store all the
|
||||
* instead return the number of bytes required to store all the
|
||||
* branch entries. If this flag is set, *buf* may be NULL.
|
||||
*
|
||||
* **-EINVAL** if arguments invalid or **size** not a multiple
|
||||
* of sizeof(struct perf_branch_entry).
|
||||
* of **sizeof**\ (**struct perf_branch_entry**\ ).
|
||||
*
|
||||
* **-ENOENT** if architecture does not support branch records.
|
||||
*
|
||||
@ -2976,8 +2985,8 @@ union bpf_attr {
|
||||
* Description
|
||||
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
|
||||
* *namespace* will be returned in *nsdata*.
|
||||
*
|
||||
* On failure, the returned value is one of the following:
|
||||
* Return
|
||||
* 0 on success, or one of the following in case of failure:
|
||||
*
|
||||
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
|
||||
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
|
||||
@ -3016,8 +3025,8 @@ union bpf_attr {
|
||||
* a global identifier that can be assumed unique. If *ctx* is
|
||||
* NULL, then the helper returns the cookie for the initial
|
||||
* network namespace. The cookie itself is very similar to that
|
||||
* of bpf_get_socket_cookie() helper, but for network namespaces
|
||||
* instead of sockets.
|
||||
* of **bpf_get_socket_cookie**\ () helper, but for network
|
||||
* namespaces instead of sockets.
|
||||
* Return
|
||||
* A 8-byte long opaque number.
|
||||
*
|
||||
@ -3052,22 +3061,98 @@ union bpf_attr {
|
||||
*
|
||||
* The *flags* argument must be zero.
|
||||
* Return
|
||||
* 0 on success, or a negative errno in case of failure.
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* * **-EINVAL** Unsupported flags specified.
|
||||
* * **-ENOENT** Socket is unavailable for assignment.
|
||||
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
|
||||
* * **-EOPNOTSUPP** Unsupported operation, for example a
|
||||
* call from outside of TC ingress.
|
||||
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
|
||||
* **-EINVAL** if specified *flags* are not supported.
|
||||
*
|
||||
* **-ENOENT** if the socket is unavailable for assignment.
|
||||
*
|
||||
* **-ENETUNREACH** if the socket is unreachable (wrong netns).
|
||||
*
|
||||
* **-EOPNOTSUPP** if the operation is not supported, for example
|
||||
* a call from outside of TC ingress.
|
||||
*
|
||||
* **-ESOCKTNOSUPPORT** if the socket type is not supported
|
||||
* (reuseport).
|
||||
*
|
||||
* u64 bpf_ktime_get_boot_ns(void)
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does include the time the system was suspended.
|
||||
* See: clock_gettime(CLOCK_BOOTTIME)
|
||||
* See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
* int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
|
||||
* Description
|
||||
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
|
||||
* out the format string.
|
||||
* The *m* represents the seq_file. The *fmt* and *fmt_size* are for
|
||||
* the format string itself. The *data* and *data_len* are format string
|
||||
* arguments. The *data* are a **u64** array and corresponding format string
|
||||
* values are stored in the array. For strings and pointers where pointees
|
||||
* are accessed, only the pointer values are stored in the *data* array.
|
||||
* The *data_len* is the size of *data* in bytes.
|
||||
*
|
||||
* Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
|
||||
* Reading kernel memory may fail due to either invalid address or
|
||||
* valid address but requiring a major memory fault. If reading kernel memory
|
||||
* fails, the string for **%s** will be an empty string, and the ip
|
||||
* address for **%p{i,I}{4,6}** will be 0. Not returning error to
|
||||
* bpf program is consistent with what **bpf_trace_printk**\ () does for now.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EBUSY** if per-CPU memory copy buffer is busy, can try again
|
||||
* by returning 1 from bpf program.
|
||||
*
|
||||
* **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
|
||||
*
|
||||
* **-E2BIG** if *fmt* contains too many format specifiers.
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*
|
||||
* int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
|
||||
* Description
|
||||
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
|
||||
* The *m* represents the seq_file. The *data* and *len* represent the
|
||||
* data to write in bytes.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*
|
||||
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
|
||||
* Description
|
||||
* Return the cgroup v2 id of the socket *sk*.
|
||||
*
|
||||
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
|
||||
* returned from **bpf_sk_lookup_xxx**\ (),
|
||||
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
|
||||
* same as in **bpf_skb_cgroup_id**\ ().
|
||||
*
|
||||
* This helper is available only if the kernel was compiled with
|
||||
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
|
||||
* Description
|
||||
* Return id of cgroup v2 that is ancestor of cgroup associated
|
||||
* with the *sk* at the *ancestor_level*. The root cgroup is at
|
||||
* *ancestor_level* zero and each step down the hierarchy
|
||||
* increments the level. If *ancestor_level* == level of cgroup
|
||||
* associated with *sk*, then return value will be same as that
|
||||
* of **bpf_sk_cgroup_id**\ ().
|
||||
*
|
||||
* The helper is useful to implement policies based on cgroups
|
||||
* that are upper in hierarchy than immediate cgroup associated
|
||||
* with *sk*.
|
||||
*
|
||||
* The format of returned id and helper limitations are same as in
|
||||
* **bpf_sk_cgroup_id**\ ().
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -3195,7 +3280,11 @@ union bpf_attr {
|
||||
FN(get_netns_cookie), \
|
||||
FN(get_current_ancestor_cgroup_id), \
|
||||
FN(sk_assign), \
|
||||
FN(ktime_get_boot_ns),
|
||||
FN(ktime_get_boot_ns), \
|
||||
FN(seq_printf), \
|
||||
FN(seq_write), \
|
||||
FN(sk_cgroup_id), \
|
||||
FN(sk_ancestor_cgroup_id),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
@ -3673,7 +3762,7 @@ struct bpf_sock_addr {
|
||||
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
|
||||
* Stored in network byte order.
|
||||
*/
|
||||
__u32 user_port; /* Allows 4-byte read and write.
|
||||
__u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
|
||||
* Stored in network byte order
|
||||
*/
|
||||
__u32 family; /* Allows 4-byte read, but no write */
|
||||
|
@ -367,8 +367,14 @@ struct vfs_ns_cap_data {
|
||||
|
||||
#define CAP_AUDIT_READ 37
|
||||
|
||||
/*
|
||||
* Allow system performance and observability privileged operations
|
||||
* using perf_events, i915_perf and other kernel subsystems
|
||||
*/
|
||||
|
||||
#define CAP_LAST_CAP CAP_AUDIT_READ
|
||||
#define CAP_PERFMON 38
|
||||
|
||||
#define CAP_LAST_CAP CAP_PERFMON
|
||||
|
||||
#define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
obj-y := core.o
|
||||
CFLAGS_core.o += $(call cc-disable-warning, override-init)
|
||||
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
|
||||
|
539
kernel/bpf/bpf_iter.c
Normal file
539
kernel/bpf/bpf_iter.c
Normal file
@ -0,0 +1,539 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
struct bpf_iter_target_info {
|
||||
struct list_head list;
|
||||
const struct bpf_iter_reg *reg_info;
|
||||
u32 btf_id; /* cached value */
|
||||
};
|
||||
|
||||
struct bpf_iter_link {
|
||||
struct bpf_link link;
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
};
|
||||
|
||||
struct bpf_iter_priv_data {
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
struct bpf_prog *prog;
|
||||
u64 session_id;
|
||||
u64 seq_num;
|
||||
bool done_stop;
|
||||
u8 target_private[] __aligned(8);
|
||||
};
|
||||
|
||||
static struct list_head targets = LIST_HEAD_INIT(targets);
|
||||
static DEFINE_MUTEX(targets_mutex);
|
||||
|
||||
/* protect bpf_iter_link changes */
|
||||
static DEFINE_MUTEX(link_mutex);
|
||||
|
||||
/* incremented on every opened seq_file */
|
||||
static atomic64_t session_id;
|
||||
|
||||
static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
|
||||
|
||||
static void bpf_iter_inc_seq_num(struct seq_file *seq)
|
||||
{
|
||||
struct bpf_iter_priv_data *iter_priv;
|
||||
|
||||
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
|
||||
target_private);
|
||||
iter_priv->seq_num++;
|
||||
}
|
||||
|
||||
static void bpf_iter_dec_seq_num(struct seq_file *seq)
|
||||
{
|
||||
struct bpf_iter_priv_data *iter_priv;
|
||||
|
||||
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
|
||||
target_private);
|
||||
iter_priv->seq_num--;
|
||||
}
|
||||
|
||||
static void bpf_iter_done_stop(struct seq_file *seq)
|
||||
{
|
||||
struct bpf_iter_priv_data *iter_priv;
|
||||
|
||||
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
|
||||
target_private);
|
||||
iter_priv->done_stop = true;
|
||||
}
|
||||
|
||||
/* bpf_seq_read, a customized and simpler version for bpf iterator.
|
||||
* no_llseek is assumed for this file.
|
||||
* The following are differences from seq_read():
|
||||
* . fixed buffer size (PAGE_SIZE)
|
||||
* . assuming no_llseek
|
||||
* . stop() may call bpf program, handling potential overflow there
|
||||
*/
|
||||
static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct seq_file *seq = file->private_data;
|
||||
size_t n, offs, copied = 0;
|
||||
int err = 0;
|
||||
void *p;
|
||||
|
||||
mutex_lock(&seq->lock);
|
||||
|
||||
if (!seq->buf) {
|
||||
seq->size = PAGE_SIZE;
|
||||
seq->buf = kmalloc(seq->size, GFP_KERNEL);
|
||||
if (!seq->buf) {
|
||||
err = -ENOMEM;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
if (seq->count) {
|
||||
n = min(seq->count, size);
|
||||
err = copy_to_user(buf, seq->buf + seq->from, n);
|
||||
if (err) {
|
||||
err = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
seq->count -= n;
|
||||
seq->from += n;
|
||||
copied = n;
|
||||
goto done;
|
||||
}
|
||||
|
||||
seq->from = 0;
|
||||
p = seq->op->start(seq, &seq->index);
|
||||
if (!p)
|
||||
goto stop;
|
||||
if (IS_ERR(p)) {
|
||||
err = PTR_ERR(p);
|
||||
seq->op->stop(seq, p);
|
||||
seq->count = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = seq->op->show(seq, p);
|
||||
if (err > 0) {
|
||||
/* object is skipped, decrease seq_num, so next
|
||||
* valid object can reuse the same seq_num.
|
||||
*/
|
||||
bpf_iter_dec_seq_num(seq);
|
||||
seq->count = 0;
|
||||
} else if (err < 0 || seq_has_overflowed(seq)) {
|
||||
if (!err)
|
||||
err = -E2BIG;
|
||||
seq->op->stop(seq, p);
|
||||
seq->count = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
loff_t pos = seq->index;
|
||||
|
||||
offs = seq->count;
|
||||
p = seq->op->next(seq, p, &seq->index);
|
||||
if (pos == seq->index) {
|
||||
pr_info_ratelimited("buggy seq_file .next function %ps "
|
||||
"did not updated position index\n",
|
||||
seq->op->next);
|
||||
seq->index++;
|
||||
}
|
||||
|
||||
if (IS_ERR_OR_NULL(p))
|
||||
break;
|
||||
|
||||
/* got a valid next object, increase seq_num */
|
||||
bpf_iter_inc_seq_num(seq);
|
||||
|
||||
if (seq->count >= size)
|
||||
break;
|
||||
|
||||
err = seq->op->show(seq, p);
|
||||
if (err > 0) {
|
||||
bpf_iter_dec_seq_num(seq);
|
||||
seq->count = offs;
|
||||
} else if (err < 0 || seq_has_overflowed(seq)) {
|
||||
seq->count = offs;
|
||||
if (offs == 0) {
|
||||
if (!err)
|
||||
err = -E2BIG;
|
||||
seq->op->stop(seq, p);
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
stop:
|
||||
offs = seq->count;
|
||||
/* bpf program called if !p */
|
||||
seq->op->stop(seq, p);
|
||||
if (!p) {
|
||||
if (!seq_has_overflowed(seq)) {
|
||||
bpf_iter_done_stop(seq);
|
||||
} else {
|
||||
seq->count = offs;
|
||||
if (offs == 0) {
|
||||
err = -E2BIG;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
n = min(seq->count, size);
|
||||
err = copy_to_user(buf, seq->buf, n);
|
||||
if (err) {
|
||||
err = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
copied = n;
|
||||
seq->count -= n;
|
||||
seq->from = n;
|
||||
done:
|
||||
if (!copied)
|
||||
copied = err;
|
||||
else
|
||||
*ppos += copied;
|
||||
mutex_unlock(&seq->lock);
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int iter_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct bpf_iter_link *link = inode->i_private;
|
||||
|
||||
return prepare_seq_file(file, link);
|
||||
}
|
||||
|
||||
static int iter_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct bpf_iter_priv_data *iter_priv;
|
||||
struct seq_file *seq;
|
||||
|
||||
seq = file->private_data;
|
||||
if (!seq)
|
||||
return 0;
|
||||
|
||||
iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
|
||||
target_private);
|
||||
|
||||
if (iter_priv->tinfo->reg_info->fini_seq_private)
|
||||
iter_priv->tinfo->reg_info->fini_seq_private(seq->private);
|
||||
|
||||
bpf_prog_put(iter_priv->prog);
|
||||
seq->private = iter_priv;
|
||||
|
||||
return seq_release_private(inode, file);
|
||||
}
|
||||
|
||||
const struct file_operations bpf_iter_fops = {
|
||||
.open = iter_open,
|
||||
.llseek = no_llseek,
|
||||
.read = bpf_seq_read,
|
||||
.release = iter_release,
|
||||
};
|
||||
|
||||
/* The argument reg_info will be cached in bpf_iter_target_info.
|
||||
* The common practice is to declare target reg_info as
|
||||
* a const static variable and passed as an argument to
|
||||
* bpf_iter_reg_target().
|
||||
*/
|
||||
int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
|
||||
{
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
|
||||
tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
|
||||
if (!tinfo)
|
||||
return -ENOMEM;
|
||||
|
||||
tinfo->reg_info = reg_info;
|
||||
INIT_LIST_HEAD(&tinfo->list);
|
||||
|
||||
mutex_lock(&targets_mutex);
|
||||
list_add(&tinfo->list, &targets);
|
||||
mutex_unlock(&targets_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info)
|
||||
{
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
bool found = false;
|
||||
|
||||
mutex_lock(&targets_mutex);
|
||||
list_for_each_entry(tinfo, &targets, list) {
|
||||
if (reg_info == tinfo->reg_info) {
|
||||
list_del(&tinfo->list);
|
||||
kfree(tinfo);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&targets_mutex);
|
||||
|
||||
WARN_ON(found == false);
|
||||
}
|
||||
|
||||
static void cache_btf_id(struct bpf_iter_target_info *tinfo,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
tinfo->btf_id = prog->aux->attach_btf_id;
|
||||
}
|
||||
|
||||
bool bpf_iter_prog_supported(struct bpf_prog *prog)
|
||||
{
|
||||
const char *attach_fname = prog->aux->attach_func_name;
|
||||
u32 prog_btf_id = prog->aux->attach_btf_id;
|
||||
const char *prefix = BPF_ITER_FUNC_PREFIX;
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
int prefix_len = strlen(prefix);
|
||||
bool supported = false;
|
||||
|
||||
if (strncmp(attach_fname, prefix, prefix_len))
|
||||
return false;
|
||||
|
||||
mutex_lock(&targets_mutex);
|
||||
list_for_each_entry(tinfo, &targets, list) {
|
||||
if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
|
||||
supported = true;
|
||||
break;
|
||||
}
|
||||
if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
|
||||
cache_btf_id(tinfo, prog);
|
||||
supported = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&targets_mutex);
|
||||
|
||||
if (supported) {
|
||||
prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
|
||||
prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
|
||||
}
|
||||
|
||||
return supported;
|
||||
}
|
||||
|
||||
static void bpf_iter_link_release(struct bpf_link *link)
|
||||
{
|
||||
}
|
||||
|
||||
static void bpf_iter_link_dealloc(struct bpf_link *link)
|
||||
{
|
||||
struct bpf_iter_link *iter_link =
|
||||
container_of(link, struct bpf_iter_link, link);
|
||||
|
||||
kfree(iter_link);
|
||||
}
|
||||
|
||||
static int bpf_iter_link_replace(struct bpf_link *link,
|
||||
struct bpf_prog *new_prog,
|
||||
struct bpf_prog *old_prog)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&link_mutex);
|
||||
if (old_prog && link->prog != old_prog) {
|
||||
ret = -EPERM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (link->prog->type != new_prog->type ||
|
||||
link->prog->expected_attach_type != new_prog->expected_attach_type ||
|
||||
link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
old_prog = xchg(&link->prog, new_prog);
|
||||
bpf_prog_put(old_prog);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&link_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct bpf_link_ops bpf_iter_link_lops = {
|
||||
.release = bpf_iter_link_release,
|
||||
.dealloc = bpf_iter_link_dealloc,
|
||||
.update_prog = bpf_iter_link_replace,
|
||||
};
|
||||
|
||||
bool bpf_link_is_iter(struct bpf_link *link)
|
||||
{
|
||||
return link->ops == &bpf_iter_link_lops;
|
||||
}
|
||||
|
||||
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_link_primer link_primer;
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
struct bpf_iter_link *link;
|
||||
bool existed = false;
|
||||
u32 prog_btf_id;
|
||||
int err;
|
||||
|
||||
if (attr->link_create.target_fd || attr->link_create.flags)
|
||||
return -EINVAL;
|
||||
|
||||
prog_btf_id = prog->aux->attach_btf_id;
|
||||
mutex_lock(&targets_mutex);
|
||||
list_for_each_entry(tinfo, &targets, list) {
|
||||
if (tinfo->btf_id == prog_btf_id) {
|
||||
existed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&targets_mutex);
|
||||
if (!existed)
|
||||
return -ENOENT;
|
||||
|
||||
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
|
||||
if (!link)
|
||||
return -ENOMEM;
|
||||
|
||||
bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
|
||||
link->tinfo = tinfo;
|
||||
|
||||
err = bpf_link_prime(&link->link, &link_primer);
|
||||
if (err) {
|
||||
kfree(link);
|
||||
return err;
|
||||
}
|
||||
|
||||
return bpf_link_settle(&link_primer);
|
||||
}
|
||||
|
||||
static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
|
||||
struct bpf_iter_target_info *tinfo,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
priv_data->tinfo = tinfo;
|
||||
priv_data->prog = prog;
|
||||
priv_data->session_id = atomic64_inc_return(&session_id);
|
||||
priv_data->seq_num = 0;
|
||||
priv_data->done_stop = false;
|
||||
}
|
||||
|
||||
static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
|
||||
{
|
||||
struct bpf_iter_priv_data *priv_data;
|
||||
struct bpf_iter_target_info *tinfo;
|
||||
struct bpf_prog *prog;
|
||||
u32 total_priv_dsize;
|
||||
struct seq_file *seq;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&link_mutex);
|
||||
prog = link->link.prog;
|
||||
bpf_prog_inc(prog);
|
||||
mutex_unlock(&link_mutex);
|
||||
|
||||
tinfo = link->tinfo;
|
||||
total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
|
||||
tinfo->reg_info->seq_priv_size;
|
||||
priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops,
|
||||
total_priv_dsize);
|
||||
if (!priv_data) {
|
||||
err = -ENOMEM;
|
||||
goto release_prog;
|
||||
}
|
||||
|
||||
if (tinfo->reg_info->init_seq_private) {
|
||||
err = tinfo->reg_info->init_seq_private(priv_data->target_private);
|
||||
if (err)
|
||||
goto release_seq_file;
|
||||
}
|
||||
|
||||
init_seq_meta(priv_data, tinfo, prog);
|
||||
seq = file->private_data;
|
||||
seq->private = priv_data->target_private;
|
||||
|
||||
return 0;
|
||||
|
||||
release_seq_file:
|
||||
seq_release_private(file->f_inode, file);
|
||||
file->private_data = NULL;
|
||||
release_prog:
|
||||
bpf_prog_put(prog);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_iter_new_fd(struct bpf_link *link)
|
||||
{
|
||||
struct file *file;
|
||||
unsigned int flags;
|
||||
int err, fd;
|
||||
|
||||
if (link->ops != &bpf_iter_link_lops)
|
||||
return -EINVAL;
|
||||
|
||||
flags = O_RDONLY | O_CLOEXEC;
|
||||
fd = get_unused_fd_flags(flags);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
|
||||
if (IS_ERR(file)) {
|
||||
err = PTR_ERR(file);
|
||||
goto free_fd;
|
||||
}
|
||||
|
||||
err = prepare_seq_file(file,
|
||||
container_of(link, struct bpf_iter_link, link));
|
||||
if (err)
|
||||
goto free_file;
|
||||
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
|
||||
free_file:
|
||||
fput(file);
|
||||
free_fd:
|
||||
put_unused_fd(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
|
||||
{
|
||||
struct bpf_iter_priv_data *iter_priv;
|
||||
struct seq_file *seq;
|
||||
void *seq_priv;
|
||||
|
||||
seq = meta->seq;
|
||||
if (seq->file->f_op != &bpf_iter_fops)
|
||||
return NULL;
|
||||
|
||||
seq_priv = seq->private;
|
||||
iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
|
||||
target_private);
|
||||
|
||||
if (in_stop && iter_priv->done_stop)
|
||||
return NULL;
|
||||
|
||||
meta->session_id = iter_priv->session_id;
|
||||
meta->seq_num = iter_priv->seq_num;
|
||||
|
||||
return iter_priv->prog;
|
||||
}
|
||||
|
||||
int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
migrate_disable();
|
||||
ret = BPF_PROG_RUN(prog, ctx);
|
||||
migrate_enable();
|
||||
rcu_read_unlock();
|
||||
|
||||
/* bpf program can only return 0 or 1:
|
||||
* 0 : okay
|
||||
* 1 : retry the same object
|
||||
* The bpf_iter_run_prog() return value
|
||||
* will be seq_ops->show() return value.
|
||||
*/
|
||||
return ret == 0 ? 0 : -EAGAIN;
|
||||
}
|
@ -3694,7 +3694,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
struct bpf_verifier_log *log = info->log;
|
||||
const struct btf_param *args;
|
||||
u32 nr_args, arg;
|
||||
int ret;
|
||||
int i, ret;
|
||||
|
||||
if (off % 8) {
|
||||
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
|
||||
@ -3791,6 +3791,14 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
||||
|
||||
/* this is a pointer to another type */
|
||||
info->reg_type = PTR_TO_BTF_ID;
|
||||
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
|
||||
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
|
||||
|
||||
if (ctx_arg_info->offset == off) {
|
||||
info->reg_type = ctx_arg_info->reg_type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tgt_prog) {
|
||||
ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
|
||||
@ -3830,6 +3838,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
|
||||
const struct btf_type *mtype, *elem_type = NULL;
|
||||
const struct btf_member *member;
|
||||
const char *tname, *mname;
|
||||
u32 vlen;
|
||||
|
||||
again:
|
||||
tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
|
||||
@ -3838,7 +3847,43 @@ again:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vlen = btf_type_vlen(t);
|
||||
if (off + size > t->size) {
|
||||
/* If the last element is a variable size array, we may
|
||||
* need to relax the rule.
|
||||
*/
|
||||
struct btf_array *array_elem;
|
||||
|
||||
if (vlen == 0)
|
||||
goto error;
|
||||
|
||||
member = btf_type_member(t) + vlen - 1;
|
||||
mtype = btf_type_skip_modifiers(btf_vmlinux, member->type,
|
||||
NULL);
|
||||
if (!btf_type_is_array(mtype))
|
||||
goto error;
|
||||
|
||||
array_elem = (struct btf_array *)(mtype + 1);
|
||||
if (array_elem->nelems != 0)
|
||||
goto error;
|
||||
|
||||
moff = btf_member_bit_offset(t, member) / 8;
|
||||
if (off < moff)
|
||||
goto error;
|
||||
|
||||
/* Only allow structure for now, can be relaxed for
|
||||
* other types later.
|
||||
*/
|
||||
elem_type = btf_type_skip_modifiers(btf_vmlinux,
|
||||
array_elem->type, NULL);
|
||||
if (!btf_type_is_struct(elem_type))
|
||||
goto error;
|
||||
|
||||
off = (off - moff) % elem_type->size;
|
||||
return btf_struct_access(log, elem_type, off, size, atype,
|
||||
next_btf_id);
|
||||
|
||||
error:
|
||||
bpf_log(log, "access beyond struct %s at off %u size %u\n",
|
||||
tname, off, size);
|
||||
return -EACCES;
|
||||
|
@ -358,8 +358,11 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
|
||||
|
||||
static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
|
||||
{
|
||||
struct bpf_link *link = arg;
|
||||
|
||||
return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
|
||||
&bpffs_obj_fops);
|
||||
bpf_link_is_iter(link) ?
|
||||
&bpf_iter_fops : &bpffs_obj_fops);
|
||||
}
|
||||
|
||||
static struct dentry *
|
||||
|
102
kernel/bpf/map_iter.c
Normal file
102
kernel/bpf/map_iter.c
Normal file
@ -0,0 +1,102 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct bpf_iter_seq_map_info {
|
||||
u32 mid;
|
||||
};
|
||||
|
||||
static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct bpf_iter_seq_map_info *info = seq->private;
|
||||
struct bpf_map *map;
|
||||
|
||||
map = bpf_map_get_curr_or_next(&info->mid);
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
||||
++*pos;
|
||||
return map;
|
||||
}
|
||||
|
||||
static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct bpf_iter_seq_map_info *info = seq->private;
|
||||
struct bpf_map *map;
|
||||
|
||||
++*pos;
|
||||
++info->mid;
|
||||
bpf_map_put((struct bpf_map *)v);
|
||||
map = bpf_map_get_curr_or_next(&info->mid);
|
||||
if (!map)
|
||||
return NULL;
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
struct bpf_iter__bpf_map {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct bpf_map *, map);
|
||||
};
|
||||
|
||||
DEFINE_BPF_ITER_FUNC(bpf_map, struct bpf_iter_meta *meta, struct bpf_map *map)
|
||||
|
||||
static int __bpf_map_seq_show(struct seq_file *seq, void *v, bool in_stop)
|
||||
{
|
||||
struct bpf_iter__bpf_map ctx;
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
int ret = 0;
|
||||
|
||||
ctx.meta = &meta;
|
||||
ctx.map = v;
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, in_stop);
|
||||
if (prog)
|
||||
ret = bpf_iter_run_prog(prog, &ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bpf_map_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
return __bpf_map_seq_show(seq, v, false);
|
||||
}
|
||||
|
||||
static void bpf_map_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
if (!v)
|
||||
(void)__bpf_map_seq_show(seq, v, true);
|
||||
else
|
||||
bpf_map_put((struct bpf_map *)v);
|
||||
}
|
||||
|
||||
static const struct seq_operations bpf_map_seq_ops = {
|
||||
.start = bpf_map_seq_start,
|
||||
.next = bpf_map_seq_next,
|
||||
.stop = bpf_map_seq_stop,
|
||||
.show = bpf_map_seq_show,
|
||||
};
|
||||
|
||||
static const struct bpf_iter_reg bpf_map_reg_info = {
|
||||
.target = "bpf_map",
|
||||
.seq_ops = &bpf_map_seq_ops,
|
||||
.init_seq_private = NULL,
|
||||
.fini_seq_private = NULL,
|
||||
.seq_priv_size = sizeof(struct bpf_iter_seq_map_info),
|
||||
.ctx_arg_info_size = 1,
|
||||
.ctx_arg_info = {
|
||||
{ offsetof(struct bpf_iter__bpf_map, map),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
},
|
||||
};
|
||||
|
||||
static int __init bpf_map_iter_init(void)
|
||||
{
|
||||
return bpf_iter_reg_target(&bpf_map_reg_info);
|
||||
}
|
||||
|
||||
late_initcall(bpf_map_iter_init);
|
@ -19,7 +19,7 @@ struct bpf_queue_stack {
|
||||
u32 head, tail;
|
||||
u32 size; /* max_entries + 1 */
|
||||
|
||||
char elements[0] __aligned(8);
|
||||
char elements[] __aligned(8);
|
||||
};
|
||||
|
||||
static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
|
||||
|
@ -2729,6 +2729,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
|
||||
case BPF_CGROUP_GETSOCKOPT:
|
||||
case BPF_CGROUP_SETSOCKOPT:
|
||||
return BPF_PROG_TYPE_CGROUP_SOCKOPT;
|
||||
case BPF_TRACE_ITER:
|
||||
return BPF_PROG_TYPE_TRACING;
|
||||
default:
|
||||
return BPF_PROG_TYPE_UNSPEC;
|
||||
}
|
||||
@ -2932,6 +2934,25 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
|
||||
return err;
|
||||
}
|
||||
|
||||
struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
|
||||
{
|
||||
struct bpf_map *map;
|
||||
|
||||
spin_lock_bh(&map_idr_lock);
|
||||
again:
|
||||
map = idr_get_next(&map_idr, id);
|
||||
if (map) {
|
||||
map = __bpf_map_inc_not_zero(map, false);
|
||||
if (IS_ERR(map)) {
|
||||
(*id)++;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
spin_unlock_bh(&map_idr_lock);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
|
||||
|
||||
struct bpf_prog *bpf_prog_by_id(u32 id)
|
||||
@ -3729,6 +3750,15 @@ err_put:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
{
|
||||
if (attr->link_create.attach_type == BPF_TRACE_ITER &&
|
||||
prog->expected_attach_type == BPF_TRACE_ITER)
|
||||
return bpf_iter_link_attach(attr, prog);
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
|
||||
static int link_create(union bpf_attr *attr)
|
||||
{
|
||||
@ -3765,6 +3795,9 @@ static int link_create(union bpf_attr *attr)
|
||||
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
|
||||
ret = cgroup_bpf_link_attach(attr, prog);
|
||||
break;
|
||||
case BPF_PROG_TYPE_TRACING:
|
||||
ret = tracing_bpf_link_attach(attr, prog);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
@ -3927,6 +3960,29 @@ static int bpf_enable_stats(union bpf_attr *attr)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
|
||||
|
||||
static int bpf_iter_create(union bpf_attr *attr)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_ITER_CREATE))
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->iter_create.flags)
|
||||
return -EINVAL;
|
||||
|
||||
link = bpf_link_get_from_fd(attr->iter_create.link_fd);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
|
||||
err = bpf_iter_new_fd(link);
|
||||
bpf_link_put(link);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
@ -4054,6 +4110,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
||||
case BPF_ENABLE_STATS:
|
||||
err = bpf_enable_stats(&attr);
|
||||
break;
|
||||
case BPF_ITER_CREATE:
|
||||
err = bpf_iter_create(&attr);
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
353
kernel/bpf/task_iter.c
Normal file
353
kernel/bpf/task_iter.c
Normal file
@ -0,0 +1,353 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/pid_namespace.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/filter.h>
|
||||
|
||||
struct bpf_iter_seq_task_common {
|
||||
struct pid_namespace *ns;
|
||||
};
|
||||
|
||||
struct bpf_iter_seq_task_info {
|
||||
/* The first field must be struct bpf_iter_seq_task_common.
|
||||
* this is assumed by {init, fini}_seq_pidns() callback functions.
|
||||
*/
|
||||
struct bpf_iter_seq_task_common common;
|
||||
u32 tid;
|
||||
};
|
||||
|
||||
static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
|
||||
u32 *tid)
|
||||
{
|
||||
struct task_struct *task = NULL;
|
||||
struct pid *pid;
|
||||
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
pid = idr_get_next(&ns->idr, tid);
|
||||
if (pid) {
|
||||
task = get_pid_task(pid, PIDTYPE_PID);
|
||||
if (!task) {
|
||||
++*tid;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
static void *task_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct bpf_iter_seq_task_info *info = seq->private;
|
||||
struct task_struct *task;
|
||||
|
||||
task = task_seq_get_next(info->common.ns, &info->tid);
|
||||
if (!task)
|
||||
return NULL;
|
||||
|
||||
++*pos;
|
||||
return task;
|
||||
}
|
||||
|
||||
static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct bpf_iter_seq_task_info *info = seq->private;
|
||||
struct task_struct *task;
|
||||
|
||||
++*pos;
|
||||
++info->tid;
|
||||
put_task_struct((struct task_struct *)v);
|
||||
task = task_seq_get_next(info->common.ns, &info->tid);
|
||||
if (!task)
|
||||
return NULL;
|
||||
|
||||
return task;
|
||||
}
|
||||
|
||||
struct bpf_iter__task {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct task_struct *, task);
|
||||
};
|
||||
|
||||
DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
|
||||
|
||||
static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
|
||||
bool in_stop)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_iter__task ctx;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, in_stop);
|
||||
if (!prog)
|
||||
return 0;
|
||||
|
||||
meta.seq = seq;
|
||||
ctx.meta = &meta;
|
||||
ctx.task = task;
|
||||
return bpf_iter_run_prog(prog, &ctx);
|
||||
}
|
||||
|
||||
static int task_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
return __task_seq_show(seq, v, false);
|
||||
}
|
||||
|
||||
static void task_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
if (!v)
|
||||
(void)__task_seq_show(seq, v, true);
|
||||
else
|
||||
put_task_struct((struct task_struct *)v);
|
||||
}
|
||||
|
||||
static const struct seq_operations task_seq_ops = {
|
||||
.start = task_seq_start,
|
||||
.next = task_seq_next,
|
||||
.stop = task_seq_stop,
|
||||
.show = task_seq_show,
|
||||
};
|
||||
|
||||
struct bpf_iter_seq_task_file_info {
|
||||
/* The first field must be struct bpf_iter_seq_task_common.
|
||||
* this is assumed by {init, fini}_seq_pidns() callback functions.
|
||||
*/
|
||||
struct bpf_iter_seq_task_common common;
|
||||
struct task_struct *task;
|
||||
struct files_struct *files;
|
||||
u32 tid;
|
||||
u32 fd;
|
||||
};
|
||||
|
||||
static struct file *
|
||||
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
|
||||
struct task_struct **task, struct files_struct **fstruct)
|
||||
{
|
||||
struct pid_namespace *ns = info->common.ns;
|
||||
u32 curr_tid = info->tid, max_fds;
|
||||
struct files_struct *curr_files;
|
||||
struct task_struct *curr_task;
|
||||
int curr_fd = info->fd;
|
||||
|
||||
/* If this function returns a non-NULL file object,
|
||||
* it held a reference to the task/files_struct/file.
|
||||
* Otherwise, it does not hold any reference.
|
||||
*/
|
||||
again:
|
||||
if (*task) {
|
||||
curr_task = *task;
|
||||
curr_files = *fstruct;
|
||||
curr_fd = info->fd;
|
||||
} else {
|
||||
curr_task = task_seq_get_next(ns, &curr_tid);
|
||||
if (!curr_task)
|
||||
return NULL;
|
||||
|
||||
curr_files = get_files_struct(curr_task);
|
||||
if (!curr_files) {
|
||||
put_task_struct(curr_task);
|
||||
curr_tid = ++(info->tid);
|
||||
info->fd = 0;
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* set *fstruct, *task and info->tid */
|
||||
*fstruct = curr_files;
|
||||
*task = curr_task;
|
||||
if (curr_tid == info->tid) {
|
||||
curr_fd = info->fd;
|
||||
} else {
|
||||
info->tid = curr_tid;
|
||||
curr_fd = 0;
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
max_fds = files_fdtable(curr_files)->max_fds;
|
||||
for (; curr_fd < max_fds; curr_fd++) {
|
||||
struct file *f;
|
||||
|
||||
f = fcheck_files(curr_files, curr_fd);
|
||||
if (!f)
|
||||
continue;
|
||||
|
||||
/* set info->fd */
|
||||
info->fd = curr_fd;
|
||||
get_file(f);
|
||||
rcu_read_unlock();
|
||||
return f;
|
||||
}
|
||||
|
||||
/* the current task is done, go to the next task */
|
||||
rcu_read_unlock();
|
||||
put_files_struct(curr_files);
|
||||
put_task_struct(curr_task);
|
||||
*task = NULL;
|
||||
*fstruct = NULL;
|
||||
info->fd = 0;
|
||||
curr_tid = ++(info->tid);
|
||||
goto again;
|
||||
}
|
||||
|
||||
static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct bpf_iter_seq_task_file_info *info = seq->private;
|
||||
struct files_struct *files = NULL;
|
||||
struct task_struct *task = NULL;
|
||||
struct file *file;
|
||||
|
||||
file = task_file_seq_get_next(info, &task, &files);
|
||||
if (!file) {
|
||||
info->files = NULL;
|
||||
info->task = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
++*pos;
|
||||
info->task = task;
|
||||
info->files = files;
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct bpf_iter_seq_task_file_info *info = seq->private;
|
||||
struct files_struct *files = info->files;
|
||||
struct task_struct *task = info->task;
|
||||
struct file *file;
|
||||
|
||||
++*pos;
|
||||
++info->fd;
|
||||
fput((struct file *)v);
|
||||
file = task_file_seq_get_next(info, &task, &files);
|
||||
if (!file) {
|
||||
info->files = NULL;
|
||||
info->task = NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
info->task = task;
|
||||
info->files = files;
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
struct bpf_iter__task_file {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct task_struct *, task);
|
||||
u32 fd __aligned(8);
|
||||
__bpf_md_ptr(struct file *, file);
|
||||
};
|
||||
|
||||
DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
|
||||
struct task_struct *task, u32 fd,
|
||||
struct file *file)
|
||||
|
||||
static int __task_file_seq_show(struct seq_file *seq, struct file *file,
|
||||
bool in_stop)
|
||||
{
|
||||
struct bpf_iter_seq_task_file_info *info = seq->private;
|
||||
struct bpf_iter__task_file ctx;
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, in_stop);
|
||||
if (!prog)
|
||||
return 0;
|
||||
|
||||
ctx.meta = &meta;
|
||||
ctx.task = info->task;
|
||||
ctx.fd = info->fd;
|
||||
ctx.file = file;
|
||||
return bpf_iter_run_prog(prog, &ctx);
|
||||
}
|
||||
|
||||
static int task_file_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
return __task_file_seq_show(seq, v, false);
|
||||
}
|
||||
|
||||
static void task_file_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_seq_task_file_info *info = seq->private;
|
||||
|
||||
if (!v) {
|
||||
(void)__task_file_seq_show(seq, v, true);
|
||||
} else {
|
||||
fput((struct file *)v);
|
||||
put_files_struct(info->files);
|
||||
put_task_struct(info->task);
|
||||
info->files = NULL;
|
||||
info->task = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int init_seq_pidns(void *priv_data)
|
||||
{
|
||||
struct bpf_iter_seq_task_common *common = priv_data;
|
||||
|
||||
common->ns = get_pid_ns(task_active_pid_ns(current));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fini_seq_pidns(void *priv_data)
|
||||
{
|
||||
struct bpf_iter_seq_task_common *common = priv_data;
|
||||
|
||||
put_pid_ns(common->ns);
|
||||
}
|
||||
|
||||
static const struct seq_operations task_file_seq_ops = {
|
||||
.start = task_file_seq_start,
|
||||
.next = task_file_seq_next,
|
||||
.stop = task_file_seq_stop,
|
||||
.show = task_file_seq_show,
|
||||
};
|
||||
|
||||
static const struct bpf_iter_reg task_reg_info = {
|
||||
.target = "task",
|
||||
.seq_ops = &task_seq_ops,
|
||||
.init_seq_private = init_seq_pidns,
|
||||
.fini_seq_private = fini_seq_pidns,
|
||||
.seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
|
||||
.ctx_arg_info_size = 1,
|
||||
.ctx_arg_info = {
|
||||
{ offsetof(struct bpf_iter__task, task),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
},
|
||||
};
|
||||
|
||||
static const struct bpf_iter_reg task_file_reg_info = {
|
||||
.target = "task_file",
|
||||
.seq_ops = &task_file_seq_ops,
|
||||
.init_seq_private = init_seq_pidns,
|
||||
.fini_seq_private = fini_seq_pidns,
|
||||
.seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
|
||||
.ctx_arg_info_size = 2,
|
||||
.ctx_arg_info = {
|
||||
{ offsetof(struct bpf_iter__task_file, task),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
{ offsetof(struct bpf_iter__task_file, file),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
},
|
||||
};
|
||||
|
||||
static int __init task_iter_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = bpf_iter_reg_target(&task_reg_info);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return bpf_iter_reg_target(&task_file_reg_info);
|
||||
}
|
||||
late_initcall(task_iter_init);
|
@ -398,7 +398,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
|
||||
return type == PTR_TO_MAP_VALUE_OR_NULL ||
|
||||
type == PTR_TO_SOCKET_OR_NULL ||
|
||||
type == PTR_TO_SOCK_COMMON_OR_NULL ||
|
||||
type == PTR_TO_TCP_SOCK_OR_NULL;
|
||||
type == PTR_TO_TCP_SOCK_OR_NULL ||
|
||||
type == PTR_TO_BTF_ID_OR_NULL;
|
||||
}
|
||||
|
||||
static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
|
||||
@ -483,6 +484,7 @@ static const char * const reg_type_str[] = {
|
||||
[PTR_TO_TP_BUFFER] = "tp_buffer",
|
||||
[PTR_TO_XDP_SOCK] = "xdp_sock",
|
||||
[PTR_TO_BTF_ID] = "ptr_",
|
||||
[PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
|
||||
};
|
||||
|
||||
static char slot_type_char[] = {
|
||||
@ -543,7 +545,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
|
||||
/* reg->off should be 0 for SCALAR_VALUE */
|
||||
verbose(env, "%lld", reg->var_off.value + reg->off);
|
||||
} else {
|
||||
if (t == PTR_TO_BTF_ID)
|
||||
if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
|
||||
verbose(env, "%s", kernel_type_name(reg->btf_id));
|
||||
verbose(env, "(id=%d", reg->id);
|
||||
if (reg_type_may_be_refcounted_or_null(t))
|
||||
@ -2139,6 +2141,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
||||
case PTR_TO_TCP_SOCK_OR_NULL:
|
||||
case PTR_TO_XDP_SOCK:
|
||||
case PTR_TO_BTF_ID:
|
||||
case PTR_TO_BTF_ID_OR_NULL:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -2659,7 +2662,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
|
||||
*/
|
||||
*reg_type = info.reg_type;
|
||||
|
||||
if (*reg_type == PTR_TO_BTF_ID)
|
||||
if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
|
||||
*btf_id = info.btf_id;
|
||||
else
|
||||
env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
|
||||
@ -3243,7 +3246,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
||||
* a sub-register.
|
||||
*/
|
||||
regs[value_regno].subreg_def = DEF_NOT_SUBREG;
|
||||
if (reg_type == PTR_TO_BTF_ID)
|
||||
if (reg_type == PTR_TO_BTF_ID ||
|
||||
reg_type == PTR_TO_BTF_ID_OR_NULL)
|
||||
regs[value_regno].btf_id = btf_id;
|
||||
}
|
||||
regs[value_regno].type = reg_type;
|
||||
@ -3490,6 +3494,11 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
|
||||
*stype = STACK_MISC;
|
||||
goto mark;
|
||||
}
|
||||
|
||||
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
|
||||
state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
|
||||
goto mark;
|
||||
|
||||
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
|
||||
state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
|
||||
__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
|
||||
@ -6572,6 +6581,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
|
||||
reg->type = PTR_TO_SOCK_COMMON;
|
||||
} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
|
||||
reg->type = PTR_TO_TCP_SOCK;
|
||||
} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
|
||||
reg->type = PTR_TO_BTF_ID;
|
||||
}
|
||||
if (is_null) {
|
||||
/* We don't need id and ref_obj_id from this point
|
||||
@ -7101,6 +7112,10 @@ static int check_return_code(struct bpf_verifier_env *env)
|
||||
return 0;
|
||||
range = tnum_const(0);
|
||||
break;
|
||||
case BPF_PROG_TYPE_TRACING:
|
||||
if (env->prog->expected_attach_type != BPF_TRACE_ITER)
|
||||
return 0;
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@ -8425,6 +8440,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
|
||||
case PTR_TO_TCP_SOCK_OR_NULL:
|
||||
case PTR_TO_XDP_SOCK:
|
||||
case PTR_TO_BTF_ID:
|
||||
case PTR_TO_BTF_ID_OR_NULL:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
@ -10481,6 +10497,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
struct bpf_prog *tgt_prog = prog->aux->linked_prog;
|
||||
u32 btf_id = prog->aux->attach_btf_id;
|
||||
const char prefix[] = "btf_trace_";
|
||||
struct btf_func_model fmodel;
|
||||
int ret = 0, subprog = -1, i;
|
||||
struct bpf_trampoline *tr;
|
||||
const struct btf_type *t;
|
||||
@ -10622,6 +10639,22 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
prog->aux->attach_func_proto = t;
|
||||
prog->aux->attach_btf_trace = true;
|
||||
return 0;
|
||||
case BPF_TRACE_ITER:
|
||||
if (!btf_type_is_func(t)) {
|
||||
verbose(env, "attach_btf_id %u is not a function\n",
|
||||
btf_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
t = btf_type_by_id(btf, t->type);
|
||||
if (!btf_type_is_func_proto(t))
|
||||
return -EINVAL;
|
||||
prog->aux->attach_func_name = tname;
|
||||
prog->aux->attach_func_proto = t;
|
||||
if (!bpf_iter_prog_supported(prog))
|
||||
return -EINVAL;
|
||||
ret = btf_distill_func_proto(&env->log, btf, t,
|
||||
tname, &fmodel);
|
||||
return ret;
|
||||
default:
|
||||
if (!prog_extension)
|
||||
return -EINVAL;
|
||||
|
@ -201,7 +201,7 @@ static int max_extfrag_threshold = 1000;
|
||||
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
|
||||
static int bpf_stats_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
|
@ -457,6 +457,212 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
|
||||
return &bpf_trace_printk_proto;
|
||||
}
|
||||
|
||||
#define MAX_SEQ_PRINTF_VARARGS 12
|
||||
#define MAX_SEQ_PRINTF_MAX_MEMCPY 6
|
||||
#define MAX_SEQ_PRINTF_STR_LEN 128
|
||||
|
||||
struct bpf_seq_printf_buf {
|
||||
char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
|
||||
};
|
||||
static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
|
||||
static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
|
||||
|
||||
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
|
||||
const void *, data, u32, data_len)
|
||||
{
|
||||
int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
|
||||
int i, buf_used, copy_size, num_args;
|
||||
u64 params[MAX_SEQ_PRINTF_VARARGS];
|
||||
struct bpf_seq_printf_buf *bufs;
|
||||
const u64 *args = data;
|
||||
|
||||
buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
|
||||
if (WARN_ON_ONCE(buf_used > 1)) {
|
||||
err = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bufs = this_cpu_ptr(&bpf_seq_printf_buf);
|
||||
|
||||
/*
|
||||
* bpf_check()->check_func_arg()->check_stack_boundary()
|
||||
* guarantees that fmt points to bpf program stack,
|
||||
* fmt_size bytes of it were initialized and fmt_size > 0
|
||||
*/
|
||||
if (fmt[--fmt_size] != 0)
|
||||
goto out;
|
||||
|
||||
if (data_len & 7)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < fmt_size; i++) {
|
||||
if (fmt[i] == '%') {
|
||||
if (fmt[i + 1] == '%')
|
||||
i++;
|
||||
else if (!data || !data_len)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
num_args = data_len / 8;
|
||||
|
||||
/* check format string for allowed specifiers */
|
||||
for (i = 0; i < fmt_size; i++) {
|
||||
/* only printable ascii for now. */
|
||||
if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fmt[i] != '%')
|
||||
continue;
|
||||
|
||||
if (fmt[i + 1] == '%') {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
|
||||
err = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (fmt_cnt >= num_args) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
|
||||
i++;
|
||||
|
||||
/* skip optional "[0 +-][num]" width formating field */
|
||||
while (fmt[i] == '0' || fmt[i] == '+' || fmt[i] == '-' ||
|
||||
fmt[i] == ' ')
|
||||
i++;
|
||||
if (fmt[i] >= '1' && fmt[i] <= '9') {
|
||||
i++;
|
||||
while (fmt[i] >= '0' && fmt[i] <= '9')
|
||||
i++;
|
||||
}
|
||||
|
||||
if (fmt[i] == 's') {
|
||||
/* try our best to copy */
|
||||
if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
|
||||
err = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = strncpy_from_unsafe(bufs->buf[memcpy_cnt],
|
||||
(void *) (long) args[fmt_cnt],
|
||||
MAX_SEQ_PRINTF_STR_LEN);
|
||||
if (err < 0)
|
||||
bufs->buf[memcpy_cnt][0] = '\0';
|
||||
params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
|
||||
|
||||
fmt_cnt++;
|
||||
memcpy_cnt++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fmt[i] == 'p') {
|
||||
if (fmt[i + 1] == 0 ||
|
||||
fmt[i + 1] == 'K' ||
|
||||
fmt[i + 1] == 'x') {
|
||||
/* just kernel pointers */
|
||||
params[fmt_cnt] = args[fmt_cnt];
|
||||
fmt_cnt++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
|
||||
if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
|
||||
err = -E2BIG;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
copy_size = (fmt[i + 2] == '4') ? 4 : 16;
|
||||
|
||||
err = probe_kernel_read(bufs->buf[memcpy_cnt],
|
||||
(void *) (long) args[fmt_cnt],
|
||||
copy_size);
|
||||
if (err < 0)
|
||||
memset(bufs->buf[memcpy_cnt], 0, copy_size);
|
||||
params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
|
||||
|
||||
i += 2;
|
||||
fmt_cnt++;
|
||||
memcpy_cnt++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fmt[i] == 'l') {
|
||||
i++;
|
||||
if (fmt[i] == 'l')
|
||||
i++;
|
||||
}
|
||||
|
||||
if (fmt[i] != 'i' && fmt[i] != 'd' &&
|
||||
fmt[i] != 'u' && fmt[i] != 'x') {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
params[fmt_cnt] = args[fmt_cnt];
|
||||
fmt_cnt++;
|
||||
}
|
||||
|
||||
/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
|
||||
* all of them to seq_printf().
|
||||
*/
|
||||
seq_printf(m, fmt, params[0], params[1], params[2], params[3],
|
||||
params[4], params[5], params[6], params[7], params[8],
|
||||
params[9], params[10], params[11]);
|
||||
|
||||
err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
|
||||
out:
|
||||
this_cpu_dec(bpf_seq_printf_buf_used);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int bpf_seq_printf_btf_ids[5];
|
||||
static const struct bpf_func_proto bpf_seq_printf_proto = {
|
||||
.func = bpf_seq_printf,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_type = ARG_PTR_TO_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE,
|
||||
.arg4_type = ARG_PTR_TO_MEM_OR_NULL,
|
||||
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.btf_id = bpf_seq_printf_btf_ids,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
|
||||
{
|
||||
return seq_write(m, data, len) ? -EOVERFLOW : 0;
|
||||
}
|
||||
|
||||
static int bpf_seq_write_btf_ids[5];
|
||||
static const struct bpf_func_proto bpf_seq_write_proto = {
|
||||
.func = bpf_seq_write,
|
||||
.gpl_only = true,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_BTF_ID,
|
||||
.arg2_type = ARG_PTR_TO_MEM,
|
||||
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.btf_id = bpf_seq_write_btf_ids,
|
||||
};
|
||||
|
||||
static __always_inline int
|
||||
get_map_perf_counter(struct bpf_map *map, u64 flags,
|
||||
u64 *value, u64 *enabled, u64 *running)
|
||||
@ -1226,6 +1432,14 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_xdp_output:
|
||||
return &bpf_xdp_output_proto;
|
||||
#endif
|
||||
case BPF_FUNC_seq_printf:
|
||||
return prog->expected_attach_type == BPF_TRACE_ITER ?
|
||||
&bpf_seq_printf_proto :
|
||||
NULL;
|
||||
case BPF_FUNC_seq_write:
|
||||
return prog->expected_attach_type == BPF_TRACE_ITER ?
|
||||
&bpf_seq_write_proto :
|
||||
NULL;
|
||||
default:
|
||||
return raw_tp_prog_func_proto(func_id, prog);
|
||||
}
|
||||
|
@ -4003,16 +4003,22 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
|
||||
{
|
||||
struct cgroup *cgrp;
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
return cgroup_id(cgrp);
|
||||
}
|
||||
|
||||
BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
|
||||
{
|
||||
struct sock *sk = skb_to_full_sk(skb);
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return 0;
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
return cgroup_id(cgrp);
|
||||
return __bpf_sk_cgroup_id(sk);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
|
||||
@ -4022,16 +4028,12 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
|
||||
ancestor_level)
|
||||
static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
|
||||
int ancestor_level)
|
||||
{
|
||||
struct sock *sk = skb_to_full_sk(skb);
|
||||
struct cgroup *ancestor;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return 0;
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
ancestor = cgroup_ancestor(cgrp, ancestor_level);
|
||||
if (!ancestor)
|
||||
@ -4040,6 +4042,17 @@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
|
||||
return cgroup_id(ancestor);
|
||||
}
|
||||
|
||||
BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
|
||||
ancestor_level)
|
||||
{
|
||||
struct sock *sk = skb_to_full_sk(skb);
|
||||
|
||||
if (!sk || !sk_fullsock(sk))
|
||||
return 0;
|
||||
|
||||
return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
|
||||
.func = bpf_skb_ancestor_cgroup_id,
|
||||
.gpl_only = false,
|
||||
@ -4047,6 +4060,31 @@ static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
|
||||
{
|
||||
return __bpf_sk_cgroup_id(sk);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
|
||||
.func = bpf_sk_cgroup_id,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_SOCKET,
|
||||
};
|
||||
|
||||
BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
|
||||
{
|
||||
return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
|
||||
.func = bpf_sk_ancestor_cgroup_id,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_SOCKET,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
};
|
||||
#endif
|
||||
|
||||
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
|
||||
@ -4525,30 +4563,28 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
|
||||
{
|
||||
#ifdef CONFIG_INET
|
||||
struct sock *sk = ctx->sk;
|
||||
u32 flags = BIND_FROM_BPF;
|
||||
int err;
|
||||
|
||||
/* Binding to port can be expensive so it's prohibited in the helper.
|
||||
* Only binding to IP is supported.
|
||||
*/
|
||||
err = -EINVAL;
|
||||
if (addr_len < offsetofend(struct sockaddr, sa_family))
|
||||
return err;
|
||||
if (addr->sa_family == AF_INET) {
|
||||
if (addr_len < sizeof(struct sockaddr_in))
|
||||
return err;
|
||||
if (((struct sockaddr_in *)addr)->sin_port != htons(0))
|
||||
return err;
|
||||
return __inet_bind(sk, addr, addr_len, true, false);
|
||||
if (((struct sockaddr_in *)addr)->sin_port == htons(0))
|
||||
flags |= BIND_FORCE_ADDRESS_NO_PORT;
|
||||
return __inet_bind(sk, addr, addr_len, flags);
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
} else if (addr->sa_family == AF_INET6) {
|
||||
if (addr_len < SIN6_LEN_RFC2133)
|
||||
return err;
|
||||
if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
|
||||
return err;
|
||||
if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
|
||||
flags |= BIND_FORCE_ADDRESS_NO_PORT;
|
||||
/* ipv6_bpf_stub cannot be NULL, since it's called from
|
||||
* bpf_cgroup_inet6_connect hook and ipv6 is already loaded
|
||||
*/
|
||||
return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
|
||||
return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
|
||||
#endif /* CONFIG_IPV6 */
|
||||
}
|
||||
#endif /* CONFIG_INET */
|
||||
@ -6159,8 +6195,22 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
#ifdef CONFIG_SOCK_CGROUP_DATA
|
||||
case BPF_FUNC_skb_cgroup_id:
|
||||
return &bpf_skb_cgroup_id_proto;
|
||||
case BPF_FUNC_skb_ancestor_cgroup_id:
|
||||
return &bpf_skb_ancestor_cgroup_id_proto;
|
||||
case BPF_FUNC_sk_cgroup_id:
|
||||
return &bpf_sk_cgroup_id_proto;
|
||||
case BPF_FUNC_sk_ancestor_cgroup_id:
|
||||
return &bpf_sk_ancestor_cgroup_id_proto;
|
||||
#endif
|
||||
#ifdef CONFIG_INET
|
||||
case BPF_FUNC_sk_lookup_tcp:
|
||||
return &bpf_sk_lookup_tcp_proto;
|
||||
case BPF_FUNC_sk_lookup_udp:
|
||||
return &bpf_sk_lookup_udp_proto;
|
||||
case BPF_FUNC_sk_release:
|
||||
return &bpf_sk_release_proto;
|
||||
case BPF_FUNC_skc_lookup_tcp:
|
||||
return &bpf_skc_lookup_tcp_proto;
|
||||
case BPF_FUNC_tcp_sock:
|
||||
return &bpf_tcp_sock_proto;
|
||||
case BPF_FUNC_get_listener_sock:
|
||||
@ -7031,6 +7081,7 @@ static bool sock_addr_is_valid_access(int off, int size,
|
||||
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
|
||||
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
|
||||
msg_src_ip6[3]):
|
||||
case bpf_ctx_range(struct bpf_sock_addr, user_port):
|
||||
if (type == BPF_READ) {
|
||||
bpf_ctx_record_field_size(info, size_default);
|
||||
|
||||
@ -7061,10 +7112,6 @@ static bool sock_addr_is_valid_access(int off, int size,
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case bpf_ctx_range(struct bpf_sock_addr, user_port):
|
||||
if (size != size_default)
|
||||
return false;
|
||||
break;
|
||||
case offsetof(struct bpf_sock_addr, sk):
|
||||
if (type != BPF_READ)
|
||||
return false;
|
||||
@ -7960,8 +8007,8 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
int off;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_sock_addr, user_family):
|
||||
@ -7996,9 +8043,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
|
||||
offsetof(struct sockaddr_in6, sin6_port));
|
||||
BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
|
||||
sizeof_field(struct sockaddr_in6, sin6_port));
|
||||
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
|
||||
struct sockaddr_in6, uaddr,
|
||||
sin6_port, tmp_reg);
|
||||
/* Account for sin6_port being smaller than user_port. */
|
||||
port_size = min(port_size, BPF_LDST_BYTES(si));
|
||||
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
|
||||
struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
|
||||
sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_addr, family):
|
||||
|
@ -450,12 +450,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return __inet_bind(sk, uaddr, addr_len, false, true);
|
||||
return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
|
||||
}
|
||||
EXPORT_SYMBOL(inet_bind);
|
||||
|
||||
int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
bool force_bind_address_no_port, bool with_lock)
|
||||
u32 flags)
|
||||
{
|
||||
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
@ -506,7 +506,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
* would be illegal to use them (multicast/broadcast) in
|
||||
* which case the sending device address is used.
|
||||
*/
|
||||
if (with_lock)
|
||||
if (flags & BIND_WITH_LOCK)
|
||||
lock_sock(sk);
|
||||
|
||||
/* Check these errors (active socket, double bind). */
|
||||
@ -520,16 +520,18 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
|
||||
/* Make sure we are allowed to bind here. */
|
||||
if (snum || !(inet->bind_address_no_port ||
|
||||
force_bind_address_no_port)) {
|
||||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
|
||||
if (sk->sk_prot->get_port(sk, snum)) {
|
||||
inet->inet_saddr = inet->inet_rcv_saddr = 0;
|
||||
err = -EADDRINUSE;
|
||||
goto out_release_sock;
|
||||
}
|
||||
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
|
||||
if (err) {
|
||||
inet->inet_saddr = inet->inet_rcv_saddr = 0;
|
||||
goto out_release_sock;
|
||||
if (!(flags & BIND_FROM_BPF)) {
|
||||
err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
|
||||
if (err) {
|
||||
inet->inet_saddr = inet->inet_rcv_saddr = 0;
|
||||
goto out_release_sock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -543,7 +545,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
sk_dst_reset(sk);
|
||||
err = 0;
|
||||
out_release_sock:
|
||||
if (with_lock)
|
||||
if (flags & BIND_WITH_LOCK)
|
||||
release_sock(sk);
|
||||
out:
|
||||
return err;
|
||||
|
@ -273,7 +273,7 @@ out_rcu_unlock:
|
||||
}
|
||||
|
||||
static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
bool force_bind_address_no_port, bool with_lock)
|
||||
u32 flags)
|
||||
{
|
||||
struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
|
||||
struct inet_sock *inet = inet_sk(sk);
|
||||
@ -297,7 +297,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
|
||||
return -EACCES;
|
||||
|
||||
if (with_lock)
|
||||
if (flags & BIND_WITH_LOCK)
|
||||
lock_sock(sk);
|
||||
|
||||
/* Check these errors (active socket, double bind). */
|
||||
@ -400,18 +400,20 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
|
||||
/* Make sure we are allowed to bind here. */
|
||||
if (snum || !(inet->bind_address_no_port ||
|
||||
force_bind_address_no_port)) {
|
||||
(flags & BIND_FORCE_ADDRESS_NO_PORT))) {
|
||||
if (sk->sk_prot->get_port(sk, snum)) {
|
||||
sk->sk_ipv6only = saved_ipv6only;
|
||||
inet_reset_saddr(sk);
|
||||
err = -EADDRINUSE;
|
||||
goto out;
|
||||
}
|
||||
err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
|
||||
if (err) {
|
||||
sk->sk_ipv6only = saved_ipv6only;
|
||||
inet_reset_saddr(sk);
|
||||
goto out;
|
||||
if (!(flags & BIND_FROM_BPF)) {
|
||||
err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
|
||||
if (err) {
|
||||
sk->sk_ipv6only = saved_ipv6only;
|
||||
inet_reset_saddr(sk);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -423,7 +425,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
|
||||
inet->inet_dport = 0;
|
||||
inet->inet_daddr = 0;
|
||||
out:
|
||||
if (with_lock)
|
||||
if (flags & BIND_WITH_LOCK)
|
||||
release_sock(sk);
|
||||
return err;
|
||||
out_unlock:
|
||||
@ -451,7 +453,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return __inet6_bind(sk, uaddr, addr_len, false, true);
|
||||
return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
|
||||
}
|
||||
EXPORT_SYMBOL(inet6_bind);
|
||||
|
||||
|
@ -2467,7 +2467,7 @@ void fib6_gc_cleanup(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
|
||||
static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct fib6_info *rt = v;
|
||||
struct ipv6_route_iter *iter = seq->private;
|
||||
@ -2625,7 +2625,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
|
||||
return w->node && !(w->state == FWS_U && w->node == w->root);
|
||||
}
|
||||
|
||||
static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
|
||||
static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
|
||||
__releases(RCU_BH)
|
||||
{
|
||||
struct net *net = seq_file_net(seq);
|
||||
@ -2637,6 +2637,62 @@ static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
|
||||
rcu_read_unlock_bh();
|
||||
}
|
||||
|
||||
#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
|
||||
static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
|
||||
struct bpf_iter_meta *meta,
|
||||
void *v)
|
||||
{
|
||||
struct bpf_iter__ipv6_route ctx;
|
||||
|
||||
ctx.meta = meta;
|
||||
ctx.rt = v;
|
||||
return bpf_iter_run_prog(prog, &ctx);
|
||||
}
|
||||
|
||||
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct ipv6_route_iter *iter = seq->private;
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
int ret;
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, false);
|
||||
if (!prog)
|
||||
return ipv6_route_native_seq_show(seq, v);
|
||||
|
||||
ret = ipv6_route_prog_seq_show(prog, &meta, v);
|
||||
iter->w.leaf = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
if (!v) {
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, true);
|
||||
if (prog)
|
||||
(void)ipv6_route_prog_seq_show(prog, &meta, v);
|
||||
}
|
||||
|
||||
ipv6_route_native_seq_stop(seq, v);
|
||||
}
|
||||
#else
|
||||
static int ipv6_route_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
return ipv6_route_native_seq_show(seq, v);
|
||||
}
|
||||
|
||||
static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
ipv6_route_native_seq_stop(seq, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct seq_operations ipv6_route_seq_ops = {
|
||||
.start = ipv6_route_seq_start,
|
||||
.next = ipv6_route_seq_next,
|
||||
|
@ -6421,6 +6421,35 @@ void __init ip6_route_init_special_entries(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
|
||||
DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
|
||||
|
||||
static const struct bpf_iter_reg ipv6_route_reg_info = {
|
||||
.target = "ipv6_route",
|
||||
.seq_ops = &ipv6_route_seq_ops,
|
||||
.init_seq_private = bpf_iter_init_seq_net,
|
||||
.fini_seq_private = bpf_iter_fini_seq_net,
|
||||
.seq_priv_size = sizeof(struct ipv6_route_iter),
|
||||
.ctx_arg_info_size = 1,
|
||||
.ctx_arg_info = {
|
||||
{ offsetof(struct bpf_iter__ipv6_route, rt),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
},
|
||||
};
|
||||
|
||||
static int __init bpf_iter_register(void)
|
||||
{
|
||||
return bpf_iter_reg_target(&ipv6_route_reg_info);
|
||||
}
|
||||
|
||||
static void bpf_iter_unregister(void)
|
||||
{
|
||||
bpf_iter_unreg_target(&ipv6_route_reg_info);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int __init ip6_route_init(void)
|
||||
{
|
||||
int ret;
|
||||
@ -6483,6 +6512,14 @@ int __init ip6_route_init(void)
|
||||
if (ret)
|
||||
goto out_register_late_subsys;
|
||||
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
|
||||
ret = bpf_iter_register();
|
||||
if (ret)
|
||||
goto out_register_late_subsys;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
|
||||
|
||||
@ -6515,6 +6552,11 @@ out_kmem_cache:
|
||||
|
||||
void ip6_route_cleanup(void)
|
||||
{
|
||||
#if IS_BUILTIN(CONFIG_IPV6)
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
|
||||
bpf_iter_unregister();
|
||||
#endif
|
||||
#endif
|
||||
unregister_netdevice_notifier(&ip6_route_dev_notifier);
|
||||
unregister_pernet_subsys(&ip6_route_net_late_ops);
|
||||
fib6_rules_cleanup();
|
||||
|
@ -2596,7 +2596,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
return __netlink_seq_next(seq);
|
||||
}
|
||||
|
||||
static void netlink_seq_stop(struct seq_file *seq, void *v)
|
||||
static void netlink_native_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct nl_seq_iter *iter = seq->private;
|
||||
|
||||
@ -2607,7 +2607,7 @@ static void netlink_seq_stop(struct seq_file *seq, void *v)
|
||||
}
|
||||
|
||||
|
||||
static int netlink_seq_show(struct seq_file *seq, void *v)
|
||||
static int netlink_native_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
if (v == SEQ_START_TOKEN) {
|
||||
seq_puts(seq,
|
||||
@ -2634,6 +2634,68 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
struct bpf_iter__netlink {
|
||||
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||
__bpf_md_ptr(struct netlink_sock *, sk);
|
||||
};
|
||||
|
||||
DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk)
|
||||
|
||||
static int netlink_prog_seq_show(struct bpf_prog *prog,
|
||||
struct bpf_iter_meta *meta,
|
||||
void *v)
|
||||
{
|
||||
struct bpf_iter__netlink ctx;
|
||||
|
||||
meta->seq_num--; /* skip SEQ_START_TOKEN */
|
||||
ctx.meta = meta;
|
||||
ctx.sk = nlk_sk((struct sock *)v);
|
||||
return bpf_iter_run_prog(prog, &ctx);
|
||||
}
|
||||
|
||||
static int netlink_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, false);
|
||||
if (!prog)
|
||||
return netlink_native_seq_show(seq, v);
|
||||
|
||||
if (v != SEQ_START_TOKEN)
|
||||
return netlink_prog_seq_show(prog, &meta, v);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void netlink_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct bpf_iter_meta meta;
|
||||
struct bpf_prog *prog;
|
||||
|
||||
if (!v) {
|
||||
meta.seq = seq;
|
||||
prog = bpf_iter_get_info(&meta, true);
|
||||
if (prog)
|
||||
(void)netlink_prog_seq_show(prog, &meta, v);
|
||||
}
|
||||
|
||||
netlink_native_seq_stop(seq, v);
|
||||
}
|
||||
#else
|
||||
static int netlink_seq_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
return netlink_native_seq_show(seq, v);
|
||||
}
|
||||
|
||||
static void netlink_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
netlink_native_seq_stop(seq, v);
|
||||
}
|
||||
#endif
|
||||
|
||||
static const struct seq_operations netlink_seq_ops = {
|
||||
.start = netlink_seq_start,
|
||||
.next = netlink_seq_next,
|
||||
@ -2740,6 +2802,26 @@ static const struct rhashtable_params netlink_rhashtable_params = {
|
||||
.automatic_shrinking = true,
|
||||
};
|
||||
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
|
||||
static const struct bpf_iter_reg netlink_reg_info = {
|
||||
.target = "netlink",
|
||||
.seq_ops = &netlink_seq_ops,
|
||||
.init_seq_private = bpf_iter_init_seq_net,
|
||||
.fini_seq_private = bpf_iter_fini_seq_net,
|
||||
.seq_priv_size = sizeof(struct nl_seq_iter),
|
||||
.ctx_arg_info_size = 1,
|
||||
.ctx_arg_info = {
|
||||
{ offsetof(struct bpf_iter__netlink, sk),
|
||||
PTR_TO_BTF_ID_OR_NULL },
|
||||
},
|
||||
};
|
||||
|
||||
static int __init bpf_iter_register(void)
|
||||
{
|
||||
return bpf_iter_reg_target(&netlink_reg_info);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __init netlink_proto_init(void)
|
||||
{
|
||||
int i;
|
||||
@ -2748,6 +2830,12 @@ static int __init netlink_proto_init(void)
|
||||
if (err != 0)
|
||||
goto out;
|
||||
|
||||
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
|
||||
err = bpf_iter_register();
|
||||
if (err)
|
||||
goto out;
|
||||
#endif
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
|
||||
|
||||
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
|
||||
|
@ -30,9 +30,9 @@ void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
|
||||
if (!xs->tx)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&umem->xsk_list_lock, flags);
|
||||
list_add_rcu(&xs->list, &umem->xsk_list);
|
||||
spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
|
||||
spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
|
||||
list_add_rcu(&xs->list, &umem->xsk_tx_list);
|
||||
spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
|
||||
}
|
||||
|
||||
void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
|
||||
@ -42,9 +42,9 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
|
||||
if (!xs->tx)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&umem->xsk_list_lock, flags);
|
||||
spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
|
||||
list_del_rcu(&xs->list);
|
||||
spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
|
||||
spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
|
||||
}
|
||||
|
||||
/* The umem is stored both in the _rx struct and the _tx struct as we do
|
||||
@ -279,7 +279,7 @@ void xdp_put_umem(struct xdp_umem *umem)
|
||||
}
|
||||
}
|
||||
|
||||
static int xdp_umem_pin_pages(struct xdp_umem *umem)
|
||||
static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
|
||||
{
|
||||
unsigned int gup_flags = FOLL_WRITE;
|
||||
long npgs;
|
||||
@ -291,7 +291,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
|
||||
return -ENOMEM;
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
npgs = pin_user_pages(umem->address, umem->npgs,
|
||||
npgs = pin_user_pages(address, umem->npgs,
|
||||
gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
@ -385,7 +385,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
|
||||
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
|
||||
return -EINVAL;
|
||||
|
||||
umem->address = (unsigned long)addr;
|
||||
umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
|
||||
: ~((u64)chunk_size - 1);
|
||||
umem->size = size;
|
||||
@ -395,8 +394,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
|
||||
umem->pgs = NULL;
|
||||
umem->user = NULL;
|
||||
umem->flags = mr->flags;
|
||||
INIT_LIST_HEAD(&umem->xsk_list);
|
||||
spin_lock_init(&umem->xsk_list_lock);
|
||||
INIT_LIST_HEAD(&umem->xsk_tx_list);
|
||||
spin_lock_init(&umem->xsk_tx_list_lock);
|
||||
|
||||
refcount_set(&umem->users, 1);
|
||||
|
||||
@ -404,7 +403,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = xdp_umem_pin_pages(umem);
|
||||
err = xdp_umem_pin_pages(umem, (unsigned long)addr);
|
||||
if (err)
|
||||
goto out_account;
|
||||
|
||||
|
@ -75,7 +75,7 @@ void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
|
||||
xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -102,7 +102,7 @@ void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
|
||||
xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -305,7 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
|
||||
struct xdp_sock *xs;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
|
||||
__xskq_cons_release(xs->tx);
|
||||
xs->sk.sk_write_space(&xs->sk);
|
||||
}
|
||||
@ -318,7 +318,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
|
||||
struct xdp_sock *xs;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
|
||||
list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
|
||||
if (!xskq_cons_peek_desc(xs->tx, desc, umem))
|
||||
continue;
|
||||
|
||||
|
@ -9,12 +9,12 @@
|
||||
|
||||
#include "xsk_queue.h"
|
||||
|
||||
void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
|
||||
void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask)
|
||||
{
|
||||
if (!q)
|
||||
return;
|
||||
|
||||
q->size = size;
|
||||
q->umem_size = umem_size;
|
||||
q->chunk_mask = chunk_mask;
|
||||
}
|
||||
|
||||
|
@ -30,7 +30,7 @@ struct xdp_umem_ring {
|
||||
|
||||
struct xsk_queue {
|
||||
u64 chunk_mask;
|
||||
u64 size;
|
||||
u64 umem_size;
|
||||
u32 ring_mask;
|
||||
u32 nentries;
|
||||
u32 cached_prod;
|
||||
@ -123,7 +123,7 @@ static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
|
||||
u64 base_addr = xsk_umem_extract_addr(addr);
|
||||
|
||||
addr = xsk_umem_add_offset_to_addr(addr);
|
||||
if (base_addr >= q->size || addr >= q->size ||
|
||||
if (base_addr >= q->umem_size || addr >= q->umem_size ||
|
||||
xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
|
||||
q->invalid_descs++;
|
||||
return false;
|
||||
@ -134,7 +134,7 @@ static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
|
||||
|
||||
static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
|
||||
{
|
||||
if (addr >= q->size) {
|
||||
if (addr >= q->umem_size) {
|
||||
q->invalid_descs++;
|
||||
return false;
|
||||
}
|
||||
@ -379,7 +379,7 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
|
||||
return q ? q->invalid_descs : 0;
|
||||
}
|
||||
|
||||
void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
|
||||
void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask);
|
||||
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
|
||||
void xskq_destroy(struct xsk_queue *q_ops);
|
||||
|
||||
|
@ -5,12 +5,12 @@
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <uapi/linux/ptrace.h>
|
||||
#include <uapi/linux/perf_event.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/sched.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
|
||||
|
||||
|
@ -1,12 +1,12 @@
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_legacy.h"
|
||||
#include <uapi/linux/in.h>
|
||||
#include <uapi/linux/if.h>
|
||||
#include <uapi/linux/if_ether.h>
|
||||
#include <uapi/linux/ip.h>
|
||||
#include <uapi/linux/ipv6.h>
|
||||
#include <uapi/linux/if_tunnel.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_legacy.h"
|
||||
#define IP_MF 0x2000
|
||||
#define IP_OFFSET 0x1FFF
|
||||
|
||||
|
@ -5,8 +5,6 @@
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_legacy.h"
|
||||
#include <uapi/linux/in.h>
|
||||
#include <uapi/linux/if.h>
|
||||
#include <uapi/linux/if_ether.h>
|
||||
@ -14,6 +12,8 @@
|
||||
#include <uapi/linux/ipv6.h>
|
||||
#include <uapi/linux/if_tunnel.h>
|
||||
#include <uapi/linux/mpls.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_legacy.h"
|
||||
#define IP_MF 0x2000
|
||||
#define IP_OFFSET 0x1FFF
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "hash_func01.h"
|
||||
|
||||
#define MAX_CPUS 64 /* WARNING - sync with _user.c */
|
||||
#define MAX_CPUS NR_CPUS
|
||||
|
||||
/* Special map type that can XDP_REDIRECT frames to another CPU */
|
||||
struct {
|
||||
|
@ -13,6 +13,7 @@ static const char *__doc__ =
|
||||
#include <unistd.h>
|
||||
#include <locale.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <getopt.h>
|
||||
#include <net/if.h>
|
||||
#include <time.h>
|
||||
@ -24,8 +25,6 @@ static const char *__doc__ =
|
||||
#include <arpa/inet.h>
|
||||
#include <linux/if_link.h>
|
||||
|
||||
#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
|
||||
|
||||
/* How many xdp_progs are defined in _kern.c */
|
||||
#define MAX_PROG 6
|
||||
|
||||
@ -40,6 +39,7 @@ static char *ifname;
|
||||
static __u32 prog_id;
|
||||
|
||||
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
|
||||
static int n_cpus;
|
||||
static int cpu_map_fd;
|
||||
static int rx_cnt_map_fd;
|
||||
static int redirect_err_cnt_map_fd;
|
||||
@ -170,7 +170,7 @@ struct stats_record {
|
||||
struct record redir_err;
|
||||
struct record kthread;
|
||||
struct record exception;
|
||||
struct record enq[MAX_CPUS];
|
||||
struct record enq[];
|
||||
};
|
||||
|
||||
static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
|
||||
@ -225,10 +225,11 @@ static struct datarec *alloc_record_per_cpu(void)
|
||||
static struct stats_record *alloc_stats_record(void)
|
||||
{
|
||||
struct stats_record *rec;
|
||||
int i;
|
||||
int i, size;
|
||||
|
||||
rec = malloc(sizeof(*rec));
|
||||
memset(rec, 0, sizeof(*rec));
|
||||
size = sizeof(*rec) + n_cpus * sizeof(struct record);
|
||||
rec = malloc(size);
|
||||
memset(rec, 0, size);
|
||||
if (!rec) {
|
||||
fprintf(stderr, "Mem alloc error\n");
|
||||
exit(EXIT_FAIL_MEM);
|
||||
@ -237,7 +238,7 @@ static struct stats_record *alloc_stats_record(void)
|
||||
rec->redir_err.cpu = alloc_record_per_cpu();
|
||||
rec->kthread.cpu = alloc_record_per_cpu();
|
||||
rec->exception.cpu = alloc_record_per_cpu();
|
||||
for (i = 0; i < MAX_CPUS; i++)
|
||||
for (i = 0; i < n_cpus; i++)
|
||||
rec->enq[i].cpu = alloc_record_per_cpu();
|
||||
|
||||
return rec;
|
||||
@ -247,7 +248,7 @@ static void free_stats_record(struct stats_record *r)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_CPUS; i++)
|
||||
for (i = 0; i < n_cpus; i++)
|
||||
free(r->enq[i].cpu);
|
||||
free(r->exception.cpu);
|
||||
free(r->kthread.cpu);
|
||||
@ -350,7 +351,7 @@ static void stats_print(struct stats_record *stats_rec,
|
||||
}
|
||||
|
||||
/* cpumap enqueue stats */
|
||||
for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
|
||||
for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
|
||||
char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
|
||||
char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
|
||||
char *errstr = "";
|
||||
@ -475,7 +476,7 @@ static void stats_collect(struct stats_record *rec)
|
||||
map_collect_percpu(fd, 1, &rec->redir_err);
|
||||
|
||||
fd = cpumap_enqueue_cnt_map_fd;
|
||||
for (i = 0; i < MAX_CPUS; i++)
|
||||
for (i = 0; i < n_cpus; i++)
|
||||
map_collect_percpu(fd, i, &rec->enq[i]);
|
||||
|
||||
fd = cpumap_kthread_cnt_map_fd;
|
||||
@ -549,10 +550,10 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
|
||||
*/
|
||||
static void mark_cpus_unavailable(void)
|
||||
{
|
||||
__u32 invalid_cpu = MAX_CPUS;
|
||||
__u32 invalid_cpu = n_cpus;
|
||||
int ret, i;
|
||||
|
||||
for (i = 0; i < MAX_CPUS; i++) {
|
||||
for (i = 0; i < n_cpus; i++) {
|
||||
ret = bpf_map_update_elem(cpus_available_map_fd, &i,
|
||||
&invalid_cpu, 0);
|
||||
if (ret) {
|
||||
@ -688,6 +689,8 @@ int main(int argc, char **argv)
|
||||
int prog_fd;
|
||||
__u32 qsize;
|
||||
|
||||
n_cpus = get_nprocs_conf();
|
||||
|
||||
/* Notice: choosing he queue size is very important with the
|
||||
* ixgbe driver, because it's driver page recycling trick is
|
||||
* dependend on pages being returned quickly. The number of
|
||||
@ -757,7 +760,7 @@ int main(int argc, char **argv)
|
||||
case 'c':
|
||||
/* Add multiple CPUs */
|
||||
add_cpu = strtoul(optarg, NULL, 0);
|
||||
if (add_cpu >= MAX_CPUS) {
|
||||
if (add_cpu >= n_cpus) {
|
||||
fprintf(stderr,
|
||||
"--cpu nr too large for cpumap err(%d):%s\n",
|
||||
errno, strerror(errno));
|
||||
|
@ -318,6 +318,11 @@ may be interested in:
|
||||
of eBPF maps are used with a given helper function.
|
||||
* *kernel/bpf/* directory contains other files in which additional helpers are
|
||||
defined (for cgroups, sockmaps, etc.).
|
||||
* The bpftool utility can be used to probe the availability of helper functions
|
||||
on the system (as well as supported program and map types, and a number of
|
||||
other parameters). To do so, run **bpftool feature probe** (see
|
||||
**bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
|
||||
list features available to unprivileged users.
|
||||
|
||||
Compatibility between helper functions and program types can generally be found
|
||||
in the files where helper functions are defined. Look for the **struct
|
||||
@ -338,6 +343,7 @@ SEE ALSO
|
||||
========
|
||||
|
||||
**bpf**\ (2),
|
||||
**bpftool**\ (8),
|
||||
**cgroups**\ (7),
|
||||
**ip**\ (8),
|
||||
**perf_event_open**\ (2),
|
||||
@ -414,6 +420,7 @@ class PrinterHelpers(Printer):
|
||||
'struct sk_reuseport_md',
|
||||
'struct sockaddr',
|
||||
'struct tcphdr',
|
||||
'struct seq_file',
|
||||
|
||||
'struct __sk_buff',
|
||||
'struct sk_msg_md',
|
||||
@ -450,6 +457,7 @@ class PrinterHelpers(Printer):
|
||||
'struct sk_reuseport_md',
|
||||
'struct sockaddr',
|
||||
'struct tcphdr',
|
||||
'struct seq_file',
|
||||
}
|
||||
mapped_types = {
|
||||
'u8': '__u8',
|
||||
|
@ -27,9 +27,9 @@
|
||||
"audit_control", "setfcap"
|
||||
|
||||
#define COMMON_CAP2_PERMS "mac_override", "mac_admin", "syslog", \
|
||||
"wake_alarm", "block_suspend", "audit_read"
|
||||
"wake_alarm", "block_suspend", "audit_read", "perfmon"
|
||||
|
||||
#if CAP_LAST_CAP > CAP_AUDIT_READ
|
||||
#if CAP_LAST_CAP > CAP_PERFMON
|
||||
#error New capability defined, please update COMMON_CAP2_PERMS.
|
||||
#endif
|
||||
|
||||
|
@ -230,9 +230,14 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8)
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -20,7 +20,7 @@ SYNOPSIS
|
||||
CGROUP COMMANDS
|
||||
===============
|
||||
|
||||
| **bpftool** **cgroup { show | list }** *CGROUP* [**effective**]
|
||||
| **bpftool** **cgroup** { **show** | **list** } *CGROUP* [**effective**]
|
||||
| **bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**]
|
||||
| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
|
||||
| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
|
||||
@ -160,9 +160,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -28,7 +28,7 @@ DESCRIPTION
|
||||
===========
|
||||
**bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
|
||||
Probe the running kernel and dump a number of eBPF-related
|
||||
parameters, such as availability of the **bpf()** system call,
|
||||
parameters, such as availability of the **bpf**\ () system call,
|
||||
JIT status, eBPF program types availability, eBPF helper
|
||||
functions availability, and more.
|
||||
|
||||
@ -93,9 +93,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -14,7 +14,7 @@ SYNOPSIS
|
||||
|
||||
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
|
||||
|
||||
*COMMAND* := { **skeleton | **help** }
|
||||
*COMMAND* := { **skeleton** | **help** }
|
||||
|
||||
GEN COMMANDS
|
||||
=============
|
||||
@ -36,12 +36,12 @@ DESCRIPTION
|
||||
etc. Skeleton eliminates the need to lookup mentioned
|
||||
components by name. Instead, if skeleton instantiation
|
||||
succeeds, they are populated in skeleton structure as valid
|
||||
libbpf types (e.g., struct bpf_map pointer) and can be
|
||||
libbpf types (e.g., **struct bpf_map** pointer) and can be
|
||||
passed to existing generic libbpf APIs.
|
||||
|
||||
In addition to simple and reliable access to maps and
|
||||
programs, skeleton provides a storage for BPF links (struct
|
||||
bpf_link) for each BPF program within BPF object. When
|
||||
programs, skeleton provides a storage for BPF links (**struct
|
||||
bpf_link**) for each BPF program within BPF object. When
|
||||
requested, supported BPF programs will be automatically
|
||||
attached and resulting BPF links stored for further use by
|
||||
user in pre-allocated fields in skeleton struct. For BPF
|
||||
@ -82,14 +82,14 @@ DESCRIPTION
|
||||
|
||||
- **example__open** and **example__open_opts**.
|
||||
These functions are used to instantiate skeleton. It
|
||||
corresponds to libbpf's **bpf_object__open()** API.
|
||||
corresponds to libbpf's **bpf_object__open**\ () API.
|
||||
**_opts** variants accepts extra **bpf_object_open_opts**
|
||||
options.
|
||||
|
||||
- **example__load**.
|
||||
This function creates maps, loads and verifies BPF
|
||||
programs, initializes global data maps. It corresponds to
|
||||
libppf's **bpf_object__load** API.
|
||||
libppf's **bpf_object__load**\ () API.
|
||||
|
||||
- **example__open_and_load** combines **example__open** and
|
||||
**example__load** invocations in one commonly used
|
||||
@ -296,10 +296,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
81
tools/bpf/bpftool/Documentation/bpftool-iter.rst
Normal file
81
tools/bpf/bpftool/Documentation/bpftool-iter.rst
Normal file
@ -0,0 +1,81 @@
|
||||
============
|
||||
bpftool-iter
|
||||
============
|
||||
-------------------------------------------------------------------------------
|
||||
tool to create BPF iterators
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
:Manual section: 8
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
|
||||
**bpftool** [*OPTIONS*] **iter** *COMMAND*
|
||||
|
||||
*COMMANDS* := { **pin** | **help** }
|
||||
|
||||
ITER COMMANDS
|
||||
===================
|
||||
|
||||
| **bpftool** **iter pin** *OBJ* *PATH*
|
||||
| **bpftool** **iter help**
|
||||
|
|
||||
| *OBJ* := /a/file/of/bpf_iter_target.o
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
**bpftool iter pin** *OBJ* *PATH*
|
||||
A bpf iterator combines a kernel iterating of
|
||||
particular kernel data (e.g., tasks, bpf_maps, etc.)
|
||||
and a bpf program called for each kernel data object
|
||||
(e.g., one task, one bpf_map, etc.). User space can
|
||||
*read* kernel iterator output through *read()* syscall.
|
||||
|
||||
The *pin* command creates a bpf iterator from *OBJ*,
|
||||
and pin it to *PATH*. The *PATH* should be located
|
||||
in *bpffs* mount. It must not contain a dot
|
||||
character ('.'), which is reserved for future extensions
|
||||
of *bpffs*.
|
||||
|
||||
User can then *cat PATH* to see the bpf iterator output.
|
||||
|
||||
**bpftool iter help**
|
||||
Print short help message.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
-h, --help
|
||||
Print short generic help message (similar to **bpftool help**).
|
||||
|
||||
-V, --version
|
||||
Print version number (similar to **bpftool version**).
|
||||
|
||||
-d, --debug
|
||||
Print all logs available, even debug-level information. This
|
||||
includes logs from libbpf as well as from the verifier, when
|
||||
attempting to load programs.
|
||||
|
||||
EXAMPLES
|
||||
========
|
||||
**# bpftool iter pin bpf_iter_netlink.o /sys/fs/bpf/my_netlink**
|
||||
|
||||
::
|
||||
|
||||
Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
|
||||
to /sys/fs/bpf/my_netlink
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
@ -109,10 +109,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -21,7 +21,7 @@ SYNOPSIS
|
||||
MAP COMMANDS
|
||||
=============
|
||||
|
||||
| **bpftool** **map { show | list }** [*MAP*]
|
||||
| **bpftool** **map** { **show** | **list** } [*MAP*]
|
||||
| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
|
||||
| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
|
||||
| **bpftool** **map dump** *MAP*
|
||||
@ -49,7 +49,7 @@ MAP COMMANDS
|
||||
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
|
||||
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
|
||||
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
|
||||
| | **queue** | **stack** }
|
||||
| | **queue** | **stack** | **sk_storage** | **struct_ops** }
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
@ -66,6 +66,13 @@ DESCRIPTION
|
||||
Create a new map with given parameters and pin it to *bpffs*
|
||||
as *FILE*.
|
||||
|
||||
*FLAGS* should be an integer which is the combination of
|
||||
desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h
|
||||
UAPI header for existing flags).
|
||||
|
||||
Keyword **dev** expects a network interface name, and is used
|
||||
to request hardware offload for the map.
|
||||
|
||||
**bpftool map dump** *MAP*
|
||||
Dump all entries in a given *MAP*. In case of **name**,
|
||||
*MAP* may match several maps which will all be dumped.
|
||||
@ -78,7 +85,7 @@ DESCRIPTION
|
||||
exists; **noexist** update only if entry doesn't exist.
|
||||
|
||||
If the **hex** keyword is provided in front of the bytes
|
||||
sequence, the bytes are parsed as hexadeximal values, even if
|
||||
sequence, the bytes are parsed as hexadecimal values, even if
|
||||
no "0x" prefix is added. If the keyword is not provided, then
|
||||
the bytes are parsed as decimal values, unless a "0x" prefix
|
||||
(for hexadecimal) or a "0" prefix (for octal) is provided.
|
||||
@ -100,10 +107,10 @@ DESCRIPTION
|
||||
extensions of *bpffs*.
|
||||
|
||||
**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
|
||||
Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
|
||||
Read events from a **BPF_MAP_TYPE_PERF_EVENT_ARRAY** map.
|
||||
|
||||
Install perf rings into a perf event array map and dump
|
||||
output of any bpf_perf_event_output() call in the kernel.
|
||||
output of any **bpf_perf_event_output**\ () call in the kernel.
|
||||
By default read the number of CPUs on the system and
|
||||
install perf ring for each CPU in the corresponding index
|
||||
in the array.
|
||||
@ -116,24 +123,24 @@ DESCRIPTION
|
||||
receiving events if it installed its rings earlier.
|
||||
|
||||
**bpftool map peek** *MAP*
|
||||
Peek next **value** in the queue or stack.
|
||||
Peek next value in the queue or stack.
|
||||
|
||||
**bpftool map push** *MAP* **value** *VALUE*
|
||||
Push **value** onto the stack.
|
||||
Push *VALUE* onto the stack.
|
||||
|
||||
**bpftool map pop** *MAP*
|
||||
Pop and print **value** from the stack.
|
||||
Pop and print value from the stack.
|
||||
|
||||
**bpftool map enqueue** *MAP* **value** *VALUE*
|
||||
Enqueue **value** into the queue.
|
||||
Enqueue *VALUE* into the queue.
|
||||
|
||||
**bpftool map dequeue** *MAP*
|
||||
Dequeue and print **value** from the queue.
|
||||
Dequeue and print value from the queue.
|
||||
|
||||
**bpftool map freeze** *MAP*
|
||||
Freeze the map as read-only from user space. Entries from a
|
||||
frozen map can not longer be updated or deleted with the
|
||||
**bpf\ ()** system call. This operation is not reversible,
|
||||
**bpf**\ () system call. This operation is not reversible,
|
||||
and the map remains immutable from user space until its
|
||||
destruction. However, read and write permissions for BPF
|
||||
programs to the map remain unchanged.
|
||||
@ -269,9 +276,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -20,7 +20,7 @@ SYNOPSIS
|
||||
NET COMMANDS
|
||||
============
|
||||
|
||||
| **bpftool** **net { show | list }** [ **dev** *NAME* ]
|
||||
| **bpftool** **net** { **show** | **list** } [ **dev** *NAME* ]
|
||||
| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
|
||||
| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
|
||||
| **bpftool** **net help**
|
||||
@ -194,9 +194,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -20,7 +20,7 @@ SYNOPSIS
|
||||
PERF COMMANDS
|
||||
=============
|
||||
|
||||
| **bpftool** **perf { show | list }**
|
||||
| **bpftool** **perf** { **show** | **list** }
|
||||
| **bpftool** **perf help**
|
||||
|
||||
DESCRIPTION
|
||||
@ -85,9 +85,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -21,11 +21,11 @@ SYNOPSIS
|
||||
PROG COMMANDS
|
||||
=============
|
||||
|
||||
| **bpftool** **prog { show | list }** [*PROG*]
|
||||
| **bpftool** **prog** { **show** | **list** } [*PROG*]
|
||||
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
|
||||
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
|
||||
| **bpftool** **prog pin** *PROG* *FILE*
|
||||
| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
|
||||
| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
|
||||
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
|
||||
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
|
||||
| **bpftool** **prog tracelog**
|
||||
@ -49,7 +49,7 @@ PROG COMMANDS
|
||||
| *ATTACH_TYPE* := {
|
||||
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
|
||||
| }
|
||||
| *METRIC* := {
|
||||
| *METRICs* := {
|
||||
| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
|
||||
| }
|
||||
|
||||
@ -155,7 +155,7 @@ DESCRIPTION
|
||||
**bpftool prog tracelog**
|
||||
Dump the trace pipe of the system to the console (stdout).
|
||||
Hit <Ctrl+C> to stop printing. BPF programs can write to this
|
||||
trace pipe at runtime with the **bpf_trace_printk()** helper.
|
||||
trace pipe at runtime with the **bpf_trace_printk**\ () helper.
|
||||
This should be used only for debugging purposes. For
|
||||
streaming data from BPF programs to user space, one can use
|
||||
perf events (see also **bpftool-map**\ (8)).
|
||||
@ -195,9 +195,9 @@ DESCRIPTION
|
||||
|
||||
**bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
|
||||
Profile *METRICs* for bpf program *PROG* for *DURATION*
|
||||
seconds or until user hits Ctrl-C. *DURATION* is optional.
|
||||
seconds or until user hits <Ctrl+C>. *DURATION* is optional.
|
||||
If *DURATION* is not specified, the profiling will run up to
|
||||
UINT_MAX seconds.
|
||||
**UINT_MAX** seconds.
|
||||
|
||||
**bpftool prog help**
|
||||
Print short help message.
|
||||
@ -267,7 +267,7 @@ EXAMPLES
|
||||
|
||||
|
|
||||
| **# bpftool prog dump xlated id 10 file /tmp/t**
|
||||
| **# ls -l /tmp/t**
|
||||
| **$ ls -l /tmp/t**
|
||||
|
||||
::
|
||||
|
||||
@ -325,6 +325,7 @@ EXAMPLES
|
||||
| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
|
||||
|
||||
::
|
||||
|
||||
51397 run_cnt
|
||||
40176203 cycles (83.05%)
|
||||
42518139 instructions # 1.06 insns per cycle (83.39%)
|
||||
@ -335,9 +336,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -105,12 +105,13 @@ SEE ALSO
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8)
|
||||
**bpftool-gen**\ (8)
|
||||
|
||||
**bpftool-prog**\ (8)
|
||||
|
@ -75,11 +75,14 @@ SEE ALSO
|
||||
========
|
||||
**bpf**\ (2),
|
||||
**bpf-helpers**\ (7),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-cgroup**\ (8),
|
||||
**bpftool-feature**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-iter**\ (8),
|
||||
**bpftool-link**\ (8),
|
||||
**bpftool-map**\ (8),
|
||||
**bpftool-net**\ (8),
|
||||
**bpftool-perf**\ (8),
|
||||
**bpftool-btf**\ (8),
|
||||
**bpftool-gen**\ (8),
|
||||
**bpftool-prog**\ (8),
|
||||
**bpftool-struct_ops**\ (8)
|
||||
|
@ -610,6 +610,19 @@ _bpftool()
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
iter)
|
||||
case $command in
|
||||
pin)
|
||||
_filedir
|
||||
return 0
|
||||
;;
|
||||
*)
|
||||
[[ $prev == $object ]] && \
|
||||
COMPREPLY=( $( compgen -W 'pin help' \
|
||||
-- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
map)
|
||||
local MAP_TYPE='id pinned name'
|
||||
case $command in
|
||||
|
@ -271,8 +271,8 @@ static void btf_int128_print(json_writer_t *jw, const void *data,
|
||||
}
|
||||
}
|
||||
|
||||
static void btf_int128_shift(__u64 *print_num, u16 left_shift_bits,
|
||||
u16 right_shift_bits)
|
||||
static void btf_int128_shift(__u64 *print_num, __u16 left_shift_bits,
|
||||
__u16 right_shift_bits)
|
||||
{
|
||||
__u64 upper_num, lower_num;
|
||||
|
||||
|
@ -157,7 +157,7 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_jmp_insn(u8 code)
|
||||
static bool is_jmp_insn(__u8 code)
|
||||
{
|
||||
return BPF_CLASS(code) == BPF_JMP || BPF_CLASS(code) == BPF_JMP32;
|
||||
}
|
||||
@ -176,7 +176,7 @@ static bool func_partition_bb_head(struct func_node *func)
|
||||
|
||||
for (; cur <= end; cur++) {
|
||||
if (is_jmp_insn(cur->code)) {
|
||||
u8 opcode = BPF_OP(cur->code);
|
||||
__u8 opcode = BPF_OP(cur->code);
|
||||
|
||||
if (opcode == BPF_EXIT || opcode == BPF_CALL)
|
||||
continue;
|
||||
|
88
tools/bpf/bpftool/iter.c
Normal file
88
tools/bpf/bpftool/iter.c
Normal file
@ -0,0 +1,88 @@
|
||||
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
// Copyright (C) 2020 Facebook
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <linux/err.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
#include "main.h"
|
||||
|
||||
static int do_pin(int argc, char **argv)
|
||||
{
|
||||
const char *objfile, *path;
|
||||
struct bpf_program *prog;
|
||||
struct bpf_object *obj;
|
||||
struct bpf_link *link;
|
||||
int err;
|
||||
|
||||
if (!REQ_ARGS(2))
|
||||
usage();
|
||||
|
||||
objfile = GET_ARG();
|
||||
path = GET_ARG();
|
||||
|
||||
obj = bpf_object__open(objfile);
|
||||
if (IS_ERR(obj)) {
|
||||
p_err("can't open objfile %s", objfile);
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = bpf_object__load(obj);
|
||||
if (err) {
|
||||
p_err("can't load objfile %s", objfile);
|
||||
goto close_obj;
|
||||
}
|
||||
|
||||
prog = bpf_program__next(NULL, obj);
|
||||
if (!prog) {
|
||||
p_err("can't find bpf program in objfile %s", objfile);
|
||||
goto close_obj;
|
||||
}
|
||||
|
||||
link = bpf_program__attach_iter(prog, NULL);
|
||||
if (IS_ERR(link)) {
|
||||
err = PTR_ERR(link);
|
||||
p_err("attach_iter failed for program %s",
|
||||
bpf_program__name(prog));
|
||||
goto close_obj;
|
||||
}
|
||||
|
||||
err = mount_bpffs_for_pin(path);
|
||||
if (err)
|
||||
goto close_link;
|
||||
|
||||
err = bpf_link__pin(link, path);
|
||||
if (err) {
|
||||
p_err("pin_iter failed for program %s to path %s",
|
||||
bpf_program__name(prog), path);
|
||||
goto close_link;
|
||||
}
|
||||
|
||||
close_link:
|
||||
bpf_link__destroy(link);
|
||||
close_obj:
|
||||
bpf_object__close(obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int do_help(int argc, char **argv)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: %s %s pin OBJ PATH\n"
|
||||
" %s %s help\n"
|
||||
"\n",
|
||||
bin_name, argv[-2], bin_name, argv[-2]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct cmd cmds[] = {
|
||||
{ "help", do_help },
|
||||
{ "pin", do_pin },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
int do_iter(int argc, char **argv)
|
||||
{
|
||||
return cmd_select(cmds, argc, argv, do_help);
|
||||
}
|
@ -16,6 +16,7 @@ static const char * const link_type_name[] = {
|
||||
[BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
|
||||
[BPF_LINK_TYPE_TRACING] = "tracing",
|
||||
[BPF_LINK_TYPE_CGROUP] = "cgroup",
|
||||
[BPF_LINK_TYPE_ITER] = "iter",
|
||||
};
|
||||
|
||||
static int link_parse_fd(int *argc, char ***argv)
|
||||
|
@ -59,7 +59,7 @@ static int do_help(int argc, char **argv)
|
||||
" %s batch file FILE\n"
|
||||
" %s version\n"
|
||||
"\n"
|
||||
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
|
||||
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
|
||||
" " HELP_SPEC_OPTIONS "\n"
|
||||
"",
|
||||
bin_name, bin_name, bin_name);
|
||||
@ -224,6 +224,7 @@ static const struct cmd cmds[] = {
|
||||
{ "btf", do_btf },
|
||||
{ "gen", do_gen },
|
||||
{ "struct_ops", do_struct_ops },
|
||||
{ "iter", do_iter },
|
||||
{ "version", do_version },
|
||||
{ 0 }
|
||||
};
|
||||
|
@ -18,6 +18,9 @@
|
||||
|
||||
#include "json_writer.h"
|
||||
|
||||
/* Make sure we do not use kernel-only integer typedefs */
|
||||
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
|
||||
|
||||
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
|
||||
|
||||
#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
|
||||
@ -199,6 +202,7 @@ int do_feature(int argc, char **argv);
|
||||
int do_btf(int argc, char **argv);
|
||||
int do_gen(int argc, char **argv);
|
||||
int do_struct_ops(int argc, char **argv);
|
||||
int do_iter(int argc, char **argv);
|
||||
|
||||
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
|
||||
int prog_parse_fd(int *argc, char ***argv);
|
||||
|
@ -1589,7 +1589,8 @@ static int do_help(int argc, char **argv)
|
||||
" percpu_array | stack_trace | cgroup_array | lru_hash |\n"
|
||||
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
|
||||
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
|
||||
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
|
||||
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
|
||||
" queue | stack | sk_storage | struct_ops }\n"
|
||||
" " HELP_SPEC_OPTIONS "\n"
|
||||
"",
|
||||
bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
|
||||
|
@ -39,7 +39,7 @@ struct event_ring_info {
|
||||
|
||||
struct perf_event_sample {
|
||||
struct perf_event_header header;
|
||||
u64 time;
|
||||
__u64 time;
|
||||
__u32 size;
|
||||
unsigned char data[];
|
||||
};
|
||||
|
@ -238,7 +238,7 @@ exit_free:
|
||||
return fd;
|
||||
}
|
||||
|
||||
static void show_prog_maps(int fd, u32 num_maps)
|
||||
static void show_prog_maps(int fd, __u32 num_maps)
|
||||
{
|
||||
struct bpf_prog_info info = {};
|
||||
__u32 len = sizeof(info);
|
||||
|
@ -8,7 +8,8 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL)
|
||||
LIBBPF_SRC := $(abspath ../../lib/bpf)
|
||||
BPFOBJ := $(OUTPUT)/libbpf.a
|
||||
BPF_INCLUDE := $(OUTPUT)
|
||||
INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib)
|
||||
INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \
|
||||
-I$(abspath ../../include/uapi)
|
||||
CFLAGS := -g -Wall
|
||||
|
||||
# Try to detect best kernel BTF source
|
||||
|
@ -116,6 +116,7 @@ enum bpf_cmd {
|
||||
BPF_LINK_GET_FD_BY_ID,
|
||||
BPF_LINK_GET_NEXT_ID,
|
||||
BPF_ENABLE_STATS,
|
||||
BPF_ITER_CREATE,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
@ -218,6 +219,7 @@ enum bpf_attach_type {
|
||||
BPF_TRACE_FEXIT,
|
||||
BPF_MODIFY_RETURN,
|
||||
BPF_LSM_MAC,
|
||||
BPF_TRACE_ITER,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
@ -228,6 +230,7 @@ enum bpf_link_type {
|
||||
BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
|
||||
BPF_LINK_TYPE_TRACING = 2,
|
||||
BPF_LINK_TYPE_CGROUP = 3,
|
||||
BPF_LINK_TYPE_ITER = 4,
|
||||
|
||||
MAX_BPF_LINK_TYPE,
|
||||
};
|
||||
@ -612,6 +615,11 @@ union bpf_attr {
|
||||
__u32 type;
|
||||
} enable_stats;
|
||||
|
||||
struct { /* struct used by BPF_ITER_CREATE command */
|
||||
__u32 link_fd;
|
||||
__u32 flags;
|
||||
} iter_create;
|
||||
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
@ -667,8 +675,8 @@ union bpf_attr {
|
||||
* For tracing programs, safely attempt to read *size* bytes from
|
||||
* kernel space address *unsafe_ptr* and store the data in *dst*.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user**\ () or
|
||||
* **bpf_probe_read_kernel**\ () instead.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@ -676,7 +684,7 @@ union bpf_attr {
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does not include time the system was suspended.
|
||||
* See: clock_gettime(CLOCK_MONOTONIC)
|
||||
* See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
@ -1535,11 +1543,11 @@ union bpf_attr {
|
||||
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address
|
||||
* *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
|
||||
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
|
||||
* more details.
|
||||
*
|
||||
* Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
|
||||
* instead.
|
||||
* Generally, use **bpf_probe_read_user_str**\ () or
|
||||
* **bpf_probe_read_kernel_str**\ () instead.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string,
|
||||
* including the trailing NUL character. On error, a negative
|
||||
@ -1567,7 +1575,7 @@ union bpf_attr {
|
||||
*
|
||||
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
|
||||
* Description
|
||||
* Equivalent to bpf_get_socket_cookie() helper that accepts
|
||||
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
|
||||
* *skb*, but gets socket from **struct bpf_sock_ops** context.
|
||||
* Return
|
||||
* A 8-byte long non-decreasing number.
|
||||
@ -1596,6 +1604,7 @@ union bpf_attr {
|
||||
* The option value of length *optlen* is pointed by *optval*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
@ -1664,12 +1673,12 @@ union bpf_attr {
|
||||
*
|
||||
* The lower two bits of *flags* are used as the return code if
|
||||
* the map lookup fails. This is so that the return value can be
|
||||
* one of the XDP program return codes up to XDP_TX, as chosen by
|
||||
* the caller. Any higher bits in the *flags* argument must be
|
||||
* one of the XDP program return codes up to **XDP_TX**, as chosen
|
||||
* by the caller. Any higher bits in the *flags* argument must be
|
||||
* unset.
|
||||
*
|
||||
* See also bpf_redirect(), which only supports redirecting to an
|
||||
* ifindex, but doesn't require a map to do so.
|
||||
* See also **bpf_redirect**\ (), which only supports redirecting
|
||||
* to an ifindex, but doesn't require a map to do so.
|
||||
* Return
|
||||
* **XDP_REDIRECT** on success, or the value of the two lower bits
|
||||
* of the *flags* argument on error.
|
||||
@ -1777,7 +1786,7 @@ union bpf_attr {
|
||||
* the time running for event since last normalization. The
|
||||
* enabled and running times are accumulated since the perf event
|
||||
* open. To achieve scaling factor between two invocations of an
|
||||
* eBPF program, users can can use CPU id as the key (which is
|
||||
* eBPF program, users can use CPU id as the key (which is
|
||||
* typical for perf array usage model) to remember the previous
|
||||
* value and do the calculation inside the eBPF program.
|
||||
* Return
|
||||
@ -1804,6 +1813,7 @@ union bpf_attr {
|
||||
* *opval* and of length *optlen*.
|
||||
*
|
||||
* *bpf_socket* should be one of the following:
|
||||
*
|
||||
* * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
|
||||
* * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
|
||||
* and **BPF_CGROUP_INET6_CONNECT**.
|
||||
@ -1825,7 +1835,7 @@ union bpf_attr {
|
||||
* The first argument is the context *regs* on which the kprobe
|
||||
* works.
|
||||
*
|
||||
* This helper works by setting setting the PC (program counter)
|
||||
* This helper works by setting the PC (program counter)
|
||||
* to an override function which is run in place of the original
|
||||
* probed function. This means the probed function is not run at
|
||||
* all. The replacement function just returns with the required
|
||||
@ -1994,10 +2004,11 @@ union bpf_attr {
|
||||
*
|
||||
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
|
||||
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
|
||||
* **AF_INET6**). Looking for a free port to bind to can be
|
||||
* expensive, therefore binding to port is not permitted by the
|
||||
* helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
|
||||
* must be set to zero.
|
||||
* **AF_INET6**). It's advised to pass zero port (**sin_port**
|
||||
* or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
|
||||
* behavior and lets the kernel efficiently pick up an unused
|
||||
* port as long as 4-tuple is unique. Passing non-zero port might
|
||||
* lead to degraded performance.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
@ -2291,7 +2302,7 @@ union bpf_attr {
|
||||
* **bpf_rc_keydown**\ () again with the same values, or calling
|
||||
* **bpf_rc_repeat**\ ().
|
||||
*
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* Some protocols include a toggle bit, in case the button was
|
||||
* released and pressed again between consecutive scancodes.
|
||||
*
|
||||
* The *ctx* should point to the lirc sample as passed into
|
||||
@ -2637,7 +2648,6 @@ union bpf_attr {
|
||||
*
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains **sizeof**\ (**struct tcphdr**).
|
||||
*
|
||||
* Return
|
||||
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
|
||||
* error otherwise.
|
||||
@ -2820,7 +2830,6 @@ union bpf_attr {
|
||||
*
|
||||
* *th* points to the start of the TCP header, while *th_len*
|
||||
* contains the length of the TCP header.
|
||||
*
|
||||
* Return
|
||||
* On success, lower 32 bits hold the generated SYN cookie in
|
||||
* followed by 16 bits which hold the MSS value for that cookie,
|
||||
@ -2903,7 +2912,7 @@ union bpf_attr {
|
||||
* // size, after checking its boundaries.
|
||||
* }
|
||||
*
|
||||
* In comparison, using **bpf_probe_read_user()** helper here
|
||||
* In comparison, using **bpf_probe_read_user**\ () helper here
|
||||
* instead to read the string would require to estimate the length
|
||||
* at compile time, and would often result in copying more memory
|
||||
* than necessary.
|
||||
@ -2921,14 +2930,14 @@ union bpf_attr {
|
||||
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
|
||||
* Description
|
||||
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
|
||||
* to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
|
||||
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
|
||||
* Return
|
||||
* On success, the strictly positive length of the string, including
|
||||
* On success, the strictly positive length of the string, including
|
||||
* the trailing NUL character. On error, a negative value.
|
||||
*
|
||||
* int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
|
||||
* Description
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
|
||||
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
|
||||
* *rcv_nxt* is the ack_seq to be sent out.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
@ -2956,19 +2965,19 @@ union bpf_attr {
|
||||
* int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
|
||||
* Description
|
||||
* For an eBPF program attached to a perf event, retrieve the
|
||||
* branch records (struct perf_branch_entry) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* branch records (**struct perf_branch_entry**) associated to *ctx*
|
||||
* and store it in the buffer pointed by *buf* up to size
|
||||
* *size* bytes.
|
||||
* Return
|
||||
* On success, number of bytes written to *buf*. On error, a
|
||||
* negative value.
|
||||
*
|
||||
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
|
||||
* instead return the number of bytes required to store all the
|
||||
* instead return the number of bytes required to store all the
|
||||
* branch entries. If this flag is set, *buf* may be NULL.
|
||||
*
|
||||
* **-EINVAL** if arguments invalid or **size** not a multiple
|
||||
* of sizeof(struct perf_branch_entry).
|
||||
* of **sizeof**\ (**struct perf_branch_entry**\ ).
|
||||
*
|
||||
* **-ENOENT** if architecture does not support branch records.
|
||||
*
|
||||
@ -2976,8 +2985,8 @@ union bpf_attr {
|
||||
* Description
|
||||
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
|
||||
* *namespace* will be returned in *nsdata*.
|
||||
*
|
||||
* On failure, the returned value is one of the following:
|
||||
* Return
|
||||
* 0 on success, or one of the following in case of failure:
|
||||
*
|
||||
* **-EINVAL** if dev and inum supplied don't match dev_t and inode number
|
||||
* with nsfs of current task, or if dev conversion to dev_t lost high bits.
|
||||
@ -3016,8 +3025,8 @@ union bpf_attr {
|
||||
* a global identifier that can be assumed unique. If *ctx* is
|
||||
* NULL, then the helper returns the cookie for the initial
|
||||
* network namespace. The cookie itself is very similar to that
|
||||
* of bpf_get_socket_cookie() helper, but for network namespaces
|
||||
* instead of sockets.
|
||||
* of **bpf_get_socket_cookie**\ () helper, but for network
|
||||
* namespaces instead of sockets.
|
||||
* Return
|
||||
* A 8-byte long opaque number.
|
||||
*
|
||||
@ -3052,22 +3061,98 @@ union bpf_attr {
|
||||
*
|
||||
* The *flags* argument must be zero.
|
||||
* Return
|
||||
* 0 on success, or a negative errno in case of failure.
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* * **-EINVAL** Unsupported flags specified.
|
||||
* * **-ENOENT** Socket is unavailable for assignment.
|
||||
* * **-ENETUNREACH** Socket is unreachable (wrong netns).
|
||||
* * **-EOPNOTSUPP** Unsupported operation, for example a
|
||||
* call from outside of TC ingress.
|
||||
* * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport).
|
||||
* **-EINVAL** if specified *flags* are not supported.
|
||||
*
|
||||
* **-ENOENT** if the socket is unavailable for assignment.
|
||||
*
|
||||
* **-ENETUNREACH** if the socket is unreachable (wrong netns).
|
||||
*
|
||||
* **-EOPNOTSUPP** if the operation is not supported, for example
|
||||
* a call from outside of TC ingress.
|
||||
*
|
||||
* **-ESOCKTNOSUPPORT** if the socket type is not supported
|
||||
* (reuseport).
|
||||
*
|
||||
* u64 bpf_ktime_get_boot_ns(void)
|
||||
* Description
|
||||
* Return the time elapsed since system boot, in nanoseconds.
|
||||
* Does include the time the system was suspended.
|
||||
* See: clock_gettime(CLOCK_BOOTTIME)
|
||||
* See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
|
||||
* Return
|
||||
* Current *ktime*.
|
||||
*
|
||||
* int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
|
||||
* Description
|
||||
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
|
||||
* out the format string.
|
||||
* The *m* represents the seq_file. The *fmt* and *fmt_size* are for
|
||||
* the format string itself. The *data* and *data_len* are format string
|
||||
* arguments. The *data* are a **u64** array and corresponding format string
|
||||
* values are stored in the array. For strings and pointers where pointees
|
||||
* are accessed, only the pointer values are stored in the *data* array.
|
||||
* The *data_len* is the size of *data* in bytes.
|
||||
*
|
||||
* Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
|
||||
* Reading kernel memory may fail due to either invalid address or
|
||||
* valid address but requiring a major memory fault. If reading kernel memory
|
||||
* fails, the string for **%s** will be an empty string, and the ip
|
||||
* address for **%p{i,I}{4,6}** will be 0. Not returning error to
|
||||
* bpf program is consistent with what **bpf_trace_printk**\ () does for now.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EBUSY** if per-CPU memory copy buffer is busy, can try again
|
||||
* by returning 1 from bpf program.
|
||||
*
|
||||
* **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
|
||||
*
|
||||
* **-E2BIG** if *fmt* contains too many format specifiers.
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*
|
||||
* int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
|
||||
* Description
|
||||
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
|
||||
* The *m* represents the seq_file. The *data* and *len* represent the
|
||||
* data to write in bytes.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure:
|
||||
*
|
||||
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
|
||||
*
|
||||
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
|
||||
* Description
|
||||
* Return the cgroup v2 id of the socket *sk*.
|
||||
*
|
||||
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
|
||||
* returned from **bpf_sk_lookup_xxx**\ (),
|
||||
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
|
||||
* same as in **bpf_skb_cgroup_id**\ ().
|
||||
*
|
||||
* This helper is available only if the kernel was compiled with
|
||||
* the **CONFIG_SOCK_CGROUP_DATA** configuration option.
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*
|
||||
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
|
||||
* Description
|
||||
* Return id of cgroup v2 that is ancestor of cgroup associated
|
||||
* with the *sk* at the *ancestor_level*. The root cgroup is at
|
||||
* *ancestor_level* zero and each step down the hierarchy
|
||||
* increments the level. If *ancestor_level* == level of cgroup
|
||||
* associated with *sk*, then return value will be same as that
|
||||
* of **bpf_sk_cgroup_id**\ ().
|
||||
*
|
||||
* The helper is useful to implement policies based on cgroups
|
||||
* that are upper in hierarchy than immediate cgroup associated
|
||||
* with *sk*.
|
||||
*
|
||||
* The format of returned id and helper limitations are same as in
|
||||
* **bpf_sk_cgroup_id**\ ().
|
||||
* Return
|
||||
* The id is returned or 0 in case the id could not be retrieved.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
@ -3195,7 +3280,11 @@ union bpf_attr {
|
||||
FN(get_netns_cookie), \
|
||||
FN(get_current_ancestor_cgroup_id), \
|
||||
FN(sk_assign), \
|
||||
FN(ktime_get_boot_ns),
|
||||
FN(ktime_get_boot_ns), \
|
||||
FN(seq_printf), \
|
||||
FN(seq_write), \
|
||||
FN(sk_cgroup_id), \
|
||||
FN(sk_ancestor_cgroup_id),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
@ -3673,7 +3762,7 @@ struct bpf_sock_addr {
|
||||
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
|
||||
* Stored in network byte order.
|
||||
*/
|
||||
__u32 user_port; /* Allows 4-byte read and write.
|
||||
__u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
|
||||
* Stored in network byte order
|
||||
*/
|
||||
__u32 family; /* Allows 4-byte read, but no write */
|
||||
|
@ -619,6 +619,16 @@ int bpf_link_update(int link_fd, int new_prog_fd,
|
||||
return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
int bpf_iter_create(int link_fd)
|
||||
{
|
||||
union bpf_attr attr;
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.iter_create.link_fd = link_fd;
|
||||
|
||||
return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
|
||||
}
|
||||
|
||||
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
|
||||
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
|
||||
{
|
||||
|
@ -187,6 +187,8 @@ struct bpf_link_update_opts {
|
||||
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
|
||||
const struct bpf_link_update_opts *opts);
|
||||
|
||||
LIBBPF_API int bpf_iter_create(int link_fd);
|
||||
|
||||
struct bpf_prog_test_run_attr {
|
||||
int prog_fd;
|
||||
int repeat;
|
||||
|
@ -36,6 +36,20 @@
|
||||
#define __weak __attribute__((weak))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helper macro to manipulate data structures
|
||||
*/
|
||||
#ifndef offsetof
|
||||
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
|
||||
#endif
|
||||
#ifndef container_of
|
||||
#define container_of(ptr, type, member) \
|
||||
({ \
|
||||
void *__mptr = (void *)(ptr); \
|
||||
((type *)(__mptr - offsetof(type, member))); \
|
||||
})
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Helper structure used by eBPF C program
|
||||
* to describe BPF map attributes to libbpf loader
|
||||
|
@ -413,4 +413,20 @@ typeof(name(0)) name(struct pt_regs *ctx) \
|
||||
} \
|
||||
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
|
||||
|
||||
/*
|
||||
* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
|
||||
* in a structure.
|
||||
*/
|
||||
#define BPF_SEQ_PRINTF(seq, fmt, args...) \
|
||||
({ \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
|
||||
static const char ___fmt[] = fmt; \
|
||||
unsigned long long ___param[] = { args }; \
|
||||
_Pragma("GCC diagnostic pop") \
|
||||
int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
|
||||
___param, sizeof(___param)); \
|
||||
___ret; \
|
||||
})
|
||||
|
||||
#endif
|
||||
|
@ -3237,7 +3237,7 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__probe_name(struct bpf_object *obj)
|
||||
bpf_object__probe_loading(struct bpf_object *obj)
|
||||
{
|
||||
struct bpf_load_program_attr attr;
|
||||
char *cp, errmsg[STRERR_BUFSIZE];
|
||||
@ -3257,15 +3257,36 @@ bpf_object__probe_name(struct bpf_object *obj)
|
||||
|
||||
ret = bpf_load_program_xattr(&attr, NULL, 0);
|
||||
if (ret < 0) {
|
||||
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
|
||||
pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
|
||||
__func__, cp, errno);
|
||||
return -errno;
|
||||
ret = errno;
|
||||
cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
|
||||
pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
|
||||
"program. Make sure your kernel supports BPF "
|
||||
"(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
|
||||
"set to big enough value.\n", __func__, cp, ret);
|
||||
return -ret;
|
||||
}
|
||||
close(ret);
|
||||
|
||||
/* now try the same program, but with the name */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
bpf_object__probe_name(struct bpf_object *obj)
|
||||
{
|
||||
struct bpf_load_program_attr attr;
|
||||
struct bpf_insn insns[] = {
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
int ret;
|
||||
|
||||
/* make sure loading with name works */
|
||||
|
||||
memset(&attr, 0, sizeof(attr));
|
||||
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
|
||||
attr.insns = insns;
|
||||
attr.insns_cnt = ARRAY_SIZE(insns);
|
||||
attr.license = "GPL";
|
||||
attr.name = "test";
|
||||
ret = bpf_load_program_xattr(&attr, NULL, 0);
|
||||
if (ret >= 0) {
|
||||
@ -5636,7 +5657,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
|
||||
|
||||
obj->loaded = true;
|
||||
|
||||
err = bpf_object__probe_caps(obj);
|
||||
err = bpf_object__probe_loading(obj);
|
||||
err = err ? : bpf_object__probe_caps(obj);
|
||||
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
|
||||
err = err ? : bpf_object__sanitize_and_load_btf(obj);
|
||||
err = err ? : bpf_object__sanitize_maps(obj);
|
||||
@ -6586,6 +6608,8 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
|
||||
struct bpf_program *prog);
|
||||
static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
|
||||
struct bpf_program *prog);
|
||||
static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
|
||||
struct bpf_program *prog);
|
||||
|
||||
static const struct bpf_sec_def section_defs[] = {
|
||||
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
|
||||
@ -6629,6 +6653,10 @@ static const struct bpf_sec_def section_defs[] = {
|
||||
.is_attach_btf = true,
|
||||
.expected_attach_type = BPF_LSM_MAC,
|
||||
.attach_fn = attach_lsm),
|
||||
SEC_DEF("iter/", TRACING,
|
||||
.expected_attach_type = BPF_TRACE_ITER,
|
||||
.is_attach_btf = true,
|
||||
.attach_fn = attach_iter),
|
||||
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
|
||||
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
|
||||
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
|
||||
@ -6891,6 +6919,7 @@ invalid_prog:
|
||||
|
||||
#define BTF_TRACE_PREFIX "btf_trace_"
|
||||
#define BTF_LSM_PREFIX "bpf_lsm_"
|
||||
#define BTF_ITER_PREFIX "bpf_iter_"
|
||||
#define BTF_MAX_NAME_SIZE 128
|
||||
|
||||
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
|
||||
@ -6921,6 +6950,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
|
||||
else if (attach_type == BPF_LSM_MAC)
|
||||
err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
|
||||
BTF_KIND_FUNC);
|
||||
else if (attach_type == BPF_TRACE_ITER)
|
||||
err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
|
||||
BTF_KIND_FUNC);
|
||||
else
|
||||
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
|
||||
|
||||
@ -7848,6 +7880,12 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
|
||||
return bpf_program__attach_lsm(prog);
|
||||
}
|
||||
|
||||
static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
|
||||
struct bpf_program *prog)
|
||||
{
|
||||
return bpf_program__attach_iter(prog, NULL);
|
||||
}
|
||||
|
||||
struct bpf_link *
|
||||
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
|
||||
{
|
||||
@ -7882,6 +7920,42 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_link *
|
||||
bpf_program__attach_iter(struct bpf_program *prog,
|
||||
const struct bpf_iter_attach_opts *opts)
|
||||
{
|
||||
char errmsg[STRERR_BUFSIZE];
|
||||
struct bpf_link *link;
|
||||
int prog_fd, link_fd;
|
||||
|
||||
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
prog_fd = bpf_program__fd(prog);
|
||||
if (prog_fd < 0) {
|
||||
pr_warn("program '%s': can't attach before loaded\n",
|
||||
bpf_program__title(prog, false));
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
link = calloc(1, sizeof(*link));
|
||||
if (!link)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
link->detach = &bpf_link__detach_fd;
|
||||
|
||||
link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
|
||||
if (link_fd < 0) {
|
||||
link_fd = -errno;
|
||||
free(link);
|
||||
pr_warn("program '%s': failed to attach to iterator: %s\n",
|
||||
bpf_program__title(prog, false),
|
||||
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
|
||||
return ERR_PTR(link_fd);
|
||||
}
|
||||
link->fd = link_fd;
|
||||
return link;
|
||||
}
|
||||
|
||||
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
|
||||
{
|
||||
const struct bpf_sec_def *sec_def;
|
||||
@ -8300,7 +8374,7 @@ error:
|
||||
struct perf_sample_raw {
|
||||
struct perf_event_header header;
|
||||
uint32_t size;
|
||||
char data[0];
|
||||
char data[];
|
||||
};
|
||||
|
||||
struct perf_sample_lost {
|
||||
|
@ -258,6 +258,15 @@ struct bpf_map;
|
||||
|
||||
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
|
||||
|
||||
struct bpf_iter_attach_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
};
|
||||
#define bpf_iter_attach_opts__last_field sz
|
||||
|
||||
LIBBPF_API struct bpf_link *
|
||||
bpf_program__attach_iter(struct bpf_program *prog,
|
||||
const struct bpf_iter_attach_opts *opts);
|
||||
|
||||
struct bpf_insn;
|
||||
|
||||
/*
|
||||
|
@ -258,6 +258,8 @@ LIBBPF_0.0.8 {
|
||||
LIBBPF_0.0.9 {
|
||||
global:
|
||||
bpf_enable_stats;
|
||||
bpf_iter_create;
|
||||
bpf_link_get_fd_by_id;
|
||||
bpf_link_get_next_id;
|
||||
bpf_program__attach_iter;
|
||||
} LIBBPF_0.0.8;
|
||||
|
@ -153,7 +153,7 @@ struct btf_ext_info_sec {
|
||||
__u32 sec_name_off;
|
||||
__u32 num_info;
|
||||
/* Followed by num_info * record_size number of bytes */
|
||||
__u8 data[0];
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
/* The minimum bpf_func_info checked by the loader */
|
||||
|
@ -686,8 +686,11 @@ try_again_reset:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (child_pid != -1)
|
||||
if (child_pid != -1) {
|
||||
if (timeout)
|
||||
kill(child_pid, SIGTERM);
|
||||
wait4(child_pid, &status, 0, &stat_config.ru_data);
|
||||
}
|
||||
|
||||
if (workload_exec_errno) {
|
||||
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
|
||||
|
@ -1821,6 +1821,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
|
||||
}
|
||||
#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
|
||||
|
||||
static int
|
||||
symbol__disassemble_bpf_image(struct symbol *sym,
|
||||
struct annotate_args *args)
|
||||
{
|
||||
struct annotation *notes = symbol__annotation(sym);
|
||||
struct disasm_line *dl;
|
||||
|
||||
args->offset = -1;
|
||||
args->line = strdup("to be implemented");
|
||||
args->line_nr = 0;
|
||||
dl = disasm_line__new(args);
|
||||
if (dl)
|
||||
annotation_line__add(&dl->al, ¬es->src->source);
|
||||
|
||||
free(args->line);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Possibly create a new version of line with tabs expanded. Returns the
|
||||
* existing or new line, storage is updated if a new line is allocated. If
|
||||
@ -1920,6 +1938,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
|
||||
|
||||
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
|
||||
return symbol__disassemble_bpf(sym, args);
|
||||
} else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
|
||||
return symbol__disassemble_bpf_image(sym, args);
|
||||
} else if (dso__is_kcore(dso)) {
|
||||
kce.kcore_filename = symfs_filename;
|
||||
kce.addr = map__rip_2objdump(map, sym->start);
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <bpf/libbpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/string.h>
|
||||
#include <internal/lib.h>
|
||||
#include <symbol/kallsyms.h>
|
||||
#include "bpf-event.h"
|
||||
#include "debug.h"
|
||||
#include "dso.h"
|
||||
@ -290,11 +293,82 @@ out:
|
||||
return err ? -1 : 0;
|
||||
}
|
||||
|
||||
struct kallsyms_parse {
|
||||
union perf_event *event;
|
||||
perf_event__handler_t process;
|
||||
struct machine *machine;
|
||||
struct perf_tool *tool;
|
||||
};
|
||||
|
||||
static int
|
||||
process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data)
|
||||
{
|
||||
struct machine *machine = data->machine;
|
||||
union perf_event *event = data->event;
|
||||
struct perf_record_ksymbol *ksymbol;
|
||||
int len;
|
||||
|
||||
ksymbol = &event->ksymbol;
|
||||
|
||||
*ksymbol = (struct perf_record_ksymbol) {
|
||||
.header = {
|
||||
.type = PERF_RECORD_KSYMBOL,
|
||||
.size = offsetof(struct perf_record_ksymbol, name),
|
||||
},
|
||||
.addr = addr,
|
||||
.len = page_size,
|
||||
.ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name);
|
||||
ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64));
|
||||
memset((void *) event + event->header.size, 0, machine->id_hdr_size);
|
||||
event->header.size += machine->id_hdr_size;
|
||||
|
||||
return perf_tool__process_synth_event(data->tool, event, machine,
|
||||
data->process);
|
||||
}
|
||||
|
||||
static int
|
||||
kallsyms_process_symbol(void *data, const char *_name,
|
||||
char type __maybe_unused, u64 start)
|
||||
{
|
||||
char disp[KSYM_NAME_LEN];
|
||||
char *module, *name;
|
||||
unsigned long id;
|
||||
int err = 0;
|
||||
|
||||
module = strchr(_name, '\t');
|
||||
if (!module)
|
||||
return 0;
|
||||
|
||||
/* We are going after [bpf] module ... */
|
||||
if (strcmp(module + 1, "[bpf]"))
|
||||
return 0;
|
||||
|
||||
name = memdup(_name, (module - _name) + 1);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
name[module - _name] = 0;
|
||||
|
||||
/* .. and only for trampolines and dispatchers */
|
||||
if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) ||
|
||||
(sscanf(name, "bpf_dispatcher_%s", disp) == 1))
|
||||
err = process_bpf_image(name, start, data);
|
||||
|
||||
free(name);
|
||||
return err;
|
||||
}
|
||||
|
||||
int perf_event__synthesize_bpf_events(struct perf_session *session,
|
||||
perf_event__handler_t process,
|
||||
struct machine *machine,
|
||||
struct record_opts *opts)
|
||||
{
|
||||
const char *kallsyms_filename = "/proc/kallsyms";
|
||||
struct kallsyms_parse arg;
|
||||
union perf_event *event;
|
||||
__u32 id = 0;
|
||||
int err;
|
||||
@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
|
||||
event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
|
||||
if (!event)
|
||||
return -1;
|
||||
|
||||
/* Synthesize all the bpf programs in system. */
|
||||
while (true) {
|
||||
err = bpf_prog_get_next_id(id, &id);
|
||||
if (err) {
|
||||
@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Synthesize all the bpf images - trampolines/dispatchers. */
|
||||
if (symbol_conf.kallsyms_name != NULL)
|
||||
kallsyms_filename = symbol_conf.kallsyms_name;
|
||||
|
||||
arg = (struct kallsyms_parse) {
|
||||
.event = event,
|
||||
.process = process,
|
||||
.machine = machine,
|
||||
.tool = session->tool,
|
||||
};
|
||||
|
||||
if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) {
|
||||
pr_err("%s: failed to synthesize bpf images: %s\n",
|
||||
__func__, strerror(errno));
|
||||
}
|
||||
|
||||
free(event);
|
||||
return err;
|
||||
}
|
||||
|
@ -191,6 +191,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
|
||||
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
|
||||
case DSO_BINARY_TYPE__JAVA_JIT:
|
||||
case DSO_BINARY_TYPE__BPF_PROG_INFO:
|
||||
case DSO_BINARY_TYPE__BPF_IMAGE:
|
||||
case DSO_BINARY_TYPE__NOT_FOUND:
|
||||
ret = -1;
|
||||
break;
|
||||
|
@ -40,6 +40,7 @@ enum dso_binary_type {
|
||||
DSO_BINARY_TYPE__GUEST_KCORE,
|
||||
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
|
||||
DSO_BINARY_TYPE__BPF_PROG_INFO,
|
||||
DSO_BINARY_TYPE__BPF_IMAGE,
|
||||
DSO_BINARY_TYPE__NOT_FOUND,
|
||||
};
|
||||
|
||||
|
@ -736,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int is_bpf_image(const char *name)
|
||||
{
|
||||
return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) ||
|
||||
strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1);
|
||||
}
|
||||
|
||||
static int machine__process_ksymbol_register(struct machine *machine,
|
||||
union perf_event *event,
|
||||
struct perf_sample *sample __maybe_unused)
|
||||
@ -759,6 +765,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
|
||||
map->start = event->ksymbol.addr;
|
||||
map->end = map->start + event->ksymbol.len;
|
||||
maps__insert(&machine->kmaps, map);
|
||||
dso__set_loaded(dso);
|
||||
|
||||
if (is_bpf_image(event->ksymbol.name)) {
|
||||
dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE;
|
||||
dso__set_long_name(dso, "", false);
|
||||
}
|
||||
}
|
||||
|
||||
sym = symbol__new(map->map_ip(map, map->start),
|
||||
|
@ -1544,6 +1544,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
|
||||
return true;
|
||||
|
||||
case DSO_BINARY_TYPE__BPF_PROG_INFO:
|
||||
case DSO_BINARY_TYPE__BPF_IMAGE:
|
||||
case DSO_BINARY_TYPE__NOT_FOUND:
|
||||
default:
|
||||
return false;
|
||||
|
1
tools/testing/selftests/bpf/.gitignore
vendored
1
tools/testing/selftests/bpf/.gitignore
vendored
@ -38,3 +38,4 @@ test_cpp
|
||||
/bpf_gcc
|
||||
/tools
|
||||
/runqslower
|
||||
/bench
|
||||
|
@ -77,7 +77,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
|
||||
# Compile but not part of 'make run_tests'
|
||||
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
|
||||
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
|
||||
test_lirc_mode2_user xdping test_cpp runqslower
|
||||
test_lirc_mode2_user xdping test_cpp runqslower bench
|
||||
|
||||
TEST_CUSTOM_PROGS = urandom_read
|
||||
|
||||
@ -265,6 +265,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
|
||||
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
|
||||
$$(filter-out $(SKEL_BLACKLIST), \
|
||||
$$(TRUNNER_BPF_SRCS)))
|
||||
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
|
||||
|
||||
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
|
||||
$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
|
||||
@ -354,6 +355,7 @@ endef
|
||||
TRUNNER_TESTS_DIR := prog_tests
|
||||
TRUNNER_BPF_PROGS_DIR := progs
|
||||
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
|
||||
network_helpers.c testing_helpers.c \
|
||||
flow_dissector_load.h
|
||||
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
|
||||
$(wildcard progs/btf_dump_test_case_*.c)
|
||||
@ -405,6 +407,21 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
|
||||
$(call msg,CXX,,$@)
|
||||
$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
|
||||
|
||||
# Benchmark runner
|
||||
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
|
||||
$(call msg,CC,,$@)
|
||||
$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
|
||||
$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
|
||||
$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
|
||||
$(OUTPUT)/bench.o: bench.h testing_helpers.h
|
||||
$(OUTPUT)/bench: LDLIBS += -lm
|
||||
$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
|
||||
$(OUTPUT)/bench_count.o \
|
||||
$(OUTPUT)/bench_rename.o \
|
||||
$(OUTPUT)/bench_trigger.o
|
||||
$(call msg,BINARY,,$@)
|
||||
$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
|
||||
|
||||
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
|
||||
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
|
||||
feature \
|
||||
|
43
tools/testing/selftests/bpf/README.rst
Normal file
43
tools/testing/selftests/bpf/README.rst
Normal file
@ -0,0 +1,43 @@
|
||||
==================
|
||||
BPF Selftest Notes
|
||||
==================
|
||||
|
||||
Additional information about selftest failures are
|
||||
documented here.
|
||||
|
||||
bpf_iter test failures with clang/llvm 10.0.0
|
||||
=============================================
|
||||
|
||||
With clang/llvm 10.0.0, the following two bpf_iter tests failed:
|
||||
* ``bpf_iter/ipv6_route``
|
||||
* ``bpf_iter/netlink``
|
||||
|
||||
The symptom for ``bpf_iter/ipv6_route`` looks like
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
2: (79) r8 = *(u64 *)(r1 +8)
|
||||
...
|
||||
14: (bf) r2 = r8
|
||||
15: (0f) r2 += r1
|
||||
; BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
|
||||
16: (7b) *(u64 *)(r8 +64) = r2
|
||||
only read is supported
|
||||
|
||||
The symptom for ``bpf_iter/netlink`` looks like
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
; struct netlink_sock *nlk = ctx->sk;
|
||||
2: (79) r7 = *(u64 *)(r1 +8)
|
||||
...
|
||||
15: (bf) r2 = r7
|
||||
16: (0f) r2 += r1
|
||||
; BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
|
||||
17: (7b) *(u64 *)(r7 +0) = r2
|
||||
only read is supported
|
||||
|
||||
This is due to a llvm BPF backend bug. The fix
|
||||
https://reviews.llvm.org/D78466
|
||||
has been pushed to llvm 10.x release branch and will be
|
||||
available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
|
449
tools/testing/selftests/bpf/bench.c
Normal file
449
tools/testing/selftests/bpf/bench.c
Normal file
@ -0,0 +1,449 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#define _GNU_SOURCE
|
||||
#include <argp.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <sys/time.h>
|
||||
#include <sched.h>
|
||||
#include <fcntl.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sys/resource.h>
|
||||
#include <signal.h>
|
||||
#include "bench.h"
|
||||
#include "testing_helpers.h"
|
||||
|
||||
struct env env = {
|
||||
.warmup_sec = 1,
|
||||
.duration_sec = 5,
|
||||
.affinity = false,
|
||||
.consumer_cnt = 1,
|
||||
.producer_cnt = 1,
|
||||
};
|
||||
|
||||
static int libbpf_print_fn(enum libbpf_print_level level,
|
||||
const char *format, va_list args)
|
||||
{
|
||||
if (level == LIBBPF_DEBUG && !env.verbose)
|
||||
return 0;
|
||||
return vfprintf(stderr, format, args);
|
||||
}
|
||||
|
||||
static int bump_memlock_rlimit(void)
|
||||
{
|
||||
struct rlimit rlim_new = {
|
||||
.rlim_cur = RLIM_INFINITY,
|
||||
.rlim_max = RLIM_INFINITY,
|
||||
};
|
||||
|
||||
return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
|
||||
}
|
||||
|
||||
void setup_libbpf()
|
||||
{
|
||||
int err;
|
||||
|
||||
libbpf_set_print(libbpf_print_fn);
|
||||
|
||||
err = bump_memlock_rlimit();
|
||||
if (err)
|
||||
fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
|
||||
}
|
||||
|
||||
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
|
||||
{
|
||||
double hits_per_sec, drops_per_sec;
|
||||
double hits_per_prod;
|
||||
|
||||
hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
|
||||
hits_per_prod = hits_per_sec / env.producer_cnt;
|
||||
drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
|
||||
|
||||
printf("Iter %3d (%7.3lfus): ",
|
||||
iter, (delta_ns - 1000000000) / 1000.0);
|
||||
|
||||
printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
|
||||
hits_per_sec, hits_per_prod, drops_per_sec);
|
||||
}
|
||||
|
||||
void hits_drops_report_final(struct bench_res res[], int res_cnt)
|
||||
{
|
||||
int i;
|
||||
double hits_mean = 0.0, drops_mean = 0.0;
|
||||
double hits_stddev = 0.0, drops_stddev = 0.0;
|
||||
|
||||
for (i = 0; i < res_cnt; i++) {
|
||||
hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
|
||||
drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
|
||||
}
|
||||
|
||||
if (res_cnt > 1) {
|
||||
for (i = 0; i < res_cnt; i++) {
|
||||
hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
|
||||
(hits_mean - res[i].hits / 1000000.0) /
|
||||
(res_cnt - 1.0);
|
||||
drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
|
||||
(drops_mean - res[i].drops / 1000000.0) /
|
||||
(res_cnt - 1.0);
|
||||
}
|
||||
hits_stddev = sqrt(hits_stddev);
|
||||
drops_stddev = sqrt(drops_stddev);
|
||||
}
|
||||
printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
|
||||
hits_mean, hits_stddev, hits_mean / env.producer_cnt);
|
||||
printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
|
||||
drops_mean, drops_stddev);
|
||||
}
|
||||
|
||||
const char *argp_program_version = "benchmark";
|
||||
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
|
||||
const char argp_program_doc[] =
|
||||
"benchmark Generic benchmarking framework.\n"
|
||||
"\n"
|
||||
"This tool runs benchmarks.\n"
|
||||
"\n"
|
||||
"USAGE: benchmark <bench-name>\n"
|
||||
"\n"
|
||||
"EXAMPLES:\n"
|
||||
" # run 'count-local' benchmark with 1 producer and 1 consumer\n"
|
||||
" benchmark count-local\n"
|
||||
" # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
|
||||
" benchmark -p16 -c8 -a count-local\n";
|
||||
|
||||
enum {
|
||||
ARG_PROD_AFFINITY_SET = 1000,
|
||||
ARG_CONS_AFFINITY_SET = 1001,
|
||||
};
|
||||
|
||||
static const struct argp_option opts[] = {
|
||||
{ "list", 'l', NULL, 0, "List available benchmarks"},
|
||||
{ "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
|
||||
{ "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
|
||||
{ "producers", 'p', "NUM", 0, "Number of producer threads"},
|
||||
{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
|
||||
{ "verbose", 'v', NULL, 0, "Verbose debug output"},
|
||||
{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
|
||||
{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
|
||||
"Set of CPUs for producer threads; implies --affinity"},
|
||||
{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
|
||||
"Set of CPUs for consumer threads; implies --affinity"},
|
||||
{},
|
||||
};
|
||||
|
||||
static error_t parse_arg(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
static int pos_args;
|
||||
|
||||
switch (key) {
|
||||
case 'v':
|
||||
env.verbose = true;
|
||||
break;
|
||||
case 'l':
|
||||
env.list = true;
|
||||
break;
|
||||
case 'd':
|
||||
env.duration_sec = strtol(arg, NULL, 10);
|
||||
if (env.duration_sec <= 0) {
|
||||
fprintf(stderr, "Invalid duration: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case 'w':
|
||||
env.warmup_sec = strtol(arg, NULL, 10);
|
||||
if (env.warmup_sec <= 0) {
|
||||
fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
env.producer_cnt = strtol(arg, NULL, 10);
|
||||
if (env.producer_cnt <= 0) {
|
||||
fprintf(stderr, "Invalid producer count: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
env.consumer_cnt = strtol(arg, NULL, 10);
|
||||
if (env.consumer_cnt <= 0) {
|
||||
fprintf(stderr, "Invalid consumer count: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case 'a':
|
||||
env.affinity = true;
|
||||
break;
|
||||
case ARG_PROD_AFFINITY_SET:
|
||||
env.affinity = true;
|
||||
if (parse_num_list(arg, &env.prod_cpus.cpus,
|
||||
&env.prod_cpus.cpus_len)) {
|
||||
fprintf(stderr, "Invalid format of CPU set for producers.");
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case ARG_CONS_AFFINITY_SET:
|
||||
env.affinity = true;
|
||||
if (parse_num_list(arg, &env.cons_cpus.cpus,
|
||||
&env.cons_cpus.cpus_len)) {
|
||||
fprintf(stderr, "Invalid format of CPU set for consumers.");
|
||||
argp_usage(state);
|
||||
}
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (pos_args++) {
|
||||
fprintf(stderr,
|
||||
"Unrecognized positional argument: %s\n", arg);
|
||||
argp_usage(state);
|
||||
}
|
||||
env.bench_name = strdup(arg);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void parse_cmdline_args(int argc, char **argv)
|
||||
{
|
||||
static const struct argp argp = {
|
||||
.options = opts,
|
||||
.parser = parse_arg,
|
||||
.doc = argp_program_doc,
|
||||
};
|
||||
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
|
||||
exit(1);
|
||||
if (!env.list && !env.bench_name) {
|
||||
argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void collect_measurements(long delta_ns);
|
||||
|
||||
static __u64 last_time_ns;
|
||||
static void sigalarm_handler(int signo)
|
||||
{
|
||||
long new_time_ns = get_time_ns();
|
||||
long delta_ns = new_time_ns - last_time_ns;
|
||||
|
||||
collect_measurements(delta_ns);
|
||||
|
||||
last_time_ns = new_time_ns;
|
||||
}
|
||||
|
||||
/* set up periodic 1-second timer */
|
||||
static void setup_timer()
|
||||
{
|
||||
static struct sigaction sigalarm_action = {
|
||||
.sa_handler = sigalarm_handler,
|
||||
};
|
||||
struct itimerval timer_settings = {};
|
||||
int err;
|
||||
|
||||
last_time_ns = get_time_ns();
|
||||
err = sigaction(SIGALRM, &sigalarm_action, NULL);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
|
||||
exit(1);
|
||||
}
|
||||
timer_settings.it_interval.tv_sec = 1;
|
||||
timer_settings.it_value.tv_sec = 1;
|
||||
err = setitimer(ITIMER_REAL, &timer_settings, NULL);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void set_thread_affinity(pthread_t thread, int cpu)
|
||||
{
|
||||
cpu_set_t cpuset;
|
||||
|
||||
CPU_ZERO(&cpuset);
|
||||
CPU_SET(cpu, &cpuset);
|
||||
if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
|
||||
fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
|
||||
cpu, errno);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static int next_cpu(struct cpu_set *cpu_set)
|
||||
{
|
||||
if (cpu_set->cpus) {
|
||||
int i;
|
||||
|
||||
/* find next available CPU */
|
||||
for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
|
||||
if (cpu_set->cpus[i]) {
|
||||
cpu_set->next_cpu = i + 1;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return cpu_set->next_cpu++;
|
||||
}
|
||||
|
||||
static struct bench_state {
|
||||
int res_cnt;
|
||||
struct bench_res *results;
|
||||
pthread_t *consumers;
|
||||
pthread_t *producers;
|
||||
} state;
|
||||
|
||||
const struct bench *bench = NULL;
|
||||
|
||||
extern const struct bench bench_count_global;
|
||||
extern const struct bench bench_count_local;
|
||||
extern const struct bench bench_rename_base;
|
||||
extern const struct bench bench_rename_kprobe;
|
||||
extern const struct bench bench_rename_kretprobe;
|
||||
extern const struct bench bench_rename_rawtp;
|
||||
extern const struct bench bench_rename_fentry;
|
||||
extern const struct bench bench_rename_fexit;
|
||||
extern const struct bench bench_rename_fmodret;
|
||||
extern const struct bench bench_trig_base;
|
||||
extern const struct bench bench_trig_tp;
|
||||
extern const struct bench bench_trig_rawtp;
|
||||
extern const struct bench bench_trig_kprobe;
|
||||
extern const struct bench bench_trig_fentry;
|
||||
extern const struct bench bench_trig_fmodret;
|
||||
|
||||
static const struct bench *benchs[] = {
|
||||
&bench_count_global,
|
||||
&bench_count_local,
|
||||
&bench_rename_base,
|
||||
&bench_rename_kprobe,
|
||||
&bench_rename_kretprobe,
|
||||
&bench_rename_rawtp,
|
||||
&bench_rename_fentry,
|
||||
&bench_rename_fexit,
|
||||
&bench_rename_fmodret,
|
||||
&bench_trig_base,
|
||||
&bench_trig_tp,
|
||||
&bench_trig_rawtp,
|
||||
&bench_trig_kprobe,
|
||||
&bench_trig_fentry,
|
||||
&bench_trig_fmodret,
|
||||
};
|
||||
|
||||
static void setup_benchmark()
|
||||
{
|
||||
int i, err;
|
||||
|
||||
if (!env.bench_name) {
|
||||
fprintf(stderr, "benchmark name is not specified\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(benchs); i++) {
|
||||
if (strcmp(benchs[i]->name, env.bench_name) == 0) {
|
||||
bench = benchs[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bench) {
|
||||
fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("Setting up benchmark '%s'...\n", bench->name);
|
||||
|
||||
state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
|
||||
state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
|
||||
state.results = calloc(env.duration_sec + env.warmup_sec + 2,
|
||||
sizeof(*state.results));
|
||||
if (!state.producers || !state.consumers || !state.results)
|
||||
exit(1);
|
||||
|
||||
if (bench->validate)
|
||||
bench->validate();
|
||||
if (bench->setup)
|
||||
bench->setup();
|
||||
|
||||
for (i = 0; i < env.consumer_cnt; i++) {
|
||||
err = pthread_create(&state.consumers[i], NULL,
|
||||
bench->consumer_thread, (void *)(long)i);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to create consumer thread #%d: %d\n",
|
||||
i, -errno);
|
||||
exit(1);
|
||||
}
|
||||
if (env.affinity)
|
||||
set_thread_affinity(state.consumers[i],
|
||||
next_cpu(&env.cons_cpus));
|
||||
}
|
||||
|
||||
/* unless explicit producer CPU list is specified, continue after
|
||||
* last consumer CPU
|
||||
*/
|
||||
if (!env.prod_cpus.cpus)
|
||||
env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
|
||||
|
||||
for (i = 0; i < env.producer_cnt; i++) {
|
||||
err = pthread_create(&state.producers[i], NULL,
|
||||
bench->producer_thread, (void *)(long)i);
|
||||
if (err) {
|
||||
fprintf(stderr, "failed to create producer thread #%d: %d\n",
|
||||
i, -errno);
|
||||
exit(1);
|
||||
}
|
||||
if (env.affinity)
|
||||
set_thread_affinity(state.producers[i],
|
||||
next_cpu(&env.prod_cpus));
|
||||
}
|
||||
|
||||
printf("Benchmark '%s' started.\n", bench->name);
|
||||
}
|
||||
|
||||
static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
|
||||
static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
|
||||
|
||||
static void collect_measurements(long delta_ns) {
|
||||
int iter = state.res_cnt++;
|
||||
struct bench_res *res = &state.results[iter];
|
||||
|
||||
bench->measure(res);
|
||||
|
||||
if (bench->report_progress)
|
||||
bench->report_progress(iter, res, delta_ns);
|
||||
|
||||
if (iter == env.duration_sec + env.warmup_sec) {
|
||||
pthread_mutex_lock(&bench_done_mtx);
|
||||
pthread_cond_signal(&bench_done);
|
||||
pthread_mutex_unlock(&bench_done_mtx);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
parse_cmdline_args(argc, argv);
|
||||
|
||||
if (env.list) {
|
||||
int i;
|
||||
|
||||
printf("Available benchmarks:\n");
|
||||
for (i = 0; i < ARRAY_SIZE(benchs); i++) {
|
||||
printf("- %s\n", benchs[i]->name);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
setup_benchmark();
|
||||
|
||||
setup_timer();
|
||||
|
||||
pthread_mutex_lock(&bench_done_mtx);
|
||||
pthread_cond_wait(&bench_done, &bench_done_mtx);
|
||||
pthread_mutex_unlock(&bench_done_mtx);
|
||||
|
||||
if (bench->report_final)
|
||||
/* skip first sample */
|
||||
bench->report_final(state.results + env.warmup_sec,
|
||||
state.res_cnt - env.warmup_sec);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
81
tools/testing/selftests/bpf/bench.h
Normal file
81
tools/testing/selftests/bpf/bench.h
Normal file
@ -0,0 +1,81 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#pragma once
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <linux/err.h>
|
||||
#include <errno.h>
|
||||
#include <unistd.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <math.h>
|
||||
#include <time.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
struct cpu_set {
|
||||
bool *cpus;
|
||||
int cpus_len;
|
||||
int next_cpu;
|
||||
};
|
||||
|
||||
struct env {
|
||||
char *bench_name;
|
||||
int duration_sec;
|
||||
int warmup_sec;
|
||||
bool verbose;
|
||||
bool list;
|
||||
bool affinity;
|
||||
int consumer_cnt;
|
||||
int producer_cnt;
|
||||
struct cpu_set prod_cpus;
|
||||
struct cpu_set cons_cpus;
|
||||
};
|
||||
|
||||
struct bench_res {
|
||||
long hits;
|
||||
long drops;
|
||||
};
|
||||
|
||||
struct bench {
|
||||
const char *name;
|
||||
void (*validate)();
|
||||
void (*setup)();
|
||||
void *(*producer_thread)(void *ctx);
|
||||
void *(*consumer_thread)(void *ctx);
|
||||
void (*measure)(struct bench_res* res);
|
||||
void (*report_progress)(int iter, struct bench_res* res, long delta_ns);
|
||||
void (*report_final)(struct bench_res res[], int res_cnt);
|
||||
};
|
||||
|
||||
struct counter {
|
||||
long value;
|
||||
} __attribute__((aligned(128)));
|
||||
|
||||
extern struct env env;
|
||||
extern const struct bench *bench;
|
||||
|
||||
void setup_libbpf();
|
||||
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
|
||||
void hits_drops_report_final(struct bench_res res[], int res_cnt);
|
||||
|
||||
static inline __u64 get_time_ns() {
|
||||
struct timespec t;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
|
||||
return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
|
||||
}
|
||||
|
||||
static inline void atomic_inc(long *value)
|
||||
{
|
||||
(void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline void atomic_add(long *value, long n)
|
||||
{
|
||||
(void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED);
|
||||
}
|
||||
|
||||
static inline long atomic_swap(long *value, long n)
|
||||
{
|
||||
return __atomic_exchange_n(value, n, __ATOMIC_RELAXED);
|
||||
}
|
91
tools/testing/selftests/bpf/benchs/bench_count.c
Normal file
91
tools/testing/selftests/bpf/benchs/bench_count.c
Normal file
@ -0,0 +1,91 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include "bench.h"
|
||||
|
||||
/* COUNT-GLOBAL benchmark */
|
||||
|
||||
static struct count_global_ctx {
|
||||
struct counter hits;
|
||||
} count_global_ctx;
|
||||
|
||||
static void *count_global_producer(void *input)
|
||||
{
|
||||
struct count_global_ctx *ctx = &count_global_ctx;
|
||||
|
||||
while (true) {
|
||||
atomic_inc(&ctx->hits.value);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *count_global_consumer(void *input)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void count_global_measure(struct bench_res *res)
|
||||
{
|
||||
struct count_global_ctx *ctx = &count_global_ctx;
|
||||
|
||||
res->hits = atomic_swap(&ctx->hits.value, 0);
|
||||
}
|
||||
|
||||
/* COUNT-local benchmark */
|
||||
|
||||
static struct count_local_ctx {
|
||||
struct counter *hits;
|
||||
} count_local_ctx;
|
||||
|
||||
static void count_local_setup()
|
||||
{
|
||||
struct count_local_ctx *ctx = &count_local_ctx;
|
||||
|
||||
ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
|
||||
if (!ctx->hits)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void *count_local_producer(void *input)
|
||||
{
|
||||
struct count_local_ctx *ctx = &count_local_ctx;
|
||||
int idx = (long)input;
|
||||
|
||||
while (true) {
|
||||
atomic_inc(&ctx->hits[idx].value);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *count_local_consumer(void *input)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void count_local_measure(struct bench_res *res)
|
||||
{
|
||||
struct count_local_ctx *ctx = &count_local_ctx;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < env.producer_cnt; i++) {
|
||||
res->hits += atomic_swap(&ctx->hits[i].value, 0);
|
||||
}
|
||||
}
|
||||
|
||||
const struct bench bench_count_global = {
|
||||
.name = "count-global",
|
||||
.producer_thread = count_global_producer,
|
||||
.consumer_thread = count_global_consumer,
|
||||
.measure = count_global_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_count_local = {
|
||||
.name = "count-local",
|
||||
.setup = count_local_setup,
|
||||
.producer_thread = count_local_producer,
|
||||
.consumer_thread = count_local_consumer,
|
||||
.measure = count_local_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
195
tools/testing/selftests/bpf/benchs/bench_rename.c
Normal file
195
tools/testing/selftests/bpf/benchs/bench_rename.c
Normal file
@ -0,0 +1,195 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <fcntl.h>
|
||||
#include "bench.h"
|
||||
#include "test_overhead.skel.h"
|
||||
|
||||
/* BPF triggering benchmarks */
|
||||
static struct ctx {
|
||||
struct test_overhead *skel;
|
||||
struct counter hits;
|
||||
int fd;
|
||||
} ctx;
|
||||
|
||||
static void validate()
|
||||
{
|
||||
if (env.producer_cnt != 1) {
|
||||
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
|
||||
exit(1);
|
||||
}
|
||||
if (env.consumer_cnt != 1) {
|
||||
fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void *producer(void *input)
|
||||
{
|
||||
char buf[] = "test_overhead";
|
||||
int err;
|
||||
|
||||
while (true) {
|
||||
err = write(ctx.fd, buf, sizeof(buf));
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "write failed\n");
|
||||
exit(1);
|
||||
}
|
||||
atomic_inc(&ctx.hits.value);
|
||||
}
|
||||
}
|
||||
|
||||
static void measure(struct bench_res *res)
|
||||
{
|
||||
res->hits = atomic_swap(&ctx.hits.value, 0);
|
||||
}
|
||||
|
||||
static void setup_ctx()
|
||||
{
|
||||
setup_libbpf();
|
||||
|
||||
ctx.skel = test_overhead__open_and_load();
|
||||
if (!ctx.skel) {
|
||||
fprintf(stderr, "failed to open skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
|
||||
if (ctx.fd < 0) {
|
||||
fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void attach_bpf(struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
link = bpf_program__attach(prog);
|
||||
if (IS_ERR(link)) {
|
||||
fprintf(stderr, "failed to attach program!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void setup_base()
|
||||
{
|
||||
setup_ctx();
|
||||
}
|
||||
|
||||
static void setup_kprobe()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.prog1);
|
||||
}
|
||||
|
||||
static void setup_kretprobe()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.prog2);
|
||||
}
|
||||
|
||||
static void setup_rawtp()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.prog3);
|
||||
}
|
||||
|
||||
static void setup_fentry()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.prog4);
|
||||
}
|
||||
|
||||
static void setup_fexit()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.prog5);
|
||||
}
|
||||
|
||||
static void setup_fmodret()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.prog6);
|
||||
}
|
||||
|
||||
static void *consumer(void *input)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct bench bench_rename_base = {
|
||||
.name = "rename-base",
|
||||
.validate = validate,
|
||||
.setup = setup_base,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_rename_kprobe = {
|
||||
.name = "rename-kprobe",
|
||||
.validate = validate,
|
||||
.setup = setup_kprobe,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_rename_kretprobe = {
|
||||
.name = "rename-kretprobe",
|
||||
.validate = validate,
|
||||
.setup = setup_kretprobe,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_rename_rawtp = {
|
||||
.name = "rename-rawtp",
|
||||
.validate = validate,
|
||||
.setup = setup_rawtp,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_rename_fentry = {
|
||||
.name = "rename-fentry",
|
||||
.validate = validate,
|
||||
.setup = setup_fentry,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_rename_fexit = {
|
||||
.name = "rename-fexit",
|
||||
.validate = validate,
|
||||
.setup = setup_fexit,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_rename_fmodret = {
|
||||
.name = "rename-fmodret",
|
||||
.validate = validate,
|
||||
.setup = setup_fmodret,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
167
tools/testing/selftests/bpf/benchs/bench_trigger.c
Normal file
167
tools/testing/selftests/bpf/benchs/bench_trigger.c
Normal file
@ -0,0 +1,167 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include "bench.h"
|
||||
#include "trigger_bench.skel.h"
|
||||
|
||||
/* BPF triggering benchmarks */
|
||||
static struct trigger_ctx {
|
||||
struct trigger_bench *skel;
|
||||
} ctx;
|
||||
|
||||
static struct counter base_hits;
|
||||
|
||||
static void trigger_validate()
|
||||
{
|
||||
if (env.consumer_cnt != 1) {
|
||||
fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void *trigger_base_producer(void *input)
|
||||
{
|
||||
while (true) {
|
||||
(void)syscall(__NR_getpgid);
|
||||
atomic_inc(&base_hits.value);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void trigger_base_measure(struct bench_res *res)
|
||||
{
|
||||
res->hits = atomic_swap(&base_hits.value, 0);
|
||||
}
|
||||
|
||||
static void *trigger_producer(void *input)
|
||||
{
|
||||
while (true)
|
||||
(void)syscall(__NR_getpgid);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void trigger_measure(struct bench_res *res)
|
||||
{
|
||||
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
|
||||
}
|
||||
|
||||
static void setup_ctx()
|
||||
{
|
||||
setup_libbpf();
|
||||
|
||||
ctx.skel = trigger_bench__open_and_load();
|
||||
if (!ctx.skel) {
|
||||
fprintf(stderr, "failed to open skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void attach_bpf(struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
link = bpf_program__attach(prog);
|
||||
if (IS_ERR(link)) {
|
||||
fprintf(stderr, "failed to attach program!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void trigger_tp_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_tp);
|
||||
}
|
||||
|
||||
static void trigger_rawtp_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
|
||||
}
|
||||
|
||||
static void trigger_kprobe_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
|
||||
}
|
||||
|
||||
static void trigger_fentry_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
|
||||
}
|
||||
|
||||
static void trigger_fmodret_setup()
|
||||
{
|
||||
setup_ctx();
|
||||
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
|
||||
}
|
||||
|
||||
static void *trigger_consumer(void *input)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct bench bench_trig_base = {
|
||||
.name = "trig-base",
|
||||
.validate = trigger_validate,
|
||||
.producer_thread = trigger_base_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_base_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_tp = {
|
||||
.name = "trig-tp",
|
||||
.validate = trigger_validate,
|
||||
.setup = trigger_tp_setup,
|
||||
.producer_thread = trigger_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_rawtp = {
|
||||
.name = "trig-rawtp",
|
||||
.validate = trigger_validate,
|
||||
.setup = trigger_rawtp_setup,
|
||||
.producer_thread = trigger_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_kprobe = {
|
||||
.name = "trig-kprobe",
|
||||
.validate = trigger_validate,
|
||||
.setup = trigger_kprobe_setup,
|
||||
.producer_thread = trigger_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_fentry = {
|
||||
.name = "trig-fentry",
|
||||
.validate = trigger_validate,
|
||||
.setup = trigger_fentry_setup,
|
||||
.producer_thread = trigger_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_fmodret = {
|
||||
.name = "trig-fmodret",
|
||||
.validate = trigger_validate,
|
||||
.setup = trigger_fmodret_setup,
|
||||
.producer_thread = trigger_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
9
tools/testing/selftests/bpf/benchs/run_bench_rename.sh
Executable file
9
tools/testing/selftests/bpf/benchs/run_bench_rename.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eufo pipefail
|
||||
|
||||
for i in base kprobe kretprobe rawtp fentry fexit fmodret
|
||||
do
|
||||
summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
|
||||
printf "%-10s: %s\n" $i "$summary"
|
||||
done
|
9
tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
Executable file
9
tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eufo pipefail
|
||||
|
||||
for i in base tp rawtp kprobe fentry fmodret
|
||||
do
|
||||
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
|
||||
printf "%-10s: %s\n" $i "$summary"
|
||||
done
|
158
tools/testing/selftests/bpf/network_helpers.c
Normal file
158
tools/testing/selftests/bpf/network_helpers.c
Normal file
@ -0,0 +1,158 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
|
||||
#include "bpf_util.h"
|
||||
#include "network_helpers.h"
|
||||
|
||||
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
|
||||
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
|
||||
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
|
||||
|
||||
struct ipv4_packet pkt_v4 = {
|
||||
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
|
||||
.iph.ihl = 5,
|
||||
.iph.protocol = IPPROTO_TCP,
|
||||
.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
|
||||
.tcp.urg_ptr = 123,
|
||||
.tcp.doff = 5,
|
||||
};
|
||||
|
||||
struct ipv6_packet pkt_v6 = {
|
||||
.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
|
||||
.iph.nexthdr = IPPROTO_TCP,
|
||||
.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
|
||||
.tcp.urg_ptr = 123,
|
||||
.tcp.doff = 5,
|
||||
};
|
||||
|
||||
int start_server(int family, int type)
|
||||
{
|
||||
struct sockaddr_storage addr = {};
|
||||
socklen_t len;
|
||||
int fd;
|
||||
|
||||
if (family == AF_INET) {
|
||||
struct sockaddr_in *sin = (void *)&addr;
|
||||
|
||||
sin->sin_family = AF_INET;
|
||||
len = sizeof(*sin);
|
||||
} else {
|
||||
struct sockaddr_in6 *sin6 = (void *)&addr;
|
||||
|
||||
sin6->sin6_family = AF_INET6;
|
||||
len = sizeof(*sin6);
|
||||
}
|
||||
|
||||
fd = socket(family, type | SOCK_NONBLOCK, 0);
|
||||
if (fd < 0) {
|
||||
log_err("Failed to create server socket");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
|
||||
log_err("Failed to bind socket");
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (type == SOCK_STREAM) {
|
||||
if (listen(fd, 1) < 0) {
|
||||
log_err("Failed to listed on socket");
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
static const struct timeval timeo_sec = { .tv_sec = 3 };
|
||||
static const size_t timeo_optlen = sizeof(timeo_sec);
|
||||
|
||||
int connect_to_fd(int family, int type, int server_fd)
|
||||
{
|
||||
int fd, save_errno;
|
||||
|
||||
fd = socket(family, type, 0);
|
||||
if (fd < 0) {
|
||||
log_err("Failed to create client socket");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
|
||||
save_errno = errno;
|
||||
close(fd);
|
||||
errno = save_errno;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int connect_fd_to_fd(int client_fd, int server_fd)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t len = sizeof(addr);
|
||||
int save_errno;
|
||||
|
||||
if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
|
||||
timeo_optlen)) {
|
||||
log_err("Failed to set SO_RCVTIMEO");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
|
||||
log_err("Failed to get server addr");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
|
||||
if (errno != EINPROGRESS) {
|
||||
save_errno = errno;
|
||||
log_err("Failed to connect to server");
|
||||
errno = save_errno;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int connect_wait(int fd)
|
||||
{
|
||||
struct epoll_event ev = {}, events[2];
|
||||
int timeout_ms = 1000;
|
||||
int efd, nfd;
|
||||
|
||||
efd = epoll_create1(EPOLL_CLOEXEC);
|
||||
if (efd < 0) {
|
||||
log_err("Failed to open epoll fd");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ev.events = EPOLLRDHUP | EPOLLOUT;
|
||||
ev.data.fd = fd;
|
||||
|
||||
if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
|
||||
log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
|
||||
close(efd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
|
||||
if (nfd < 0)
|
||||
log_err("Failed to wait for I/O event on epoll fd=%d", efd);
|
||||
|
||||
close(efd);
|
||||
return nfd;
|
||||
}
|
41
tools/testing/selftests/bpf/network_helpers.h
Normal file
41
tools/testing/selftests/bpf/network_helpers.h
Normal file
@ -0,0 +1,41 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __NETWORK_HELPERS_H
|
||||
#define __NETWORK_HELPERS_H
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <linux/types.h>
|
||||
typedef __u16 __sum16;
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
#define MAGIC_VAL 0x1234
|
||||
#define NUM_ITER 100000
|
||||
#define VIP_NUM 5
|
||||
#define MAGIC_BYTES 123
|
||||
|
||||
/* ipv4 test vector */
|
||||
struct ipv4_packet {
|
||||
struct ethhdr eth;
|
||||
struct iphdr iph;
|
||||
struct tcphdr tcp;
|
||||
} __packed;
|
||||
extern struct ipv4_packet pkt_v4;
|
||||
|
||||
/* ipv6 test vector */
|
||||
struct ipv6_packet {
|
||||
struct ethhdr eth;
|
||||
struct ipv6hdr iph;
|
||||
struct tcphdr tcp;
|
||||
} __packed;
|
||||
extern struct ipv6_packet pkt_v6;
|
||||
|
||||
int start_server(int family, int type);
|
||||
int connect_to_fd(int family, int type, int server_fd);
|
||||
int connect_fd_to_fd(int client_fd, int server_fd);
|
||||
int connect_wait(int client_fd);
|
||||
|
||||
#endif
|
409
tools/testing/selftests/bpf/prog_tests/bpf_iter.c
Normal file
409
tools/testing/selftests/bpf/prog_tests/bpf_iter.c
Normal file
@ -0,0 +1,409 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2020 Facebook */
|
||||
#include <test_progs.h>
|
||||
#include "bpf_iter_ipv6_route.skel.h"
|
||||
#include "bpf_iter_netlink.skel.h"
|
||||
#include "bpf_iter_bpf_map.skel.h"
|
||||
#include "bpf_iter_task.skel.h"
|
||||
#include "bpf_iter_task_file.skel.h"
|
||||
#include "bpf_iter_test_kern1.skel.h"
|
||||
#include "bpf_iter_test_kern2.skel.h"
|
||||
#include "bpf_iter_test_kern3.skel.h"
|
||||
#include "bpf_iter_test_kern4.skel.h"
|
||||
|
||||
static int duration;
|
||||
|
||||
static void test_btf_id_or_null(void)
|
||||
{
|
||||
struct bpf_iter_test_kern3 *skel;
|
||||
|
||||
skel = bpf_iter_test_kern3__open_and_load();
|
||||
if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
|
||||
"skeleton open_and_load unexpectedly succeeded\n")) {
|
||||
bpf_iter_test_kern3__destroy(skel);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void do_dummy_read(struct bpf_program *prog)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
char buf[16] = {};
|
||||
int iter_fd, len;
|
||||
|
||||
link = bpf_program__attach_iter(prog, NULL);
|
||||
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
|
||||
return;
|
||||
|
||||
iter_fd = bpf_iter_create(bpf_link__fd(link));
|
||||
if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
|
||||
goto free_link;
|
||||
|
||||
/* not check contents, but ensure read() ends without error */
|
||||
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
|
||||
;
|
||||
CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
|
||||
|
||||
close(iter_fd);
|
||||
|
||||
free_link:
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void test_ipv6_route(void)
|
||||
{
|
||||
struct bpf_iter_ipv6_route *skel;
|
||||
|
||||
skel = bpf_iter_ipv6_route__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
do_dummy_read(skel->progs.dump_ipv6_route);
|
||||
|
||||
bpf_iter_ipv6_route__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_netlink(void)
|
||||
{
|
||||
struct bpf_iter_netlink *skel;
|
||||
|
||||
skel = bpf_iter_netlink__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
do_dummy_read(skel->progs.dump_netlink);
|
||||
|
||||
bpf_iter_netlink__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_bpf_map(void)
|
||||
{
|
||||
struct bpf_iter_bpf_map *skel;
|
||||
|
||||
skel = bpf_iter_bpf_map__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
do_dummy_read(skel->progs.dump_bpf_map);
|
||||
|
||||
bpf_iter_bpf_map__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_task(void)
|
||||
{
|
||||
struct bpf_iter_task *skel;
|
||||
|
||||
skel = bpf_iter_task__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_task__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
do_dummy_read(skel->progs.dump_task);
|
||||
|
||||
bpf_iter_task__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_task_file(void)
|
||||
{
|
||||
struct bpf_iter_task_file *skel;
|
||||
|
||||
skel = bpf_iter_task_file__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
do_dummy_read(skel->progs.dump_task_file);
|
||||
|
||||
bpf_iter_task_file__destroy(skel);
|
||||
}
|
||||
|
||||
/* The expected string is less than 16 bytes */
|
||||
static int do_read_with_fd(int iter_fd, const char *expected,
|
||||
bool read_one_char)
|
||||
{
|
||||
int err = -1, len, read_buf_len, start;
|
||||
char buf[16] = {};
|
||||
|
||||
read_buf_len = read_one_char ? 1 : 16;
|
||||
start = 0;
|
||||
while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
|
||||
start += len;
|
||||
if (CHECK(start >= 16, "read", "read len %d\n", len))
|
||||
return -1;
|
||||
read_buf_len = read_one_char ? 1 : 16 - start;
|
||||
}
|
||||
if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
|
||||
return -1;
|
||||
|
||||
err = strcmp(buf, expected);
|
||||
if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
|
||||
buf, expected))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_anon_iter(bool read_one_char)
|
||||
{
|
||||
struct bpf_iter_test_kern1 *skel;
|
||||
struct bpf_link *link;
|
||||
int iter_fd, err;
|
||||
|
||||
skel = bpf_iter_test_kern1__open_and_load();
|
||||
if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
err = bpf_iter_test_kern1__attach(skel);
|
||||
if (CHECK(err, "bpf_iter_test_kern1__attach",
|
||||
"skeleton attach failed\n")) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
link = skel->links.dump_task;
|
||||
iter_fd = bpf_iter_create(bpf_link__fd(link));
|
||||
if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
|
||||
goto out;
|
||||
|
||||
do_read_with_fd(iter_fd, "abcd", read_one_char);
|
||||
close(iter_fd);
|
||||
|
||||
out:
|
||||
bpf_iter_test_kern1__destroy(skel);
|
||||
}
|
||||
|
||||
static int do_read(const char *path, const char *expected)
|
||||
{
|
||||
int err, iter_fd;
|
||||
|
||||
iter_fd = open(path, O_RDONLY);
|
||||
if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
|
||||
path, strerror(errno)))
|
||||
return -1;
|
||||
|
||||
err = do_read_with_fd(iter_fd, expected, false);
|
||||
close(iter_fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void test_file_iter(void)
|
||||
{
|
||||
const char *path = "/sys/fs/bpf/bpf_iter_test1";
|
||||
struct bpf_iter_test_kern1 *skel1;
|
||||
struct bpf_iter_test_kern2 *skel2;
|
||||
struct bpf_link *link;
|
||||
int err;
|
||||
|
||||
skel1 = bpf_iter_test_kern1__open_and_load();
|
||||
if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
return;
|
||||
|
||||
link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
|
||||
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
|
||||
goto out;
|
||||
|
||||
/* unlink this path if it exists. */
|
||||
unlink(path);
|
||||
|
||||
err = bpf_link__pin(link, path);
|
||||
if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
|
||||
goto free_link;
|
||||
|
||||
err = do_read(path, "abcd");
|
||||
if (err)
|
||||
goto unlink_path;
|
||||
|
||||
/* file based iterator seems working fine. Let us a link update
|
||||
* of the underlying link and `cat` the iterator again, its content
|
||||
* should change.
|
||||
*/
|
||||
skel2 = bpf_iter_test_kern2__open_and_load();
|
||||
if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
|
||||
"skeleton open_and_load failed\n"))
|
||||
goto unlink_path;
|
||||
|
||||
err = bpf_link__update_program(link, skel2->progs.dump_task);
|
||||
if (CHECK(err, "update_prog", "update_prog failed\n"))
|
||||
goto destroy_skel2;
|
||||
|
||||
do_read(path, "ABCD");
|
||||
|
||||
destroy_skel2:
|
||||
bpf_iter_test_kern2__destroy(skel2);
|
||||
unlink_path:
|
||||
unlink(path);
|
||||
free_link:
|
||||
bpf_link__destroy(link);
|
||||
out:
|
||||
bpf_iter_test_kern1__destroy(skel1);
|
||||
}
|
||||
|
||||
static void test_overflow(bool test_e2big_overflow, bool ret1)
|
||||
{
|
||||
__u32 map_info_len, total_read_len, expected_read_len;
|
||||
int err, iter_fd, map1_fd, map2_fd, len;
|
||||
struct bpf_map_info map_info = {};
|
||||
struct bpf_iter_test_kern4 *skel;
|
||||
struct bpf_link *link;
|
||||
__u32 page_size;
|
||||
char *buf;
|
||||
|
||||
skel = bpf_iter_test_kern4__open();
|
||||
if (CHECK(!skel, "bpf_iter_test_kern4__open",
|
||||
"skeleton open failed\n"))
|
||||
return;
|
||||
|
||||
/* create two maps: bpf program will only do bpf_seq_write
|
||||
* for these two maps. The goal is one map output almost
|
||||
* fills seq_file buffer and then the other will trigger
|
||||
* overflow and needs restart.
|
||||
*/
|
||||
map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
|
||||
if (CHECK(map1_fd < 0, "bpf_create_map",
|
||||
"map_creation failed: %s\n", strerror(errno)))
|
||||
goto out;
|
||||
map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
|
||||
if (CHECK(map2_fd < 0, "bpf_create_map",
|
||||
"map_creation failed: %s\n", strerror(errno)))
|
||||
goto free_map1;
|
||||
|
||||
/* bpf_seq_printf kernel buffer is one page, so one map
|
||||
* bpf_seq_write will mostly fill it, and the other map
|
||||
* will partially fill and then trigger overflow and need
|
||||
* bpf_seq_read restart.
|
||||
*/
|
||||
page_size = sysconf(_SC_PAGE_SIZE);
|
||||
|
||||
if (test_e2big_overflow) {
|
||||
skel->rodata->print_len = (page_size + 8) / 8;
|
||||
expected_read_len = 2 * (page_size + 8);
|
||||
} else if (!ret1) {
|
||||
skel->rodata->print_len = (page_size - 8) / 8;
|
||||
expected_read_len = 2 * (page_size - 8);
|
||||
} else {
|
||||
skel->rodata->print_len = 1;
|
||||
expected_read_len = 2 * 8;
|
||||
}
|
||||
skel->rodata->ret1 = ret1;
|
||||
|
||||
if (CHECK(bpf_iter_test_kern4__load(skel),
|
||||
"bpf_iter_test_kern4__load", "skeleton load failed\n"))
|
||||
goto free_map2;
|
||||
|
||||
/* setup filtering map_id in bpf program */
|
||||
map_info_len = sizeof(map_info);
|
||||
err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
|
||||
if (CHECK(err, "get_map_info", "get map info failed: %s\n",
|
||||
strerror(errno)))
|
||||
goto free_map2;
|
||||
skel->bss->map1_id = map_info.id;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
|
||||
if (CHECK(err, "get_map_info", "get map info failed: %s\n",
|
||||
strerror(errno)))
|
||||
goto free_map2;
|
||||
skel->bss->map2_id = map_info.id;
|
||||
|
||||
link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
|
||||
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
|
||||
goto free_map2;
|
||||
|
||||
iter_fd = bpf_iter_create(bpf_link__fd(link));
|
||||
if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
|
||||
goto free_link;
|
||||
|
||||
buf = malloc(expected_read_len);
|
||||
if (!buf)
|
||||
goto close_iter;
|
||||
|
||||
/* do read */
|
||||
total_read_len = 0;
|
||||
if (test_e2big_overflow) {
|
||||
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
|
||||
total_read_len += len;
|
||||
|
||||
CHECK(len != -1 || errno != E2BIG, "read",
|
||||
"expected ret -1, errno E2BIG, but get ret %d, error %s\n",
|
||||
len, strerror(errno));
|
||||
goto free_buf;
|
||||
} else if (!ret1) {
|
||||
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
|
||||
total_read_len += len;
|
||||
|
||||
if (CHECK(len < 0, "read", "read failed: %s\n",
|
||||
strerror(errno)))
|
||||
goto free_buf;
|
||||
} else {
|
||||
do {
|
||||
len = read(iter_fd, buf, expected_read_len);
|
||||
if (len > 0)
|
||||
total_read_len += len;
|
||||
} while (len > 0 || len == -EAGAIN);
|
||||
|
||||
if (CHECK(len < 0, "read", "read failed: %s\n",
|
||||
strerror(errno)))
|
||||
goto free_buf;
|
||||
}
|
||||
|
||||
if (CHECK(total_read_len != expected_read_len, "read",
|
||||
"total len %u, expected len %u\n", total_read_len,
|
||||
expected_read_len))
|
||||
goto free_buf;
|
||||
|
||||
if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
|
||||
"expected 1 actual %d\n", skel->bss->map1_accessed))
|
||||
goto free_buf;
|
||||
|
||||
if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
|
||||
"expected 2 actual %d\n", skel->bss->map2_accessed))
|
||||
goto free_buf;
|
||||
|
||||
CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
|
||||
"map2_seqnum", "two different seqnum %lld %lld\n",
|
||||
skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
|
||||
|
||||
free_buf:
|
||||
free(buf);
|
||||
close_iter:
|
||||
close(iter_fd);
|
||||
free_link:
|
||||
bpf_link__destroy(link);
|
||||
free_map2:
|
||||
close(map2_fd);
|
||||
free_map1:
|
||||
close(map1_fd);
|
||||
out:
|
||||
bpf_iter_test_kern4__destroy(skel);
|
||||
}
|
||||
|
||||
void test_bpf_iter(void)
|
||||
{
|
||||
if (test__start_subtest("btf_id_or_null"))
|
||||
test_btf_id_or_null();
|
||||
if (test__start_subtest("ipv6_route"))
|
||||
test_ipv6_route();
|
||||
if (test__start_subtest("netlink"))
|
||||
test_netlink();
|
||||
if (test__start_subtest("bpf_map"))
|
||||
test_bpf_map();
|
||||
if (test__start_subtest("task"))
|
||||
test_task();
|
||||
if (test__start_subtest("task_file"))
|
||||
test_task_file();
|
||||
if (test__start_subtest("anon"))
|
||||
test_anon_iter(false);
|
||||
if (test__start_subtest("anon-read-one-char"))
|
||||
test_anon_iter(true);
|
||||
if (test__start_subtest("file"))
|
||||
test_file_iter();
|
||||
if (test__start_subtest("overflow"))
|
||||
test_overflow(false, false);
|
||||
if (test__start_subtest("overflow-e2big"))
|
||||
test_overflow(true, false);
|
||||
if (test__start_subtest("prog-ret-1"))
|
||||
test_overflow(false, true);
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (c) 2020 Facebook
|
||||
|
||||
#include <test_progs.h>
|
||||
|
||||
#include "network_helpers.h"
|
||||
#include "cgroup_skb_sk_lookup_kern.skel.h"
|
||||
|
||||
static void run_lookup_test(__u16 *g_serv_port, int out_sk)
|
||||
{
|
||||
int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err;
|
||||
struct sockaddr_in6 addr = {};
|
||||
socklen_t addr_len = sizeof(addr);
|
||||
__u32 duration = 0;
|
||||
|
||||
serv_sk = start_server(AF_INET6, SOCK_STREAM);
|
||||
if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
|
||||
return;
|
||||
|
||||
err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len);
|
||||
if (CHECK(err, "getsockname", "errno %d\n", errno))
|
||||
goto cleanup;
|
||||
|
||||
*g_serv_port = addr.sin6_port;
|
||||
|
||||
/* Client outside of test cgroup should fail to connect by timeout. */
|
||||
err = connect_fd_to_fd(out_sk, serv_sk);
|
||||
if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
|
||||
"unexpected result err %d errno %d\n", err, errno))
|
||||
goto cleanup;
|
||||
|
||||
err = connect_wait(out_sk);
|
||||
if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
|
||||
goto cleanup;
|
||||
|
||||
/* Client inside test cgroup should connect just fine. */
|
||||
in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
|
||||
if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
|
||||
goto cleanup;
|
||||
|
||||
serv_in_sk = accept(serv_sk, NULL, NULL);
|
||||
if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno))
|
||||
goto cleanup;
|
||||
|
||||
cleanup:
|
||||
close(serv_in_sk);
|
||||
close(in_sk);
|
||||
close(serv_sk);
|
||||
}
|
||||
|
||||
static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
|
||||
{
|
||||
struct cgroup_skb_sk_lookup_kern *skel;
|
||||
struct bpf_link *link;
|
||||
__u32 duration = 0;
|
||||
int cgfd = -1;
|
||||
|
||||
skel = cgroup_skb_sk_lookup_kern__open_and_load();
|
||||
if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
|
||||
return;
|
||||
|
||||
cgfd = test__join_cgroup(cg_path);
|
||||
if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n"))
|
||||
goto cleanup;
|
||||
|
||||
link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
|
||||
if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
|
||||
goto cleanup;
|
||||
|
||||
run_lookup_test(&skel->bss->g_serv_port, out_sk);
|
||||
|
||||
bpf_link__destroy(link);
|
||||
|
||||
cleanup:
|
||||
close(cgfd);
|
||||
cgroup_skb_sk_lookup_kern__destroy(skel);
|
||||
}
|
||||
|
||||
void test_cgroup_skb_sk_lookup(void)
|
||||
{
|
||||
const char *cg_path = "/foo";
|
||||
int out_sk;
|
||||
|
||||
/* Create a socket before joining testing cgroup so that its cgroup id
|
||||
* differs from that of testing cgroup. Moving selftests process to
|
||||
* testing cgroup won't change cgroup id of an already created socket.
|
||||
*/
|
||||
out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
|
||||
if (CHECK_FAIL(out_sk < 0))
|
||||
return;
|
||||
|
||||
run_cgroup_bpf_test(cg_path, out_sk);
|
||||
|
||||
close(out_sk);
|
||||
}
|
115
tools/testing/selftests/bpf/prog_tests/connect_force_port.c
Normal file
115
tools/testing/selftests/bpf/prog_tests/connect_force_port.c
Normal file
@ -0,0 +1,115 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <test_progs.h>
|
||||
#include "cgroup_helpers.h"
|
||||
#include "network_helpers.h"
|
||||
|
||||
static int verify_port(int family, int fd, int expected)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t len = sizeof(addr);
|
||||
__u16 port;
|
||||
|
||||
if (getsockname(fd, (struct sockaddr *)&addr, &len)) {
|
||||
log_err("Failed to get server addr");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (family == AF_INET)
|
||||
port = ((struct sockaddr_in *)&addr)->sin_port;
|
||||
else
|
||||
port = ((struct sockaddr_in6 *)&addr)->sin6_port;
|
||||
|
||||
if (ntohs(port) != expected) {
|
||||
log_err("Unexpected port %d, expected %d", ntohs(port),
|
||||
expected);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int run_test(int cgroup_fd, int server_fd, int family, int type)
|
||||
{
|
||||
struct bpf_prog_load_attr attr = {
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
|
||||
};
|
||||
struct bpf_object *obj;
|
||||
int expected_port;
|
||||
int prog_fd;
|
||||
int err;
|
||||
int fd;
|
||||
|
||||
if (family == AF_INET) {
|
||||
attr.file = "./connect_force_port4.o";
|
||||
attr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
|
||||
expected_port = 22222;
|
||||
} else {
|
||||
attr.file = "./connect_force_port6.o";
|
||||
attr.expected_attach_type = BPF_CGROUP_INET6_CONNECT;
|
||||
expected_port = 22223;
|
||||
}
|
||||
|
||||
err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
|
||||
if (err) {
|
||||
log_err("Failed to load BPF object");
|
||||
return -1;
|
||||
}
|
||||
|
||||
err = bpf_prog_attach(prog_fd, cgroup_fd, attr.expected_attach_type,
|
||||
0);
|
||||
if (err) {
|
||||
log_err("Failed to attach BPF program");
|
||||
goto close_bpf_object;
|
||||
}
|
||||
|
||||
fd = connect_to_fd(family, type, server_fd);
|
||||
if (fd < 0) {
|
||||
err = -1;
|
||||
goto close_bpf_object;
|
||||
}
|
||||
|
||||
err = verify_port(family, fd, expected_port);
|
||||
|
||||
close(fd);
|
||||
|
||||
close_bpf_object:
|
||||
bpf_object__close(obj);
|
||||
return err;
|
||||
}
|
||||
|
||||
void test_connect_force_port(void)
|
||||
{
|
||||
int server_fd, cgroup_fd;
|
||||
|
||||
cgroup_fd = test__join_cgroup("/connect_force_port");
|
||||
if (CHECK_FAIL(cgroup_fd < 0))
|
||||
return;
|
||||
|
||||
server_fd = start_server(AF_INET, SOCK_STREAM);
|
||||
if (CHECK_FAIL(server_fd < 0))
|
||||
goto close_cgroup_fd;
|
||||
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
|
||||
close(server_fd);
|
||||
|
||||
server_fd = start_server(AF_INET6, SOCK_STREAM);
|
||||
if (CHECK_FAIL(server_fd < 0))
|
||||
goto close_cgroup_fd;
|
||||
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
|
||||
close(server_fd);
|
||||
|
||||
server_fd = start_server(AF_INET, SOCK_DGRAM);
|
||||
if (CHECK_FAIL(server_fd < 0))
|
||||
goto close_cgroup_fd;
|
||||
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
|
||||
close(server_fd);
|
||||
|
||||
server_fd = start_server(AF_INET6, SOCK_DGRAM);
|
||||
if (CHECK_FAIL(server_fd < 0))
|
||||
goto close_cgroup_fd;
|
||||
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
|
||||
close(server_fd);
|
||||
|
||||
close_cgroup_fd:
|
||||
close(cgroup_fd);
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2019 Facebook */
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
|
||||
static void test_fexit_bpf2bpf_common(const char *obj_file,
|
||||
const char *target_obj_file,
|
||||
|
@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
#include <error.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/if_tun.h>
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user