net: Allow iterating sockmap and sockhash
Add bpf_iter support for sockmap / sockhash, based on the bpf_sk_storage and hashtable implementation. sockmap and sockhash share the same iteration context: a pointer to an arbitrary key and a pointer to a socket. Both pointers may be NULL, and so BPF has to perform a NULL check before accessing them. Technically it's not possible for sockhash iteration to yield a NULL socket, but we ignore this to be able to use a single iteration point. Iteration will visit all keys that remain unmodified during the lifetime of the iterator. It may or may not visit newly added ones. Switch from using rcu_dereference_raw to plain rcu_dereference, so we gain another guard rail if CONFIG_PROVE_RCU is enabled. Signed-off-by: Lorenz Bauer <lmb@cloudflare.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/bpf/20200909162712.221874-3-lmb@cloudflare.com
This commit is contained in:
parent
654785a1af
commit
0365351524
@ -2,6 +2,7 @@
|
|||||||
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
||||||
|
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
|
#include <linux/btf_ids.h>
|
||||||
#include <linux/filter.h>
|
#include <linux/filter.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
@ -703,6 +704,109 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
|
|||||||
.arg4_type = ARG_ANYTHING,
|
.arg4_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct sock_map_seq_info {
|
||||||
|
struct bpf_map *map;
|
||||||
|
struct sock *sk;
|
||||||
|
u32 index;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_iter__sockmap {
|
||||||
|
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
||||||
|
__bpf_md_ptr(struct bpf_map *, map);
|
||||||
|
__bpf_md_ptr(void *, key);
|
||||||
|
__bpf_md_ptr(struct sock *, sk);
|
||||||
|
};
|
||||||
|
|
||||||
|
DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
|
||||||
|
struct bpf_map *map, void *key,
|
||||||
|
struct sock *sk)
|
||||||
|
|
||||||
|
static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
|
||||||
|
{
|
||||||
|
if (unlikely(info->index >= info->map->max_entries))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
info->sk = __sock_map_lookup_elem(info->map, info->index);
|
||||||
|
|
||||||
|
/* can't return sk directly, since that might be NULL */
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct sock_map_seq_info *info = seq->private;
|
||||||
|
|
||||||
|
if (*pos == 0)
|
||||||
|
++*pos;
|
||||||
|
|
||||||
|
/* pairs with sock_map_seq_stop */
|
||||||
|
rcu_read_lock();
|
||||||
|
return sock_map_seq_lookup_elem(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct sock_map_seq_info *info = seq->private;
|
||||||
|
|
||||||
|
++*pos;
|
||||||
|
++info->index;
|
||||||
|
|
||||||
|
return sock_map_seq_lookup_elem(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sock_map_seq_show(struct seq_file *seq, void *v)
|
||||||
|
{
|
||||||
|
struct sock_map_seq_info *info = seq->private;
|
||||||
|
struct bpf_iter__sockmap ctx = {};
|
||||||
|
struct bpf_iter_meta meta;
|
||||||
|
struct bpf_prog *prog;
|
||||||
|
|
||||||
|
meta.seq = seq;
|
||||||
|
prog = bpf_iter_get_info(&meta, !v);
|
||||||
|
if (!prog)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ctx.meta = &meta;
|
||||||
|
ctx.map = info->map;
|
||||||
|
if (v) {
|
||||||
|
ctx.key = &info->index;
|
||||||
|
ctx.sk = info->sk;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bpf_iter_run_prog(prog, &ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sock_map_seq_stop(struct seq_file *seq, void *v)
|
||||||
|
{
|
||||||
|
if (!v)
|
||||||
|
(void)sock_map_seq_show(seq, NULL);
|
||||||
|
|
||||||
|
/* pairs with sock_map_seq_start */
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct seq_operations sock_map_seq_ops = {
|
||||||
|
.start = sock_map_seq_start,
|
||||||
|
.next = sock_map_seq_next,
|
||||||
|
.stop = sock_map_seq_stop,
|
||||||
|
.show = sock_map_seq_show,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int sock_map_init_seq_private(void *priv_data,
|
||||||
|
struct bpf_iter_aux_info *aux)
|
||||||
|
{
|
||||||
|
struct sock_map_seq_info *info = priv_data;
|
||||||
|
|
||||||
|
info->map = aux->map;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
|
||||||
|
.seq_ops = &sock_map_seq_ops,
|
||||||
|
.init_seq_private = sock_map_init_seq_private,
|
||||||
|
.seq_priv_size = sizeof(struct sock_map_seq_info),
|
||||||
|
};
|
||||||
|
|
||||||
static int sock_map_btf_id;
|
static int sock_map_btf_id;
|
||||||
const struct bpf_map_ops sock_map_ops = {
|
const struct bpf_map_ops sock_map_ops = {
|
||||||
.map_meta_equal = bpf_map_meta_equal,
|
.map_meta_equal = bpf_map_meta_equal,
|
||||||
@ -717,6 +821,7 @@ const struct bpf_map_ops sock_map_ops = {
|
|||||||
.map_check_btf = map_check_no_btf,
|
.map_check_btf = map_check_no_btf,
|
||||||
.map_btf_name = "bpf_stab",
|
.map_btf_name = "bpf_stab",
|
||||||
.map_btf_id = &sock_map_btf_id,
|
.map_btf_id = &sock_map_btf_id,
|
||||||
|
.iter_seq_info = &sock_map_iter_seq_info,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct bpf_shtab_elem {
|
struct bpf_shtab_elem {
|
||||||
@ -953,7 +1058,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
|
|||||||
if (!elem)
|
if (!elem)
|
||||||
goto find_first_elem;
|
goto find_first_elem;
|
||||||
|
|
||||||
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
|
elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)),
|
||||||
struct bpf_shtab_elem, node);
|
struct bpf_shtab_elem, node);
|
||||||
if (elem_next) {
|
if (elem_next) {
|
||||||
memcpy(key_next, elem_next->key, key_size);
|
memcpy(key_next, elem_next->key, key_size);
|
||||||
@ -965,7 +1070,7 @@ static int sock_hash_get_next_key(struct bpf_map *map, void *key,
|
|||||||
find_first_elem:
|
find_first_elem:
|
||||||
for (; i < htab->buckets_num; i++) {
|
for (; i < htab->buckets_num; i++) {
|
||||||
head = &sock_hash_select_bucket(htab, i)->head;
|
head = &sock_hash_select_bucket(htab, i)->head;
|
||||||
elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
|
elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)),
|
||||||
struct bpf_shtab_elem, node);
|
struct bpf_shtab_elem, node);
|
||||||
if (elem_next) {
|
if (elem_next) {
|
||||||
memcpy(key_next, elem_next->key, key_size);
|
memcpy(key_next, elem_next->key, key_size);
|
||||||
@ -1199,6 +1304,117 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
|
|||||||
.arg4_type = ARG_ANYTHING,
|
.arg4_type = ARG_ANYTHING,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct sock_hash_seq_info {
|
||||||
|
struct bpf_map *map;
|
||||||
|
struct bpf_shtab *htab;
|
||||||
|
u32 bucket_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
|
||||||
|
struct bpf_shtab_elem *prev_elem)
|
||||||
|
{
|
||||||
|
const struct bpf_shtab *htab = info->htab;
|
||||||
|
struct bpf_shtab_bucket *bucket;
|
||||||
|
struct bpf_shtab_elem *elem;
|
||||||
|
struct hlist_node *node;
|
||||||
|
|
||||||
|
/* try to find next elem in the same bucket */
|
||||||
|
if (prev_elem) {
|
||||||
|
node = rcu_dereference(hlist_next_rcu(&prev_elem->node));
|
||||||
|
elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
|
||||||
|
if (elem)
|
||||||
|
return elem;
|
||||||
|
|
||||||
|
/* no more elements, continue in the next bucket */
|
||||||
|
info->bucket_id++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
|
||||||
|
bucket = &htab->buckets[info->bucket_id];
|
||||||
|
node = rcu_dereference(hlist_first_rcu(&bucket->head));
|
||||||
|
elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
|
||||||
|
if (elem)
|
||||||
|
return elem;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct sock_hash_seq_info *info = seq->private;
|
||||||
|
|
||||||
|
if (*pos == 0)
|
||||||
|
++*pos;
|
||||||
|
|
||||||
|
/* pairs with sock_hash_seq_stop */
|
||||||
|
rcu_read_lock();
|
||||||
|
return sock_hash_seq_find_next(info, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||||
|
{
|
||||||
|
struct sock_hash_seq_info *info = seq->private;
|
||||||
|
|
||||||
|
++*pos;
|
||||||
|
return sock_hash_seq_find_next(info, v);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int sock_hash_seq_show(struct seq_file *seq, void *v)
|
||||||
|
{
|
||||||
|
struct sock_hash_seq_info *info = seq->private;
|
||||||
|
struct bpf_iter__sockmap ctx = {};
|
||||||
|
struct bpf_shtab_elem *elem = v;
|
||||||
|
struct bpf_iter_meta meta;
|
||||||
|
struct bpf_prog *prog;
|
||||||
|
|
||||||
|
meta.seq = seq;
|
||||||
|
prog = bpf_iter_get_info(&meta, !elem);
|
||||||
|
if (!prog)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
ctx.meta = &meta;
|
||||||
|
ctx.map = info->map;
|
||||||
|
if (elem) {
|
||||||
|
ctx.key = elem->key;
|
||||||
|
ctx.sk = elem->sk;
|
||||||
|
}
|
||||||
|
|
||||||
|
return bpf_iter_run_prog(prog, &ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sock_hash_seq_stop(struct seq_file *seq, void *v)
|
||||||
|
{
|
||||||
|
if (!v)
|
||||||
|
(void)sock_hash_seq_show(seq, NULL);
|
||||||
|
|
||||||
|
/* pairs with sock_hash_seq_start */
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct seq_operations sock_hash_seq_ops = {
|
||||||
|
.start = sock_hash_seq_start,
|
||||||
|
.next = sock_hash_seq_next,
|
||||||
|
.stop = sock_hash_seq_stop,
|
||||||
|
.show = sock_hash_seq_show,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int sock_hash_init_seq_private(void *priv_data,
|
||||||
|
struct bpf_iter_aux_info *aux)
|
||||||
|
{
|
||||||
|
struct sock_hash_seq_info *info = priv_data;
|
||||||
|
|
||||||
|
info->map = aux->map;
|
||||||
|
info->htab = container_of(aux->map, struct bpf_shtab, map);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
|
||||||
|
.seq_ops = &sock_hash_seq_ops,
|
||||||
|
.init_seq_private = sock_hash_init_seq_private,
|
||||||
|
.seq_priv_size = sizeof(struct sock_hash_seq_info),
|
||||||
|
};
|
||||||
|
|
||||||
static int sock_hash_map_btf_id;
|
static int sock_hash_map_btf_id;
|
||||||
const struct bpf_map_ops sock_hash_ops = {
|
const struct bpf_map_ops sock_hash_ops = {
|
||||||
.map_meta_equal = bpf_map_meta_equal,
|
.map_meta_equal = bpf_map_meta_equal,
|
||||||
@ -1213,6 +1429,7 @@ const struct bpf_map_ops sock_hash_ops = {
|
|||||||
.map_check_btf = map_check_no_btf,
|
.map_check_btf = map_check_no_btf,
|
||||||
.map_btf_name = "bpf_shtab",
|
.map_btf_name = "bpf_shtab",
|
||||||
.map_btf_id = &sock_hash_map_btf_id,
|
.map_btf_id = &sock_hash_map_btf_id,
|
||||||
|
.iter_seq_info = &sock_hash_iter_seq_info,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
|
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
|
||||||
@ -1323,3 +1540,62 @@ void sock_map_close(struct sock *sk, long timeout)
|
|||||||
release_sock(sk);
|
release_sock(sk);
|
||||||
saved_close(sk, timeout);
|
saved_close(sk, timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int sock_map_iter_attach_target(struct bpf_prog *prog,
|
||||||
|
union bpf_iter_link_info *linfo,
|
||||||
|
struct bpf_iter_aux_info *aux)
|
||||||
|
{
|
||||||
|
struct bpf_map *map;
|
||||||
|
int err = -EINVAL;
|
||||||
|
|
||||||
|
if (!linfo->map.map_fd)
|
||||||
|
return -EBADF;
|
||||||
|
|
||||||
|
map = bpf_map_get_with_uref(linfo->map.map_fd);
|
||||||
|
if (IS_ERR(map))
|
||||||
|
return PTR_ERR(map);
|
||||||
|
|
||||||
|
if (map->map_type != BPF_MAP_TYPE_SOCKMAP &&
|
||||||
|
map->map_type != BPF_MAP_TYPE_SOCKHASH)
|
||||||
|
goto put_map;
|
||||||
|
|
||||||
|
if (prog->aux->max_rdonly_access > map->key_size) {
|
||||||
|
err = -EACCES;
|
||||||
|
goto put_map;
|
||||||
|
}
|
||||||
|
|
||||||
|
aux->map = map;
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
put_map:
|
||||||
|
bpf_map_put_with_uref(map);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
|
||||||
|
{
|
||||||
|
bpf_map_put_with_uref(aux->map);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct bpf_iter_reg sock_map_iter_reg = {
|
||||||
|
.target = "sockmap",
|
||||||
|
.attach_target = sock_map_iter_attach_target,
|
||||||
|
.detach_target = sock_map_iter_detach_target,
|
||||||
|
.show_fdinfo = bpf_iter_map_show_fdinfo,
|
||||||
|
.fill_link_info = bpf_iter_map_fill_link_info,
|
||||||
|
.ctx_arg_info_size = 2,
|
||||||
|
.ctx_arg_info = {
|
||||||
|
{ offsetof(struct bpf_iter__sockmap, key),
|
||||||
|
PTR_TO_RDONLY_BUF_OR_NULL },
|
||||||
|
{ offsetof(struct bpf_iter__sockmap, sk),
|
||||||
|
PTR_TO_BTF_ID_OR_NULL },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
static int __init bpf_sockmap_iter_init(void)
|
||||||
|
{
|
||||||
|
sock_map_iter_reg.ctx_arg_info[1].btf_id =
|
||||||
|
btf_sock_ids[BTF_SOCK_TYPE_SOCK];
|
||||||
|
return bpf_iter_reg_target(&sock_map_iter_reg);
|
||||||
|
}
|
||||||
|
late_initcall(bpf_sockmap_iter_init);
|
||||||
|
Loading…
Reference in New Issue
Block a user