Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-03-09

The following pull-request contains BPF updates for your *net-next* tree.

We've added 90 non-merge commits during the last 17 day(s) which contain
a total of 114 files changed, 5158 insertions(+), 1288 deletions(-).

The main changes are:

1) Faster bpf_redirect_map(), from Björn.

2) skmsg cleanup, from Cong.

3) Support for floating point types in BTF, from Ilya.

4) Documentation for sys_bpf commands, from Joe.

5) Support for sk_lookup in bpf_prog_test_run, form Lorenz.

6) Enable task local storage for tracing programs, from Song.

7) bpf_for_each_map_elem() helper, from Yonghong.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller
2021-03-09 18:07:05 -08:00
114 changed files with 5158 additions and 1286 deletions

View File

@@ -9,8 +9,8 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
@@ -18,7 +18,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o
obj-$(CONFIG_BPF_SYSCALL) += offload.o
obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
endif

View File

@@ -625,6 +625,42 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
};
static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
u32 i, key, num_elems = 0;
struct bpf_array *array;
bool is_percpu;
u64 ret = 0;
void *val;
if (flags != 0)
return -EINVAL;
is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
array = container_of(map, struct bpf_array, map);
if (is_percpu)
migrate_disable();
for (i = 0; i < map->max_entries; i++) {
if (is_percpu)
val = this_cpu_ptr(array->pptrs[i]);
else
val = array->value + array->elem_size * i;
num_elems++;
key = i;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)&key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret)
break;
}
if (is_percpu)
migrate_enable();
return num_elems;
}
static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
.map_meta_equal = array_map_meta_equal,
@@ -643,6 +679,8 @@ const struct bpf_map_ops array_map_ops = {
.map_check_btf = array_map_check_btf,
.map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id,
.iter_seq_info = &iter_seq_info,
@@ -660,6 +698,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_delete_elem = array_map_delete_elem,
.map_seq_show_elem = percpu_array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_array_elem,
.map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id,
.iter_seq_info = &iter_seq_info,

View File

@@ -237,7 +237,7 @@ static void inode_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
bpf_local_storage_map_free(smap);
bpf_local_storage_map_free(smap, NULL);
}
static int inode_storage_map_btf_id;

View File

@@ -675,3 +675,19 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
*/
return ret == 0 ? 0 : -EAGAIN;
}
BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn,
void *, callback_ctx, u64, flags)
{
return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags);
}
const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.func = bpf_for_each_map_elem,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_FUNC,
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};

View File

@@ -140,17 +140,18 @@ static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
unsigned long flags;
if (unlikely(!selem_linked_to_storage(selem)))
/* selem has already been unlinked from sk */
return;
local_storage = rcu_dereference(selem->local_storage);
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (likely(selem_linked_to_storage(selem)))
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true);
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
if (free_local_storage)
kfree_rcu(local_storage, rcu);
@@ -167,6 +168,7 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map *smap;
struct bpf_local_storage_map_bucket *b;
unsigned long flags;
if (unlikely(!selem_linked_to_map(selem)))
/* selem has already be unlinked from smap */
@@ -174,21 +176,22 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
smap = rcu_dereference(SDATA(selem)->smap);
b = select_bucket(smap, selem);
raw_spin_lock_bh(&b->lock);
raw_spin_lock_irqsave(&b->lock, flags);
if (likely(selem_linked_to_map(selem)))
hlist_del_init_rcu(&selem->map_node);
raw_spin_unlock_bh(&b->lock);
raw_spin_unlock_irqrestore(&b->lock, flags);
}
void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
unsigned long flags;
raw_spin_lock_bh(&b->lock);
raw_spin_lock_irqsave(&b->lock, flags);
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
hlist_add_head_rcu(&selem->map_node, &b->list);
raw_spin_unlock_bh(&b->lock);
raw_spin_unlock_irqrestore(&b->lock, flags);
}
void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
@@ -224,16 +227,18 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
sdata = SDATA(selem);
if (cacheit_lockit) {
unsigned long flags;
/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx],
sdata);
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}
return sdata;
@@ -327,6 +332,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
unsigned long flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -374,7 +380,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
}
raw_spin_lock_bh(&local_storage->lock);
raw_spin_lock_irqsave(&local_storage->lock, flags);
/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
@@ -428,11 +434,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
}
unlock:
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return SDATA(selem);
unlock_err:
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
return ERR_PTR(err);
}
@@ -468,7 +474,8 @@ void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
spin_unlock(&cache->idx_lock);
}
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
int __percpu *busy_counter)
{
struct bpf_local_storage_elem *selem;
struct bpf_local_storage_map_bucket *b;
@@ -497,7 +504,15 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
while ((selem = hlist_entry_safe(
rcu_dereference_raw(hlist_first_rcu(&b->list)),
struct bpf_local_storage_elem, map_node))) {
if (busy_counter) {
migrate_disable();
__this_cpu_inc(*busy_counter);
}
bpf_selem_unlink(selem);
if (busy_counter) {
__this_cpu_dec(*busy_counter);
migrate_enable();
}
cond_resched_rcu();
}
rcu_read_unlock();

View File

@@ -115,10 +115,6 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_bprm_opts_set:
return &bpf_bprm_opts_set_proto;
case BPF_FUNC_ima_inode_hash:

View File

@@ -15,21 +15,41 @@
#include <linux/bpf_local_storage.h>
#include <linux/filter.h>
#include <uapi/linux/btf.h>
#include <linux/bpf_lsm.h>
#include <linux/btf_ids.h>
#include <linux/fdtable.h>
DEFINE_BPF_STORAGE_CACHE(task_cache);
DEFINE_PER_CPU(int, bpf_task_storage_busy);
static void bpf_task_storage_lock(void)
{
migrate_disable();
__this_cpu_inc(bpf_task_storage_busy);
}
static void bpf_task_storage_unlock(void)
{
__this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
}
static bool bpf_task_storage_trylock(void)
{
migrate_disable();
if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
__this_cpu_dec(bpf_task_storage_busy);
migrate_enable();
return false;
}
return true;
}
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
struct bpf_storage_blob *bsb;
bsb = bpf_task(task);
if (!bsb)
return NULL;
return &bsb->storage;
return &task->bpf_storage;
}
static struct bpf_local_storage_data *
@@ -38,13 +58,8 @@ task_storage_lookup(struct task_struct *task, struct bpf_map *map,
{
struct bpf_local_storage *task_storage;
struct bpf_local_storage_map *smap;
struct bpf_storage_blob *bsb;
bsb = bpf_task(task);
if (!bsb)
return NULL;
task_storage = rcu_dereference(bsb->storage);
task_storage = rcu_dereference(task->bpf_storage);
if (!task_storage)
return NULL;
@@ -57,16 +72,12 @@ void bpf_task_storage_free(struct task_struct *task)
struct bpf_local_storage_elem *selem;
struct bpf_local_storage *local_storage;
bool free_task_storage = false;
struct bpf_storage_blob *bsb;
struct hlist_node *n;
bsb = bpf_task(task);
if (!bsb)
return;
unsigned long flags;
rcu_read_lock();
local_storage = rcu_dereference(bsb->storage);
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage) {
rcu_read_unlock();
return;
@@ -81,7 +92,8 @@ void bpf_task_storage_free(struct task_struct *task)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
raw_spin_lock_bh(&local_storage->lock);
bpf_task_storage_lock();
raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
/* Always unlink from map before unlinking from
* local_storage.
@@ -90,7 +102,8 @@ void bpf_task_storage_free(struct task_struct *task)
free_task_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, false);
}
raw_spin_unlock_bh(&local_storage->lock);
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_task_storage_unlock();
rcu_read_unlock();
/* free_task_storage should always be true as long as
@@ -123,7 +136,9 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
goto out;
}
bpf_task_storage_lock();
sdata = task_storage_lookup(task, map, true);
bpf_task_storage_unlock();
put_pid(pid);
return sdata ? sdata->data : NULL;
out:
@@ -150,13 +165,15 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
*/
WARN_ON_ONCE(!rcu_read_lock_held());
task = pid_task(pid, PIDTYPE_PID);
if (!task || !task_storage_ptr(task)) {
if (!task) {
err = -ENOENT;
goto out;
}
bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags);
bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
out:
@@ -199,7 +216,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
goto out;
}
bpf_task_storage_lock();
err = task_storage_delete(task, map);
bpf_task_storage_unlock();
out:
put_pid(pid);
return err;
@@ -213,44 +232,47 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
return (unsigned long)NULL;
/* explicitly check that the task_storage_ptr is not
* NULL as task_storage_lookup returns NULL in this case and
* bpf_local_storage_update expects the owner to have a
* valid storage pointer.
*/
if (!task || !task_storage_ptr(task))
if (!task)
return (unsigned long)NULL;
if (!bpf_task_storage_trylock())
return (unsigned long)NULL;
sdata = task_storage_lookup(task, map, true);
if (sdata)
return (unsigned long)sdata->data;
goto unlock;
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST);
return IS_ERR(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
return (unsigned long)NULL;
unlock:
bpf_task_storage_unlock();
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL :
(unsigned long)sdata->data;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task)
{
int ret;
if (!task)
return -EINVAL;
if (!bpf_task_storage_trylock())
return -EBUSY;
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
return task_storage_delete(task, map);
ret = task_storage_delete(task, map);
bpf_task_storage_unlock();
return ret;
}
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -276,7 +298,7 @@ static void task_storage_map_free(struct bpf_map *map)
smap = (struct bpf_local_storage_map *)map;
bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
bpf_local_storage_map_free(smap);
bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
}
static int task_storage_map_btf_id;

View File

@@ -173,7 +173,7 @@
#define BITS_ROUNDUP_BYTES(bits) \
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
#define BTF_INFO_MASK 0x8f00ffff
#define BTF_INFO_MASK 0x9f00ffff
#define BTF_INT_MASK 0x0fffffff
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
@@ -280,6 +280,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
[BTF_KIND_FLOAT] = "FLOAT",
};
static const char *btf_type_str(const struct btf_type *t)
@@ -574,6 +575,7 @@ static bool btf_type_has_size(const struct btf_type *t)
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
case BTF_KIND_FLOAT:
return true;
}
@@ -1704,6 +1706,7 @@ __btf_resolve_size(const struct btf *btf, const struct btf_type *type,
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_FLOAT:
size = type->size;
goto resolved;
@@ -1849,7 +1852,7 @@ static int btf_df_check_kflag_member(struct btf_verifier_env *env,
return -EINVAL;
}
/* Used for ptr, array and struct/union type members.
/* Used for ptr, array struct/union and float type members.
* int, enum and modifier types have their specific callback functions.
*/
static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
@@ -3675,6 +3678,81 @@ static const struct btf_kind_operations datasec_ops = {
.show = btf_datasec_show,
};
static s32 btf_float_check_meta(struct btf_verifier_env *env,
const struct btf_type *t,
u32 meta_left)
{
if (btf_type_vlen(t)) {
btf_verifier_log_type(env, t, "vlen != 0");
return -EINVAL;
}
if (btf_type_kflag(t)) {
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
return -EINVAL;
}
if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
t->size != 16) {
btf_verifier_log_type(env, t, "Invalid type_size");
return -EINVAL;
}
btf_verifier_log_type(env, t, NULL);
return 0;
}
static int btf_float_check_member(struct btf_verifier_env *env,
const struct btf_type *struct_type,
const struct btf_member *member,
const struct btf_type *member_type)
{
u64 start_offset_bytes;
u64 end_offset_bytes;
u64 misalign_bits;
u64 align_bytes;
u64 align_bits;
/* Different architectures have different alignment requirements, so
* here we check only for the reasonable minimum. This way we ensure
* that types after CO-RE can pass the kernel BTF verifier.
*/
align_bytes = min_t(u64, sizeof(void *), member_type->size);
align_bits = align_bytes * BITS_PER_BYTE;
div64_u64_rem(member->offset, align_bits, &misalign_bits);
if (misalign_bits) {
btf_verifier_log_member(env, struct_type, member,
"Member is not properly aligned");
return -EINVAL;
}
start_offset_bytes = member->offset / BITS_PER_BYTE;
end_offset_bytes = start_offset_bytes + member_type->size;
if (end_offset_bytes > struct_type->size) {
btf_verifier_log_member(env, struct_type, member,
"Member exceeds struct_size");
return -EINVAL;
}
return 0;
}
static void btf_float_log(struct btf_verifier_env *env,
const struct btf_type *t)
{
btf_verifier_log(env, "size=%u", t->size);
}
static const struct btf_kind_operations float_ops = {
.check_meta = btf_float_check_meta,
.resolve = btf_df_resolve,
.check_member = btf_float_check_member,
.check_kflag_member = btf_generic_check_kflag_member,
.log_details = btf_float_log,
.show = btf_df_show,
};
static int btf_func_proto_check(struct btf_verifier_env *env,
const struct btf_type *t)
{
@@ -3808,6 +3886,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
[BTF_KIND_VAR] = &var_ops,
[BTF_KIND_DATASEC] = &datasec_ops,
[BTF_KIND_FLOAT] = &float_ops,
};
static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -4592,8 +4671,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
arg = off / 8;
args = (const struct btf_param *)(t + 1);
/* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */
nr_args = t ? btf_type_vlen(t) : 5;
/* if (t == NULL) Fall back to default BPF prog with
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
*/
nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
if (prog->aux->attach_btf_trace) {
/* skip first 'void *__data' argument in btf_trace_##name typedef */
args++;
@@ -4649,7 +4730,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
}
} else {
if (!t)
/* Default prog with 5 args */
/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
return true;
t = btf_type_by_id(btf, args[arg].type);
}
@@ -5100,12 +5181,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
if (!func) {
/* BTF function prototype doesn't match the verifier types.
* Fall back to 5 u64 args.
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
*/
for (i = 0; i < 5; i++)
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
m->arg_size[i] = 8;
m->ret_size = 8;
m->nr_args = 5;
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
return 0;
}
args = (const struct btf_param *)(func + 1);
@@ -5328,8 +5409,9 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
}
args = (const struct btf_param *)(t + 1);
nargs = btf_type_vlen(t);
if (nargs > 5) {
bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs);
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
bpf_log(log, "Function %s has %d > %d args\n", tname, nargs,
MAX_BPF_FUNC_REG_ARGS);
goto out;
}
@@ -5458,9 +5540,9 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
}
args = (const struct btf_param *)(t + 1);
nargs = btf_type_vlen(t);
if (nargs > 5) {
bpf_log(log, "Global function %s() with %d > 5 args. Buggy compiler.\n",
tname, nargs);
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
return -EINVAL;
}
/* check that function returns int */

View File

@@ -543,7 +543,6 @@ static void cpu_map_free(struct bpf_map *map)
* complete.
*/
bpf_clear_redirect_map(map);
synchronize_rcu();
/* For cpu_map the remote CPUs can still be using the entries
@@ -563,7 +562,7 @@ static void cpu_map_free(struct bpf_map *map)
kfree(cmap);
}
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
static void *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
struct bpf_cpu_map_entry *rcpu;
@@ -600,6 +599,11 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
}
static int cpu_map_btf_id;
const struct bpf_map_ops cpu_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -612,6 +616,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_cpu_map",
.map_btf_id = &cpu_map_btf_id,
.map_redirect = cpu_map_redirect,
};
static void bq_flush_to_queue(struct xdp_bulk_queue *bq)

View File

@@ -197,7 +197,6 @@ static void dev_map_free(struct bpf_map *map)
list_del_rcu(&dtab->list);
spin_unlock(&dev_map_lock);
bpf_clear_redirect_map(map);
synchronize_rcu();
/* Make sure prior __dev_map_entry_free() have completed. */
@@ -258,7 +257,7 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0;
}
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
static void *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct hlist_head *head = dev_map_index_hash(dtab, key);
@@ -392,7 +391,7 @@ void __dev_flush(void)
* update happens in parallel here a dev_put wont happen until after reading the
* ifindex.
*/
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *obj;
@@ -735,6 +734,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
map, key, value, map_flags);
}
static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
}
static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
{
return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
}
static int dev_map_btf_id;
const struct bpf_map_ops dev_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -747,6 +756,7 @@ const struct bpf_map_ops dev_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_dtab",
.map_btf_id = &dev_map_btf_id,
.map_redirect = dev_map_redirect,
};
static int dev_map_hash_map_btf_id;
@@ -761,6 +771,7 @@ const struct bpf_map_ops dev_map_hash_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_dtab",
.map_btf_id = &dev_map_hash_map_btf_id,
.map_redirect = dev_hash_map_redirect,
};
static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,

View File

@@ -1869,6 +1869,63 @@ static const struct bpf_iter_seq_info iter_seq_info = {
.seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info),
};
static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
void *callback_ctx, u64 flags)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
struct htab_elem *elem;
u32 roundup_key_size;
int i, num_elems = 0;
void __percpu *pptr;
struct bucket *b;
void *key, *val;
bool is_percpu;
u64 ret = 0;
if (flags != 0)
return -EINVAL;
is_percpu = htab_is_percpu(htab);
roundup_key_size = round_up(map->key_size, 8);
/* disable migration so percpu value prepared here will be the
* same as the one seen by the bpf program with bpf_map_lookup_elem().
*/
if (is_percpu)
migrate_disable();
for (i = 0; i < htab->n_buckets; i++) {
b = &htab->buckets[i];
rcu_read_lock();
head = &b->head;
hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) {
key = elem->key;
if (is_percpu) {
/* current cpu value for percpu map */
pptr = htab_elem_get_ptr(elem, map->key_size);
val = this_cpu_ptr(pptr);
} else {
val = elem->key + roundup_key_size;
}
num_elems++;
ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
(u64)(long)key, (u64)(long)val,
(u64)(long)callback_ctx, 0);
/* return value: 0 - continue, 1 - stop and return */
if (ret) {
rcu_read_unlock();
goto out;
}
}
rcu_read_unlock();
}
out:
if (is_percpu)
migrate_enable();
return num_elems;
}
static int htab_map_btf_id;
const struct bpf_map_ops htab_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
@@ -1881,6 +1938,8 @@ const struct bpf_map_ops htab_map_ops = {
.map_delete_elem = htab_map_delete_elem,
.map_gen_lookup = htab_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_map_btf_id,
@@ -1900,6 +1959,8 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_map_btf_id,
@@ -2019,6 +2080,8 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_percpu_map_btf_id,
@@ -2036,6 +2099,8 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
.map_set_for_each_callback_args = map_set_for_each_callback_args,
.map_for_each_callback = bpf_for_each_hash_elem,
BATCH_OPS(htab_lru_percpu),
.map_btf_name = "bpf_htab",
.map_btf_id = &htab_lru_percpu_map_btf_id,

View File

@@ -708,6 +708,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_discard_proto;
case BPF_FUNC_ringbuf_query:
return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
break;
}

View File

@@ -234,6 +234,12 @@ static bool bpf_pseudo_call(const struct bpf_insn *insn)
insn->src_reg == BPF_PSEUDO_CALL;
}
static bool bpf_pseudo_func(const struct bpf_insn *insn)
{
return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
insn->src_reg == BPF_PSEUDO_FUNC;
}
struct bpf_call_arg_meta {
struct bpf_map *map_ptr;
bool raw_mode;
@@ -248,6 +254,7 @@ struct bpf_call_arg_meta {
u32 btf_id;
struct btf *ret_btf;
u32 ret_btf_id;
u32 subprogno;
};
struct btf *btf_vmlinux;
@@ -390,6 +397,24 @@ __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
env->prev_linfo = linfo;
}
static void verbose_invalid_scalar(struct bpf_verifier_env *env,
struct bpf_reg_state *reg,
struct tnum *range, const char *ctx,
const char *reg_name)
{
char tn_buf[48];
verbose(env, "At %s the register %s ", ctx, reg_name);
if (!tnum_is_unknown(reg->var_off)) {
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
tnum_strn(tn_buf, sizeof(tn_buf), *range);
verbose(env, " should have been in %s\n", tn_buf);
}
static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
return type == PTR_TO_PACKET ||
@@ -409,6 +434,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
return type == PTR_TO_SOCKET ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
type == PTR_TO_MAP_KEY ||
type == PTR_TO_SOCK_COMMON;
}
@@ -451,7 +477,8 @@ static bool arg_type_may_be_null(enum bpf_arg_type type)
type == ARG_PTR_TO_MEM_OR_NULL ||
type == ARG_PTR_TO_CTX_OR_NULL ||
type == ARG_PTR_TO_SOCKET_OR_NULL ||
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
type == ARG_PTR_TO_STACK_OR_NULL;
}
/* Determine whether the function releases some resources allocated by another
@@ -541,6 +568,8 @@ static const char * const reg_type_str[] = {
[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
[PTR_TO_RDWR_BUF] = "rdwr_buf",
[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
[PTR_TO_FUNC] = "func",
[PTR_TO_MAP_KEY] = "map_key",
};
static char slot_type_char[] = {
@@ -612,6 +641,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
if (type_is_pkt_pointer(t))
verbose(env, ",r=%d", reg->range);
else if (t == CONST_PTR_TO_MAP ||
t == PTR_TO_MAP_KEY ||
t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
verbose(env, ",ks=%d,vs=%d",
@@ -1519,7 +1549,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
}
ret = find_subprog(env, off);
if (ret >= 0)
return 0;
return ret;
if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
verbose(env, "too many subprograms\n");
return -E2BIG;
@@ -1527,7 +1557,7 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
env->subprog_info[env->subprog_cnt++].start = off;
sort(env->subprog_info, env->subprog_cnt,
sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
return 0;
return env->subprog_cnt - 1;
}
static int check_subprogs(struct bpf_verifier_env *env)
@@ -1544,6 +1574,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
/* determine subprog starts. The end is one before the next starts */
for (i = 0; i < insn_cnt; i++) {
if (bpf_pseudo_func(insn + i)) {
if (!env->bpf_capable) {
verbose(env,
"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
return -EPERM;
}
ret = add_subprog(env, i + insn[i].imm + 1);
if (ret < 0)
return ret;
/* remember subprog */
insn[i + 1].imm = ret;
continue;
}
if (!bpf_pseudo_call(insn + i))
continue;
if (!env->bpf_capable) {
@@ -2295,6 +2338,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_PERCPU_BTF_ID:
case PTR_TO_MEM:
case PTR_TO_MEM_OR_NULL:
case PTR_TO_FUNC:
case PTR_TO_MAP_KEY:
return true;
default:
return false;
@@ -2899,6 +2944,10 @@ static int __check_mem_access(struct bpf_verifier_env *env, int regno,
reg = &cur_regs(env)[regno];
switch (reg->type) {
case PTR_TO_MAP_KEY:
verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
mem_size, off, size);
break;
case PTR_TO_MAP_VALUE:
verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
mem_size, off, size);
@@ -3304,6 +3353,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
case PTR_TO_FLOW_KEYS:
pointer_desc = "flow keys ";
break;
case PTR_TO_MAP_KEY:
pointer_desc = "key ";
break;
case PTR_TO_MAP_VALUE:
pointer_desc = "value ";
break;
@@ -3405,7 +3457,7 @@ process_func:
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
if (!bpf_pseudo_call(insn + i))
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
/* remember insn and function to return to */
ret_insn[frame] = i + 1;
@@ -3842,7 +3894,19 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
/* for access checks, reg->off is just part of off */
off += reg->off;
if (reg->type == PTR_TO_MAP_VALUE) {
if (reg->type == PTR_TO_MAP_KEY) {
if (t == BPF_WRITE) {
verbose(env, "write to change key R%d not allowed\n", regno);
return -EACCES;
}
err = check_mem_region_access(env, regno, off, size,
reg->map_ptr->key_size, false);
if (err)
return err;
if (value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
} else if (reg->type == PTR_TO_MAP_VALUE) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
verbose(env, "R%d leaks addr into map\n", value_regno);
@@ -4258,6 +4322,9 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size,
zero_size_allowed);
case PTR_TO_MAP_KEY:
return check_mem_region_access(env, regno, reg->off, access_size,
reg->map_ptr->key_size, false);
case PTR_TO_MAP_VALUE:
if (check_map_access_type(env, regno, reg->off, access_size,
meta && meta->raw_mode ? BPF_WRITE :
@@ -4474,6 +4541,7 @@ static const struct bpf_reg_types map_key_value_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
},
};
@@ -4505,6 +4573,7 @@ static const struct bpf_reg_types mem_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
PTR_TO_MEM,
PTR_TO_RDONLY_BUF,
@@ -4517,6 +4586,7 @@ static const struct bpf_reg_types int_ptr_types = {
PTR_TO_STACK,
PTR_TO_PACKET,
PTR_TO_PACKET_META,
PTR_TO_MAP_KEY,
PTR_TO_MAP_VALUE,
},
};
@@ -4529,6 +4599,8 @@ static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_T
static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
@@ -4557,6 +4629,8 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
[ARG_PTR_TO_INT] = &int_ptr_types,
[ARG_PTR_TO_LONG] = &int_ptr_types,
[ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
[ARG_PTR_TO_FUNC] = &func_ptr_types,
[ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types,
};
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -4738,6 +4812,8 @@ skip_type_check:
verbose(env, "verifier internal error\n");
return -EFAULT;
}
} else if (arg_type == ARG_PTR_TO_FUNC) {
meta->subprogno = reg->subprogno;
} else if (arg_type_is_mem_ptr(arg_type)) {
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
@@ -5258,13 +5334,19 @@ static void clear_caller_saved_regs(struct bpf_verifier_env *env,
}
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
int insn_idx);
static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx, int subprog,
set_callee_state_fn set_callee_state_cb)
{
struct bpf_verifier_state *state = env->cur_state;
struct bpf_func_info_aux *func_info_aux;
struct bpf_func_state *caller, *callee;
int i, err, subprog, target_insn;
int err;
bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
@@ -5273,14 +5355,6 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -E2BIG;
}
target_insn = *insn_idx + insn->imm;
subprog = find_subprog(env, target_insn + 1);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn + 1);
return -EFAULT;
}
caller = state->frame[state->curframe];
if (state->frame[state->curframe + 1]) {
verbose(env, "verifier bug. Frame %d already allocated\n",
@@ -5335,11 +5409,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (err)
return err;
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
callee->regs[i] = caller->regs[i];
err = set_callee_state_cb(env, caller, callee, *insn_idx);
if (err)
return err;
clear_caller_saved_regs(env, caller->regs);
@@ -5347,7 +5419,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
state->curframe++;
/* and go analyze first insn of the callee */
*insn_idx = target_insn;
*insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) {
verbose(env, "caller:\n");
@@ -5358,6 +5430,92 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return 0;
}
int map_set_for_each_callback_args(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee)
{
/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
* void *callback_ctx, u64 flags);
* callback_fn(struct bpf_map *map, void *key, void *value,
* void *callback_ctx);
*/
callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
/* pointer to stack or null */
callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
/* unused */
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
return 0;
}
static int set_callee_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee, int insn_idx)
{
int i;
/* copy r1 - r5 args that callee can access. The copy includes parent
* pointers, which connects us up to the liveness chain
*/
for (i = BPF_REG_1; i <= BPF_REG_5; i++)
callee->regs[i] = caller->regs[i];
return 0;
}
static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx)
{
int subprog, target_insn;
target_insn = *insn_idx + insn->imm + 1;
subprog = find_subprog(env, target_insn);
if (subprog < 0) {
verbose(env, "verifier bug. No program starts at insn %d\n",
target_insn);
return -EFAULT;
}
return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
}
static int set_map_elem_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
int insn_idx)
{
struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
struct bpf_map *map;
int err;
if (bpf_map_ptr_poisoned(insn_aux)) {
verbose(env, "tail_call abusing map_ptr\n");
return -EINVAL;
}
map = BPF_MAP_PTR(insn_aux->map_ptr_state);
if (!map->ops->map_set_for_each_callback_args ||
!map->ops->map_for_each_callback) {
verbose(env, "callback function not allowed for map\n");
return -ENOTSUPP;
}
err = map->ops->map_set_for_each_callback_args(env, caller, callee);
if (err)
return err;
callee->in_callback_fn = true;
return 0;
}
static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
{
struct bpf_verifier_state *state = env->cur_state;
@@ -5380,8 +5538,22 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
state->curframe--;
caller = state->frame[state->curframe];
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
if (callee->in_callback_fn) {
/* enforce R0 return value range [0, 1]. */
struct tnum range = tnum_range(0, 1);
if (r0->type != SCALAR_VALUE) {
verbose(env, "R0 not a scalar value\n");
return -EACCES;
}
if (!tnum_in(range, r0->var_off)) {
verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
return -EINVAL;
}
} else {
/* return to the caller whatever r0 had in the callee */
caller->regs[BPF_REG_0] = *r0;
}
/* Transfer references to the caller */
err = transfer_reference_state(caller, callee);
@@ -5436,7 +5608,9 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_map_push_elem &&
func_id != BPF_FUNC_map_pop_elem &&
func_id != BPF_FUNC_map_peek_elem)
func_id != BPF_FUNC_map_peek_elem &&
func_id != BPF_FUNC_for_each_map_elem &&
func_id != BPF_FUNC_redirect_map)
return 0;
if (map == NULL) {
@@ -5517,15 +5691,18 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct bpf_func_proto *fn = NULL;
struct bpf_reg_state *regs;
struct bpf_call_arg_meta meta;
int insn_idx = *insn_idx_p;
bool changes_data;
int i, err;
int i, err, func_id;
/* find function prototype */
func_id = insn->imm;
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
func_id);
@@ -5571,7 +5748,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
err = check_func_arg(env, i, &meta, fn);
if (err)
return err;
@@ -5621,6 +5798,13 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
if (func_id == BPF_FUNC_for_each_map_elem) {
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_map_elem_callback_state);
if (err < 0)
return -EINVAL;
}
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -5874,6 +6058,19 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
else
*ptr_limit = -off;
return 0;
case PTR_TO_MAP_KEY:
/* Currently, this code is not exercised as the only use
* is bpf_for_each_map_elem() helper which requires
* bpf_capble. The code has been tested manually for
* future use.
*/
if (mask_to_left) {
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
} else {
off = ptr_reg->smin_value + ptr_reg->off;
*ptr_limit = ptr_reg->map_ptr->key_size - off;
}
return 0;
case PTR_TO_MAP_VALUE:
if (mask_to_left) {
*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
@@ -5904,7 +6101,7 @@ static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
aux->alu_limit != alu_limit))
return -EACCES;
/* Corresponding fixup done in fixup_bpf_calls(). */
/* Corresponding fixup done in do_misc_fixups(). */
aux->alu_state = alu_state;
aux->alu_limit = alu_limit;
return 0;
@@ -6075,6 +6272,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
verbose(env, "R%d pointer arithmetic on %s prohibited\n",
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
@@ -8254,6 +8452,24 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
return 0;
}
if (insn->src_reg == BPF_PSEUDO_FUNC) {
struct bpf_prog_aux *aux = env->prog->aux;
u32 subprogno = insn[1].imm;
if (!aux->func_info) {
verbose(env, "missing btf func_info\n");
return -EINVAL;
}
if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
verbose(env, "callback function not static\n");
return -EINVAL;
}
dst_reg->type = PTR_TO_FUNC;
dst_reg->subprogno = subprogno;
return 0;
}
map = env->used_maps[aux->map_index];
mark_reg_known_zero(env, regs, insn->dst_reg);
dst_reg->map_ptr = map;
@@ -8482,17 +8698,7 @@ static int check_return_code(struct bpf_verifier_env *env)
}
if (!tnum_in(range, reg->var_off)) {
char tn_buf[48];
verbose(env, "At program exit the register R0 ");
if (!tnum_is_unknown(reg->var_off)) {
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "has value %s", tn_buf);
} else {
verbose(env, "has unknown scalar value");
}
tnum_strn(tn_buf, sizeof(tn_buf), range);
verbose(env, " should have been in %s\n", tn_buf);
verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
return -EINVAL;
}
@@ -8619,6 +8825,27 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
return DONE_EXPLORING;
}
static int visit_func_call_insn(int t, int insn_cnt,
struct bpf_insn *insns,
struct bpf_verifier_env *env,
bool visit_callee)
{
int ret;
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret)
return ret;
if (t + 1 < insn_cnt)
init_explored_state(env, t + 1);
if (visit_callee) {
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
env, false);
}
return ret;
}
/* Visits the instruction at index t and returns one of the following:
* < 0 - an error occurred
* DONE_EXPLORING - the instruction was fully explored
@@ -8629,6 +8856,9 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
struct bpf_insn *insns = env->prog->insnsi;
int ret;
if (bpf_pseudo_func(insns + t))
return visit_func_call_insn(t, insn_cnt, insns, env, true);
/* All non-branch instructions have a single fall-through edge. */
if (BPF_CLASS(insns[t].code) != BPF_JMP &&
BPF_CLASS(insns[t].code) != BPF_JMP32)
@@ -8639,18 +8869,8 @@ static int visit_insn(int t, int insn_cnt, struct bpf_verifier_env *env)
return DONE_EXPLORING;
case BPF_CALL:
ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
if (ret)
return ret;
if (t + 1 < insn_cnt)
init_explored_state(env, t + 1);
if (insns[t].src_reg == BPF_PSEUDO_CALL) {
init_explored_state(env, t);
ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
env, false);
}
return ret;
return visit_func_call_insn(t, insn_cnt, insns, env,
insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
if (BPF_SRC(insns[t].code) != BPF_K)
@@ -9259,6 +9479,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
*/
return false;
}
case PTR_TO_MAP_KEY:
case PTR_TO_MAP_VALUE:
/* If the new min/max/var_off satisfy the old ones and
* everything else matches, we are OK.
@@ -10105,10 +10326,9 @@ static int do_check(struct bpf_verifier_env *env)
if (insn->src_reg == BPF_PSEUDO_CALL)
err = check_func_call(env, insn, &env->insn_idx);
else
err = check_helper_call(env, insn->imm, env->insn_idx);
err = check_helper_call(env, insn, &env->insn_idx);
if (err)
return err;
} else if (opcode == BPF_JA) {
if (BPF_SRC(insn->code) != BPF_K ||
insn->imm != 0 ||
@@ -10537,6 +10757,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
goto next_insn;
}
if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
aux = &env->insn_aux_data[i];
aux->ptr_type = PTR_TO_FUNC;
goto next_insn;
}
/* In final convert_pseudo_ld_imm64() step, this is
* converted into regular 64-bit imm load insn.
*/
@@ -10669,9 +10895,13 @@ static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
int insn_cnt = env->prog->len;
int i;
for (i = 0; i < insn_cnt; i++, insn++)
if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
insn->src_reg = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
continue;
if (insn->src_reg == BPF_PSEUDO_FUNC)
continue;
insn->src_reg = 0;
}
}
/* single env->prog->insni[off] instruction was replaced with the range
@@ -11310,6 +11540,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
env->insn_aux_data[i].call_imm = insn->imm;
/* subprog is encoded in insn[1].imm */
continue;
}
if (!bpf_pseudo_call(insn))
continue;
/* Upon error here we cannot fall back to interpreter but
@@ -11439,6 +11675,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < env->subprog_cnt; i++) {
insn = func[i]->insnsi;
for (j = 0; j < func[i]->len; j++, insn++) {
if (bpf_pseudo_func(insn)) {
subprog = insn[1].imm;
insn[0].imm = (u32)(long)func[subprog]->bpf_func;
insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
continue;
}
if (!bpf_pseudo_call(insn))
continue;
subprog = insn->off;
@@ -11484,6 +11726,11 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
insn[0].imm = env->insn_aux_data[i].call_imm;
insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
continue;
}
if (!bpf_pseudo_call(insn))
continue;
insn->off = env->insn_aux_data[i].call_imm;
@@ -11548,6 +11795,14 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return -EINVAL;
}
for (i = 0; i < prog->len; i++, insn++) {
if (bpf_pseudo_func(insn)) {
/* When JIT fails the progs with callback calls
* have to be rejected, since interpreter doesn't support them yet.
*/
verbose(env, "callbacks are not allowed in non-JITed programs\n");
return -EINVAL;
}
if (!bpf_pseudo_call(insn))
continue;
depth = get_callee_stack_depth(env, insn, i);
@@ -11560,12 +11815,10 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
/* fixup insn->imm field of bpf_call instructions
* and inline eligible helpers as explicit sequence of BPF instructions
*
* this function is called after eBPF program passed verification
/* Do various post-verification rewrites in a single program pass.
* These rewrites simplify JIT and interpreter implementations.
*/
static int fixup_bpf_calls(struct bpf_verifier_env *env)
static int do_misc_fixups(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog;
bool expect_blinding = bpf_jit_blinding_enabled(prog);
@@ -11580,6 +11833,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
/* Make divide-by-zero exceptions impossible. */
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
@@ -11620,6 +11874,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
continue;
}
/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
if (BPF_CLASS(insn->code) == BPF_LD &&
(BPF_MODE(insn->code) == BPF_ABS ||
BPF_MODE(insn->code) == BPF_IND)) {
@@ -11639,6 +11894,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
continue;
}
/* Rewrite pointer arithmetic to mitigate speculation attacks. */
if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
@@ -11787,7 +12043,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->imm == BPF_FUNC_map_delete_elem ||
insn->imm == BPF_FUNC_map_push_elem ||
insn->imm == BPF_FUNC_map_pop_elem ||
insn->imm == BPF_FUNC_map_peek_elem)) {
insn->imm == BPF_FUNC_map_peek_elem ||
insn->imm == BPF_FUNC_redirect_map)) {
aux = &env->insn_aux_data[i + delta];
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
@@ -11829,6 +12086,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
(int (*)(struct bpf_map *map, void *value))NULL));
BUILD_BUG_ON(!__same_type(ops->map_redirect,
(int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
patch_map_ops_generic:
switch (insn->imm) {
case BPF_FUNC_map_lookup_elem:
@@ -11855,11 +12115,16 @@ patch_map_ops_generic:
insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
__bpf_call_base;
continue;
case BPF_FUNC_redirect_map:
insn->imm = BPF_CAST_CALL(ops->map_redirect) -
__bpf_call_base;
continue;
}
goto patch_call_imm;
}
/* Implement bpf_jiffies64 inline. */
if (prog->jit_requested && BITS_PER_LONG == 64 &&
insn->imm == BPF_FUNC_jiffies64) {
struct bpf_insn ld_jiffies_addr[2] = {
@@ -12670,7 +12935,7 @@ skip_full_check:
ret = convert_ctx_accesses(env);
if (ret == 0)
ret = fixup_bpf_calls(env);
ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched
* insns could be handled correctly.

View File

@@ -96,6 +96,7 @@
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
#include <linux/bpf.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -734,6 +735,7 @@ void __put_task_struct(struct task_struct *tsk)
cgroup_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
bpf_task_storage_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
put_signal_struct(tsk->signal);
@@ -2064,6 +2066,9 @@ static __latent_entropy struct task_struct *copy_process(
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
#ifdef CONFIG_BPF_SYSCALL
RCU_INIT_POINTER(p->bpf_storage, NULL);
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);

View File

@@ -1367,6 +1367,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_per_cpu_ptr_proto;
case BPF_FUNC_this_cpu_ptr:
return &bpf_this_cpu_ptr_proto;
case BPF_FUNC_task_storage_get:
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
return &bpf_task_storage_delete_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
default:
return NULL;
}