Merge branch 'Enable RCU semantics for task kptrs'

David Vernet says:

====================

In commit 22df776a9a ("tasks: Extract rcu_users out of union"), the
'refcount_t rcu_users' field was extracted out of a union with the
'struct rcu_head rcu' field. This allows us to use the field for
refcounting struct task_struct with RCU protection, as the RCU callback
no longer flips rcu_users to be nonzero after the callback is scheduled.

This patch set leverages this to do a few things:

1. Marks struct task_struct as RCU safe in the verifier, allowing
   referenced kptr tasks stored in maps to be accessed in an RCU
   read region without acquiring a reference (with just a NULL check).
2. Makes bpf_task_acquire() a KF_ACQUIRE | KF_RCU | KF_RET_NULL kfunc.
3. Removes bpf_task_kptr_get() and bpf_task_acquire_not_zero(), as
   they're now redundant with the above two changes.
4. Updates selftests and documentation accordingly.
---
Changelog:
v1: https://lore.kernel.org/all/20230331005733.406202-1-void@manifault.com/
v1 -> v2:
- Remove testcases validating nested trust inheritance. The first
  version used 'struct task_struct __rcu *parent', but because that
  field has the __rcu tag it functions differently on gcc and llvm and
  causes gcc selftests to fail. Alexei is reworking nested trust,
  anyways so let's leave it off for now (Alexei).
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2023-04-01 09:07:21 -07:00
commit a033907e7b
8 changed files with 155 additions and 174 deletions

View File

@ -471,13 +471,50 @@ struct_ops callback arg. For example:
struct task_struct *acquired;
acquired = bpf_task_acquire(task);
if (acquired)
/*
* In a typical program you'd do something like store
* the task in a map, and the map will automatically
* release it later. Here, we release it manually.
*/
bpf_task_release(acquired);
return 0;
}
References acquired on ``struct task_struct *`` objects are RCU protected.
Therefore, when in an RCU read region, you can obtain a pointer to a task
embedded in a map value without having to acquire a reference:
.. code-block:: c
#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
private(TASK) static struct task_struct *global;
/**
* A trivial example showing how to access a task stored
* in a map using RCU.
*/
SEC("tp_btf/task_newtask")
int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags)
{
struct task_struct *local_copy;
bpf_rcu_read_lock();
local_copy = global;
if (local_copy)
/*
* We could also pass local_copy to kfuncs or helper functions here,
* as we're guaranteed that local_copy will be valid until we exit
* the RCU read region below.
*/
bpf_printk("Global task %s is valid", local_copy->comm);
else
bpf_printk("No global task found");
bpf_rcu_read_unlock();
/* At this point we can no longer reference local_copy. */
/*
* In a typical program you'd do something like store
* the task in a map, and the map will automatically
* release it later. Here, we release it manually.
*/
bpf_task_release(acquired);
return 0;
}

View File

@ -18,6 +18,7 @@
#include <linux/pid_namespace.h>
#include <linux/poison.h>
#include <linux/proc_ns.h>
#include <linux/sched/task.h>
#include <linux/security.h>
#include <linux/btf_ids.h>
#include <linux/bpf_mem_alloc.h>
@ -2013,73 +2014,8 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
*/
__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
{
return get_task_struct(p);
}
/**
* bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
* acquired by this kfunc which is not stored in a map as a kptr, must be
* released by calling bpf_task_release().
* @p: The task on which a reference is being acquired.
*/
__bpf_kfunc struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
{
/* For the time being this function returns NULL, as it's not currently
* possible to safely acquire a reference to a task with RCU protection
* using get_task_struct() and put_task_struct(). This is due to the
* slightly odd mechanics of p->rcu_users, and how task RCU protection
* works.
*
* A struct task_struct is refcounted by two different refcount_t
* fields:
*
* 1. p->usage: The "true" refcount field which tracks a task's
* lifetime. The task is freed as soon as this
* refcount drops to 0.
*
* 2. p->rcu_users: An "RCU users" refcount field which is statically
* initialized to 2, and is co-located in a union with
* a struct rcu_head field (p->rcu). p->rcu_users
* essentially encapsulates a single p->usage
* refcount, and when p->rcu_users goes to 0, an RCU
* callback is scheduled on the struct rcu_head which
* decrements the p->usage refcount.
*
* There are two important implications to this task refcounting logic
* described above. The first is that
* refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
* after the refcount goes to 0, the RCU callback being scheduled will
* cause the memory backing the refcount to again be nonzero due to the
* fields sharing a union. The other is that we can't rely on RCU to
* guarantee that a task is valid in a BPF program. This is because a
* task could have already transitioned to being in the TASK_DEAD
* state, had its rcu_users refcount go to 0, and its rcu callback
* invoked in which it drops its single p->usage reference. At this
* point the task will be freed as soon as the last p->usage reference
* goes to 0, without waiting for another RCU gp to elapse. The only
* way that a BPF program can guarantee that a task is valid is in this
* scenario is to hold a p->usage refcount itself.
*
* Until we're able to resolve this issue, either by pulling
* p->rcu_users and p->rcu out of the union, or by getting rid of
* p->usage and just using p->rcu_users for refcounting, we'll just
* return NULL here.
*/
return NULL;
}
/**
* bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task
* kptr acquired by this kfunc which is not subsequently stored in a map, must
* be released by calling bpf_task_release().
* @pp: A pointer to a task kptr on which a reference is being acquired.
*/
__bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
{
/* We must return NULL here until we have clarity on how to properly
* leverage RCU for ensuring a task's lifetime. See the comment above
* in bpf_task_acquire_not_zero() for more details.
*/
if (refcount_inc_not_zero(&p->rcu_users))
return p;
return NULL;
}
@ -2089,7 +2025,7 @@ __bpf_kfunc struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
*/
__bpf_kfunc void bpf_task_release(struct task_struct *p)
{
put_task_struct(p);
put_task_struct_rcu_user(p);
}
#ifdef CONFIG_CGROUPS
@ -2199,7 +2135,7 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
rcu_read_lock();
p = find_task_by_pid_ns(pid, &init_pid_ns);
if (p)
bpf_task_acquire(p);
p = bpf_task_acquire(p);
rcu_read_unlock();
return p;
@ -2371,9 +2307,7 @@ BTF_ID_FLAGS(func, bpf_list_push_front)
BTF_ID_FLAGS(func, bpf_list_push_back)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE)
BTF_ID_FLAGS(func, bpf_rbtree_add)

View File

@ -4600,6 +4600,7 @@ BTF_SET_START(rcu_protected_types)
BTF_ID(struct, prog_test_ref_kfunc)
BTF_ID(struct, cgroup)
BTF_ID(struct, bpf_cpumask)
BTF_ID(struct, task_struct)
BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id)

View File

@ -73,11 +73,12 @@ static const char * const success_tests[] = {
"test_task_acquire_release_current",
"test_task_acquire_leave_in_map",
"test_task_xchg_release",
"test_task_get_release",
"test_task_map_acquire_release",
"test_task_current_acquire_release",
"test_task_from_pid_arg",
"test_task_from_pid_current",
"test_task_from_pid_invalid",
"task_kfunc_acquire_trusted_walked",
};
void test_task_kfunc(void)

View File

@ -23,7 +23,7 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym;
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
@ -159,13 +159,8 @@ int task_acquire(void *ctx)
goto out;
/* acquire a reference which can be used outside rcu read lock region */
gparent = bpf_task_acquire_not_zero(gparent);
gparent = bpf_task_acquire(gparent);
if (!gparent)
/* Until we resolve the issues with using task->rcu_users, we
* expect bpf_task_acquire_not_zero() to return a NULL task.
* See the comment at the definition of
* bpf_task_acquire_not_zero() for more details.
*/
goto out;
(void)bpf_task_storage_get(&map_a, gparent, 0, 0);

View File

@ -21,9 +21,10 @@ struct hash_map {
} __tasks_kfunc_map SEC(".maps");
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
struct task_struct *bpf_task_kptr_get(struct task_struct **pp) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
{
@ -60,6 +61,9 @@ static inline int tasks_kfunc_map_insert(struct task_struct *p)
}
acquired = bpf_task_acquire(p);
if (!acquired)
return -ENOENT;
old = bpf_kptr_xchg(&v->task, acquired);
if (old) {
bpf_task_release(old);

View File

@ -40,6 +40,9 @@ int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_f
/* Can't invoke bpf_task_acquire() on an untrusted pointer. */
acquired = bpf_task_acquire(v->task);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
@ -53,38 +56,49 @@ int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
/* Can't invoke bpf_task_acquire() on a random frame pointer. */
acquired = bpf_task_acquire((struct task_struct *)&stack_task);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
SEC("kretprobe/free_task")
__failure __msg("reg type unsupported for arg#0 function")
__failure __msg("calling kernel function bpf_task_acquire is not allowed")
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
/* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
acquired = bpf_task_acquire(task);
/* Can't release a bpf_task_acquire()'d task without a NULL check. */
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("R1 must be referenced or trusted")
int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
SEC("kretprobe/free_task")
__failure __msg("calling kernel function bpf_task_acquire is not allowed")
int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
/* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */
acquired = bpf_task_acquire(task->group_leader);
bpf_task_release(acquired);
bpf_rcu_read_lock();
if (!task) {
bpf_rcu_read_unlock();
return 0;
}
/* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
acquired = bpf_task_acquire(task);
if (acquired)
bpf_task_release(acquired);
bpf_rcu_read_unlock();
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
@ -114,57 +128,6 @@ int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("arg#0 expected pointer to map value")
int BPF_PROG(task_kfunc_get_non_kptr_param, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
/* Cannot use bpf_task_kptr_get() on a non-kptr, even on a valid task. */
kptr = bpf_task_kptr_get(&task);
if (!kptr)
return 0;
bpf_task_release(kptr);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("arg#0 expected pointer to map value")
int BPF_PROG(task_kfunc_get_non_kptr_acquired, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr, *acquired;
acquired = bpf_task_acquire(task);
/* Cannot use bpf_task_kptr_get() on a non-kptr, even if it was acquired. */
kptr = bpf_task_kptr_get(&acquired);
bpf_task_release(acquired);
if (!kptr)
return 0;
bpf_task_release(kptr);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("arg#0 expected pointer to map value")
int BPF_PROG(task_kfunc_get_null, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
/* Cannot use bpf_task_kptr_get() on a NULL pointer. */
kptr = bpf_task_kptr_get(NULL);
if (!kptr)
return 0;
bpf_task_release(kptr);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("Unreleased reference")
int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
@ -186,21 +149,14 @@ int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_fla
}
SEC("tp_btf/task_newtask")
__failure __msg("Unreleased reference")
int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flags)
__failure __msg("Possibly NULL pointer passed to trusted arg0")
int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
struct __tasks_kfunc_map_value *v;
struct task_struct *acquired;
v = insert_lookup_task(task);
if (!v)
return 0;
kptr = bpf_task_kptr_get(&v->task);
if (!kptr)
return 0;
/* Kptr acquired above is never released. */
acquired = bpf_task_acquire(task);
/* Can't invoke bpf_task_release() on an acquired task without a NULL check. */
bpf_task_release(acquired);
return 0;
}
@ -256,12 +212,13 @@ int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
return -ENOENT;
acquired = bpf_task_acquire(task);
if (!acquired)
return -EEXIST;
old = bpf_kptr_xchg(&v->task, acquired);
/* old cannot be passed to bpf_task_release() without a NULL check. */
bpf_task_release(old);
bpf_task_release(old);
return 0;
}
@ -298,6 +255,9 @@ int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
/* the argument of lsm task_free hook is untrusted. */
acquired = bpf_task_acquire(task);
if (!acquired)
return 0;
bpf_task_release(acquired);
return 0;
}
@ -337,3 +297,30 @@ int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool
bpf_strncmp(task->comm, 16, "foo");
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("R1 must be referenced or trusted")
int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags)
{
struct task_struct *local;
struct __tasks_kfunc_map_value *v;
if (tasks_kfunc_map_insert(task))
return 0;
v = tasks_kfunc_map_value_lookup(task);
if (!v)
return 0;
bpf_rcu_read_lock();
local = v->task;
if (!local) {
bpf_rcu_read_unlock();
return 0;
}
/* Can't release a kptr that's still stored in a map. */
bpf_task_release(local);
bpf_rcu_read_unlock();
return 0;
}

View File

@ -47,7 +47,10 @@ static int test_acquire_release(struct task_struct *task)
}
acquired = bpf_task_acquire(task);
bpf_task_release(acquired);
if (acquired)
bpf_task_release(acquired);
else
err = 6;
return 0;
}
@ -119,7 +122,7 @@ int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
}
SEC("tp_btf/task_newtask")
int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags)
{
struct task_struct *kptr;
struct __tasks_kfunc_map_value *v;
@ -140,18 +143,18 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
return 0;
}
kptr = bpf_task_kptr_get(&v->task);
if (kptr) {
/* Until we resolve the issues with using task->rcu_users, we
* expect bpf_task_kptr_get() to return a NULL task. See the
* comment at the definition of bpf_task_acquire_not_zero() for
* more details.
*/
bpf_task_release(kptr);
bpf_rcu_read_lock();
kptr = v->task;
if (!kptr) {
err = 3;
return 0;
} else {
kptr = bpf_task_acquire(kptr);
if (!kptr)
err = 4;
else
bpf_task_release(kptr);
}
bpf_rcu_read_unlock();
return 0;
}
@ -166,7 +169,10 @@ int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 cl
current = bpf_get_current_task_btf();
acquired = bpf_task_acquire(current);
bpf_task_release(acquired);
if (acquired)
bpf_task_release(acquired);
else
err = 1;
return 0;
}
@ -241,3 +247,19 @@ int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_fla
return 0;
}
SEC("tp_btf/task_newtask")
int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
{
struct task_struct *acquired;
/* task->group_leader is listed as a trusted, non-NULL field of task struct. */
acquired = bpf_task_acquire(task->group_leader);
if (acquired)
bpf_task_release(acquired);
else
err = 1;
return 0;
}