seccomp updates for v6.11-rc1

- interrupt SECCOMP_IOCTL_NOTIF_RECV when all users exit (Andrei Vagin)
 
 - Update selftests to check for expected NOTIF_RECV exits (Andrei Vagin)
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEpcP2jyKd1g9yPm4TiXL039xtwCYFAmaVTOkACgkQiXL039xt
 wCayUA//fJTZUq+idihdKMql9SEwYh0uIJQpGOxxBEesUksZUM4alTCswzmheLFL
 q9BxlWWJJfUsp3djeZK+0vnDv3izaR+LfA1JPEJf64ImbympbEenVXK0ZkmVrTqg
 bNAlD5c/LVpzYtB7cnOaglq18uUja6/E+EQvNYz5NLHrIhYYCieJZIiFATkHQ9Lj
 3Wq3g9FWEa5pZxpKbEI3UA2HllADnmHeb/Z78Zdvyue5lOOvsBIheQfL4m0pW38x
 xgBWNglIg7b+X+YgwYSv8w50Lhn4SJVtIynWnwzBz19qFJRQL7oJRj1zyFHZPCwZ
 ajHVIj5LOuts/BYxSiGzczxVqZaAqeOyCY5e8G+Mjk5ZD5kLYznbbcrIFIUIaHpx
 rpRD/TVVwJ3PHsOIpWHwrXKgoKnbe/0n8lJT+Ehnm/2lLrlGyZj9hLyl6/+JsizE
 dGIWgE2emykYI+52IRRYSZaw4hLb+CU52d1vd5a35wUk1ie5fcVGZAWnaul23x0I
 maQtXcyB6tYuhX3oPnxoxFVqGvCKGi3Tc5N+Vg4JR/RzTy2H7fZ02DRZq8Vs0QST
 EgO3cpCD3035qxkK6ivaV4ebPJjkL158D/+uFyne+PWQlSfrmXJvhfggPFA+Oqtb
 y8PTUV73+HxDiruXbGZ7MD8C7d+ZvGI/D+xheohYrijhivcXxcc=
 =tlFN
 -----END PGP SIGNATURE-----

Merge tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux

Pull seccomp updates from Kees Cook:

 - interrupt SECCOMP_IOCTL_NOTIF_RECV when all users exit (Andrei Vagin)

 - Update selftests to check for expected NOTIF_RECV exits (Andrei
   Vagin)

* tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  selftests/seccomp: check that a zombie leader doesn't affect others
  selftests/seccomp: add test for NOTIF_RECV and unused filters
  seccomp: release task filters when the task exits
  seccomp: interrupt SECCOMP_IOCTL_NOTIF_RECV when all users have exited
This commit is contained in:
Linus Torvalds 2024-07-16 13:12:16 -07:00
commit 1ca995edf8
3 changed files with 157 additions and 7 deletions

View File

@ -277,7 +277,6 @@ repeat:
}
write_unlock_irq(&tasklist_lock);
seccomp_filter_release(p);
proc_flush_pid(thread_pid);
put_pid(thread_pid);
release_thread(p);
@ -834,6 +833,8 @@ void __noreturn do_exit(long code)
io_uring_files_cancel();
exit_signals(tsk); /* sets PF_EXITING */
seccomp_filter_release(tsk);
acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {

View File

@ -502,6 +502,9 @@ static inline pid_t seccomp_can_sync_threads(void)
/* Skip current, since it is initiating the sync. */
if (thread == caller)
continue;
/* Skip exited threads. */
if (thread->flags & PF_EXITING)
continue;
if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||
(thread->seccomp.mode == SECCOMP_MODE_FILTER &&
@ -563,18 +566,21 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
* @tsk: task the filter should be released from.
*
* This function should only be called when the task is exiting as
* it detaches it from its filter tree. As such, READ_ONCE() and
* barriers are not needed here, as would normally be needed.
* it detaches it from its filter tree. PF_EXITING has to be set
* for the task.
*/
void seccomp_filter_release(struct task_struct *tsk)
{
struct seccomp_filter *orig = tsk->seccomp.filter;
struct seccomp_filter *orig;
/* We are effectively holding the siglock by not having any sighand. */
WARN_ON(tsk->sighand != NULL);
if (WARN_ON((tsk->flags & PF_EXITING) == 0))
return;
spin_lock_irq(&tsk->sighand->siglock);
orig = tsk->seccomp.filter;
/* Detach task from its filter tree. */
tsk->seccomp.filter = NULL;
spin_unlock_irq(&tsk->sighand->siglock);
__seccomp_filter_release(orig);
}
@ -602,6 +608,13 @@ static inline void seccomp_sync_threads(unsigned long flags)
if (thread == caller)
continue;
/*
* Skip exited threads. seccomp_filter_release could have
* been already called for this task.
*/
if (thread->flags & PF_EXITING)
continue;
/* Get a task reference for the new leaf node. */
get_seccomp_filter(caller);
@ -1466,7 +1479,7 @@ static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int s
void *key)
{
/* Avoid a wakeup if event not interesting for us. */
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR | EPOLLHUP)))
return 0;
return autoremove_wake_function(wait, mode, sync, key);
}
@ -1476,6 +1489,9 @@ static int recv_wait_event(struct seccomp_filter *filter)
DEFINE_WAIT_FUNC(wait, recv_wake_function);
int ret;
if (refcount_read(&filter->users) == 0)
return 0;
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
return 0;
@ -1484,6 +1500,8 @@ static int recv_wait_event(struct seccomp_filter *filter)
if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
break;
if (refcount_read(&filter->users) == 0)
break;
if (ret)
return ret;

View File

@ -3954,6 +3954,60 @@ TEST(user_notification_filter_empty)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
TEST(user_ioctl_notification_filter_empty)
{
pid_t pid;
long ret;
int status, p[2];
struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
struct seccomp_notif req = {};
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
if (__NR_clone3 < 0)
SKIP(return, "Test not built with clone3 support");
ASSERT_EQ(0, pipe(p));
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
int listener;
listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);
if (dup2(listener, 200) != 200)
_exit(EXIT_FAILURE);
close(p[1]);
close(listener);
sleep(1);
_exit(EXIT_SUCCESS);
}
if (read(p[0], &status, 1) != 0)
_exit(EXIT_SUCCESS);
close(p[0]);
/*
* The seccomp filter has become unused so we should be notified once
* the kernel gets around to cleaning up task struct.
*/
EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
EXPECT_EQ(errno, ENOENT);
EXPECT_EQ(waitpid(pid, &status, 0), pid);
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status));
}
static void *do_thread(void *data)
{
return NULL;
@ -4755,6 +4809,83 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
struct tsync_vs_thread_leader_args {
pthread_t leader;
};
static void *tsync_vs_dead_thread_leader_sibling(void *_args)
{
struct sock_filter allow_filter[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog allow_prog = {
.len = (unsigned short)ARRAY_SIZE(allow_filter),
.filter = allow_filter,
};
struct tsync_vs_thread_leader_args *args = _args;
void *retval;
long ret;
ret = pthread_join(args->leader, &retval);
if (ret)
exit(1);
if (retval != _args)
exit(2);
ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
if (ret)
exit(3);
exit(0);
}
/*
* Ensure that a dead thread leader doesn't prevent installing new filters with
* SECCOMP_FILTER_FLAG_TSYNC from other threads.
*/
TEST(tsync_vs_dead_thread_leader)
{
int status;
pid_t pid;
long ret;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
struct sock_filter allow_filter[] = {
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog allow_prog = {
.len = (unsigned short)ARRAY_SIZE(allow_filter),
.filter = allow_filter,
};
struct tsync_vs_thread_leader_args *args;
pthread_t sibling;
args = malloc(sizeof(*args));
ASSERT_NE(NULL, args);
args->leader = pthread_self();
ret = pthread_create(&sibling, NULL,
tsync_vs_dead_thread_leader_sibling, args);
ASSERT_EQ(0, ret);
/* Install a new filter just to the leader thread. */
ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
ASSERT_EQ(0, ret);
pthread_exit(args);
exit(1);
}
EXPECT_EQ(pid, waitpid(pid, &status, 0));
EXPECT_EQ(0, status);
}
/*
* TODO:
* - expand NNP testing