- Fix task exposure order when forking tasks

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmISLs0ACgkQEsHwGGHe
 VUrq+g/+NxLVl3QTmteNh4jEzA/3FDccRKbALNs09YX116nCOZ1sv1Z5FgtdOLUT
 ukIujAm1SKDXQrXoglmm3eH6ylWZ54DQo6qrP+vzavtIKSgbt+vPS63qDR4ehF8N
 Kh04qssWy+AzOSeZRQ6lXTiGei2nBKFbiwlwPnV5vbQmLBFvzv2dEErVFYkHwWN6
 foygRUOhslv5aizwpDriAvDq9ZTVUPXqzIi5zWnTON8y8Vy32eZjSJKez8SQH57w
 vbEZkJTw33tCzG/5+J5mh3UmP9Mcj34W/GDCKHxjO1Y38SbLTMy2Agl5hbRKjVZ8
 W4wElyXyXtLx1RsrZpOJ90mhqwADcxLgkq4ipl6uzikeQlQVnOOBsHZOi3NkTorg
 1YhpxZhqzWWotdGUQwzfZYSuoqQAyUxeqZKwqUD+dYsEqQWhTWSa41IWcfz+r8UD
 uD+pO03EbAEzBB+BSJL9xh0xnchdOnHbpTSFo1AJeQ+LUKanXxO2tXk9/3SwliMj
 M751vPtQH6DiVukSfywxJym+aaLbjeju6RvA69Atap51roYXPSsaCHGDwLeNUS+w
 5D6KC0IuCxsliLpAD4c4PXkRdTvwnG/0lUt+s83bGnyfh5nQzfXXbBp3I5+o0Fcq
 jIQ37uH2Mj5MKtmC8o4ekCtNTtWM8QNZvKzMzoju616Wqee0Ag4=
 =RagE
 -----END PGP SIGNATURE-----

Merge tag 'sched_urgent_for_v5.17_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fix from Borislav Petkov:
 "Fix task exposure order when forking tasks"

* tag 'sched_urgent_for_v5.17_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Fix yet more sched_fork() races
This commit is contained in:
Linus Torvalds 2022-02-20 12:40:20 -08:00
commit 0b0894ff78
3 changed files with 35 additions and 16 deletions

View File

@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_post_fork(struct task_struct *p,
struct kernel_clone_args *kargs);
extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
extern void sched_post_fork(struct task_struct *p);
extern void sched_dead(struct task_struct *p);
void __noreturn do_task_dead(void);

View File

@ -2266,6 +2266,17 @@ static __latent_entropy struct task_struct *copy_process(
if (retval)
goto bad_fork_put_pidfd;
/*
* Now that the cgroups are pinned, re-clone the parent cgroup and put
* the new task on the correct runqueue. All this *before* the task
* becomes visible.
*
* This isn't part of ->can_fork() because while the re-cloning is
* cgroup specific, it unconditionally needs to place the task on a
* runqueue.
*/
sched_cgroup_fork(p, args);
/*
* From this point on we must avoid any synchronous user-space
* communication until we take the tasklist-lock. In particular, we do
@ -2375,7 +2386,7 @@ static __latent_entropy struct task_struct *copy_process(
fd_install(pidfd, pidfile);
proc_fork_connector(p);
sched_post_fork(p, args);
sched_post_fork(p);
cgroup_post_fork(p, args);
perf_event_fork(p);

View File

@ -1214,9 +1214,8 @@ int tg_nop(struct task_group *tg, void *data)
}
#endif
static void set_load_weight(struct task_struct *p)
static void set_load_weight(struct task_struct *p, bool update_load)
{
bool update_load = !(READ_ONCE(p->__state) & TASK_NEW);
int prio = p->static_prio - MAX_RT_PRIO;
struct load_weight *load = &p->se.load;
@ -4407,7 +4406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->static_prio = NICE_TO_PRIO(0);
p->prio = p->normal_prio = p->static_prio;
set_load_weight(p);
set_load_weight(p, false);
/*
* We don't need the reset flag anymore after the fork. It has
@ -4425,6 +4424,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
init_entity_runnable_average(&p->se);
#ifdef CONFIG_SCHED_INFO
if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
@ -4440,18 +4440,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
return 0;
}
void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
{
unsigned long flags;
#ifdef CONFIG_CGROUP_SCHED
struct task_group *tg;
#endif
/*
* Because we're not yet on the pid-hash, p->pi_lock isn't strictly
* required yet, but lockdep gets upset if rules are violated.
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
#ifdef CONFIG_CGROUP_SCHED
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
struct task_group, css);
p->sched_task_group = autogroup_task_group(p, tg);
if (1) {
struct task_group *tg;
tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
struct task_group, css);
tg = autogroup_task_group(p, tg);
p->sched_task_group = tg;
}
#endif
rseq_migrate(p);
/*
@ -4462,7 +4467,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}
void sched_post_fork(struct task_struct *p)
{
uclamp_post_fork(p);
}
@ -6922,7 +6930,7 @@ void set_user_nice(struct task_struct *p, long nice)
put_prev_task(rq, p);
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p);
set_load_weight(p, true);
old_prio = p->prio;
p->prio = effective_prio(p);
@ -7213,7 +7221,7 @@ static void __setscheduler_params(struct task_struct *p,
*/
p->rt_priority = attr->sched_priority;
p->normal_prio = normal_prio(p);
set_load_weight(p);
set_load_weight(p, true);
}
/*
@ -9446,7 +9454,7 @@ void __init sched_init(void)
#endif
}
set_load_weight(&init_task);
set_load_weight(&init_task, false);
/*
* The boot idle thread does lazy MMU switching as well: