sched/fair: Prepare exit/cleanup paths for delayed_dequeue

When dequeue_task() is delayed it becomes possible to exit a task (or
cgroup) that is still enqueued. Ensure things are dequeued before
freeing.

Thanks to Valentin for asking the obvious questions and making
switched_from_fair() less weird.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Link: https://lkml.kernel.org/r/20240727105029.631948434@infradead.org
This commit is contained in:
Peter Zijlstra 2024-05-23 11:03:42 +02:00
parent e28b5f8bda
commit 2e0199df25

View File

@ -8342,7 +8342,21 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
static void task_dead_fair(struct task_struct *p)
{
remove_entity_load_avg(&p->se);
struct sched_entity *se = &p->se;
if (se->sched_delayed) {
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(p, &rf);
if (se->sched_delayed) {
update_rq_clock(rq);
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
}
task_rq_unlock(rq, p, &rf);
}
remove_entity_load_avg(se);
}
/*
@ -12854,10 +12868,22 @@ static void attach_task_cfs_rq(struct task_struct *p)
static void switched_from_fair(struct rq *rq, struct task_struct *p)
{
detach_task_cfs_rq(p);
/*
* Since this is called after changing class, this is a little weird
* and we cannot use DEQUEUE_DELAYED.
*/
if (p->se.sched_delayed) {
dequeue_task(rq, p, DEQUEUE_NOCLOCK | DEQUEUE_SLEEP);
p->se.sched_delayed = 0;
if (sched_feat(DELAY_ZERO) && p->se.vlag > 0)
p->se.vlag = 0;
}
}
static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
SCHED_WARN_ON(p->se.sched_delayed);
attach_task_cfs_rq(p);
set_task_max_allowed_capacity(p);
@ -13008,28 +13034,35 @@ void online_fair_sched_group(struct task_group *tg)
void unregister_fair_sched_group(struct task_group *tg)
{
unsigned long flags;
struct rq *rq;
int cpu;
destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
for_each_possible_cpu(cpu) {
if (tg->se[cpu])
remove_entity_load_avg(tg->se[cpu]);
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu];
struct sched_entity *se = tg->se[cpu];
struct rq *rq = cpu_rq(cpu);
if (se) {
if (se->sched_delayed) {
guard(rq_lock_irqsave)(rq);
if (se->sched_delayed) {
update_rq_clock(rq);
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
}
list_del_leaf_cfs_rq(cfs_rq);
}
remove_entity_load_avg(se);
}
/*
* Only empty task groups can be destroyed; so we can speculatively
* check on_list without danger of it being re-added.
*/
if (!tg->cfs_rq[cpu]->on_list)
continue;
rq = cpu_rq(cpu);
raw_spin_rq_lock_irqsave(rq, flags);
list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
raw_spin_rq_unlock_irqrestore(rq, flags);
if (cfs_rq->on_list) {
guard(rq_lock_irqsave)(rq);
list_del_leaf_cfs_rq(cfs_rq);
}
}
}