nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt()

dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having called sched_update_tick_dependency() preventing it from re-enabling the tick on systems that no longer have pending SCHED_RT tasks but have multiple runnable SCHED_OTHER tasks: dequeue_task_rt() dequeue_rt_entity() dequeue_rt_stack() dequeue_top_rt_rq() sub_nr_running() // decrements rq->nr_running sched_update_tick_dependency() sched_can_stop_tick() // checks rq->rt.rt_nr_running, ... __dequeue_rt_entity() dec_rt_tasks() // decrements rq->rt.rt_nr_running ... Every other scheduler class performs the operation in the opposite order, and sched_update_tick_dependency() expects the values to be updated as such. So avoid the misbehaviour by inverting the order in which the above operations are performed in the RT scheduler. Fixes: 76d92ac305 ("sched: Migrate sched to use new tick dependency mask model") Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <vschneid@redhat.com> Reviewed-by: Phil Auld <pauld@redhat.com> Link: https://lore.kernel.org/r/20220628092259.330171-1-nsaenzju@redhat.com
2024-12-28 13:51:44 +00:00 · 2022-06-28 11:22:59 +02:00 · 2022-06-28 11:22:59 +02:00 · 5c66d1b9b3
commit 5c66d1b9b3
parent 401e4963bf
1 changed files with 9 additions and 6 deletions
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
 #endif /* CONFIG_SMP */

 static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);

 static inline int on_rt_rq(struct sched_rt_entity *rt_se)
 {
@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 	rt_se = rt_rq->tg->rt_se[cpu];

 	if (!rt_se) {
-		dequeue_top_rt_rq(rt_rq);
+		dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 		/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 		cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
 	}
@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)

 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 {
-	dequeue_top_rt_rq(rt_rq);
+	dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
 }

 static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq)
 }

 static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
 {
 	struct rq *rq = rq_of_rt_rq(rt_rq);

@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

 	BUG_ON(!rq->nr_running);

-	sub_nr_running(rq, rt_rq->rt_nr_running);
+	sub_nr_running(rq, count);
 	rt_rq->rt_queued = 0;

 }
@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 {
 	struct sched_rt_entity *back = NULL;
+	unsigned int rt_nr_running;

 	for_each_sched_rt_entity(rt_se) {
 		rt_se->back = back;
 		back = rt_se;
 	}

-	dequeue_top_rt_rq(rt_rq_of_se(back));
+	rt_nr_running = rt_rq_of_se(back)->rt_nr_running;

 	for (rt_se = back; rt_se; rt_se = rt_se->back) {
 		if (on_rt_rq(rt_se))
 			__dequeue_rt_entity(rt_se, flags);
 	}
+
+	dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
 }

 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)