rcu: Cure load woes

Commit cc3ce5176d (rcu: Start RCU kthreads in TASK_INTERRUPTIBLE
state) fudges a sleeping task' state, resulting in the scheduler seeing
a TASK_UNINTERRUPTIBLE task going to sleep, but a TASK_INTERRUPTIBLE
task waking up. The result is unbalanced load calculation.

The problem that patch tried to address is that the RCU threads could
stay in UNINTERRUPTIBLE state for quite a while and triggering the hung
task detector due to on-demand wake-ups.

Cure the problem differently by always giving the tasks at least one
wake-up once the CPU is fully up and running, this will kick them out of
the initial UNINTERRUPTIBLE state and into the regular INTERRUPTIBLE
wait state.

[ The alternative would be teaching kthread_create() to start threads as
  INTERRUPTIBLE but that needs a tad more thought. ]

Reported-by: Damien Wyart <damien.wyart@free.fr>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul E. McKenney <paul.mckenney@linaro.org>
Link: http://lkml.kernel.org/r/1306755291.1200.2872.camel@twins
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2011-05-30 13:34:51 +02:00 committed by Ingo Molnar
parent 55922c9d1b
commit d72bce0e67
2 changed files with 56 additions and 9 deletions

View File

@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
if (IS_ERR(t)) if (IS_ERR(t))
return PTR_ERR(t); return PTR_ERR(t);
kthread_bind(t, cpu); kthread_bind(t, cpu);
set_task_state(t, TASK_INTERRUPTIBLE);
per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu;
WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL);
per_cpu(rcu_cpu_kthread_task, cpu) = t; per_cpu(rcu_cpu_kthread_task, cpu) = t;
@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
if (IS_ERR(t)) if (IS_ERR(t))
return PTR_ERR(t); return PTR_ERR(t);
raw_spin_lock_irqsave(&rnp->lock, flags); raw_spin_lock_irqsave(&rnp->lock, flags);
set_task_state(t, TASK_INTERRUPTIBLE);
rnp->node_kthread_task = t; rnp->node_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags); raw_spin_unlock_irqrestore(&rnp->lock, flags);
sp.sched_priority = 99; sp.sched_priority = 99;
@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index);
} }
static void rcu_wake_one_boost_kthread(struct rcu_node *rnp);
/* /*
* Spawn all kthreads -- called as soon as the scheduler is running. * Spawn all kthreads -- called as soon as the scheduler is running.
*/ */
@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void)
{ {
int cpu; int cpu;
struct rcu_node *rnp; struct rcu_node *rnp;
struct task_struct *t;
rcu_kthreads_spawnable = 1; rcu_kthreads_spawnable = 1;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
per_cpu(rcu_cpu_has_work, cpu) = 0; per_cpu(rcu_cpu_has_work, cpu) = 0;
if (cpu_online(cpu)) if (cpu_online(cpu)) {
(void)rcu_spawn_one_cpu_kthread(cpu); (void)rcu_spawn_one_cpu_kthread(cpu);
t = per_cpu(rcu_cpu_kthread_task, cpu);
if (t)
wake_up_process(t);
}
} }
rnp = rcu_get_root(rcu_state); rnp = rcu_get_root(rcu_state);
(void)rcu_spawn_one_node_kthread(rcu_state, rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
if (rnp->node_kthread_task)
wake_up_process(rnp->node_kthread_task);
if (NUM_RCU_NODES > 1) { if (NUM_RCU_NODES > 1) {
rcu_for_each_leaf_node(rcu_state, rnp) rcu_for_each_leaf_node(rcu_state, rnp) {
(void)rcu_spawn_one_node_kthread(rcu_state, rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp);
t = rnp->node_kthread_task;
if (t)
wake_up_process(t);
rcu_wake_one_boost_kthread(rnp);
}
} }
return 0; return 0;
} }
@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
raw_spin_unlock_irqrestore(&rsp->onofflock, flags); raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
} }
static void __cpuinit rcu_online_cpu(int cpu) static void __cpuinit rcu_prepare_cpu(int cpu)
{ {
rcu_init_percpu_data(cpu, &rcu_sched_state, 0); rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
rcu_init_percpu_data(cpu, &rcu_bh_state, 0); rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
rcu_preempt_init_percpu_data(cpu); rcu_preempt_init_percpu_data(cpu);
} }
static void __cpuinit rcu_online_kthreads(int cpu) static void __cpuinit rcu_prepare_kthreads(int cpu)
{ {
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode; struct rcu_node *rnp = rdp->mynode;
@ -2208,6 +2220,31 @@ static void __cpuinit rcu_online_kthreads(int cpu)
} }
} }
/*
* kthread_create() creates threads in TASK_UNINTERRUPTIBLE state,
* but the RCU threads are woken on demand, and if demand is low this
* could be a while triggering the hung task watchdog.
*
* In order to avoid this, poke all tasks once the CPU is fully
* up and running.
*/
static void __cpuinit rcu_online_kthreads(int cpu)
{
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
struct rcu_node *rnp = rdp->mynode;
struct task_struct *t;
t = per_cpu(rcu_cpu_kthread_task, cpu);
if (t)
wake_up_process(t);
t = rnp->node_kthread_task;
if (t)
wake_up_process(t);
rcu_wake_one_boost_kthread(rnp);
}
/* /*
* Handle CPU online/offline notification events. * Handle CPU online/offline notification events.
*/ */
@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
switch (action) { switch (action) {
case CPU_UP_PREPARE: case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN: case CPU_UP_PREPARE_FROZEN:
rcu_online_cpu(cpu); rcu_prepare_cpu(cpu);
rcu_online_kthreads(cpu); rcu_prepare_kthreads(cpu);
break; break;
case CPU_ONLINE: case CPU_ONLINE:
rcu_online_kthreads(cpu);
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
rcu_node_kthread_setaffinity(rnp, -1); rcu_node_kthread_setaffinity(rnp, -1);
rcu_cpu_kthread_setrt(cpu, 1); rcu_cpu_kthread_setrt(cpu, 1);

View File

@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
if (IS_ERR(t)) if (IS_ERR(t))
return PTR_ERR(t); return PTR_ERR(t);
raw_spin_lock_irqsave(&rnp->lock, flags); raw_spin_lock_irqsave(&rnp->lock, flags);
set_task_state(t, TASK_INTERRUPTIBLE);
rnp->boost_kthread_task = t; rnp->boost_kthread_task = t;
raw_spin_unlock_irqrestore(&rnp->lock, flags); raw_spin_unlock_irqrestore(&rnp->lock, flags);
sp.sched_priority = RCU_KTHREAD_PRIO; sp.sched_priority = RCU_KTHREAD_PRIO;
@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return 0; return 0;
} }
static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
{
if (rnp->boost_kthread_task)
wake_up_process(rnp->boost_kthread_task);
}
#else /* #ifdef CONFIG_RCU_BOOST */ #else /* #ifdef CONFIG_RCU_BOOST */
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
return 0; return 0;
} }
static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp)
{
}
#endif /* #else #ifdef CONFIG_RCU_BOOST */ #endif /* #else #ifdef CONFIG_RCU_BOOST */
#ifndef CONFIG_SMP #ifndef CONFIG_SMP