sched: fix inconsistency when redistribute per-cpu tg->cfs_rq shares
Impact: make load-balancing more consistent In the update_shares() path leading to tg_shares_up(), the calculation of per-cpu cfs_rq shares is rather erratic even under moderate task wake up rate. The problem is that the per-cpu tg->cfs_rq load weight used in the sd_rq_weight aggregation and actual redistribution of the cfs_rq->shares are collected at different time. Under moderate system load, we've seen quite a bit of variation on the cfs_rq->shares and ultimately wildly affects sched_entity's load weight. This patch caches the result of initial per-cpu load weight when doing the sum calculation, and then pass it down to update_group_shares_cpu() for redistributing per-cpu cfs_rq shares. This allows consistent total cfs_rq shares across all CPUs. It also simplifies the rounding and zero load weight check. Signed-off-by: Ken Chen <kenchen@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
3ac3ba0b39
commit
ec4e0e2fe0
@ -1453,27 +1453,13 @@ static void
|
||||
update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
unsigned long sd_shares, unsigned long sd_rq_weight)
|
||||
{
|
||||
int boost = 0;
|
||||
unsigned long shares;
|
||||
unsigned long rq_weight;
|
||||
|
||||
if (!tg->se[cpu])
|
||||
return;
|
||||
|
||||
rq_weight = tg->cfs_rq[cpu]->load.weight;
|
||||
|
||||
/*
|
||||
* If there are currently no tasks on the cpu pretend there is one of
|
||||
* average load so that when a new task gets to run here it will not
|
||||
* get delayed by group starvation.
|
||||
*/
|
||||
if (!rq_weight) {
|
||||
boost = 1;
|
||||
rq_weight = NICE_0_LOAD;
|
||||
}
|
||||
|
||||
if (unlikely(rq_weight > sd_rq_weight))
|
||||
rq_weight = sd_rq_weight;
|
||||
rq_weight = tg->cfs_rq[cpu]->rq_weight;
|
||||
|
||||
/*
|
||||
* \Sum shares * rq_weight
|
||||
@ -1481,7 +1467,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
* \Sum rq_weight
|
||||
*
|
||||
*/
|
||||
shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
|
||||
shares = (sd_shares * rq_weight) / sd_rq_weight;
|
||||
shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
|
||||
|
||||
if (abs(shares - tg->se[cpu]->load.weight) >
|
||||
@ -1490,11 +1476,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rq->lock, flags);
|
||||
/*
|
||||
* record the actual number of shares, not the boosted amount.
|
||||
*/
|
||||
tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
|
||||
tg->cfs_rq[cpu]->rq_weight = rq_weight;
|
||||
tg->cfs_rq[cpu]->shares = shares;
|
||||
|
||||
__set_se_shares(tg->se[cpu], shares);
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
@ -1508,13 +1490,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
*/
|
||||
static int tg_shares_up(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long rq_weight = 0;
|
||||
unsigned long weight, rq_weight = 0;
|
||||
unsigned long shares = 0;
|
||||
struct sched_domain *sd = data;
|
||||
int i;
|
||||
|
||||
for_each_cpu_mask(i, sd->span) {
|
||||
rq_weight += tg->cfs_rq[i]->load.weight;
|
||||
/*
|
||||
* If there are currently no tasks on the cpu pretend there
|
||||
* is one of average load so that when a new task gets to
|
||||
* run here it will not get delayed by group starvation.
|
||||
*/
|
||||
weight = tg->cfs_rq[i]->load.weight;
|
||||
if (!weight)
|
||||
weight = NICE_0_LOAD;
|
||||
|
||||
tg->cfs_rq[i]->rq_weight = weight;
|
||||
rq_weight += weight;
|
||||
shares += tg->cfs_rq[i]->shares;
|
||||
}
|
||||
|
||||
@ -1524,9 +1516,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
|
||||
if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
|
||||
shares = tg->shares;
|
||||
|
||||
if (!rq_weight)
|
||||
rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
|
||||
|
||||
for_each_cpu_mask(i, sd->span)
|
||||
update_group_shares_cpu(tg, i, shares, rq_weight);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user