sched: reduce balance-tasks overhead
At the moment, balance_tasks() provides low level functionality for both move_tasks() and move_one_task() (indirectly) via the load_balance() function (in the sched_class interface) which also provides dual functionality. This dual functionality complicates the interfaces and internal mechanisms and makes the run time overhead of operations that are called with two run queue locks held. This patch addresses this issue and reduces the overhead of these operations. Signed-off-by: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
a0f846aa76
commit
e1d1484f72
@ -829,11 +829,14 @@ struct sched_class {
|
||||
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
|
||||
|
||||
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
|
||||
struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct rq *busiest, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio);
|
||||
|
||||
int (*move_one_task) (struct rq *this_rq, int this_cpu,
|
||||
struct rq *busiest, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle);
|
||||
|
||||
void (*set_curr_task) (struct rq *rq);
|
||||
void (*task_tick) (struct rq *rq, struct task_struct *p);
|
||||
void (*task_new) (struct rq *rq, struct task_struct *p);
|
||||
|
@ -838,11 +838,35 @@ struct rq_iterator {
|
||||
struct task_struct *(*next)(void *);
|
||||
};
|
||||
|
||||
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, unsigned long *load_moved,
|
||||
int *this_best_prio, struct rq_iterator *iterator);
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned long
|
||||
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle, int *all_pinned,
|
||||
int *this_best_prio, struct rq_iterator *iterator);
|
||||
|
||||
static int
|
||||
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
struct rq_iterator *iterator);
|
||||
#else
|
||||
static inline unsigned long
|
||||
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle, int *all_pinned,
|
||||
int *this_best_prio, struct rq_iterator *iterator)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
struct rq_iterator *iterator)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#include "sched_stats.h"
|
||||
#include "sched_idletask.c"
|
||||
@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, unsigned long *load_moved,
|
||||
int *this_best_prio, struct rq_iterator *iterator)
|
||||
static unsigned long
|
||||
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_load_move, struct sched_domain *sd,
|
||||
enum cpu_idle_type idle, int *all_pinned,
|
||||
int *this_best_prio, struct rq_iterator *iterator)
|
||||
{
|
||||
int pulled = 0, pinned = 0, skip_for_load;
|
||||
struct task_struct *p;
|
||||
long rem_load_move = max_load_move;
|
||||
|
||||
if (max_nr_move == 0 || max_load_move == 0)
|
||||
if (max_load_move == 0)
|
||||
goto out;
|
||||
|
||||
pinned = 1;
|
||||
@ -2267,7 +2291,7 @@ next:
|
||||
* We only want to steal up to the prescribed number of tasks
|
||||
* and the prescribed amount of weighted load.
|
||||
*/
|
||||
if (pulled < max_nr_move && rem_load_move > 0) {
|
||||
if (rem_load_move > 0) {
|
||||
if (p->prio < *this_best_prio)
|
||||
*this_best_prio = p->prio;
|
||||
p = iterator->next(iterator->arg);
|
||||
@ -2275,7 +2299,7 @@ next:
|
||||
}
|
||||
out:
|
||||
/*
|
||||
* Right now, this is the only place pull_task() is called,
|
||||
* Right now, this is one of only two places pull_task() is called,
|
||||
* so we can safely collect pull_task() stats here rather than
|
||||
* inside pull_task().
|
||||
*/
|
||||
@ -2283,8 +2307,8 @@ out:
|
||||
|
||||
if (all_pinned)
|
||||
*all_pinned = pinned;
|
||||
*load_moved = max_load_move - rem_load_move;
|
||||
return pulled;
|
||||
|
||||
return max_load_move - rem_load_move;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
do {
|
||||
total_load_moved +=
|
||||
class->load_balance(this_rq, this_cpu, busiest,
|
||||
ULONG_MAX, max_load_move - total_load_moved,
|
||||
max_load_move - total_load_moved,
|
||||
sd, idle, all_pinned, &this_best_prio);
|
||||
class = class->next;
|
||||
} while (class && max_load_move > total_load_moved);
|
||||
@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
return total_load_moved > 0;
|
||||
}
|
||||
|
||||
static int
|
||||
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
struct rq_iterator *iterator)
|
||||
{
|
||||
struct task_struct *p = iterator->start(iterator->arg);
|
||||
int pinned = 0;
|
||||
|
||||
while (p) {
|
||||
if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
|
||||
pull_task(busiest, p, this_rq, this_cpu);
|
||||
/*
|
||||
* Right now, this is only the second place pull_task()
|
||||
* is called, so we can safely collect pull_task()
|
||||
* stats here rather than inside pull_task().
|
||||
*/
|
||||
schedstat_inc(sd, lb_gained[idle]);
|
||||
|
||||
return 1;
|
||||
}
|
||||
p = iterator->next(iterator->arg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* move_one_task tries to move exactly one task from busiest to this_rq, as
|
||||
* part of active balancing operations within "domain".
|
||||
@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle)
|
||||
{
|
||||
const struct sched_class *class;
|
||||
int this_best_prio = MAX_PRIO;
|
||||
|
||||
for (class = sched_class_highest; class; class = class->next)
|
||||
if (class->load_balance(this_rq, this_cpu, busiest,
|
||||
1, ULONG_MAX, sd, idle, NULL,
|
||||
&this_best_prio))
|
||||
if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
/* Avoid "used but not defined" warning on UP */
|
||||
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, unsigned long *load_moved,
|
||||
int *this_best_prio, struct rq_iterator *iterator)
|
||||
{
|
||||
*load_moved = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
DEFINE_PER_CPU(struct kernel_stat, kstat);
|
||||
|
@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
|
||||
|
||||
static unsigned long
|
||||
load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
struct cfs_rq *busy_cfs_rq;
|
||||
unsigned long load_moved, total_nr_moved = 0, nr_moved;
|
||||
long rem_load_move = max_load_move;
|
||||
struct rq_iterator cfs_rq_iterator;
|
||||
|
||||
@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
#else
|
||||
# define maxload rem_load_move
|
||||
#endif
|
||||
/* pass busy_cfs_rq argument into
|
||||
/*
|
||||
* pass busy_cfs_rq argument into
|
||||
* load_balance_[start|next]_fair iterators
|
||||
*/
|
||||
cfs_rq_iterator.arg = busy_cfs_rq;
|
||||
nr_moved = balance_tasks(this_rq, this_cpu, busiest,
|
||||
max_nr_move, maxload, sd, idle, all_pinned,
|
||||
&load_moved, this_best_prio, &cfs_rq_iterator);
|
||||
rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
|
||||
maxload, sd, idle, all_pinned,
|
||||
this_best_prio,
|
||||
&cfs_rq_iterator);
|
||||
|
||||
total_nr_moved += nr_moved;
|
||||
max_nr_move -= nr_moved;
|
||||
rem_load_move -= load_moved;
|
||||
|
||||
if (max_nr_move <= 0 || rem_load_move <= 0)
|
||||
if (rem_load_move <= 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return max_load_move - rem_load_move;
|
||||
}
|
||||
|
||||
static int
|
||||
move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle)
|
||||
{
|
||||
struct cfs_rq *busy_cfs_rq;
|
||||
struct rq_iterator cfs_rq_iterator;
|
||||
|
||||
cfs_rq_iterator.start = load_balance_start_fair;
|
||||
cfs_rq_iterator.next = load_balance_next_fair;
|
||||
|
||||
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
|
||||
/*
|
||||
* pass busy_cfs_rq argument into
|
||||
* load_balance_[start|next]_fair iterators
|
||||
*/
|
||||
cfs_rq_iterator.arg = busy_cfs_rq;
|
||||
if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
|
||||
&cfs_rq_iterator))
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* scheduler tick hitting a task of our scheduling class:
|
||||
*/
|
||||
@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = {
|
||||
.put_prev_task = put_prev_task_fair,
|
||||
|
||||
.load_balance = load_balance_fair,
|
||||
.move_one_task = move_one_task_fair,
|
||||
|
||||
.set_curr_task = set_curr_task_fair,
|
||||
.task_tick = task_tick_fair,
|
||||
|
@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
||||
|
||||
static unsigned long
|
||||
load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = {
|
||||
.put_prev_task = put_prev_task_idle,
|
||||
|
||||
.load_balance = load_balance_idle,
|
||||
.move_one_task = move_one_task_idle,
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
.task_tick = task_tick_idle,
|
||||
|
@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg)
|
||||
|
||||
static unsigned long
|
||||
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
unsigned long max_nr_move, unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
unsigned long max_load_move,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle,
|
||||
int *all_pinned, int *this_best_prio)
|
||||
{
|
||||
int nr_moved;
|
||||
struct rq_iterator rt_rq_iterator;
|
||||
unsigned long load_moved;
|
||||
|
||||
rt_rq_iterator.start = load_balance_start_rt;
|
||||
rt_rq_iterator.next = load_balance_next_rt;
|
||||
@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
*/
|
||||
rt_rq_iterator.arg = busiest;
|
||||
|
||||
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
|
||||
max_load_move, sd, idle, all_pinned, &load_moved,
|
||||
this_best_prio, &rt_rq_iterator);
|
||||
return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
|
||||
idle, all_pinned, this_best_prio, &rt_rq_iterator);
|
||||
}
|
||||
|
||||
return load_moved;
|
||||
static int
|
||||
move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
struct sched_domain *sd, enum cpu_idle_type idle)
|
||||
{
|
||||
struct rq_iterator rt_rq_iterator;
|
||||
|
||||
rt_rq_iterator.start = load_balance_start_rt;
|
||||
rt_rq_iterator.next = load_balance_next_rt;
|
||||
rt_rq_iterator.arg = busiest;
|
||||
|
||||
return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
|
||||
&rt_rq_iterator);
|
||||
}
|
||||
|
||||
static void task_tick_rt(struct rq *rq, struct task_struct *p)
|
||||
@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = {
|
||||
.put_prev_task = put_prev_task_rt,
|
||||
|
||||
.load_balance = load_balance_rt,
|
||||
.move_one_task = move_one_task_rt,
|
||||
|
||||
.set_curr_task = set_curr_task_rt,
|
||||
.task_tick = task_tick_rt,
|
||||
|
Loading…
Reference in New Issue
Block a user