diff --git a/include/linux/sched.h b/include/linux/sched.h index 52288a647692..639241f4f3d1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -829,11 +829,14 @@ struct sched_class { void (*put_prev_task) (struct rq *rq, struct task_struct *p); unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, - struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + struct rq *busiest, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio); + int (*move_one_task) (struct rq *this_rq, int this_cpu, + struct rq *busiest, struct sched_domain *sd, + enum cpu_idle_type idle); + void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); void (*task_new) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index cc9cd5b710a6..8607795fad69 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -838,11 +838,35 @@ struct rq_iterator { struct task_struct *(*next)(void *); }; -static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved, - int *this_best_prio, struct rq_iterator *iterator); +#ifdef CONFIG_SMP +static unsigned long +balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, + unsigned long max_load_move, struct sched_domain *sd, + enum cpu_idle_type idle, int *all_pinned, + int *this_best_prio, struct rq_iterator *iterator); + +static int +iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle, + struct rq_iterator *iterator); +#else +static inline unsigned long +balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, + unsigned long max_load_move, struct sched_domain *sd, + enum cpu_idle_type idle, int *all_pinned, + int *this_best_prio, struct rq_iterator *iterator) +{ + return 0; +} + +static inline int +iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle, + struct rq_iterator *iterator) +{ + return 0; +} +#endif #include "sched_stats.h" #include "sched_idletask.c" @@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, return 1; } -static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved, - int *this_best_prio, struct rq_iterator *iterator) +static unsigned long +balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, + unsigned long max_load_move, struct sched_domain *sd, + enum cpu_idle_type idle, int *all_pinned, + int *this_best_prio, struct rq_iterator *iterator) { int pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; long rem_load_move = max_load_move; - if (max_nr_move == 0 || max_load_move == 0) + if (max_load_move == 0) goto out; pinned = 1; @@ -2267,7 +2291,7 @@ next: * We only want to steal up to the prescribed number of tasks * and the prescribed amount of weighted load. */ - if (pulled < max_nr_move && rem_load_move > 0) { + if (rem_load_move > 0) { if (p->prio < *this_best_prio) *this_best_prio = p->prio; p = iterator->next(iterator->arg); @@ -2275,7 +2299,7 @@ next: } out: /* - * Right now, this is the only place pull_task() is called, + * Right now, this is one of only two places pull_task() is called, * so we can safely collect pull_task() stats here rather than * inside pull_task(). */ @@ -2283,8 +2307,8 @@ out: if (all_pinned) *all_pinned = pinned; - *load_moved = max_load_move - rem_load_move; - return pulled; + + return max_load_move - rem_load_move; } /* @@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, do { total_load_moved += class->load_balance(this_rq, this_cpu, busiest, - ULONG_MAX, max_load_move - total_load_moved, + max_load_move - total_load_moved, sd, idle, all_pinned, &this_best_prio); class = class->next; } while (class && max_load_move > total_load_moved); @@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, return total_load_moved > 0; } +static int +iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle, + struct rq_iterator *iterator) +{ + struct task_struct *p = iterator->start(iterator->arg); + int pinned = 0; + + while (p) { + if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { + pull_task(busiest, p, this_rq, this_cpu); + /* + * Right now, this is only the second place pull_task() + * is called, so we can safely collect pull_task() + * stats here rather than inside pull_task(). + */ + schedstat_inc(sd, lb_gained[idle]); + + return 1; + } + p = iterator->next(iterator->arg); + } + + return 0; +} + /* * move_one_task tries to move exactly one task from busiest to this_rq, as * part of active balancing operations within "domain". @@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle) { const struct sched_class *class; - int this_best_prio = MAX_PRIO; for (class = sched_class_highest; class; class = class->next) - if (class->load_balance(this_rq, this_cpu, busiest, - 1, ULONG_MAX, sd, idle, NULL, - &this_best_prio)) + if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle)) return 1; return 0; @@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq) { } -/* Avoid "used but not defined" warning on UP */ -static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved, - int *this_best_prio, struct rq_iterator *iterator) -{ - *load_moved = 0; - - return 0; -} - #endif DEFINE_PER_CPU(struct kernel_stat, kstat); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 166ed6db600b..a90d0457d603 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, int *this_best_prio) { struct cfs_rq *busy_cfs_rq; - unsigned long load_moved, total_nr_moved = 0, nr_moved; long rem_load_move = max_load_move; struct rq_iterator cfs_rq_iterator; @@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, #else # define maxload rem_load_move #endif - /* pass busy_cfs_rq argument into + /* + * pass busy_cfs_rq argument into * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; - nr_moved = balance_tasks(this_rq, this_cpu, busiest, - max_nr_move, maxload, sd, idle, all_pinned, - &load_moved, this_best_prio, &cfs_rq_iterator); + rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, + maxload, sd, idle, all_pinned, + this_best_prio, + &cfs_rq_iterator); - total_nr_moved += nr_moved; - max_nr_move -= nr_moved; - rem_load_move -= load_moved; - - if (max_nr_move <= 0 || rem_load_move <= 0) + if (rem_load_move <= 0) break; } return max_load_move - rem_load_move; } +static int +move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct cfs_rq *busy_cfs_rq; + struct rq_iterator cfs_rq_iterator; + + cfs_rq_iterator.start = load_balance_start_fair; + cfs_rq_iterator.next = load_balance_next_fair; + + for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { + /* + * pass busy_cfs_rq argument into + * load_balance_[start|next]_fair iterators + */ + cfs_rq_iterator.arg = busy_cfs_rq; + if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, + &cfs_rq_iterator)) + return 1; + } + + return 0; +} + /* * scheduler tick hitting a task of our scheduling class: */ @@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = { .put_prev_task = put_prev_task_fair, .load_balance = load_balance_fair, + .move_one_task = move_one_task_fair, .set_curr_task = set_curr_task_fair, .task_tick = task_tick_fair, diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 6e2ead41516e..586b06ca30aa 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) static unsigned long load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) + unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio) +{ + return 0; +} + +static int +move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) { return 0; } @@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = { .put_prev_task = put_prev_task_idle, .load_balance = load_balance_idle, + .move_one_task = move_one_task_idle, .set_curr_task = set_curr_task_idle, .task_tick = task_tick_idle, diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d0097a0634e5..e9395b7119e6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg) static unsigned long load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) + unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio) { - int nr_moved; struct rq_iterator rt_rq_iterator; - unsigned long load_moved; rt_rq_iterator.start = load_balance_start_rt; rt_rq_iterator.next = load_balance_next_rt; @@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, */ rt_rq_iterator.arg = busiest; - nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, - max_load_move, sd, idle, all_pinned, &load_moved, - this_best_prio, &rt_rq_iterator); + return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd, + idle, all_pinned, this_best_prio, &rt_rq_iterator); +} - return load_moved; +static int +move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct rq_iterator rt_rq_iterator; + + rt_rq_iterator.start = load_balance_start_rt; + rt_rq_iterator.next = load_balance_next_rt; + rt_rq_iterator.arg = busiest; + + return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, + &rt_rq_iterator); } static void task_tick_rt(struct rq *rq, struct task_struct *p) @@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = { .put_prev_task = put_prev_task_rt, .load_balance = load_balance_rt, + .move_one_task = move_one_task_rt, .set_curr_task = set_curr_task_rt, .task_tick = task_tick_rt,