sched/deadline: Reclaim bandwidth not used by dl tasks

This commit introduces a per-runqueue "extra utilization" that can be
reclaimed by deadline tasks. In this way, the maximum fraction of CPU
time that can reclaimed by deadline tasks is fixed (and configurable)
and does not depend on the total deadline utilization.
The GRUB accounting rule is modified to add this "extra utilization"
to the inactive utilization of the runqueue, and to avoid reclaiming
more than a maximum fraction of the CPU time.

Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Luca Abeni <luca.abeni@santannapisa.it>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Claudio Scordino <claudio@evidence.eu.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/1495138417-6203-10-git-send-email-luca.abeni@santannapisa.it
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Luca Abeni 2017-05-18 22:13:36 +02:00 committed by Ingo Molnar
parent 9f0d1a5077
commit daec579836
3 changed files with 72 additions and 24 deletions

View File

@ -2444,7 +2444,7 @@ inline struct dl_bw *dl_bw_of(int i)
return &cpu_rq(i)->rd->dl_bw; return &cpu_rq(i)->rd->dl_bw;
} }
static inline int dl_bw_cpus(int i) inline int dl_bw_cpus(int i)
{ {
struct root_domain *rd = cpu_rq(i)->rd; struct root_domain *rd = cpu_rq(i)->rd;
int cpus = 0; int cpus = 0;
@ -2462,7 +2462,7 @@ inline struct dl_bw *dl_bw_of(int i)
return &cpu_rq(i)->dl.dl_bw; return &cpu_rq(i)->dl.dl_bw;
} }
static inline int dl_bw_cpus(int i) inline int dl_bw_cpus(int i)
{ {
return 1; return 1;
} }
@ -2500,8 +2500,8 @@ static int dl_overflow(struct task_struct *p, int policy,
if (dl_policy(policy) && !task_has_dl_policy(p) && if (dl_policy(policy) && !task_has_dl_policy(p) &&
!__dl_overflow(dl_b, cpus, 0, new_bw)) { !__dl_overflow(dl_b, cpus, 0, new_bw)) {
if (hrtimer_active(&p->dl.inactive_timer)) if (hrtimer_active(&p->dl.inactive_timer))
__dl_clear(dl_b, p->dl.dl_bw); __dl_clear(dl_b, p->dl.dl_bw, cpus);
__dl_add(dl_b, new_bw); __dl_add(dl_b, new_bw, cpus);
err = 0; err = 0;
} else if (dl_policy(policy) && task_has_dl_policy(p) && } else if (dl_policy(policy) && task_has_dl_policy(p) &&
!__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) { !__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
@ -2512,8 +2512,8 @@ static int dl_overflow(struct task_struct *p, int policy,
* But this would require to set the task's "inactive * But this would require to set the task's "inactive
* timer" when the task is not inactive. * timer" when the task is not inactive.
*/ */
__dl_clear(dl_b, p->dl.dl_bw); __dl_clear(dl_b, p->dl.dl_bw, cpus);
__dl_add(dl_b, new_bw); __dl_add(dl_b, new_bw, cpus);
dl_change_utilization(p, new_bw); dl_change_utilization(p, new_bw);
err = 0; err = 0;
} else if (!dl_policy(policy) && task_has_dl_policy(p)) { } else if (!dl_policy(policy) && task_has_dl_policy(p)) {
@ -5515,7 +5515,7 @@ int task_can_attach(struct task_struct *p,
* We will free resources in the source root_domain * We will free resources in the source root_domain
* later on (see set_cpus_allowed_dl()). * later on (see set_cpus_allowed_dl()).
*/ */
__dl_add(dl_b, p->dl.dl_bw); __dl_add(dl_b, p->dl.dl_bw, cpus);
} }
raw_spin_unlock_irqrestore(&dl_b->lock, flags); raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched(); rcu_read_unlock_sched();
@ -6764,9 +6764,12 @@ void init_dl_rq_bw_ratio(struct dl_rq *dl_rq)
{ {
if (global_rt_runtime() == RUNTIME_INF) { if (global_rt_runtime() == RUNTIME_INF) {
dl_rq->bw_ratio = 1 << RATIO_SHIFT; dl_rq->bw_ratio = 1 << RATIO_SHIFT;
dl_rq->extra_bw = 1 << BW_SHIFT;
} else { } else {
dl_rq->bw_ratio = to_ratio(global_rt_runtime(), dl_rq->bw_ratio = to_ratio(global_rt_runtime(),
global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT); global_rt_period()) >> (BW_SHIFT - RATIO_SHIFT);
dl_rq->extra_bw = to_ratio(global_rt_period(),
global_rt_runtime());
} }
} }

View File

@ -209,7 +209,7 @@ static void task_non_contending(struct task_struct *p)
if (p->state == TASK_DEAD) if (p->state == TASK_DEAD)
sub_rq_bw(p->dl.dl_bw, &rq->dl); sub_rq_bw(p->dl.dl_bw, &rq->dl);
raw_spin_lock(&dl_b->lock); raw_spin_lock(&dl_b->lock);
__dl_clear(dl_b, p->dl.dl_bw); __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
__dl_clear_params(p); __dl_clear_params(p);
raw_spin_unlock(&dl_b->lock); raw_spin_unlock(&dl_b->lock);
} }
@ -955,28 +955,40 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
/* /*
* This function implements the GRUB accounting rule: * This function implements the GRUB accounting rule:
* according to the GRUB reclaiming algorithm, the runtime is * according to the GRUB reclaiming algorithm, the runtime is
* not decreased as "dq = -dt", but as "dq = -max{u, (1 - Uinact)} dt", * not decreased as "dq = -dt", but as
* where u is the utilization of the task and Uinact is the * "dq = -max{u / Umax, (1 - Uinact - Uextra)} dt",
* (per-runqueue) inactive utilization, computed as the difference * where u is the utilization of the task, Umax is the maximum reclaimable
* between the "total runqueue utilization" and the runqueue * utilization, Uinact is the (per-runqueue) inactive utilization, computed
* active utilization. * as the difference between the "total runqueue utilization" and the
* runqueue active utilization, and Uextra is the (per runqueue) extra
* reclaimable utilization.
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
* multiplied by 2^BW_SHIFT, the result has to be shifted right by BW_SHIFT. * multiplied by 2^BW_SHIFT, the result has to be shifted right by
* BW_SHIFT.
* Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT,
* dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
* Since delta is a 64 bit variable, to have an overflow its value
* should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
* So, overflow is not an issue here.
*/ */
u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se) u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se)
{ {
u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */ u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */
u64 u_act; u64 u_act;
u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> RATIO_SHIFT;
/* /*
* Instead of computing max{u, (1 - u_inact)}, we compare * Instead of computing max{u * bw_ratio, (1 - u_inact - u_extra)},
* u_inact with 1 - u, because u_inact can be larger than 1 * we compare u_inact + rq->dl.extra_bw with
* (so, 1 - u_inact would be negative leading to wrong results) * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because
* u_inact + rq->dl.extra_bw can be larger than
* 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative
* leading to wrong results)
*/ */
if (u_inact > BW_UNIT - dl_se->dl_bw) if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min)
u_act = dl_se->dl_bw; u_act = u_act_min;
else else
u_act = BW_UNIT - u_inact; u_act = BW_UNIT - u_inact - rq->dl.extra_bw;
return (delta * u_act) >> BW_SHIFT; return (delta * u_act) >> BW_SHIFT;
} }
@ -1085,7 +1097,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
} }
raw_spin_lock(&dl_b->lock); raw_spin_lock(&dl_b->lock);
__dl_clear(dl_b, p->dl.dl_bw); __dl_clear(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
raw_spin_unlock(&dl_b->lock); raw_spin_unlock(&dl_b->lock);
__dl_clear_params(p); __dl_clear_params(p);
@ -2054,7 +2066,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
* until we complete the update. * until we complete the update.
*/ */
raw_spin_lock(&src_dl_b->lock); raw_spin_lock(&src_dl_b->lock);
__dl_clear(src_dl_b, p->dl.dl_bw); __dl_clear(src_dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
raw_spin_unlock(&src_dl_b->lock); raw_spin_unlock(&src_dl_b->lock);
} }

View File

@ -219,22 +219,27 @@ static inline int dl_bandwidth_enabled(void)
} }
extern struct dl_bw *dl_bw_of(int i); extern struct dl_bw *dl_bw_of(int i);
extern int dl_bw_cpus(int i);
struct dl_bw { struct dl_bw {
raw_spinlock_t lock; raw_spinlock_t lock;
u64 bw, total_bw; u64 bw, total_bw;
}; };
static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
static inline static inline
void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw) void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
{ {
dl_b->total_bw -= tsk_bw; dl_b->total_bw -= tsk_bw;
__dl_update(dl_b, (s32)tsk_bw / cpus);
} }
static inline static inline
void __dl_add(struct dl_bw *dl_b, u64 tsk_bw) void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
{ {
dl_b->total_bw += tsk_bw; dl_b->total_bw += tsk_bw;
__dl_update(dl_b, -((s32)tsk_bw / cpus));
} }
static inline static inline
@ -576,6 +581,7 @@ struct dl_rq {
* runqueue (inactive utilization = this_bw - running_bw). * runqueue (inactive utilization = this_bw - running_bw).
*/ */
u64 this_bw; u64 this_bw;
u64 extra_bw;
/* /*
* Inverse of the fraction of CPU utilization that can be reclaimed * Inverse of the fraction of CPU utilization that can be reclaimed
@ -1958,6 +1964,33 @@ extern void nohz_balance_exit_idle(unsigned int cpu);
static inline void nohz_balance_exit_idle(unsigned int cpu) { } static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#endif #endif
#ifdef CONFIG_SMP
static inline
void __dl_update(struct dl_bw *dl_b, s64 bw)
{
struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
int i;
RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
"sched RCU must be held");
for_each_cpu_and(i, rd->span, cpu_active_mask) {
struct rq *rq = cpu_rq(i);
rq->dl.extra_bw += bw;
}
}
#else
static inline
void __dl_update(struct dl_bw *dl_b, s64 bw)
{
struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
dl->extra_bw += bw;
}
#endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime { struct irqtime {
u64 total; u64 total;