Updates to the tick/nohz code in this cycle:

- Micro-optimize tick_nohz_full_cpu()
 
  - Optimize idle exit tick restarts to be less eager
 
  - Optimize tick_nohz_dep_set_task() to only wake up
    a single CPU. This reduces IPIs and interruptions
    on nohz_full CPUs.
 
  - Optimize tick_nohz_dep_set_signal() in a similar
    fashion.
 
  - Skip IPIs in tick_nohz_kick_task() when trying
    to kick a non-running task.
 
  - Micro-optimize tick_nohz_task_switch() IRQ flags
    handling to reduce context switching costs.
 
  - Misc cleanups and fixes
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmDZcycRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1jItRAAn1/vI0+pWQWjyWQ+CL8AMNNWTbtBpC7W
 ZUR+IEtEoYEufYXH9RgcweIgopBExVlC9CWzUX5o7AuVdN2YyzcBuQbza4vlYeIm
 azcdIlKCwjdgODJBTgHNH7IR0QKF/Gq+fVCGX3Xc37BlyD389CQ33HXC7X2JZLB3
 Mb5wxAJoZ2HQzGGJoz4JyA0rl6lY3jYzLMK7mqxkUqIqT45xLpgw5+imRM2J1ddV
 d/73P4TwFY+E8KXSLctUfgmkmCzJYISGSlH49jX3CkwAktwTY17JjWjxT9Z5b2D8
 6TTpsDoLtI4tXg0U2KsBxBoDHK/a4hAwo+GnE/RMT6ghqaX5IrANrgtTVPBN9dvh
 qUGVAMHVDN3Ed7wwFvCm4tPUz/iXzBsP8xPl28WPHsyV9BE9tcrk2ynzSWy47Twd
 z1GVZDNTwCfdvH62WS/HvbPdGl2hHH5/oe3HaF1ROLPHq8UzaxwKEX+A0rwLJrBp
 ZU8Lnvu3rPVa5cHc4z1AE7sbX7OkTTNjxY/qQzDhNKwVwfkaPcBiok9VgEIEGS7A
 n3U/yuQCn307sr7SlJ6z4yu3YCw3aEJ3pTxUprmNTh3+x4yF5ZaOimqPyvzBaUVM
 Hm3LYrxHIScisFJio4FiC2dghZryM37RFonvqrCAOuA+afMU2GOFnaoDruXU27SE
 tqxR6c/hw+4=
 =18pN
 -----END PGP SIGNATURE-----

Merge tag 'timers-nohz-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timers/nohz updates from Ingo Molnar:

 - Micro-optimize tick_nohz_full_cpu()

 - Optimize idle exit tick restarts to be less eager

 - Optimize tick_nohz_dep_set_task() to only wake up a single CPU.
   This reduces IPIs and interruptions on nohz_full CPUs.

 - Optimize tick_nohz_dep_set_signal() in a similar fashion.

 - Skip IPIs in tick_nohz_kick_task() when trying to kick a
   non-running task.

 - Micro-optimize tick_nohz_task_switch() IRQ flags handling to
   reduce context switching costs.

 - Misc cleanups and fixes

* tag 'timers-nohz-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  MAINTAINERS: Add myself as context tracking maintainer
  tick/nohz: Call tick_nohz_task_switch() with interrupts disabled
  tick/nohz: Kick only _queued_ task whose tick dependency is updated
  tick/nohz: Change signal tick dependency to wake up CPUs of member tasks
  tick/nohz: Only wake up a single target cpu when kicking a task
  tick/nohz: Update nohz_full Kconfig help
  tick/nohz: Update idle_exittime on actual idle exit
  tick/nohz: Remove superflous check for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  tick/nohz: Conditionally restart tick on idle exit
  tick/nohz: Evaluate the CPU expression after the static key
This commit is contained in:
Linus Torvalds 2021-06-28 12:22:06 -07:00
commit 9269d27e51
7 changed files with 126 additions and 61 deletions

View File

@ -4610,6 +4610,12 @@ S: Supported
F: drivers/video/console/
F: include/linux/console*
CONTEXT TRACKING
M: Frederic Weisbecker <frederic@kernel.org>
S: Maintained
F: kernel/context_tracking.c
F: include/linux/context_tracking*
CONTROL GROUP (CGROUP)
M: Tejun Heo <tj@kernel.org>
M: Zefan Li <lizefan.x@bytedance.com>

View File

@ -2028,6 +2028,8 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
extern bool sched_task_on_rq(struct task_struct *p);
/*
* In order to reduce various lock holder preemption latencies provide an
* interface to see if a vCPU is currently running or not.

View File

@ -186,13 +186,17 @@ static inline bool tick_nohz_full_enabled(void)
return tick_nohz_full_running;
}
static inline bool tick_nohz_full_cpu(int cpu)
{
if (!tick_nohz_full_enabled())
return false;
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
}
/*
* Check if a CPU is part of the nohz_full subset. Arrange for evaluating
* the cpu expression (typically smp_processor_id()) _after_ the static
* key.
*/
#define tick_nohz_full_cpu(_cpu) ({ \
bool __ret = false; \
if (tick_nohz_full_enabled()) \
__ret = cpumask_test_cpu((_cpu), tick_nohz_full_mask); \
__ret; \
})
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask)
{
@ -208,7 +212,7 @@ extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
extern void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
@ -253,11 +257,11 @@ static inline void tick_dep_clear_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
tick_nohz_dep_set_signal(tsk, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
@ -285,7 +289,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }

View File

@ -1928,6 +1928,11 @@ static inline void uclamp_post_fork(struct task_struct *p) { }
static inline void init_uclamp(void) { }
#endif /* CONFIG_UCLAMP_TASK */
bool sched_task_on_rq(struct task_struct *p)
{
return task_on_rq_queued(p);
}
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
if (!(flags & ENQUEUE_NOCLOCK))
@ -4546,6 +4551,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
vtime_task_switch(prev);
perf_event_task_sched_in(prev, current);
finish_task(prev);
tick_nohz_task_switch();
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
@ -4591,7 +4597,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
put_task_struct_rcu_user(prev);
}
tick_nohz_task_switch();
return rq;
}

View File

@ -117,13 +117,14 @@ config NO_HZ_FULL
the task mostly runs in userspace and has few kernel activity.
You need to fill up the nohz_full boot parameter with the
desired range of dynticks CPUs.
desired range of dynticks CPUs to use it. This is implemented at
the expense of some overhead in user <-> kernel transitions:
syscalls, exceptions and interrupts.
This is implemented at the expense of some overhead in user <-> kernel
transitions: syscalls, exceptions and interrupts. Even when it's
dynamically off.
By default, without passing the nohz_full parameter, this behaves just
like NO_HZ_IDLE.
Say N.
If you're a distro say Y.
endchoice

View File

@ -523,7 +523,7 @@ static void arm_timer(struct k_itimer *timer, struct task_struct *p)
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
}
/*
@ -1358,7 +1358,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
if (*newval < *nextevt)
*nextevt = *newval;
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
}
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,

View File

@ -323,6 +323,46 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}
static void tick_nohz_kick_task(struct task_struct *tsk)
{
int cpu;
/*
* If the task is not running, run_posix_cpu_timers()
* has nothing to elapse, IPI can then be spared.
*
* activate_task() STORE p->tick_dep_mask
* STORE p->on_rq
* __schedule() (switch to task 'p') smp_mb() (atomic_fetch_or())
* LOCK rq->lock LOAD p->on_rq
* smp_mb__after_spin_lock()
* tick_nohz_task_switch()
* LOAD p->tick_dep_mask
*/
if (!sched_task_on_rq(tsk))
return;
/*
* If the task concurrently migrates to another CPU,
* we guarantee it sees the new tick dependency upon
* schedule.
*
* set_task_cpu(p, cpu);
* STORE p->cpu = @cpu
* __schedule() (switch to task 'p')
* LOCK rq->lock
* smp_mb__after_spin_lock() STORE p->tick_dep_mask
* tick_nohz_task_switch() smp_mb() (atomic_fetch_or())
* LOAD p->tick_dep_mask LOAD p->cpu
*/
cpu = task_cpu(tsk);
preempt_disable();
if (cpu_online(cpu))
tick_nohz_full_kick_cpu(cpu);
preempt_enable();
}
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
@ -405,19 +445,8 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_cpu);
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask)) {
if (tsk == current) {
preempt_disable();
tick_nohz_full_kick();
preempt_enable();
} else {
/*
* Some future tick_nohz_full_kick_task()
* should optimize this.
*/
tick_nohz_full_kick_all();
}
}
if (!atomic_fetch_or(BIT(bit), &tsk->tick_dep_mask))
tick_nohz_kick_task(tsk);
}
EXPORT_SYMBOL_GPL(tick_nohz_dep_set_task);
@ -431,9 +460,20 @@ EXPORT_SYMBOL_GPL(tick_nohz_dep_clear_task);
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
void tick_nohz_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
int prev;
struct signal_struct *sig = tsk->signal;
prev = atomic_fetch_or(BIT(bit), &sig->tick_dep_mask);
if (!prev) {
struct task_struct *t;
lockdep_assert_held(&tsk->sighand->siglock);
__for_each_thread(sig, t)
tick_nohz_kick_task(t);
}
}
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
@ -448,13 +488,10 @@ void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bi
*/
void __tick_nohz_task_switch(void)
{
unsigned long flags;
struct tick_sched *ts;
local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id()))
goto out;
return;
ts = this_cpu_ptr(&tick_cpu_sched);
@ -463,8 +500,6 @@ void __tick_nohz_task_switch(void)
atomic_read(&current->signal->tick_dep_mask))
tick_nohz_full_kick();
}
out:
local_irq_restore(flags);
}
/* Get the boot-time nohz CPU list from the kernel parameters. */
@ -922,27 +957,31 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
* Cancel the scheduled timer and restore the tick
*/
ts->tick_stopped = 0;
ts->idle_exittime = now;
tick_nohz_restart(ts, now);
}
static void tick_nohz_full_update_tick(struct tick_sched *ts)
static void __tick_nohz_full_update_tick(struct tick_sched *ts,
ktime_t now)
{
#ifdef CONFIG_NO_HZ_FULL
int cpu = smp_processor_id();
if (!tick_nohz_full_cpu(cpu))
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, now);
#endif
}
static void tick_nohz_full_update_tick(struct tick_sched *ts)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
__tick_nohz_full_update_tick(ts, ktime_get());
}
static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
@ -1189,11 +1228,13 @@ unsigned long tick_nohz_get_idle_calls(void)
return ts->idle_calls;
}
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
static void tick_nohz_account_idle_time(struct tick_sched *ts,
ktime_t now)
{
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
ts->idle_exittime = now;
if (vtime_accounting_enabled_this_cpu())
return;
/*
@ -1207,21 +1248,27 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
*/
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
#endif
}
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
{
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped)
__tick_nohz_idle_restart_tick(ts, ktime_get());
if (ts->tick_stopped) {
ktime_t now = ktime_get();
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
}
}
static void tick_nohz_idle_update_tick(struct tick_sched *ts, ktime_t now)
{
if (tick_nohz_full_cpu(smp_processor_id()))
__tick_nohz_full_update_tick(ts, now);
else
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_time(ts, now);
}
/**
@ -1253,7 +1300,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now);
if (tick_stopped)
__tick_nohz_idle_restart_tick(ts, now);
tick_nohz_idle_update_tick(ts, now);
local_irq_enable();
}