linux/kernel/time/tick-internal.h

167 lines
7.0 KiB
C
Raw Normal View History

/*
* tick internal variable and functions used by low/high res code
*/
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include "timekeeping.h"
#include "tick-sched.h"
#ifdef CONFIG_GENERIC_CLOCKEVENTS
# define TICK_DO_TIMER_NONE -1
# define TICK_DO_TIMER_BOOT -2
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
extern ktime_t tick_next_period;
extern ktime_t tick_period;
extern int tick_do_timer_cpu __read_mostly;
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
extern void tick_handle_periodic(struct clock_event_device *dev);
extern void tick_check_new_device(struct clock_event_device *dev);
extern void tick_shutdown(unsigned int cpu);
extern void tick_suspend(void);
extern void tick_resume(void);
extern bool tick_check_replacement(struct clock_event_device *curdev,
struct clock_event_device *newdev);
extern void tick_install_replacement(struct clock_event_device *dev);
extern int tick_is_oneshot_available(void);
extern struct tick_device *tick_get_device(int cpu);
extern int clockevents_tick_resume(struct clock_event_device *dev);
/* Check, if the device is functional or a dummy for broadcast */
static inline int tick_device_is_functional(struct clock_event_device *dev)
{
return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
}
static inline enum clock_event_state clockevent_get_state(struct clock_event_device *dev)
{
return dev->state_use_accessors;
}
static inline void clockevent_set_state(struct clock_event_device *dev,
enum clock_event_state state)
{
dev->state_use_accessors = state;
}
extern void clockevents_shutdown(struct clock_event_device *dev);
extern void clockevents_exchange_device(struct clock_event_device *old,
struct clock_event_device *new);
extern void clockevents_switch_state(struct clock_event_device *dev,
enum clock_event_state state);
extern int clockevents_program_event(struct clock_event_device *dev,
ktime_t expires, bool force);
extern void clockevents_handle_noop(struct clock_event_device *dev);
extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
/* Broadcasting support */
# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern void tick_install_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_shutdown_broadcast(unsigned int cpu);
extern void tick_suspend_broadcast(void);
extern void tick_resume_broadcast(void);
extern bool tick_resume_check_broadcast(void);
extern void tick_broadcast_init(void);
extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
extern struct tick_device *tick_get_broadcast_device(void);
extern struct cpumask *tick_get_broadcast_mask(void);
# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
static inline void tick_shutdown_broadcast(unsigned int cpu) { }
static inline void tick_suspend_broadcast(void) { }
static inline void tick_resume_broadcast(void) { }
static inline bool tick_resume_check_broadcast(void) { return false; }
static inline void tick_broadcast_init(void) { }
static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
/* Set the periodic handler in non broadcast mode */
static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
{
dev->event_handler = tick_handle_periodic;
}
# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
#else /* !GENERIC_CLOCKEVENTS: */
static inline void tick_suspend(void) { }
static inline void tick_resume(void) { }
#endif /* !GENERIC_CLOCKEVENTS */
/* Oneshot related functions */
#ifdef CONFIG_TICK_ONESHOT
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
extern void tick_resume_oneshot(void);
static inline bool tick_oneshot_possible(void) { return true; }
extern int tick_oneshot_mode_active(void);
extern void tick_clock_notify(void);
extern int tick_check_oneshot_change(int allow_nohz);
extern int tick_init_highres(void);
#else /* !CONFIG_TICK_ONESHOT: */
static inline
void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt) { BUG(); }
static inline void tick_resume_oneshot(void) { BUG(); }
static inline int tick_program_event(ktime_t expires, int force) { return 0; }
static inline void tick_oneshot_notify(void) { }
static inline bool tick_oneshot_possible(void) { return false; }
static inline int tick_oneshot_mode_active(void) { return 0; }
static inline void tick_clock_notify(void) { }
static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
#endif /* !CONFIG_TICK_ONESHOT */
/* Functions related to oneshot broadcasting */
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
extern void tick_broadcast_switch_to_oneshot(void);
extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
extern int tick_broadcast_oneshot_active(void);
extern void tick_check_oneshot_broadcast_this_cpu(void);
bool tick_broadcast_oneshot_available(void);
extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
#else /* !(BROADCAST && ONESHOT): */
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
static inline void tick_broadcast_switch_to_oneshot(void) { }
static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
#endif /* !(BROADCAST && ONESHOT) */
/* NO_HZ_FULL internal */
#ifdef CONFIG_NO_HZ_FULL
extern void tick_nohz_init(void);
# else
static inline void tick_nohz_init(void) { }
#endif
timer: Reduce timer migration overhead if disabled Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-05-26 22:50:33 +00:00
#ifdef CONFIG_NO_HZ_COMMON
extern unsigned long tick_nohz_active;
#else
#define tick_nohz_active (0)
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
timer: Minimize nohz off overhead If nohz is disabled on the kernel command line the [hr]timer code still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty pointless exercise. Cache nohz_active in [hr]timer per cpu bases and avoid the overhead. Before: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu After: 48.73% hog [.] main 15.36% [kernel] [k] _raw_spin_lock_irqsave 9.77% [kernel] [k] _raw_spin_unlock_irqrestore 6.61% [kernel] [k] lock_timer_base.isra.38 6.42% [kernel] [k] mod_timer 3.90% [kernel] [k] detach_if_pending 3.76% [kernel] [k] del_timer 2.41% [kernel] [k] internal_add_timer 1.39% [kernel] [k] __internal_add_timer 0.76% [kernel] [k] timerfn We probably should have a cached value for nohz full in the per cpu bases as well to avoid the cpumask check. The base cache line is hot already, the cpumask not necessarily. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-05-26 22:50:35 +00:00
extern void timers_update_migration(bool update_nohz);
timer: Reduce timer migration overhead if disabled Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-05-26 22:50:33 +00:00
#else
timer: Minimize nohz off overhead If nohz is disabled on the kernel command line the [hr]timer code still calls wake_up_nohz_cpu() and tick_nohz_full_cpu(), a pretty pointless exercise. Cache nohz_active in [hr]timer per cpu bases and avoid the overhead. Before: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu After: 48.73% hog [.] main 15.36% [kernel] [k] _raw_spin_lock_irqsave 9.77% [kernel] [k] _raw_spin_unlock_irqrestore 6.61% [kernel] [k] lock_timer_base.isra.38 6.42% [kernel] [k] mod_timer 3.90% [kernel] [k] detach_if_pending 3.76% [kernel] [k] del_timer 2.41% [kernel] [k] internal_add_timer 1.39% [kernel] [k] __internal_add_timer 0.76% [kernel] [k] timerfn We probably should have a cached value for nohz full in the per cpu bases as well to avoid the cpumask check. The base cache line is hot already, the cpumask not necessarily. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.207378134@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-05-26 22:50:35 +00:00
static inline void timers_update_migration(bool update_nohz) { }
timer: Reduce timer migration overhead if disabled Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2015-05-26 22:50:33 +00:00
#endif
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);