58687acba5
The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Eric Paris <eparis@redhat.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
128 lines
3.1 KiB
C
128 lines
3.1 KiB
C
/*
|
|
* HW NMI watchdog support
|
|
*
|
|
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
|
|
*
|
|
* Arch specific calls to support NMI watchdog
|
|
*
|
|
* Bits copied from original nmi.c file
|
|
*
|
|
*/
|
|
|
|
#include <asm/apic.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <asm/mce.h>
|
|
|
|
#include <linux/nmi.h>
|
|
#include <linux/module.h>
|
|
|
|
/* For reliability, we're prepared to waste bits here. */
|
|
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
|
|
|
|
static DEFINE_PER_CPU(unsigned, last_irq_sum);
|
|
|
|
/*
|
|
* Take the local apic timer and PIT/HPET into account. We don't
|
|
* know which one is active, when we have highres/dyntick on
|
|
*/
|
|
static inline unsigned int get_timer_irqs(int cpu)
|
|
{
|
|
unsigned int irqs = per_cpu(irq_stat, cpu).irq0_irqs;
|
|
|
|
#if defined(CONFIG_X86_LOCAL_APIC)
|
|
irqs += per_cpu(irq_stat, cpu).apic_timer_irqs;
|
|
#endif
|
|
|
|
return irqs;
|
|
}
|
|
|
|
static inline int mce_in_progress(void)
|
|
{
|
|
#if defined(CONFIG_X86_MCE)
|
|
return atomic_read(&mce_entry) > 0;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
int hw_nmi_is_cpu_stuck(struct pt_regs *regs)
|
|
{
|
|
unsigned int sum;
|
|
int cpu = smp_processor_id();
|
|
|
|
/* FIXME: cheap hack for this check, probably should get its own
|
|
* die_notifier handler
|
|
*/
|
|
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
|
|
static DEFINE_SPINLOCK(lock); /* Serialise the printks */
|
|
|
|
spin_lock(&lock);
|
|
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
|
|
show_regs(regs);
|
|
dump_stack();
|
|
spin_unlock(&lock);
|
|
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
|
|
}
|
|
|
|
/* if we are doing an mce, just assume the cpu is not stuck */
|
|
/* Could check oops_in_progress here too, but it's safer not to */
|
|
if (mce_in_progress())
|
|
return 0;
|
|
|
|
/* We determine if the cpu is stuck by checking whether any
|
|
* interrupts have happened since we last checked. Of course
|
|
* an nmi storm could create false positives, but the higher
|
|
* level logic should account for that
|
|
*/
|
|
sum = get_timer_irqs(cpu);
|
|
if (__get_cpu_var(last_irq_sum) == sum) {
|
|
return 1;
|
|
} else {
|
|
__get_cpu_var(last_irq_sum) = sum;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
u64 hw_nmi_get_sample_period(void)
|
|
{
|
|
return (u64)(cpu_khz) * 1000 * 60;
|
|
}
|
|
|
|
#ifdef ARCH_HAS_NMI_WATCHDOG
|
|
void arch_trigger_all_cpu_backtrace(void)
|
|
{
|
|
int i;
|
|
|
|
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
|
|
|
|
printk(KERN_INFO "sending NMI to all CPUs:\n");
|
|
apic->send_IPI_all(NMI_VECTOR);
|
|
|
|
/* Wait for up to 10 seconds for all CPUs to do the backtrace */
|
|
for (i = 0; i < 10 * 1000; i++) {
|
|
if (cpumask_empty(to_cpumask(backtrace_mask)))
|
|
break;
|
|
mdelay(1);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* STUB calls to mimic old nmi_watchdog behaviour */
|
|
#if defined(CONFIG_X86_LOCAL_APIC)
|
|
unsigned int nmi_watchdog = NMI_NONE;
|
|
EXPORT_SYMBOL(nmi_watchdog);
|
|
void acpi_nmi_enable(void) { return; }
|
|
void acpi_nmi_disable(void) { return; }
|
|
#endif
|
|
atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
|
|
EXPORT_SYMBOL(nmi_active);
|
|
int unknown_nmi_panic;
|
|
void cpu_nmi_set_wd_enabled(void) { return; }
|
|
void stop_apic_nmi_watchdog(void *unused) { return; }
|
|
void setup_apic_nmi_watchdog(void *unused) { return; }
|
|
int __init check_nmi_watchdog(void) { return 0; }
|