From 6c80f87ed4483bff0ecbf4455a1b09e7f950e9ab Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Sun, 21 Dec 2014 08:18:25 -0800 Subject: [PATCH 1/2] x86, mce: Improve timeout error messages There are four different possible types of timeouts. Distinguish them in the logs to help debug them. Signed-off-by: Andy Lutomirski Link: http://lkml.kernel.org/r/0fa6d2653a54a01c48b43a3583caf950ea99606e.1419178397.git.luto@amacapital.net Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d2c611699cd9..323e02c658af 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -311,7 +311,7 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(char *msg, struct mce *final, char *exp) +static void mce_panic(const char *msg, struct mce *final, char *exp) { int i, apei_err = 0; @@ -735,7 +735,7 @@ static atomic_t mce_callin; /* * Check if a timeout waiting for other CPUs happened. */ -static int mce_timed_out(u64 *t) +static int mce_timed_out(u64 *t, const char *msg) { /* * The others already did panic for some reason. @@ -750,8 +750,7 @@ static int mce_timed_out(u64 *t) goto out; if ((s64)*t < SPINUNIT) { if (mca_cfg.tolerant <= 1) - mce_panic("Timeout synchronizing machine check over CPUs", - NULL, NULL); + mce_panic(msg, NULL, NULL); cpu_missing = 1; return 1; } @@ -867,7 +866,8 @@ static int mce_start(int *no_way_out) * Wait for everyone. */ while (atomic_read(&mce_callin) != cpus) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Not all CPUs entered broadcast exception handler")) { atomic_set(&global_nwo, 0); return -1; } @@ -892,7 +892,8 @@ static int mce_start(int *no_way_out) * only seen by one CPU before cleared, avoiding duplicates. */ while (atomic_read(&mce_executing) < order) { - if (mce_timed_out(&timeout)) { + if (mce_timed_out(&timeout, + "Timeout: Subject CPUs unable to finish machine check processing")) { atomic_set(&global_nwo, 0); return -1; } @@ -936,7 +937,8 @@ static int mce_end(int order) * loops. */ while (atomic_read(&mce_executing) <= cpus) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU unable to finish machine check processing")) goto reset; ndelay(SPINUNIT); } @@ -949,7 +951,8 @@ static int mce_end(int order) * Subject: Wait for Monarch to finish. */ while (atomic_read(&mce_executing) != 0) { - if (mce_timed_out(&timeout)) + if (mce_timed_out(&timeout, + "Timeout: Monarch CPU did not finish machine check processing")) goto reset; ndelay(SPINUNIT); } From 83737691e586cd2767fa4fc87cd41251d1a49e9e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 22 Dec 2014 21:04:31 +0100 Subject: [PATCH 2/2] x86, mce: Fix sparse errors Make stuff used in mce.c only, static. Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/mcheck/mce.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 323e02c658af..4c5cd7575d31 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -115,7 +115,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); * CPU/chipset specific EDAC code can register a notifier call here to print * MCE errors in a human-readable form. */ -ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); +static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); /* Do initial initialization of a struct mce */ void mce_setup(struct mce *m) @@ -529,7 +529,7 @@ static void mce_schedule_work(void) schedule_work(this_cpu_ptr(&mce_work)); } -DEFINE_PER_CPU(struct irq_work, mce_irq_work); +static DEFINE_PER_CPU(struct irq_work, mce_irq_work); static void mce_irq_work_cb(struct irq_work *entry) { @@ -1012,7 +1012,7 @@ static void mce_clear_state(unsigned long *toclear) */ #define MCE_INFO_MAX 16 -struct mce_info { +static struct mce_info { atomic_t inuse; struct task_struct *t; __u64 paddr;