From c7c3f05e341a9a2bd1a92993d4f996cfd6e7348e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 25 Oct 2018 19:10:36 +0900 Subject: [PATCH 1/4] panic: avoid deadlocks in re-entrant console drivers From printk()/serial console point of view panic() is special, because it may force CPU to re-enter printk() or/and serial console driver. Therefore, some of serial consoles drivers are re-entrant. E.g. 8250: serial8250_console_write() { if (port->sysrq) locked = 0; else if (oops_in_progress) locked = spin_trylock_irqsave(&port->lock, flags); else spin_lock_irqsave(&port->lock, flags); ... } panic() does set oops_in_progress via bust_spinlocks(1), so in theory we should be able to re-enter serial console driver from panic(): CPU0 uart_console_write() serial8250_console_write() // if (oops_in_progress) // spin_trylock_irqsave() call_console_drivers() console_unlock() console_flush_on_panic() bust_spinlocks(1) // oops_in_progress++ panic() spin_lock_irqsave(&port->lock, flags) // spin_lock_irqsave() serial8250_console_write() call_console_drivers() console_unlock() printk() ... However, this does not happen and we deadlock in serial console on port->lock spinlock. And the problem is that console_flush_on_panic() called after bust_spinlocks(0): void panic(const char *fmt, ...) { bust_spinlocks(1); ... bust_spinlocks(0); console_flush_on_panic(); ... } bust_spinlocks(0) decrements oops_in_progress, so oops_in_progress can go back to zero. Thus even re-entrant console drivers will simply spin on port->lock spinlock. Given that port->lock may already be locked either by a stopped CPU, or by the very same CPU we execute panic() on (for instance, NMI panic() on printing CPU) the system deadlocks and does not reboot. Fix this by removing bust_spinlocks(0), so oops_in_progress is always set in panic() now and, thus, re-entrant console drivers will trylock the port->lock instead of spinning on it forever, when we call them from console_flush_on_panic(). Link: http://lkml.kernel.org/r/20181025101036.6823-1-sergey.senozhatsky@gmail.com Cc: Steven Rostedt Cc: Daniel Wang Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Cc: Greg Kroah-Hartman Cc: Alan Cox Cc: Jiri Slaby Cc: Peter Feiner Cc: linux-serial@vger.kernel.org Cc: Sergey Senozhatsky Cc: stable@vger.kernel.org Signed-off-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/panic.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index 8b2e002d52eb..6a6df23acd1a 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -233,7 +234,10 @@ void panic(const char *fmt, ...) if (_crash_kexec_post_notifiers) __crash_kexec(NULL); - bust_spinlocks(0); +#ifdef CONFIG_VT + unblank_screen(); +#endif + console_unblank(); /* * We may have ended up stopping the CPU holding the lock (in From 9adcfaffc34d53e498637237fb3701560359d50b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 24 Nov 2018 13:10:25 +0900 Subject: [PATCH 2/4] printk: Make printk_emit() local function. printk_emit() is called from only devkmsg_write() in the same file. Save object size by making it a local function. Link: http://lkml.kernel.org/r/5cc99d2c-c408-34f7-d1fc-e1cd2a9e31da@i-love.sakura.ne.jp Cc: Steven Rostedt Signed-off-by: Tetsuo Handa Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 5 ----- kernel/printk/printk.c | 30 ++++++++++++++---------------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index cf3eccfe1543..55aa96975fa2 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -166,11 +166,6 @@ int vprintk_emit(int facility, int level, asmlinkage __printf(1, 0) int vprintk(const char *fmt, va_list args); -asmlinkage __printf(5, 6) __cold -int printk_emit(int facility, int level, - const char *dict, size_t dictlen, - const char *fmt, ...); - asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b77150ad1965..a1d88212a5d2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -753,6 +753,19 @@ struct devkmsg_user { char buf[CONSOLE_EXT_LOG_MAX]; }; +static __printf(3, 4) __cold +int devkmsg_emit(int facility, int level, const char *fmt, ...) +{ + va_list args; + int r; + + va_start(args, fmt); + r = vprintk_emit(facility, level, NULL, 0, fmt, args); + va_end(args); + + return r; +} + static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) { char *buf, *line; @@ -811,7 +824,7 @@ static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) } } - printk_emit(facility, level, NULL, 0, "%s", line); + devkmsg_emit(facility, level, "%s", line); kfree(buf); return ret; } @@ -1936,21 +1949,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) } EXPORT_SYMBOL(vprintk); -asmlinkage int printk_emit(int facility, int level, - const char *dict, size_t dictlen, - const char *fmt, ...) -{ - va_list args; - int r; - - va_start(args, fmt); - r = vprintk_emit(facility, level, dict, dictlen, fmt, args); - va_end(args); - - return r; -} -EXPORT_SYMBOL(printk_emit); - int vprintk_default(const char *fmt, va_list args) { int r; From e80c1a9d5f514ce5134c6c4263a11607341466c9 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 4 Dec 2018 19:00:01 +0900 Subject: [PATCH 3/4] printk: fix printk_time race. Since printk_time can be toggled via /sys/module/printk/parameters/time , it is not safe to assume that output length does not change across multiple msg_print_text() calls. If we hit this race, we can observe failures such as SYSLOG_ACTION_READ_ALL writes more bytes than userspace has supplied, SYSLOG_ACTION_SIZE_UNREAD returns -EFAULT when succeeded, SYSLOG_ACTION_READ reads garbage memory or even triggers an kernel oops at _copy_to_user() due to integer overflow. To close this race, get a snapshot value of printk_time and pass it to SYSLOG_ACTION_READ, SYSLOG_ACTION_READ_ALL, SYSLOG_ACTION_SIZE_UNREAD and kmsg_dump_get_buffer(). Link: http://lkml.kernel.org/r/555af37c-b9e0-f940-cb73-a78eba2d4944@i-love.sakura.ne.jp To: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Signed-off-by: Tetsuo Handa Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 70 +++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 31 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index a1d88212a5d2..c7d217764db3 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -404,6 +404,7 @@ DECLARE_WAIT_QUEUE_HEAD(log_wait); static u64 syslog_seq; static u32 syslog_idx; static size_t syslog_partial; +static bool syslog_time; /* index and sequence number of the first record stored in the buffer */ static u64 log_first_seq; @@ -1229,12 +1230,7 @@ module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); static size_t print_time(u64 ts, char *buf) { - unsigned long rem_nsec; - - if (!printk_time) - return 0; - - rem_nsec = do_div(ts, 1000000000); + unsigned long rem_nsec = do_div(ts, 1000000000); if (!buf) return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts); @@ -1243,7 +1239,8 @@ static size_t print_time(u64 ts, char *buf) (unsigned long)ts, rem_nsec / 1000); } -static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf) +static size_t print_prefix(const struct printk_log *msg, bool syslog, + bool time, char *buf) { size_t len = 0; unsigned int prefix = (msg->facility << 3) | msg->level; @@ -1262,11 +1259,13 @@ static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf) } } - len += print_time(msg->ts_nsec, buf ? buf + len : NULL); + if (time) + len += print_time(msg->ts_nsec, buf ? buf + len : NULL); return len; } -static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) +static size_t msg_print_text(const struct printk_log *msg, bool syslog, + bool time, char *buf, size_t size) { const char *text = log_text(msg); size_t text_size = msg->text_len; @@ -1285,17 +1284,17 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *bu } if (buf) { - if (print_prefix(msg, syslog, NULL) + + if (print_prefix(msg, syslog, time, NULL) + text_len + 1 >= size - len) break; - len += print_prefix(msg, syslog, buf + len); + len += print_prefix(msg, syslog, time, buf + len); memcpy(buf + len, text, text_len); len += text_len; buf[len++] = '\n'; } else { /* SYSLOG_ACTION_* buffer size only calculation */ - len += print_prefix(msg, syslog, NULL); + len += print_prefix(msg, syslog, time, NULL); len += text_len; len++; } @@ -1332,9 +1331,17 @@ static int syslog_print(char __user *buf, int size) break; } + /* + * To keep reading/counting partial line consistent, + * use printk_time value as of the beginning of a line. + */ + if (!syslog_partial) + syslog_time = printk_time; + skip = syslog_partial; msg = log_from_idx(syslog_idx); - n = msg_print_text(msg, true, text, LOG_LINE_MAX + PREFIX_MAX); + n = msg_print_text(msg, true, syslog_time, text, + LOG_LINE_MAX + PREFIX_MAX); if (n - syslog_partial <= size) { /* message fits into buffer, move forward */ syslog_idx = log_next(syslog_idx); @@ -1374,11 +1381,13 @@ static int syslog_print_all(char __user *buf, int size, bool clear) u64 next_seq; u64 seq; u32 idx; + bool time; text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); if (!text) return -ENOMEM; + time = printk_time; logbuf_lock_irq(); /* * Find first record that fits, including all following records, @@ -1389,7 +1398,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) while (seq < log_next_seq) { struct printk_log *msg = log_from_idx(idx); - len += msg_print_text(msg, true, NULL, 0); + len += msg_print_text(msg, true, time, NULL, 0); idx = log_next(idx); seq++; } @@ -1400,7 +1409,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) while (len > size && seq < log_next_seq) { struct printk_log *msg = log_from_idx(idx); - len -= msg_print_text(msg, true, NULL, 0); + len -= msg_print_text(msg, true, time, NULL, 0); idx = log_next(idx); seq++; } @@ -1411,14 +1420,9 @@ static int syslog_print_all(char __user *buf, int size, bool clear) len = 0; while (len >= 0 && seq < next_seq) { struct printk_log *msg = log_from_idx(idx); - int textlen; + int textlen = msg_print_text(msg, true, time, text, + LOG_LINE_MAX + PREFIX_MAX); - textlen = msg_print_text(msg, true, text, - LOG_LINE_MAX + PREFIX_MAX); - if (textlen < 0) { - len = textlen; - break; - } idx = log_next(idx); seq++; @@ -1542,11 +1546,14 @@ int do_syslog(int type, char __user *buf, int len, int source) } else { u64 seq = syslog_seq; u32 idx = syslog_idx; + bool time = syslog_partial ? syslog_time : printk_time; while (seq < log_next_seq) { struct printk_log *msg = log_from_idx(idx); - error += msg_print_text(msg, true, NULL, 0); + error += msg_print_text(msg, true, time, NULL, + 0); + time = printk_time; idx = log_next(idx); seq++; } @@ -2004,6 +2011,7 @@ EXPORT_SYMBOL(printk); #define LOG_LINE_MAX 0 #define PREFIX_MAX 0 +#define printk_time false static u64 syslog_seq; static u32 syslog_idx; @@ -2027,8 +2035,8 @@ static void console_lock_spinning_enable(void) { } static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_drivers(const char *ext_text, size_t ext_len, const char *text, size_t len) {} -static size_t msg_print_text(const struct printk_log *msg, - bool syslog, char *buf, size_t size) { return 0; } +static size_t msg_print_text(const struct printk_log *msg, bool syslog, + bool time, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } #endif /* CONFIG_PRINTK */ @@ -2386,8 +2394,7 @@ skip: len += msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG, - text + len, - sizeof(text) - len); + printk_time, text + len, sizeof(text) - len); if (nr_ext_console_drivers) { ext_len = msg_print_ext_header(ext_text, sizeof(ext_text), @@ -3111,7 +3118,7 @@ bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, goto out; msg = log_from_idx(dumper->cur_idx); - l = msg_print_text(msg, syslog, line, size); + l = msg_print_text(msg, syslog, printk_time, line, size); dumper->cur_idx = log_next(dumper->cur_idx); dumper->cur_seq++; @@ -3182,6 +3189,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, u32 next_idx; size_t l = 0; bool ret = false; + bool time = printk_time; if (!dumper->active) goto out; @@ -3205,7 +3213,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, while (seq < dumper->next_seq) { struct printk_log *msg = log_from_idx(idx); - l += msg_print_text(msg, true, NULL, 0); + l += msg_print_text(msg, true, time, NULL, 0); idx = log_next(idx); seq++; } @@ -3216,7 +3224,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, while (l > size && seq < dumper->next_seq) { struct printk_log *msg = log_from_idx(idx); - l -= msg_print_text(msg, true, NULL, 0); + l -= msg_print_text(msg, true, time, NULL, 0); idx = log_next(idx); seq++; } @@ -3229,7 +3237,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, while (seq < dumper->next_seq) { struct printk_log *msg = log_from_idx(idx); - l += msg_print_text(msg, syslog, buf + l, size - l); + l += msg_print_text(msg, syslog, time, buf + l, size - l); idx = log_next(idx); seq++; } From 07c17732bd687567802aaa5fa5c101c2776565d1 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Tue, 11 Dec 2018 18:49:05 +0900 Subject: [PATCH 4/4] printk: Remove print_prefix() calls with NULL buffer. We can save lines/size by removing print_prefix() with buf == NULL. This patch makes no functional change. Link: http://lkml.kernel.org/r/1544521745-11925-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp To: Steven Rostedt Cc: linux-kernel@vger.kernel.org Signed-off-by: Tetsuo Handa Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- kernel/printk/printk.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c7d217764db3..91db332ccf4d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1228,13 +1228,15 @@ static inline void boot_delay_msec(int level) static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); +static size_t print_syslog(unsigned int level, char *buf) +{ + return sprintf(buf, "<%u>", level); +} + static size_t print_time(u64 ts, char *buf) { unsigned long rem_nsec = do_div(ts, 1000000000); - if (!buf) - return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts); - return sprintf(buf, "[%5lu.%06lu] ", (unsigned long)ts, rem_nsec / 1000); } @@ -1243,24 +1245,11 @@ static size_t print_prefix(const struct printk_log *msg, bool syslog, bool time, char *buf) { size_t len = 0; - unsigned int prefix = (msg->facility << 3) | msg->level; - - if (syslog) { - if (buf) { - len += sprintf(buf, "<%u>", prefix); - } else { - len += 3; - if (prefix > 999) - len += 3; - else if (prefix > 99) - len += 2; - else if (prefix > 9) - len++; - } - } + if (syslog) + len = print_syslog((msg->facility << 3) | msg->level, buf); if (time) - len += print_time(msg->ts_nsec, buf ? buf + len : NULL); + len += print_time(msg->ts_nsec, buf + len); return len; } @@ -1270,6 +1259,8 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, const char *text = log_text(msg); size_t text_size = msg->text_len; size_t len = 0; + char prefix[PREFIX_MAX]; + const size_t prefix_len = print_prefix(msg, syslog, time, prefix); do { const char *next = memchr(text, '\n', text_size); @@ -1284,19 +1275,17 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, } if (buf) { - if (print_prefix(msg, syslog, time, NULL) + - text_len + 1 >= size - len) + if (prefix_len + text_len + 1 >= size - len) break; - len += print_prefix(msg, syslog, time, buf + len); + memcpy(buf + len, prefix, prefix_len); + len += prefix_len; memcpy(buf + len, text, text_len); len += text_len; buf[len++] = '\n'; } else { /* SYSLOG_ACTION_* buffer size only calculation */ - len += print_prefix(msg, syslog, time, NULL); - len += text_len; - len++; + len += prefix_len + text_len + 1; } text = next;