From d180d7249f75d2c751479e15ff719d4ac2d0a15a Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 24 Oct 2023 11:07:36 +0000 Subject: [PATCH 1/4] printk: Add panic_in_progress helper mainline inclusion from mainline-v5.18-rc1 commit 77498617857f68496b360081dde1a492d40c28b2 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I89RYC CVE: NA ------------------------------------------------- This will be used help avoid deadlocks during panics. Although it would be better to include this in linux/panic.h, it would require that header to include linux/atomic.h as well. On some architectures, this results in a circular dependency as well. So instead add the helper directly to printk.c. Suggested-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-2-stephen.s.brennan@oracle.com Signed-off-by: Ye Weihua --- kernel/printk/printk.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d0fbbc23ad6b..f86160077222 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -248,6 +248,11 @@ static void __up_console_sem(unsigned long ip) } #define up_console_sem() __up_console_sem(_RET_IP_) +static bool panic_in_progress(void) +{ + return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); +} + /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's -- Gitee From eed9d660d6a7ea91e93cb24656505752d76d5337 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 24 Oct 2023 11:07:37 +0000 Subject: [PATCH 2/4] printk: disable optimistic spin during panic mainline inclusion from mainline-v5.18-rc1 commit d51507098ff91e863b6e0a8047507741d59b8175 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I89RYC CVE: NA ------------------------------------------------- A CPU executing with console lock spinning enabled might be halted during a panic. Before the panicking CPU calls console_flush_on_panic(), it may call console_trylock(), which attempts to optimistically spin, deadlocking the panic CPU: CPU 0 (panic CPU) CPU 1 ----------------- ------ printk() { vprintk_func() { vprintk_default() { vprintk_emit() { console_unlock() { console_lock_spinning_enable(); ... printing to console ... panic() { crash_smp_send_stop() { NMI -------------------> HALT } atomic_notifier_call_chain() { printk() { ... console_trylock_spinnning() { // optimistic spin infinitely This hang during panic can be induced when a kdump kernel is loaded, and crash_kexec_post_notifiers=1 is present on the kernel command line. The following script which concurrently writes to /dev/kmsg, and triggers a panic, can result in this hang: #!/bin/bash date # 991 chars (based on log buffer size): chars="$(printf 'a%.0s' {1..991})" while :; do echo $chars > /dev/kmsg done & echo c > /proc/sysrq-trigger & date exit To avoid this deadlock, ensure that console_trylock_spinning() does not allow spinning once a panic has begun. Fixes: dbdda842fe96 ("printk: Add console owner and waiter logic to load balance console writes") Suggested-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-3-stephen.s.brennan@oracle.com Signed-off-by: Ye Weihua --- kernel/printk/printk.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f86160077222..d4ce108b2f69 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1719,6 +1719,16 @@ static int console_trylock_spinning(void) if (console_trylock()) return 1; + /* + * It's unsafe to spin once a panic has begun. If we are the + * panic CPU, we may have already halted the owner of the + * console_sem. If we are not the panic CPU, then we should + * avoid taking console_sem, so the panic CPU has a better + * chance of cleanly acquiring it later. + */ + if (panic_in_progress()) + return 0; + printk_safe_enter_irqsave(flags); raw_spin_lock(&console_owner_lock); -- Gitee From 68d20c52caaabe0d292ac6dca162e47b3630bcf0 Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 24 Oct 2023 11:07:38 +0000 Subject: [PATCH 3/4] printk: Avoid livelock with heavy printk during panic mainline inclusion from mainline-v5.18-rc1 commit 13fb0f74d7029df3b8137f11ef955e578a4a4a60 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I89RYC CVE: NA ------------------------------------------------- During panic(), if another CPU is writing heavily the kernel log (e.g. via /dev/kmsg), then the panic CPU may livelock writing out its messages to the console. Note when too many messages are dropped during panic and suppress further printk, except from the panic CPU. This could result in some important messages being dropped. However, messages are already being dropped, so this approach at least prevents a livelock. Reviewed-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-4-stephen.s.brennan@oracle.com Conflict: kernel/printk/printk.c Signed-off-by: Ye Weihua --- kernel/printk/printk.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d4ce108b2f69..765c4fd9c5d0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -84,6 +84,12 @@ static DEFINE_SEMAPHORE(console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); +/* + * During panic, heavy printk by other CPUs can delay the + * panic and risk deadlock on console resources. + */ +int __read_mostly suppress_panic_printk; + #ifdef CONFIG_LOCKDEP static struct lockdep_map console_lock_dep_map = { .name = "console_lock" @@ -1973,6 +1979,10 @@ asmlinkage int vprintk_emit(int facility, int level, unsigned long flags; u64 curr_log_seq; + if (unlikely(suppress_panic_printk) && + atomic_read(&panic_cpu) != raw_smp_processor_id()) + return 0; + if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; in_sched = true; @@ -2395,6 +2405,7 @@ void console_unlock(void) { static char ext_text[CONSOLE_EXT_LOG_MAX]; static char text[LOG_LINE_MAX + PREFIX_MAX]; + static int panic_console_dropped; unsigned long flags; bool do_cond_resched, retry; @@ -2447,6 +2458,10 @@ void console_unlock(void) /* messages are gone, move to first one */ console_seq = log_first_seq; console_idx = log_first_idx; + if (panic_in_progress() && panic_console_dropped++ > 10) { + suppress_panic_printk = 1; + pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); + } } else { len = 0; } -- Gitee From 608e95275698e1a34ff0df81872128d8d5565b1f Mon Sep 17 00:00:00 2001 From: Stephen Brennan Date: Tue, 24 Oct 2023 11:07:39 +0000 Subject: [PATCH 4/4] printk: Drop console_sem during panic mainline inclusion from mainline-v5.18-rc1 commit 8ebc476fd51e6c0fd3174ec1959a20ba99d4c5e5 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I89RYC CVE: NA ------------------------------------------------- If another CPU is in panic, we are about to be halted. Try to gracefully abandon the console_sem, leaving it free for the panic CPU to grab. Suggested-by: Petr Mladek Signed-off-by: Stephen Brennan Reviewed-by: Petr Mladek Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek Link: https://lore.kernel.org/r/20220202171821.179394-5-stephen.s.brennan@oracle.com Conflict: kernel/printk/printk.c Signed-off-by: Ye Weihua --- kernel/printk/printk.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 765c4fd9c5d0..f52eceb3c48a 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2375,6 +2375,25 @@ static int have_callable_console(void) return 0; } +/* + * Return true when this CPU should unlock console_sem without pushing all + * messages to the console. This reduces the chance that the console is + * locked when the panic CPU tries to use it. + */ +static bool abandon_console_lock_in_panic(void) +{ + if (!panic_in_progress()) + return false; + + /* + * We can use raw_smp_processor_id() here because it is impossible for + * the task to be migrated to the panic_cpu, or away from it. If + * panic_cpu has already been set, and we're not currently executing on + * that CPU, then we never will be. + */ + return atomic_read(&panic_cpu) != raw_smp_processor_id(); +} + /* * Can we actually use the console at this time on this cpu? * @@ -2522,6 +2541,10 @@ void console_unlock(void) printk_safe_exit_irqrestore(flags); + /* Allow panic_cpu to take over the consoles safely */ + if (abandon_console_lock_in_panic()) + break; + if (do_cond_resched) cond_resched(); } @@ -2543,7 +2566,7 @@ void console_unlock(void) raw_spin_unlock(&logbuf_lock); printk_safe_exit_irqrestore(flags); - if (retry && console_trylock()) + if (retry && !abandon_console_lock_in_panic() && console_trylock()) goto again; } EXPORT_SYMBOL(console_unlock); -- Gitee