diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8f1a4db8d49b6d2bfabf92e3846374ff50e5cfec..28d54725af5a7c757878417f77effb01c938297c 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -7747,11 +7747,12 @@ CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y CONFIG_HARDLOCKUP_DETECTOR=y # CONFIG_HARDLOCKUP_DETECTOR_PREFER_BUDDY is not set -CONFIG_HARDLOCKUP_DETECTOR_PERF=y +# CONFIG_HARDLOCKUP_DETECTOR_PERF is not set # CONFIG_HARDLOCKUP_DETECTOR_BUDDY is not set # CONFIG_HARDLOCKUP_DETECTOR_ARCH is not set CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER=y CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_SDEI_WATCHDOG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d95b3d6b471a7d63957c47151fd6cb404ca0f4c7..d48aa807dccea30152559614c239fd243616a3c9 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -68,6 +68,7 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o +obj-$(CONFIG_SDEI_WATCHDOG) += watchdog_sdei.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 078910db77a41b6ffa60c665debc927dff0c7ca5..40607a4fe3a5751690d4877d082a54fac1993457 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -262,6 +263,16 @@ void machine_crash_shutdown(struct pt_regs *regs) /* shutdown non-crashing cpus */ crash_smp_send_stop(); + /* + * when we panic in hardlockup detected by sdei_watchdog, the secure + * timer interrupt remains activate here because firmware clear eoi + * after dispatch is completed. This will cause arm_arch_timer + * interrupt failed to trigger in the second kernel. So we clear eoi + * of the secure timer before booting the second kernel. + */ + if (in_nmi()) + sdei_watchdog_clear_eoi(); + /* for crashing cpu */ crash_save_cpu(regs, smp_processor_id()); machine_kexec_mask_interrupts(); diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c new file mode 100644 index 0000000000000000000000000000000000000000..6f43496de56efc1faf6fc80b4e9a05c6631ba2fa --- /dev/null +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Detect hard lockups on a system + * + * Note: Most of this code is borrowed heavily from the perf hardlockup + * detector, so thanks to Don for the initial implementation. + */ + +#define pr_fmt(fmt) "SDEI NMI watchdog: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +/* We use the secure physical timer as SDEI NMI watchdog timer */ +#define SDEI_NMI_WATCHDOG_HWIRQ 29 + +static int sdei_watchdog_event_num; +bool disable_sdei_nmi_watchdog; +static bool sdei_watchdog_registered; +static DEFINE_PER_CPU(ktime_t, last_check_time); + +void watchdog_hardlockup_enable(unsigned int cpu) +{ + int ret; + + if (!sdei_watchdog_registered) + return; + + /* Skip the first hardlockup check incase BIOS didn't init the + * secure timer correctly */ + watchdog_hardlockup_touch_cpu(cpu); + sdei_api_set_secure_timer_period(watchdog_thresh); + __this_cpu_write(last_check_time, ktime_get_mono_fast_ns()); + + ret = sdei_api_event_enable(sdei_watchdog_event_num); + if (ret) { + pr_err("Enable NMI Watchdog failed on cpu%d\n", + smp_processor_id()); + } +} + +void watchdog_hardlockup_disable(unsigned int cpu) +{ + int ret; + + if (!sdei_watchdog_registered) + return; + + ret = sdei_api_event_disable(sdei_watchdog_event_num); + if (ret) + pr_err("Disable NMI Watchdog failed on cpu%d\n", + smp_processor_id()); +} + +static int sdei_watchdog_callback(u32 event, + struct pt_regs *regs, void *arg) +{ + ktime_t delta, now = ktime_get_mono_fast_ns(); + + delta = now - __this_cpu_read(last_check_time); + __this_cpu_write(last_check_time, now); + + /* + * Set delta to 4/5 of the actual watchdog threshold period so the + * hrtimer is guaranteed to fire at least once within the real + * watchdog threshold. + */ + if (delta < watchdog_thresh * (u64)NSEC_PER_SEC * 4 / 5) { + pr_err(FW_BUG "SDEI Watchdog event triggered too soon, " + "time to last check:%lld ns\n", delta); + return 0; + } + + watchdog_hardlockup_check(smp_processor_id(), regs); + + return 0; +} +NOKPROBE_SYMBOL(sdei_watchdog_callback); + +static void sdei_nmi_watchdog_bind(void *data) +{ + int ret; + + ret = sdei_api_event_interrupt_bind(SDEI_NMI_WATCHDOG_HWIRQ); + if (ret < 0) + pr_err("SDEI bind failed on cpu%d, return %d\n", + smp_processor_id(), ret); +} + +static int __init disable_sdei_nmi_watchdog_setup(char *str) +{ + disable_sdei_nmi_watchdog = true; + return 1; +} +__setup("disable_sdei_nmi_watchdog", disable_sdei_nmi_watchdog_setup); + +void sdei_watchdog_clear_eoi(void) +{ + if (sdei_watchdog_registered) + sdei_api_clear_eoi(SDEI_NMI_WATCHDOG_HWIRQ); +} + +int __init watchdog_hardlockup_probe(void) +{ + int ret; + + if (disable_sdei_nmi_watchdog) + return -EINVAL; + + if (!is_hyp_mode_available()) { + pr_err("Disable SDEI NMI Watchdog in VM\n"); + return -EINVAL; + } + + sdei_watchdog_event_num = sdei_api_event_interrupt_bind(SDEI_NMI_WATCHDOG_HWIRQ); + if (sdei_watchdog_event_num < 0) { + pr_err("Bind interrupt failed. Firmware may not support SDEI !\n"); + return sdei_watchdog_event_num; + } + + /* + * After we introduced 'sdei_api_set_secure_timer_period', we disselect + * 'CONFIG_HARDLOCKUP_CHECK_TIMESTAMP'. So we need to make sure that + * firmware can set the period of the secure timer and the timer + * interrupt doesn't trigger too soon. + */ + if (sdei_api_set_secure_timer_period(watchdog_thresh)) { + pr_err("Firmware doesn't support setting the secure timer period, please update your BIOS !\n"); + return -EINVAL; + } + + on_each_cpu(sdei_nmi_watchdog_bind, NULL, true); + + ret = sdei_event_register(sdei_watchdog_event_num, + sdei_watchdog_callback, NULL); + if (ret) { + pr_err("SDEI Watchdog register callback failed\n"); + return ret; + } + + sdei_watchdog_registered = true; + pr_info("SDEI Watchdog registered successfully\n"); + + return 0; +} diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 285fe7ad490d1ddd904e0dd73518350bc135ef8c..0f7ef69071c0156326171421b62ca23eaf6e2e0a 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -188,6 +188,28 @@ int sdei_api_event_context(u32 query, u64 *result) } NOKPROBE_SYMBOL(sdei_api_event_context); +int sdei_api_event_interrupt_bind(int hwirq) +{ + u64 event_number; + + invoke_sdei_fn(SDEI_1_0_FN_SDEI_INTERRUPT_BIND, hwirq, 0, 0, 0, 0, + &event_number); + + return (int)event_number; +} + +int sdei_api_clear_eoi(int hwirq) +{ + return invoke_sdei_fn(SDEI_1_0_FN_SDEI_CLEAR_EOI, hwirq, 0, 0, 0, 0, + NULL); +} + +int sdei_api_set_secure_timer_period(int sec) +{ + return invoke_sdei_fn(SDEI_1_0_FN_SET_SECURE_TIMER_PERIOD, sec, 0, 0, 0, + 0, NULL); +} + static int sdei_api_event_get_info(u32 event, u32 info, u64 *result) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0, @@ -379,7 +401,7 @@ static int sdei_platform_reset(void) return err; } -static int sdei_api_event_enable(u32 event_num) +int sdei_api_event_enable(u32 event_num) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ENABLE, event_num, 0, 0, 0, 0, NULL); @@ -426,7 +448,7 @@ int sdei_event_enable(u32 event_num) return err; } -static int sdei_api_event_disable(u32 event_num) +int sdei_api_event_disable(u32 event_num) { return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_DISABLE, event_num, 0, 0, 0, 0, NULL); diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 255701e1251b4ac242456693f998e3940a36851c..28e247dd57731c118f4bdc51a2f1bc6b87aeca30 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -36,6 +36,11 @@ int sdei_event_unregister(u32 event_num); int sdei_event_enable(u32 event_num); int sdei_event_disable(u32 event_num); +int sdei_api_event_interrupt_bind(int hwirq); +int sdei_api_event_disable(u32 event_num); +int sdei_api_event_enable(u32 event_num); +int sdei_api_clear_eoi(int hwirq); +int sdei_api_set_secure_timer_period(int sec); /* GHES register/unregister helpers */ int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb, diff --git a/include/linux/nmi.h b/include/linux/nmi.h index e92e378df000fb1eca1e082ebc889fe7849a19d2..7bd446acad24ab70c7cd66bae1fb8cd069f13105 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -235,4 +235,12 @@ static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {} static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {} #endif +#ifdef CONFIG_SDEI_WATCHDOG +void sdei_watchdog_clear_eoi(void); +extern bool disable_sdei_nmi_watchdog; +#else +static inline void sdei_watchdog_clear_eoi(void) { } +#define disable_sdei_nmi_watchdog 1 +#endif + #endif diff --git a/include/uapi/linux/arm_sdei.h b/include/uapi/linux/arm_sdei.h index af0630ba5437d4f3f15f4ee0b4ca94843a813a9f..a5375679dd503dbef5acc176442ecd073773aef3 100644 --- a/include/uapi/linux/arm_sdei.h +++ b/include/uapi/linux/arm_sdei.h @@ -24,6 +24,8 @@ #define SDEI_1_0_FN_SDEI_INTERRUPT_RELEASE SDEI_1_0_FN(0x0E) #define SDEI_1_0_FN_SDEI_PRIVATE_RESET SDEI_1_0_FN(0x11) #define SDEI_1_0_FN_SDEI_SHARED_RESET SDEI_1_0_FN(0x12) +#define SDEI_1_0_FN_SDEI_CLEAR_EOI SDEI_1_0_FN(0x18) +#define SDEI_1_0_FN_SET_SECURE_TIMER_PERIOD SDEI_1_0_FN(0x19) #define SDEI_VERSION_MAJOR_SHIFT 48 #define SDEI_VERSION_MAJOR_MASK 0x7fff diff --git a/init/main.c b/init/main.c index e24b0780fdff7a807bd027ab26e61fc303c624ef..7bbce78cdccf370b3f210ac990fc425075231547 100644 --- a/init/main.c +++ b/init/main.c @@ -1539,7 +1539,8 @@ static noinline void __init kernel_init_freeable(void) rcu_init_tasks_generic(); do_pre_smp_initcalls(); - lockup_detector_init(); + if (disable_sdei_nmi_watchdog) + lockup_detector_init(); smp_init(); sched_init_smp(); @@ -1550,6 +1551,10 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); + /* sdei_watchdog needs to be initialized after sdei_init */ + if (!disable_sdei_nmi_watchdog) + lockup_detector_init(); + kunit_run_all_tests(); wait_for_initramfs(); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index cedb17ba158a9bf57f3510fbfc8141adbd12b74b..9466d61d21c981c9fa2558c5db44dc62faa9f63f 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -23,6 +23,10 @@ #include #include +#ifdef CONFIG_ARM64 +#include +#endif + /* * Structure to determine completion condition and record errors. May * be shared by works on different cpus. @@ -234,6 +238,9 @@ static int multi_cpu_stop(void *data) case MULTI_STOP_DISABLE_IRQ: local_irq_disable(); hard_irq_disable(); +#ifdef CONFIG_ARM64 + sdei_mask_local_cpu(); +#endif break; case MULTI_STOP_RUN: if (is_active) @@ -254,6 +261,9 @@ static int multi_cpu_stop(void *data) rcu_momentary_dyntick_idle(); } while (curstate != MULTI_STOP_EXIT); +#ifdef CONFIG_ARM64 + sdei_unmask_local_cpu(); +#endif local_irq_restore(flags); return err; } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5cd6d4e269157973acecd00afc91646720fee2be..9e349a943cdd404679ff82263f2bcc789cf74d1b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -127,6 +128,7 @@ static bool is_hardlockup(unsigned int cpu) return false; } +NOKPROBE_SYMBOL(is_hardlockup); static void watchdog_hardlockup_kick(void) { @@ -184,6 +186,7 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs) per_cpu(watchdog_hardlockup_warned, cpu) = false; } } +NOKPROBE_SYMBOL(watchdog_hardlockup_check); #else /* CONFIG_HARDLOCKUP_DETECTOR_COUNTS_HRTIMER */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index fa307f93fa2e20d043b4a1c9094023aedd3d1d92..cee4d3f7582097b426730170f0caa6d93465e64e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1045,6 +1045,12 @@ config HAVE_HARDLOCKUP_DETECTOR_BUDDY depends on SMP default y +config SDEI_WATCHDOG + bool "SDEI NMI Watchdog support" + depends on ARM_SDE_INTERFACE + depends on HARDLOCKUP_DETECTOR + select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER + # # Global switch whether to build a hardlockup detector at all. It is available # only when the architecture supports at least one implementation. There are @@ -1061,6 +1067,7 @@ config HARDLOCKUP_DETECTOR depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_BUDDY || HAVE_HARDLOCKUP_DETECTOR_ARCH imply HARDLOCKUP_DETECTOR_PERF imply HARDLOCKUP_DETECTOR_BUDDY + imply SDEI_WATCHDOG imply HARDLOCKUP_DETECTOR_ARCH select LOCKUP_DETECTOR @@ -1097,6 +1104,7 @@ config HARDLOCKUP_DETECTOR_PERF depends on HARDLOCKUP_DETECTOR depends on HAVE_HARDLOCKUP_DETECTOR_PERF && !HARDLOCKUP_DETECTOR_PREFER_BUDDY depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH + depends on !SDEI_WATCHDOG select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER config HARDLOCKUP_DETECTOR_BUDDY @@ -1105,6 +1113,7 @@ config HARDLOCKUP_DETECTOR_BUDDY depends on HAVE_HARDLOCKUP_DETECTOR_BUDDY depends on !HAVE_HARDLOCKUP_DETECTOR_PERF || HARDLOCKUP_DETECTOR_PREFER_BUDDY depends on !HAVE_HARDLOCKUP_DETECTOR_ARCH + depends on !SDEI_WATCHDOG select HARDLOCKUP_DETECTOR_COUNTS_HRTIMER config HARDLOCKUP_DETECTOR_ARCH