diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index e1cf25de010287e5f960e5786fa536ef067737de..497b36a8598a6694032ab25eb5e8fe281c8191a1 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6769,6 +6769,7 @@ CONFIG_HNS3_PMU=m # end of Performance monitor support CONFIG_RAS=y +CONFIG_RAS_ARM_EVENT_INFO=y CONFIG_PAGE_EJECT=m CONFIG_USB4=m # CONFIG_USB4_DEBUGFS_WRITE is not set diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index bf1b9252a8dab1be7fca445e5b3842806f363879..3fde9388b0f48f048346e9adfdf2731b70fb6664 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -518,9 +518,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s int sec_sev, i; char *p; - log_arm_hw_error(err); - sec_sev = ghes_severity(gdata->error_severity); +#ifdef CONFIG_RAS_ARM_EVENT_INFO + log_arm_hw_error(err, sec_sev); +#else + log_arm_hw_error(err); +#endif if (sev != GHES_SEV_RECOVERABLE || sec_sev != GHES_SEV_RECOVERABLE) return false; diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index f8554a94031614ae19ba1191d8b834cf1a811acc..84b6d569cfda5561cfa6b34c92221099e35e2e81 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -29,6 +29,17 @@ menuconfig RAS so have ideal availability, but may be unreliable, with frequent data corruption. +config RAS_ARM_EVENT_INFO + bool "RAS feature: report all the arm processor info in arm event" + default y + depends on ARM64 + help + This option allows to report several ARM processor error information, + context information, vendor specific error information, error severity + and cpu logical index about the event to userspace via perf i/f. So + that the user can do cpu core isolation according to error severity + and other info. + if RAS source "arch/x86/ras/Kconfig" diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 95540ea8dd9db905fa76019e9a0ce618c053a84c..9a12398ee0a865d24a049fad4d65a7901f8f81ce 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -21,9 +21,59 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); } +#ifdef CONFIG_RAS_ARM_EVENT_INFO +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) +#else void log_arm_hw_error(struct cper_sec_proc_arm *err) +#endif { +#ifdef CONFIG_RAS_ARM_EVENT_INFO + u32 pei_len; + u32 ctx_len = 0; + s32 vsei_len; + u8 *pei_err; + u8 *ctx_err; + u8 *ven_err_data; + struct cper_arm_err_info *err_info; + struct cper_arm_ctx_info *ctx_info; + int n, sz; + int cpu; + + pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; + pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm); + + err_info = (struct cper_arm_err_info *)(err + 1); + ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num); + ctx_err = (u8 *)ctx_info; + for (n = 0; n < err->context_info_num; n++) { + sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size; + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz); + ctx_len += sz; + } + + vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + + pei_len + ctx_len); + if (vsei_len < 0) { + pr_warn(FW_BUG + "section length: %d\n", err->section_length); + pr_warn(FW_BUG + "section length is too small\n"); + pr_warn(FW_BUG + "firmware-generated error record is incorrect\n"); + vsei_len = 0; + } + ven_err_data = (u8 *)ctx_info; + + cpu = GET_LOGICAL_INDEX(err->mpidr); + /* when return value is invalid, set cpu index to -1 */ + if (cpu < 0) + cpu = -1; + + trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, + ven_err_data, (u32)vsei_len, sev, cpu); +#else trace_arm_event(err); +#endif } static int __init ras_init(void) diff --git a/include/linux/ras.h b/include/linux/ras.h index 1f4048bf2674deac9f9d6feba567c6c261eab27a..8cdd011a9735c339484f3619a848afd4e1e06e84 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -24,15 +24,38 @@ int __init parse_cec_param(char *str); void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); + +#ifdef CONFIG_RAS_ARM_EVENT_INFO +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev); +#else void log_arm_hw_error(struct cper_sec_proc_arm *err); +#endif + #else static inline void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len) { return; } + +#ifdef CONFIG_RAS_ARM_EVENT_INFO static inline void +log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; } +#else log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #endif +#endif + +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) +#include +/* + * Include ARM specific SMP header which provides a function mapping mpidr to + * cpu logical index. + */ +#define GET_LOGICAL_INDEX(mpidr) get_logical_index(mpidr & MPIDR_HWID_BITMASK) +#else +#define GET_LOGICAL_INDEX(mpidr) -EINVAL +#endif /* CONFIG_ARM || CONFIG_ARM64 */ + #endif /* __RAS_H__ */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index e7d9470a27cd698693094561f4665a3ad3ba630c..9dbca0e03b3d8dd4893552900948d8396d291cb3 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -168,11 +168,29 @@ TRACE_EVENT(mc_event, * This event is generated when hardware detects an ARM processor error * has occurred. UEFI 2.6 spec section N.2.4.4. */ +#define APEIL "ARM Processor Err Info data len" +#define APEID "ARM Processor Err Info raw data" +#define APECIL "ARM Processor Err Context Info data len" +#define APECID "ARM Processor Err Context Info raw data" +#define VSEIL "Vendor Specific Err Info data len" +#define VSEID "Vendor Specific Err Info raw data" TRACE_EVENT(arm_event, - +#ifdef CONFIG_RAS_ARM_EVENT_INFO + TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, + const u32 pei_len, + const u8 *ctx_err, + const u32 ctx_len, + const u8 *oem, + const u32 oem_len, + u8 sev, + int cpu), + + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev, cpu), +#else TP_PROTO(const struct cper_sec_proc_arm *proc), TP_ARGS(proc), +#endif TP_STRUCT__entry( __field(u64, mpidr) @@ -180,6 +198,16 @@ TRACE_EVENT(arm_event, __field(u32, running_state) __field(u32, psci_state) __field(u8, affinity) +#ifdef CONFIG_RAS_ARM_EVENT_INFO + __field(u32, pei_len) + __dynamic_array(u8, buf, pei_len) + __field(u32, ctx_len) + __dynamic_array(u8, buf1, ctx_len) + __field(u32, oem_len) + __dynamic_array(u8, buf2, oem_len) + __field(u8, sev) + __field(int, cpu) +#endif ), TP_fast_assign( @@ -199,12 +227,38 @@ TRACE_EVENT(arm_event, __entry->running_state = ~0; __entry->psci_state = ~0; } +#ifdef CONFIG_RAS_ARM_EVENT_INFO + __entry->pei_len = pei_len; + memcpy(__get_dynamic_array(buf), pei_err, pei_len); + __entry->ctx_len = ctx_len; + memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); + __entry->oem_len = oem_len; + memcpy(__get_dynamic_array(buf2), oem, oem_len); + __entry->sev = sev; + __entry->cpu = cpu; +#endif ), +#ifdef CONFIG_RAS_ARM_EVENT_INFO + TP_printk("cpu: %d; error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " + "running state: %d; PSCI state: %d; " + "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s", + __entry->cpu, + __entry->sev, + __entry->affinity, __entry->mpidr, __entry->midr, + __entry->running_state, __entry->psci_state, + APEIL, __entry->pei_len, APEID, + __print_hex(__get_dynamic_array(buf), __entry->pei_len), + APECIL, __entry->ctx_len, APECID, + __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), + VSEIL, __entry->oem_len, VSEID, + __print_hex(__get_dynamic_array(buf2), __entry->oem_len)) +#else TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " "running state: %d; PSCI state: %d", __entry->affinity, __entry->mpidr, __entry->midr, __entry->running_state, __entry->psci_state) +#endif ); /*