From 2414f0c8bf5f01c6325cc9097a9cb94dfff309ce Mon Sep 17 00:00:00 2001 From: leoliu-oc Date: Fri, 28 Jun 2024 15:56:57 +0800 Subject: [PATCH] anolis: x86/mce/zhaoxin: Enable mcelog to decode PCIE, ZDI/ZPI and DRAM errors ANBZ: #9446 The mcelog cannot decode PCIE, ZDI/ZPI, and DRAM errors in the FFM (Firmware First Mode). The purpose of this patch is to enable mcelog to decode PCIE, ZDI/ZPI, and DRAM errors that occur on Zhaoxin processors, so that the cause of these errors can be quickly located. Signed-off-by: leoliu-oc --- arch/x86/include/asm/mce.h | 6 ++ arch/x86/kernel/acpi/apei.c | 24 ++++- arch/x86/kernel/cpu/mce/apei.c | 165 +++++++++++++++++++++++++++++++++ drivers/acpi/apei/apei-base.c | 10 ++ drivers/acpi/apei/ghes.c | 28 +++++- include/acpi/apei.h | 2 + 6 files changed, 233 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 180b1cbfcc4e..eff1cf90895c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -289,6 +289,12 @@ struct cper_sec_mem_err; extern void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); +extern void zx_apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); +struct cper_sec_pcie; +extern void zx_apei_mce_report_pcie_error(int corrected, struct cper_sec_pcie *pcie_err); +struct cper_sec_proc_generic; +extern void zx_apei_mce_report_zdi_error(int corrected, struct cper_sec_proc_generic *zdi_err); + /* * Enumerate new IP types and HWID values in AMD processors which support * Scalable MCA. diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index 0916f00a992e..26d9963b66bd 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -40,7 +40,29 @@ int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data) void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) { #ifdef CONFIG_X86_MCE - apei_mce_report_mem_error(sev, mem_err); + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) + zx_apei_mce_report_mem_error(sev, mem_err); + else + apei_mce_report_mem_error(sev, mem_err); +#endif +} + +void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err) +{ +#ifdef CONFIG_X86_MCE + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) + zx_apei_mce_report_pcie_error(sev, pcie_err); +#endif +} + +void arch_apei_report_zdi_error(int sev, struct cper_sec_proc_generic *zdi_err) +{ +#ifdef CONFIG_X86_MCE + if (boot_cpu_data.x86_vendor == X86_VENDOR_ZHAOXIN || + boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) + zx_apei_mce_report_zdi_error(sev, zdi_err); #endif } diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index 8ed341714686..7c23ae2e3006 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -63,6 +63,171 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) } EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); +void zx_apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) +{ + struct mce m; + int apei_error = 0; + + if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91) + return; + + if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) + return; + + mce_setup(&m); + m.misc = 0; + m.misc = mem_err->module; + m.addr = mem_err->physical_addr; + if (mem_err->card == 0) + m.bank = 9; + else + m.bank = 10; + + switch (mem_err->error_type) { + case 2: + m.status = 0x9c20004000010080; + break; + case 3: + m.status = 0xbe40000000020090; + apei_error = apei_write_mce(&m); + break; + case 8: + if (mem_err->requestor_id == 2) + m.status = 0x98200040000400b0; + else if (mem_err->requestor_id == 3) { + m.status = 0xba400000000600a0; + apei_error = apei_write_mce(&m); + } else if (mem_err->requestor_id == 4) + m.status = 0x98200100000300b0; + else if (mem_err->requestor_id == 5) { + m.status = 0xba000000000500b0; + apei_error = apei_write_mce(&m); + } else + pr_info("Undefined Parity error\n"); + break; + case 10: + if (mem_err->requestor_id == 6) { + m.status = 0xba400000000700a0; + apei_error = apei_write_mce(&m); + } else if (mem_err->requestor_id == 7) { + m.status = 0xba000000000800b0; + apei_error = apei_write_mce(&m); + } else + pr_info("Undefined dvad error\n"); + break; + case 13: + m.status = 0x9c200040000100c0; + break; + case 14: + m.status = 0xbd000000000200c0; + apei_error = apei_write_mce(&m); + break; + } + mce_log(&m); +} +EXPORT_SYMBOL_GPL(zx_apei_mce_report_mem_error); + +void zx_apei_mce_report_pcie_error(int severity, struct cper_sec_pcie *pcie_err) +{ + struct mce m; + int apei_error = 0; + + if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91) + return; + + mce_setup(&m); + m.addr = 0; + m.misc = 0; + m.misc |= (u64)pcie_err->device_id.segment << 32; + m.misc |= pcie_err->device_id.bus << 24; + m.misc |= pcie_err->device_id.device << 19; + m.misc |= pcie_err->device_id.function << 16; + m.bank = 6; + + switch (severity) { + case 1: + m.status = 0x9820004000020e0b; + break; + case 2: + m.status = 0xba20000000010e0b; + break; + case 3: + m.status = 0xbd20000000000e0b; + apei_error = apei_write_mce(&m); + break; + default: + pr_info("Undefine pcie error\n"); + break; + } + mce_log(&m); +} +EXPORT_SYMBOL_GPL(zx_apei_mce_report_pcie_error); + +void zx_apei_mce_report_zdi_error(int severity, struct cper_sec_proc_generic *zdi_err) +{ + struct mce m; + int apei_error = 0; + + if (boot_cpu_data.x86 != 7 || boot_cpu_data.x86_model != 91) + return; + + mce_setup(&m); + m.misc = 0; + m.misc |= (zdi_err->requestor_id & 0xff) << 19; + m.misc |= ((zdi_err->requestor_id & 0xff00) >> 8) >> 24; + m.bank = 5; + switch (zdi_err->responder_id) { + case 2: + m.status = 0xba00000000040e0f; + apei_error = apei_write_mce(&m); + break; + case 3: + m.status = 0xba00000000030e0f; + apei_error = apei_write_mce(&m); + break; + case 4: + m.status = 0xba00000000020e0f; + apei_error = apei_write_mce(&m); + break; + case 5: + m.status = 0xba00000000010e0f; + apei_error = apei_write_mce(&m); + break; + case 6: + m.status = 0x9820004000090e0f; + break; + case 7: + m.status = 0x9820004000080e0f; + break; + case 8: + m.status = 0x9820004000070e0f; + break; + case 9: + m.status = 0x9820004000060e0f; + break; + case 10: + m.status = 0x9820004000050e0f; + break; + case 11: + case 12: + case 13: + case 14: + case 15: + m.status = 0x98200040000b0e0f; + break; + case 16: + case 17: + case 18: + m.status = 0x98200040000c0e0f; + break; + default: + pr_info("Undefined ZDI Error\n"); + break; + } + mce_log(&m); +} +EXPORT_SYMBOL_GPL(zx_apei_mce_report_zdi_error); + int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) { const u64 *i_mce = ((const u64 *) (ctx_info + 1)); diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index c7c26872f4ce..05ee09357bd7 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -773,6 +773,16 @@ void __weak arch_apei_report_mem_error(int sev, } EXPORT_SYMBOL_GPL(arch_apei_report_mem_error); +void __weak arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err) +{ +} +EXPORT_SYMBOL_GPL(arch_apei_report_pcie_error); + +void __weak arch_apei_report_zdi_error(int sev, struct cper_sec_proc_generic *zdi_err) +{ +} +EXPORT_SYMBOL_GPL(arch_apei_report_zdi_error); + int apei_osc_setup(void) { static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 64b6193e2475..9ccf2a51c64a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -796,14 +796,21 @@ static bool ghes_do_proc(struct ghes *ghes, atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err); - arch_apei_report_mem_error(sev, mem_err); + arch_apei_report_mem_error(sec_sev, mem_err); queued = ghes_handle_memory_failure(gdata, sev, sync); } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + + arch_apei_report_pcie_error(sec_sev, pcie_err); ghes_handle_aer(gdata); } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev, sync); + } else if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { + struct cper_sec_proc_generic *zdi_err = acpi_hest_get_payload(gdata); + + arch_apei_report_zdi_error(sec_sev, zdi_err); } else { void *err = acpi_hest_get_payload(gdata); @@ -1188,6 +1195,8 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, u32 len, node_len; u64 buf_paddr; int sev, rc; + struct acpi_hest_generic_data *gdata; + guid_t *sec_type; if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG)) return -EOPNOTSUPP; @@ -1223,6 +1232,23 @@ static int ghes_in_nmi_queue_one_entry(struct ghes *ghes, sev = ghes_severity(estatus->error_severity); if (sev >= GHES_SEV_PANIC) { + apei_estatus_for_each_section(estatus, gdata) { + sec_type = (guid_t *)gdata->section_type; + if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata); + + arch_apei_report_mem_error(sev, mem_err); + } else if (guid_equal(sec_type, &CPER_SEC_PCIE)) { + struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata); + + arch_apei_report_pcie_error(sev, pcie_err); + } else if (guid_equal(sec_type, &CPER_SEC_PROC_GENERIC)) { + struct cper_sec_proc_generic *zdi_err = + acpi_hest_get_payload(gdata); + + arch_apei_report_zdi_error(sev, zdi_err); + } + } ghes_print_queued_estatus(); __ghes_panic(ghes, estatus, buf_paddr, fixmap_idx); } diff --git a/include/acpi/apei.h b/include/acpi/apei.h index dc60f7db5524..fcb5814a3f43 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -52,6 +52,8 @@ int erst_clear(u64 record_id); int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data); void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err); +void arch_apei_report_pcie_error(int sev, struct cper_sec_pcie *pcie_err); +void arch_apei_report_zdi_error(int sev, struct cper_sec_proc_generic *zdi_err); #endif #endif -- Gitee