From efe362404a69fdab1c48d64d3c6c93f711504394 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 29 Aug 2024 13:51:01 +0800 Subject: [PATCH 01/12] EDAC/{skx_common,skx,i10nm}: Move the common debug code to skx_common ANBZ: #23456 commit 8b935823530d6fd0bea0e90a747f3ebd8cfefb0d upstream. Commit afdb82fd763c ("EDAC, i10nm: make skx_common.o a separate module") made skx_common.o a separate module. With skx_common.o now a separate module, move the common debug code setup_{skx,i10nm}_debug() and teardown_{skx,i10nm}_debug() in {skx,i10nm}_base.c to skx_common.c to reduce code duplication. Additionally, prefix these function names with 'skx' to maintain consistency with other names in the file. Intel-SIG: commit 8b935823530d EDAC/{skx_common,skx,i10nm}: Move the common debug code to skx_common Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20240829055101.56245-1-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 52 ++------------------------------------- drivers/edac/skx_base.c | 52 ++------------------------------------- drivers/edac/skx_common.c | 47 +++++++++++++++++++++++++++++++++++ drivers/edac/skx_common.h | 8 ++++++ 4 files changed, 59 insertions(+), 100 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 068597e8fce9..c1f362e80c6d 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1017,54 +1017,6 @@ static struct notifier_block i10nm_mce_dec = { .priority = MCE_PRIO_EDAC, }; -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/i10nm_test/addr. - */ -static struct dentry *i10nm_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -static void setup_i10nm_debug(void) -{ - i10nm_test = edac_debugfs_create_dir("i10nm_test"); - if (!i10nm_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, i10nm_test, - NULL, &fops_u64_wo)) { - debugfs_remove(i10nm_test); - i10nm_test = NULL; - } -} - -static void teardown_i10nm_debug(void) -{ - debugfs_remove_recursive(i10nm_test); -} -#else -static inline void setup_i10nm_debug(void) {} -static inline void teardown_i10nm_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - static int __init i10nm_init(void) { u8 mc = 0, src_id = 0, node_id = 0; @@ -1164,7 +1116,7 @@ static int __init i10nm_init(void) opstate_init(); mce_register_decode_chain(&i10nm_mce_dec); - setup_i10nm_debug(); + skx_setup_debug("i10nm_test"); if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); @@ -1192,7 +1144,7 @@ static void __exit i10nm_exit(void) enable_retry_rd_err_log(false); } - teardown_i10nm_debug(); + skx_teardown_debug(); mce_unregister_decode_chain(&i10nm_mce_dec); skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 0a862336a7ce..51b2cc9472ef 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -587,54 +587,6 @@ static struct notifier_block skx_mce_dec = { .priority = MCE_PRIO_EDAC, }; -#ifdef CONFIG_EDAC_DEBUG -/* - * Debug feature. - * Exercise the address decode logic by writing an address to - * /sys/kernel/debug/edac/skx_test/addr. - */ -static struct dentry *skx_test; - -static int debugfs_u64_set(void *data, u64 val) -{ - struct mce m; - - pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); - - memset(&m, 0, sizeof(m)); - /* ADDRV + MemRd + Unknown channel */ - m.status = MCI_STATUS_ADDRV + 0x90; - /* One corrected error */ - m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); - m.addr = val; - skx_mce_check_error(NULL, 0, &m); - - return 0; -} -DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); - -static void setup_skx_debug(void) -{ - skx_test = edac_debugfs_create_dir("skx_test"); - if (!skx_test) - return; - - if (!edac_debugfs_create_file("addr", 0200, skx_test, - NULL, &fops_u64_wo)) { - debugfs_remove(skx_test); - skx_test = NULL; - } -} - -static void teardown_skx_debug(void) -{ - debugfs_remove_recursive(skx_test); -} -#else -static inline void setup_skx_debug(void) {} -static inline void teardown_skx_debug(void) {} -#endif /*CONFIG_EDAC_DEBUG*/ - /* * skx_init: * make sure we are running on the correct cpu model @@ -728,7 +680,7 @@ static int __init skx_init(void) /* Ensure that the OPSTATE is set correctly for POLL or NMI */ opstate_init(); - setup_skx_debug(); + skx_setup_debug("skx_test"); mce_register_decode_chain(&skx_mce_dec); @@ -742,7 +694,7 @@ static void __exit skx_exit(void) { edac_dbg(2, "\n"); mce_unregister_decode_chain(&skx_mce_dec); - teardown_skx_debug(); + skx_teardown_debug(); if (nvdimm_count) skx_adxl_put(); skx_remove(); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 9e43aed72bd9..ece34296ca3f 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -794,6 +794,53 @@ void skx_remove(void) } EXPORT_SYMBOL_GPL(skx_remove); +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. + * Exercise the address decode logic by writing an address to + * /sys/kernel/debug/edac/{skx,i10nm}_test/addr. + */ +static struct dentry *skx_test; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct mce m; + + pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val); + + memset(&m, 0, sizeof(m)); + /* ADDRV + MemRd + Unknown channel */ + m.status = MCI_STATUS_ADDRV + 0x90; + /* One corrected error */ + m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT); + m.addr = val; + skx_mce_check_error(NULL, 0, &m); + + return 0; +} +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +void skx_setup_debug(const char *name) +{ + skx_test = edac_debugfs_create_dir(name); + if (!skx_test) + return; + + if (!edac_debugfs_create_file("addr", 0200, skx_test, + NULL, &fops_u64_wo)) { + debugfs_remove(skx_test); + skx_test = NULL; + } +} +EXPORT_SYMBOL_GPL(skx_setup_debug); + +void skx_teardown_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +EXPORT_SYMBOL_GPL(skx_teardown_debug); +#endif /*CONFIG_EDAC_DEBUG*/ + MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Tony Luck"); MODULE_DESCRIPTION("MC Driver for Intel server processors"); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 2ea4d1d1fbef..b89fc9bd10ec 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -284,4 +284,12 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, void skx_remove(void); +#ifdef CONFIG_EDAC_DEBUG +void skx_setup_debug(const char *name); +void skx_teardown_debug(void); +#else +static inline void skx_setup_debug(const char *name) {} +static inline void skx_teardown_debug(void) {} +#endif + #endif /* _SKX_COMM_EDAC_H */ -- Gitee From 76499e124eede8de8e69aadeb7bf2958651f9ba6 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 29 Aug 2024 14:13:09 +0800 Subject: [PATCH 02/12] EDAC/{skx_common,i10nm}: Remove the AMAP register for determing DDR5 ANBZ: #23456 commit 7a33c144c28ebb27c59963007992fed15f3953b3 upstream. The configuration flag 'res_config->support_ddr5 = true' sufficiently indicates DDR5 memory support for Sapphire Rapids and Granite Rapids. Additionally, the i10nm_edac driver doesn't need to use the AMAP register for setting the 'fine_grain_bank' of each DIMM. Therefore, remove the AMAP register for determining DDR5. Intel-SIG: commit 7a33c144c28e EDAC/{skx_common,i10nm}: Remove the AMAP register for determing DDR5 Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20240829061309.57738-1-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 9 ++------- drivers/edac/skx_common.c | 2 +- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c1f362e80c6d..c4798d152a98 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -47,10 +47,6 @@ readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \ (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \ (i) * (m)->chan_mmio_sz) -#define I10NM_GET_AMAP(m, i) \ - readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \ - (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \ - (i) * (m)->chan_mmio_sz) #define I10NM_GET_REG32(m, i, offset) \ readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset)) #define I10NM_GET_REG64(m, i, offset) \ @@ -975,7 +971,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, { struct skx_pvt *pvt = mci->pvt_info; struct skx_imc *imc = pvt->imc; - u32 mtr, amap, mcddrtcfg = 0; + u32 mtr, mcddrtcfg = 0; struct dimm_info *dimm; int i, j, ndimms; @@ -984,7 +980,6 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, continue; ndimms = 0; - amap = I10NM_GET_AMAP(imc, i); if (res_cfg->type != GNR) mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i); @@ -996,7 +991,7 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci, mtr, mcddrtcfg, imc->mc, i, j); if (IS_DIMM_PRESENT(mtr)) - ndimms += skx_get_dimm_info(mtr, 0, amap, dimm, + ndimms += skx_get_dimm_info(mtr, 0, 0, dimm, imc, i, j, cfg); else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) ndimms += skx_get_nvdimm_info(dimm, imc, i, j, diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index ece34296ca3f..88f5ff249f2e 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -420,7 +420,7 @@ int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, if (imc->hbm_mc) { banks = 32; mtype = MEM_HBM2; - } else if (cfg->support_ddr5 && (amap & 0x8)) { + } else if (cfg->support_ddr5) { banks = 32; mtype = MEM_DDR5; } else { -- Gitee From 0eaec9936e6edce886d3d863438ba5c2e560c478 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 3 Dec 2024 10:20:38 +0800 Subject: [PATCH 03/12] EDAC/i10nm: Add Intel Clearwater Forest server support ANBZ: #23456 commit 2e55bb9b71e179c37d05deff37daa0dd8d04b59d upstream. Clearwater Forest is the successor to Sierra Forest. Add Clearwater Forest CPU model ID for EDAC support. Intel-SIG: commit 2e55bb9b71e1 EDAC/i10nm: Add Intel Clearwater Forest server support Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Yi Lai Link: https://lore.kernel.org/r/20241203022038.72873-1-qiuxu.zhuo@intel.com [ Zhang Rui: resolve conflict (X86_MATCH_VFM) and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c4798d152a98..1af1ed4ebacd 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -952,6 +952,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_DARKMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); -- Gitee From a9a2b93176a945a49b60df7e778bd4a7cd1852a0 Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Thu, 12 Dec 2024 19:25:49 -0600 Subject: [PATCH 04/12] EDAC/{i10nm,skx,skx_common}: Support UV systems ANBZ: #23456 commit 584e09743d2f44905290b0dbf3215064d2a1888c upstream. The 3-bit source IDs in PCI configuration space registers, used to map devices to sockets, are limited to 8 unique IDs, and each ID is local to a UPI/QPI domain. Source IDs cannot be used to map devices to sockets on UV systems because they can exceed 8 sockets and have multiple UPI/QPI domains with identical, repeating source IDs. Use NUMA information to get package IDs instead of source IDs on UV systems, and use package/source IDs to name IMC information structures. Intel-SIG: commit 584e09743d2f EDAC/{i10nm,skx,skx_common}: Support UV systems Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Kyle Meyer Signed-off-by: Tony Luck Tested-by: Qiuxu Zhuo Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/all/20241213012549.43099-1-kyle.meyer@hpe.com/ [ Zhang Rui: resolve conflict (topo.pkg_id not available) and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 11 +++------ drivers/edac/skx_base.c | 9 +++----- drivers/edac/skx_common.c | 48 +++++++++++++++++++++++++++------------ drivers/edac/skx_common.h | 3 +-- 4 files changed, 41 insertions(+), 30 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 1af1ed4ebacd..93b4b56a961a 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -1015,7 +1015,7 @@ static struct notifier_block i10nm_mce_dec = { static int __init i10nm_init(void) { - u8 mc = 0, src_id = 0, node_id = 0; + u8 mc = 0, src_id = 0; const struct x86_cpu_id *id; struct res_config *cfg; const char *owner; @@ -1075,19 +1075,14 @@ static int __init i10nm_init(void) if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - - edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id); + edac_dbg(2, "src_id = %d\n", src_id); for (i = 0; i < imc_num; i++) { if (!d->imc[i].mdev) continue; d->imc[i].mc = mc++; d->imc[i].lmc = i; - d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; + d->imc[i].src_id = src_id; if (d->imc[i].hbm_mc) { d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz; d->imc[i].num_channels = cfg->hbm_chan_num; diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 51b2cc9472ef..67634d3af461 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -600,7 +600,7 @@ static int __init skx_init(void) const struct munit *m; const char *owner; int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8}; - u8 mc = 0, src_id, node_id; + u8 mc = 0, src_id; struct skx_dev *d; edac_dbg(2, "\n"); @@ -650,15 +650,12 @@ static int __init skx_init(void) rc = skx_get_src_id(d, 0xf0, &src_id); if (rc < 0) goto fail; - rc = skx_get_node_id(d, &node_id); - if (rc < 0) - goto fail; - edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); + + edac_dbg(2, "src_id = %d\n", src_id); for (i = 0; i < SKX_NUM_IMC; i++) { d->imc[i].mc = mc++; d->imc[i].lmc = i; d->imc[i].src_id = src_id; - d->imc[i].node_id = node_id; rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev, "Skylake Socket", EDAC_MOD_STR, skx_get_dimm_config, cfg); diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 88f5ff249f2e..c950f1a188d2 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -14,11 +14,13 @@ * Copyright (c) 2018, Intel Corporation. */ +#include #include #include #include #include #include +#include #include "edac_module.h" #include "skx_common.h" @@ -253,33 +255,51 @@ void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) } EXPORT_SYMBOL_GPL(skx_set_decode); -int skx_get_src_id(struct skx_dev *d, int off, u8 *id) +static int skx_get_pkg_id(struct skx_dev *d, u8 *id) { - u32 reg; + int node; + int cpu; - if (pci_read_config_dword(d->util_all, off, ®)) { - skx_printk(KERN_ERR, "Failed to read src id\n"); - return -ENODEV; + node = pcibus_to_node(d->util_all->bus); + if (numa_valid_node(node)) { + for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && cpu_to_node(cpu) == node) { + *id = topology_physical_package_id(cpu); + return 0; + } + } } - *id = GET_BITFIELD(reg, 12, 14); - return 0; + skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n"); + return -ENODEV; } -EXPORT_SYMBOL_GPL(skx_get_src_id); -int skx_get_node_id(struct skx_dev *d, u8 *id) +int skx_get_src_id(struct skx_dev *d, int off, u8 *id) { u32 reg; - if (pci_read_config_dword(d->util_all, 0xf4, ®)) { - skx_printk(KERN_ERR, "Failed to read node id\n"); + /* + * The 3-bit source IDs in PCI configuration space registers are limited + * to 8 unique IDs, and each ID is local to a UPI/QPI domain. + * + * Source IDs cannot be used to map devices to sockets on UV systems + * because they can exceed 8 sockets and have multiple UPI/QPI domains + * with identical, repeating source IDs. + */ + if (is_uv_system()) + return skx_get_pkg_id(d, id); + + if (pci_read_config_dword(d->util_all, off, ®)) { + skx_printk(KERN_ERR, "Failed to read src id\n"); return -ENODEV; } - *id = GET_BITFIELD(reg, 0, 2); + *id = GET_BITFIELD(reg, 12, 14); return 0; } -EXPORT_SYMBOL_GPL(skx_get_node_id); +EXPORT_SYMBOL_GPL(skx_get_src_id); static int get_width(u32 mtr) { @@ -541,7 +561,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, pvt->imc = imc; mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name, - imc->node_id, imc->lmc); + imc->src_id, imc->lmc); if (!mci->ctl_name) { rc = -ENOMEM; goto fail0; diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b89fc9bd10ec..bd2d9eaeb448 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -117,7 +117,7 @@ struct skx_dev { bool hbm_mc; u8 mc; /* system wide mc# */ u8 lmc; /* socket relative mc# */ - u8 src_id, node_id; + u8 src_id; struct skx_channel { struct pci_dev *cdev; struct pci_dev *edev; @@ -261,7 +261,6 @@ void skx_set_res_cfg(struct res_config *cfg); void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); int skx_get_src_id(struct skx_dev *d, int off, u8 *id); -int skx_get_node_id(struct skx_dev *d, u8 *id); int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); -- Gitee From 99b82eeade77e19805e242bf7692845c28ed874d Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:20 +0800 Subject: [PATCH 05/12] EDAC/i10nm: Explicitly set the modes of the RRL register sets ANBZ: #23456 commit 4878e1e90056230cefd580136d0e6d5689a7b770 upstream. The i10nm_edac driver uses the default modes (either patrol scrub read or on-demand read) of the RRL register sets configured by the BIOS. Explicitly set the modes during the loading of the i10nm_edac driver with the module parameter retry_rd_err_log=2. Intel-SIG: commit 4878e1e90056 EDAC/i10nm: Explicitly set the modes of the RRL register sets Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-4-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 93b4b56a961a..c0f1a45a8b69 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -73,6 +73,7 @@ #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) #define RETRY_RD_ERR_LOG_UC BIT(1) +#define RETRY_RD_ERR_LOG_EN_PATSPR BIT(13) #define RETRY_RD_ERR_LOG_NOOVER BIT(14) #define RETRY_RD_ERR_LOG_EN BIT(15) #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) @@ -114,12 +115,15 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable rrl_ctl[2] = d2; s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + s |= RETRY_RD_ERR_LOG_EN_PATSPR; s |= RETRY_RD_ERR_LOG_EN; d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; + d &= ~RETRY_RD_ERR_LOG_EN_PATSPR; d |= RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { d2 &= ~RETRY_RD_ERR_LOG_UC; + d2 &= ~RETRY_RD_ERR_LOG_EN_PATSPR; d2 |= RETRY_RD_ERR_LOG_NOOVER; d2 |= RETRY_RD_ERR_LOG_EN; } @@ -129,18 +133,24 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable s |= RETRY_RD_ERR_LOG_UC; if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) s |= RETRY_RD_ERR_LOG_NOOVER; + if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN_PATSPR)) + s &= ~RETRY_RD_ERR_LOG_EN_PATSPR; if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) s &= ~RETRY_RD_ERR_LOG_EN; if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) d |= RETRY_RD_ERR_LOG_UC; if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) d |= RETRY_RD_ERR_LOG_NOOVER; + if (rrl_ctl[1] & RETRY_RD_ERR_LOG_EN_PATSPR) + d |= RETRY_RD_ERR_LOG_EN_PATSPR; if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) d &= ~RETRY_RD_ERR_LOG_EN; if (offsets_demand2) { if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) d2 |= RETRY_RD_ERR_LOG_UC; + if (rrl_ctl[2] & RETRY_RD_ERR_LOG_EN_PATSPR) + d2 |= RETRY_RD_ERR_LOG_EN_PATSPR; if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) d2 &= ~RETRY_RD_ERR_LOG_NOOVER; if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) -- Gitee From 0397e83f9bb9af64cb840d682efd1a94dbf9915d Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:21 +0800 Subject: [PATCH 06/12] EDAC/{skx_common,i10nm}: Structure the per-channel RRL registers ANBZ: #23456 commit 1a8a6af663a7f16c9b2779cf728187775735047b upstream. As the number of RRL (retry_rd_err_log) registers per memory channel increases, the positions of the RRL control bits and the widths of the RRL registers vary across different CPU generations. Adding RRL support for a new CPU requires handling these differences throughout the RRL-related code. Structure the offsets, widths, control bit positions, set numbers, modes, etc., of the per-channel RRL registers and make them configurable to facilitate easier RRL support for new CPUs. No functional changes are intended. Intel-SIG: commit 1a8a6af663a7 EDAC/{skx_common,i10nm}: Structure the per-channel RRL registers Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-5-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 92 ++++++++++++++++++++++++--------------- drivers/edac/skx_common.h | 21 +++++---- 2 files changed, 69 insertions(+), 44 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c0f1a45a8b69..61d40491ca0e 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -86,15 +86,38 @@ static int retry_rd_err_log; static int decoding_via_mca; static bool mem_cfg_2lm; -static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; -static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; -static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; -static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; -static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; -static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; -static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; +static struct reg_rrl icx_reg_rrl_ddr = { + .set_num = 2, + .offsets = { + {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, + }, +}; + +static struct reg_rrl spr_reg_rrl_ddr = { + .set_num = 3, + .offsets = { + {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, + {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, + {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, + }, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch0 = { + .set_num = 2, + .offsets = { + {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, + {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, + }, +}; + +static struct reg_rrl spr_reg_rrl_hbm_pch1 = { + .set_num = 2, + .offsets = { + {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, + {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, + }, +}; static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, u32 *offsets_scrub, u32 *offsets_demand, @@ -185,9 +208,11 @@ static void enable_retry_rd_err_log(bool enable) chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->offsets_scrub, - res_cfg->offsets_demand, - res_cfg->offsets_demand2); + res_cfg->reg_rrl_ddr->offsets[0], + res_cfg->reg_rrl_ddr->offsets[1], + res_cfg->reg_rrl_ddr->set_num > 2 ? + res_cfg->reg_rrl_ddr->offsets[2] : NULL); + } imc_num += res_cfg->hbm_imc_num; @@ -201,12 +226,12 @@ static void enable_retry_rd_err_log(bool enable) chan = d->imc[i].chan; for (j = 0; j < chan_num; j++) { __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->offsets_scrub_hbm0, - res_cfg->offsets_demand_hbm0, + res_cfg->reg_rrl_hbm[0]->offsets[0], + res_cfg->reg_rrl_hbm[0]->offsets[1], NULL); __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], - res_cfg->offsets_scrub_hbm1, - res_cfg->offsets_demand_hbm1, + res_cfg->reg_rrl_hbm[1]->offsets[0], + res_cfg->reg_rrl_hbm[1]->offsets[1], NULL); } } @@ -233,17 +258,18 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, pch = res->cs & 1; if (pch) - offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : - res_cfg->offsets_demand_hbm1; + offsets = scrub_err ? res_cfg->reg_rrl_hbm[1]->offsets[0] : + res_cfg->reg_rrl_hbm[1]->offsets[1]; else - offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : - res_cfg->offsets_demand_hbm0; + offsets = scrub_err ? res_cfg->reg_rrl_hbm[0]->offsets[0] : + res_cfg->reg_rrl_hbm[0]->offsets[1]; } else { if (scrub_err) { - offsets = res_cfg->offsets_scrub; + offsets = res_cfg->reg_rrl_ddr->offsets[0]; } else { - offsets = res_cfg->offsets_demand; - xffsets = res_cfg->offsets_demand2; + offsets = res_cfg->reg_rrl_ddr->offsets[1]; + if (res_cfg->reg_rrl_ddr->set_num > 2) + xffsets = res_cfg->reg_rrl_ddr->offsets[2]; } } @@ -883,8 +909,7 @@ static struct res_config i10nm_cfg0 = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config i10nm_cfg1 = { @@ -902,8 +927,7 @@ static struct res_config i10nm_cfg1 = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x108, - .offsets_scrub = offsets_scrub_icx, - .offsets_demand = offsets_demand_icx, + .reg_rrl_ddr = &icx_reg_rrl_ddr, }; static struct res_config spr_cfg = { @@ -926,13 +950,9 @@ static struct res_config spr_cfg = { .ddr_mdev_bdf = {0, 12, 0}, .hbm_mdev_bdf = {0, 12, 1}, .sad_all_offset = 0x300, - .offsets_scrub = offsets_scrub_spr, - .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, - .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, - .offsets_demand = offsets_demand_spr, - .offsets_demand2 = offsets_demand2_spr, - .offsets_demand_hbm0 = offsets_demand_spr_hbm0, - .offsets_demand_hbm1 = offsets_demand_spr_hbm1, + .reg_rrl_ddr = &spr_reg_rrl_ddr, + .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, + .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, }; static struct res_config gnr_cfg = { @@ -1119,7 +1139,7 @@ static int __init i10nm_init(void) mce_register_decode_chain(&i10nm_mce_dec); skx_setup_debug("i10nm_test"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); if (retry_rd_err_log == 2) enable_retry_rd_err_log(true); @@ -1139,7 +1159,7 @@ static void __exit i10nm_exit(void) { edac_dbg(2, "\n"); - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { skx_set_decode(NULL, NULL); if (retry_rd_err_log == 2) enable_retry_rd_err_log(false); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index bd2d9eaeb448..cda7a16b8bee 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -82,6 +82,15 @@ /* Max RRL register sets per {,sub-,pseudo-}channel. */ #define NUM_RRL_SET 3 +/* Max RRL registers per set. */ +#define NUM_RRL_REG 6 + +/* RRL registers per {,sub-,pseudo-}channel. */ +struct reg_rrl { + /* RRL register parts. */ + int set_num; + u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; +}; /* * Each cpu socket contains some pci devices that provide global @@ -238,14 +247,10 @@ struct res_config { /* HBM mdev device BDF */ struct pci_bdf hbm_mdev_bdf; int sad_all_offset; - /* Offsets of retry_rd_err_log registers */ - u32 *offsets_scrub; - u32 *offsets_scrub_hbm0; - u32 *offsets_scrub_hbm1; - u32 *offsets_demand; - u32 *offsets_demand2; - u32 *offsets_demand_hbm0; - u32 *offsets_demand_hbm1; + /* RRL register sets per DDR channel */ + struct reg_rrl *reg_rrl_ddr; + /* RRL register sets per HBM channel */ + struct reg_rrl *reg_rrl_hbm[2]; }; typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, -- Gitee From 24f92fc5f6ab8919d2548c1293e383e67263ef13 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:22 +0800 Subject: [PATCH 07/12] EDAC/{skx_common,i10nm}: Refactor enable_retry_rd_err_log() ANBZ: #23456 commit ba3985c1faf5eb72084ddc31204b076c2a450263 upstream. Refactor enable_retry_rd_err_log() using helper functions for both DDR and HBM, making the RRL control bits configurable instead of hard-coded. Additionally, explicitly define the four RRL modes for better readability. No functional changes intended. Intel-SIG: commit ba3985c1faf5 EDAC/{skx_common,i10nm}: Refactor enable_retry_rd_err_log() Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-6-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 237 ++++++++++++++++++++++---------------- drivers/edac/skx_common.h | 20 ++++ 2 files changed, 156 insertions(+), 101 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 61d40491ca0e..ee1943908da3 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -72,11 +72,6 @@ #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) -#define RETRY_RD_ERR_LOG_UC BIT(1) -#define RETRY_RD_ERR_LOG_EN_PATSPR BIT(13) -#define RETRY_RD_ERR_LOG_NOOVER BIT(14) -#define RETRY_RD_ERR_LOG_EN BIT(15) -#define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) static struct list_head *i10nm_edac_list; @@ -88,153 +83,193 @@ static bool mem_cfg_2lm; static struct reg_rrl icx_reg_rrl_ddr = { .set_num = 2, + .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, }, + .widths = {4, 4, 4, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; static struct reg_rrl spr_reg_rrl_ddr = { .set_num = 3, + .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, }, + .widths = {4, 4, 8, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; static struct reg_rrl spr_reg_rrl_hbm_pch0 = { .set_num = 2, + .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, }, + .widths = {4, 4, 8, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; static struct reg_rrl spr_reg_rrl_hbm_pch1 = { .set_num = 2, + .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, }, + .widths = {4, 4, 8, 4, 4, 8}, + .uc_mask = BIT(1), + .en_patspr_mask = BIT(13), + .noover_mask = BIT(14), + .en_mask = BIT(15), }; -static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl, - u32 *offsets_scrub, u32 *offsets_demand, - u32 *offsets_demand2) +static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) { - u32 s, d, d2; + switch (width) { + case 4: + return I10NM_GET_REG32(imc, chan, offset); + case 8: + return I10NM_GET_REG64(imc, chan, offset); + default: + i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width); + return 0; + } +} - s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); - d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); - if (offsets_demand2) - d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); +static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) +{ + switch (width) { + case 4: + return I10NM_SET_REG32(imc, chan, offset, (u32)val); + default: + i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width); + } +} + +static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + int rrl_set, bool enable, u32 *rrl_ctl) +{ + enum rrl_mode mode = rrl->modes[rrl_set]; + u32 offset = rrl->offsets[rrl_set][0], v; + u8 width = rrl->widths[0]; + bool first, scrub; + + /* First or last read error. */ + first = (mode == FRE_SCRUB || mode == FRE_DEMAND); + /* Patrol scrub or on-demand read error. */ + scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB); + + v = read_imc_reg(imc, chan, offset, width); if (enable) { - /* Save default configurations */ - rrl_ctl[0] = s; - rrl_ctl[1] = d; - if (offsets_demand2) - rrl_ctl[2] = d2; - - s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - s |= RETRY_RD_ERR_LOG_EN_PATSPR; - s |= RETRY_RD_ERR_LOG_EN; - d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; - d &= ~RETRY_RD_ERR_LOG_EN_PATSPR; - d |= RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - d2 &= ~RETRY_RD_ERR_LOG_UC; - d2 &= ~RETRY_RD_ERR_LOG_EN_PATSPR; - d2 |= RETRY_RD_ERR_LOG_NOOVER; - d2 |= RETRY_RD_ERR_LOG_EN; - } + /* Save default configurations. */ + *rrl_ctl = v; + v &= ~rrl->uc_mask; + + if (first) + v |= rrl->noover_mask; + else + v &= ~rrl->noover_mask; + + if (scrub) + v |= rrl->en_patspr_mask; + else + v &= ~rrl->en_patspr_mask; + + v |= rrl->en_mask; } else { - /* Restore default configurations */ - if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC) - s |= RETRY_RD_ERR_LOG_UC; - if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER) - s |= RETRY_RD_ERR_LOG_NOOVER; - if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN_PATSPR)) - s &= ~RETRY_RD_ERR_LOG_EN_PATSPR; - if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN)) - s &= ~RETRY_RD_ERR_LOG_EN; - if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC) - d |= RETRY_RD_ERR_LOG_UC; - if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER) - d |= RETRY_RD_ERR_LOG_NOOVER; - if (rrl_ctl[1] & RETRY_RD_ERR_LOG_EN_PATSPR) - d |= RETRY_RD_ERR_LOG_EN_PATSPR; - if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN)) - d &= ~RETRY_RD_ERR_LOG_EN; - - if (offsets_demand2) { - if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC) - d2 |= RETRY_RD_ERR_LOG_UC; - if (rrl_ctl[2] & RETRY_RD_ERR_LOG_EN_PATSPR) - d2 |= RETRY_RD_ERR_LOG_EN_PATSPR; - if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER)) - d2 &= ~RETRY_RD_ERR_LOG_NOOVER; - if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN)) - d2 &= ~RETRY_RD_ERR_LOG_EN; + /* Restore default configurations. */ + if (*rrl_ctl & rrl->uc_mask) + v |= rrl->uc_mask; + + if (first) { + if (!(*rrl_ctl & rrl->noover_mask)) + v &= ~rrl->noover_mask; + } else { + if (*rrl_ctl & rrl->noover_mask) + v |= rrl->noover_mask; } + + if (scrub) { + if (!(*rrl_ctl & rrl->en_patspr_mask)) + v &= ~rrl->en_patspr_mask; + } else { + if (*rrl_ctl & rrl->en_patspr_mask) + v |= rrl->en_patspr_mask; + } + + if (!(*rrl_ctl & rrl->en_mask)) + v &= ~rrl->en_mask; } - I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); - I10NM_SET_REG32(imc, chan, offsets_demand[0], d); - if (offsets_demand2) - I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); + write_imc_reg(imc, chan, offset, width, v); +} + +static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, + bool enable, u32 *rrl_ctl) +{ + for (int i = 0; i < rrl->set_num; i++) + enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); +} + +static void enable_rrls_ddr(struct skx_imc *imc, bool enable) +{ + struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr; + int i, chan_num = res_cfg->ddr_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase) + return; + + for (i = 0; i < chan_num; i++) + enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]); +} + +static void enable_rrls_hbm(struct skx_imc *imc, bool enable) +{ + struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm; + int i, chan_num = res_cfg->hbm_chan_num; + struct skx_channel *chan = imc->chan; + + if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) + return; + + for (i = 0; i < chan_num; i++) { + enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); + enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); + } } static void enable_retry_rd_err_log(bool enable) { - int i, j, imc_num, chan_num; - struct skx_channel *chan; - struct skx_imc *imc; struct skx_dev *d; + int i, imc_num; edac_dbg(2, "\n"); list_for_each_entry(d, i10nm_edac_list, list) { imc_num = res_cfg->ddr_imc_num; - chan_num = res_cfg->ddr_chan_num; - - for (i = 0; i < imc_num; i++) { - imc = &d->imc[i]; - if (!imc->mbase) - continue; - - chan = d->imc[i].chan; - for (j = 0; j < chan_num; j++) - __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->reg_rrl_ddr->offsets[0], - res_cfg->reg_rrl_ddr->offsets[1], - res_cfg->reg_rrl_ddr->set_num > 2 ? - res_cfg->reg_rrl_ddr->offsets[2] : NULL); - - } + for (i = 0; i < imc_num; i++) + enable_rrls_ddr(&d->imc[i], enable); imc_num += res_cfg->hbm_imc_num; - chan_num = res_cfg->hbm_chan_num; - - for (; i < imc_num; i++) { - imc = &d->imc[i]; - if (!imc->mbase || !imc->hbm_mc) - continue; - - chan = d->imc[i].chan; - for (j = 0; j < chan_num; j++) { - __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0], - res_cfg->reg_rrl_hbm[0]->offsets[0], - res_cfg->reg_rrl_hbm[0]->offsets[1], - NULL); - __enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1], - res_cfg->reg_rrl_hbm[1]->offsets[0], - res_cfg->reg_rrl_hbm[1]->offsets[1], - NULL); - } - } + for (; i < imc_num; i++) + enable_rrls_hbm(&d->imc[i], enable); } } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index cda7a16b8bee..7fc0c5af43e7 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -85,11 +85,31 @@ /* Max RRL registers per set. */ #define NUM_RRL_REG 6 +/* Modes of RRL register set. */ +enum rrl_mode { + /* Last read error from patrol scrub. */ + LRE_SCRUB, + /* Last read error from demand. */ + LRE_DEMAND, + /* First read error from patrol scrub. */ + FRE_SCRUB, + /* First read error from demand. */ + FRE_DEMAND, +}; + /* RRL registers per {,sub-,pseudo-}channel. */ struct reg_rrl { /* RRL register parts. */ int set_num; + enum rrl_mode modes[NUM_RRL_SET]; u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; + /* RRL register widths in byte per set. */ + u8 widths[NUM_RRL_REG]; + /* RRL control bits of the first register per set. */ + u32 uc_mask; + u32 en_patspr_mask; + u32 noover_mask; + u32 en_mask; }; /* -- Gitee From 450304cb9437339bc2ea15ca3ec1424035ea3976 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:23 +0800 Subject: [PATCH 08/12] EDAC/{skx_common,i10nm}: Refactor show_retry_rd_err_log() ANBZ: #23456 commit 126168fa2c3e16113ea75a656fff5156a54a5726 upstream. Make the {valid bit, overwritten status, number} of RRL registers and the {number, offsets, widths} of per-channel CORRERRCNT registers configurable. Refactor show_retry_rd_err_log() to use the configurable fields of struct reg_rrl, making the code more scalable and simpler. No functional changes intended. Intel-SIG: commit 126168fa2c3e EDAC/{skx_common,i10nm}: Refactor show_retry_rd_err_log() Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-7-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 162 +++++++++++++++++--------------------- drivers/edac/skx_common.h | 11 ++- 2 files changed, 81 insertions(+), 92 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index ee1943908da3..c9aadbee58da 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -72,8 +72,6 @@ #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) -#define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) - static struct list_head *i10nm_edac_list; static struct res_config *res_cfg; @@ -83,20 +81,28 @@ static bool mem_cfg_2lm; static struct reg_rrl icx_reg_rrl_ddr = { .set_num = 2, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, }, .widths = {4, 4, 4, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, }; static struct reg_rrl spr_reg_rrl_ddr = { .set_num = 3, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, .offsets = { {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, @@ -104,38 +110,58 @@ static struct reg_rrl spr_reg_rrl_ddr = { {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, }, .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, + .cecnt_widths = {4, 4, 4, 4}, }; static struct reg_rrl spr_reg_rrl_hbm_pch0 = { .set_num = 2, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, }, .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, + .cecnt_widths = {4, 4, 4, 4}, }; static struct reg_rrl spr_reg_rrl_hbm_pch1 = { .set_num = 2, + .reg_num = 6, .modes = {LRE_SCRUB, LRE_DEMAND}, .offsets = { {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, }, .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), .uc_mask = BIT(1), + .over_mask = BIT(2), .en_patspr_mask = BIT(13), .noover_mask = BIT(14), .en_mask = BIT(15), + + .cecnt_num = 4, + .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, + .cecnt_widths = {4, 4, 4, 4}, }; static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) @@ -276,110 +302,64 @@ static void enable_retry_rd_err_log(bool enable) static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, int len, bool scrub_err) { + int i, j, n, ch = res->channel, pch = res->cs & 1; struct skx_imc *imc = &res->dev->imc[res->imc]; - u32 log0, log1, log2, log3, log4; - u32 corr0, corr1, corr2, corr3; - u32 lxg0, lxg1, lxg3, lxg4; - u32 *xffsets = NULL; - u64 log2a, log5; - u64 lxg2a, lxg5; - u32 *offsets; - int n, pch; + u32 offset, status_mask; + struct reg_rrl *rrl; + u64 log, corr; + bool scrub; + u8 width; if (!imc->mbase) return; - if (imc->hbm_mc) { - pch = res->cs & 1; + rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr; - if (pch) - offsets = scrub_err ? res_cfg->reg_rrl_hbm[1]->offsets[0] : - res_cfg->reg_rrl_hbm[1]->offsets[1]; - else - offsets = scrub_err ? res_cfg->reg_rrl_hbm[0]->offsets[0] : - res_cfg->reg_rrl_hbm[0]->offsets[1]; - } else { - if (scrub_err) { - offsets = res_cfg->reg_rrl_ddr->offsets[0]; - } else { - offsets = res_cfg->reg_rrl_ddr->offsets[1]; - if (res_cfg->reg_rrl_ddr->set_num > 2) - xffsets = res_cfg->reg_rrl_ddr->offsets[2]; - } - } + if (!rrl) + return; - log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); - log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); - log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); - log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); - log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); - - if (xffsets) { - lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); - lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); - lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); - lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); - lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); - } + status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; - if (res_cfg->type == SPR) { - log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", - log0, log1, log2a, log3, log4, log5); + n = snprintf(msg, len, " retry_rd_err_log["); + for (i = 0; i < rrl->set_num; i++) { + scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); + if (scrub_err != scrub) + continue; - if (len - n > 0) { - if (xffsets) { - lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); - n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", - lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); - } else { - n += snprintf(msg + n, len - n, "]"); - } - } - } else { - log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", - log0, log1, log2, log3, log4, log5); - } + for (j = 0; j < rrl->reg_num && len - n > 0; j++) { + offset = rrl->offsets[i][j]; + width = rrl->widths[j]; + log = read_imc_reg(imc, ch, offset, width); - if (imc->hbm_mc) { - if (pch) { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); + if (width == 4) + n += snprintf(msg + n, len - n, "%.8llx ", log); + else + n += snprintf(msg + n, len - n, "%.16llx ", log); + + /* Clear RRL status if RRL in Linux control mode. */ + if (retry_rd_err_log == 2 && !j && (log & status_mask)) + write_imc_reg(imc, ch, offset, width, log & ~status_mask); } - } else { - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); } - if (len - n > 0) - snprintf(msg + n, len - n, - " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", - corr0 & 0xffff, corr0 >> 16, - corr1 & 0xffff, corr1 >> 16, - corr2 & 0xffff, corr2 >> 16, - corr3 & 0xffff, corr3 >> 16); - - /* Clear status bits */ - if (retry_rd_err_log == 2) { - if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); - } + /* Move back one space. */ + n--; + n += snprintf(msg + n, len - n, "]"); + + if (len - n > 0) { + n += snprintf(msg + n, len - n, " correrrcnt["); + for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { + offset = rrl->cecnt_offsets[i]; + width = rrl->cecnt_widths[i]; + corr = read_imc_reg(imc, ch, offset, width); - if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { - lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; - I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); + n += snprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); } + + /* Move back one space. */ + n--; + n += snprintf(msg + n, len - n, "]"); } } diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 7fc0c5af43e7..b04f4d835a9c 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -84,6 +84,8 @@ #define NUM_RRL_SET 3 /* Max RRL registers per set. */ #define NUM_RRL_REG 6 +/* Max correctable error count registers. */ +#define NUM_CECNT_REG 4 /* Modes of RRL register set. */ enum rrl_mode { @@ -100,16 +102,23 @@ enum rrl_mode { /* RRL registers per {,sub-,pseudo-}channel. */ struct reg_rrl { /* RRL register parts. */ - int set_num; + int set_num, reg_num; enum rrl_mode modes[NUM_RRL_SET]; u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; /* RRL register widths in byte per set. */ u8 widths[NUM_RRL_REG]; /* RRL control bits of the first register per set. */ + u32 v_mask; u32 uc_mask; + u32 over_mask; u32 en_patspr_mask; u32 noover_mask; u32 en_mask; + + /* CORRERRCNT register parts. */ + int cecnt_num; + u32 cecnt_offsets[NUM_CECNT_REG]; + u8 cecnt_widths[NUM_CECNT_REG]; }; /* -- Gitee From eaf7c8992c47e1205dd845ecc401ff744c8e0077 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 17 Apr 2025 23:07:24 +0800 Subject: [PATCH 09/12] EDAC/{skx_common,i10nm}: Add RRL support for Intel Granite Rapids server ANBZ: #23456 commit 5904dc561ef21e69f0b9dca39d1a66e34b7ea764 upstream. Compared to previous generations, Granite Rapids defines the RRL control bits {en_patspr, noover, en} in different positions, adds an extra RRL set for the new mode of the first patrol-scrub read error, and extends the number of CORRERRCNT registers from 4 to 8, encoding one counter per CORRERRCNT register. Add a Granite Rapids reg_rrl configuration table and adjust the code to accommodate the differences mentioned above for RRL support. Intel-SIG: commit 5904dc561ef2 EDAC/{skx_common,i10nm}: Add RRL support for Intel Granite Rapids server Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Feng Xu Link: https://lore.kernel.org/r/20250417150724.1170168-8-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 37 +++++++++++++++++++++++++++++++++++-- drivers/edac/skx_common.h | 4 ++-- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index c9aadbee58da..2f8d03809c05 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -164,6 +164,29 @@ static struct reg_rrl spr_reg_rrl_hbm_pch1 = { .cecnt_widths = {4, 4, 4, 4}, }; +static struct reg_rrl gnr_reg_rrl_ddr = { + .set_num = 4, + .reg_num = 6, + .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND}, + .offsets = { + {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, + {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, + {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, + {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, + }, + .widths = {4, 4, 8, 4, 4, 8}, + .v_mask = BIT(0), + .uc_mask = BIT(1), + .over_mask = BIT(2), + .en_patspr_mask = BIT(14), + .noover_mask = BIT(15), + .en_mask = BIT(12), + + .cecnt_num = 8, + .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, + .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, +}; + static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) { switch (width) { @@ -353,8 +376,17 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, width = rrl->cecnt_widths[i]; corr = read_imc_reg(imc, ch, offset, width); - n += snprintf(msg + n, len - n, "%.4llx %.4llx ", - corr & 0xffff, corr >> 16); + /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ + if (res_cfg->type <= SPR) { + n += snprintf(msg + n, len - n, "%.4llx %.4llx ", + corr & 0xffff, corr >> 16); + } else { + /* CPUs {GNR} encode one counter per CORRERRCNT register. */ + if (width == 4) + n += snprintf(msg + n, len - n, "%.8llx ", corr); + else + n += snprintf(msg + n, len - n, "%.16llx ", corr); + } } /* Move back one space. */ @@ -985,6 +1017,7 @@ static struct res_config gnr_cfg = { .uracu_bdf = {0, 0, 1}, .ddr_mdev_bdf = {0, 5, 1}, .sad_all_offset = 0x300, + .reg_rrl_ddr = &gnr_reg_rrl_ddr, }; static const struct x86_cpu_id i10nm_cpuids[] = { diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index b04f4d835a9c..15cb774d88ec 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -81,11 +81,11 @@ #define MCACOD_EXT_MEM_ERR 0x280 /* Max RRL register sets per {,sub-,pseudo-}channel. */ -#define NUM_RRL_SET 3 +#define NUM_RRL_SET 4 /* Max RRL registers per set. */ #define NUM_RRL_REG 6 /* Max correctable error count registers. */ -#define NUM_CECNT_REG 4 +#define NUM_CECNT_REG 8 /* Modes of RRL register set. */ enum rrl_mode { -- Gitee From fd1e07dad2e4d36cf403aae0771e107b28332d21 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Thu, 24 Apr 2025 16:14:54 +0800 Subject: [PATCH 10/12] EDAC/i10nm: Fix the bitwise operation between variables of different sizes ANBZ: #23456 commit 2b2408aca90b86c1ef51c19d834e5f6db0a1ff30 upstream. The tool of Smatch static checker reported the following warning: drivers/edac/i10nm_base.c:364 show_retry_rd_err_log() warn: should bitwise negate be 'ullong'? This warning was due to the bitwise NOT/AND operations between 'status_mask' (a u32 type) and 'log' (a u64 type), which resulted in the high 32 bits of 'log' were cleared. This was a false positive warning, as only the low 32 bits of 'log' was written to the first RRL memory controller register (a u32 type). To improve code sanity, fix this warning by changing 'status_mask' to a u64 type, ensuring it matches the size of 'log' for bitwise operations. Intel-SIG: commit 2b2408aca90b EDAC/i10nm: Fix the bitwise operation between variables of different sizes Add EDAC support and enhancement for GNR/GNR-D/CWF. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aAih0KmEVq7ch6v2@stanley.mountain/ Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20250424081454.2952632-1-qiuxu.zhuo@intel.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2f8d03809c05..2fd024aa6c0e 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -327,10 +327,10 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, { int i, j, n, ch = res->channel, pch = res->cs & 1; struct skx_imc *imc = &res->dev->imc[res->imc]; - u32 offset, status_mask; + u64 log, corr, status_mask; struct reg_rrl *rrl; - u64 log, corr; bool scrub; + u32 offset; u8 width; if (!imc->mbase) -- Gitee From 3f025a9d644c03e5c8214baf859c455e44c8b231 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 4 Jul 2025 23:16:07 +0800 Subject: [PATCH 11/12] EDAC/i10nm: Add Intel Granite Rapids-D support ANBZ: #23456 commit 9ad08c1115646533097c8a799ad046bf5127b04a upstream. The Granite Rapids-D CPU model uses memory controller registers similar to those of the Granite Rapids server CPU but with a different memory controller MMIO base. Add the Granite Rapids-D CPU model ID and use the new memory controller MMIO base for EDAC support. Intel-SIG: commit 9ad08c111564 EDAC/i10nm: Add Intel Granite Rapids-D support Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: VikasX Chougule Link: https://lore.kernel.org/r/20250704151609.7833-2-qiuxu.zhuo@intel.com [ Zhang Rui: resolve conflict (X86_MATCH_VFM) and amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 2fd024aa6c0e..a23cee3a26e5 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -62,6 +62,7 @@ ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000) #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000 +#define I10NM_GNR_D_IMC_MMIO_OFFSET 0x206000 #define I10NM_GNR_IMC_MMIO_SIZE 0x4000 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000 #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24) @@ -687,6 +688,14 @@ static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *phy return NULL; } +static u32 get_gnr_imc_mmio_offset(void) +{ + if (boot_cpu_data.x86_vfm == INTEL_GRANITERAPIDS_D) + return I10NM_GNR_D_IMC_MMIO_OFFSET; + + return I10NM_GNR_IMC_MMIO_OFFSET; +} + /** * get_ddr_munit() - Get the resource of the i-th DDR memory controller. * @@ -715,7 +724,7 @@ static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsi return NULL; *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) + - I10NM_GNR_IMC_MMIO_OFFSET + + get_gnr_imc_mmio_offset() + physical_idx * I10NM_GNR_IMC_MMIO_SIZE; *size = I10NM_GNR_IMC_MMIO_SIZE; @@ -1029,6 +1038,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_D, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_DARKMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} -- Gitee From 1e177dffa1700a62d707c1eccbaba611cf83974c Mon Sep 17 00:00:00 2001 From: Wang Haoran Date: Tue, 15 Jul 2025 21:17:00 +0800 Subject: [PATCH 12/12] EDAC/{skx_common,i10nm}: Use scnprintf() for safer buffer handling ANBZ: #23456 commit 35928bc38db69a2af26624e35a250c1e0f9a6a3f upstream. snprintf() is fragile when its return value will be used to append additional data to a buffer. Use scnprintf() instead. Intel-SIG: commit 35928bc38db6 EDAC/{skx_common,i10nm}: Use scnprintf() for safer buffer handling Add EDAC support and enhancement for GNR/GNR-D/CWF. Signed-off-by: Wang Haoran Signed-off-by: Tony Luck Tested-by: Qiuxu Zhuo Reviewed-by: Qiuxu Zhuo Link: https://lore.kernel.org/r/20250715131700.1092720-1-haoranwangsec@gmail.com [ Zhang Rui: amend commit log ] Signed-off-by: Zhang Rui --- drivers/edac/i10nm_base.c | 18 +++++++++--------- drivers/edac/skx_common.c | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index a23cee3a26e5..b3684965753c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -344,7 +344,7 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; - n = snprintf(msg, len, " retry_rd_err_log["); + n = scnprintf(msg, len, " retry_rd_err_log["); for (i = 0; i < rrl->set_num; i++) { scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); if (scrub_err != scrub) @@ -356,9 +356,9 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, log = read_imc_reg(imc, ch, offset, width); if (width == 4) - n += snprintf(msg + n, len - n, "%.8llx ", log); + n += scnprintf(msg + n, len - n, "%.8llx ", log); else - n += snprintf(msg + n, len - n, "%.16llx ", log); + n += scnprintf(msg + n, len - n, "%.16llx ", log); /* Clear RRL status if RRL in Linux control mode. */ if (retry_rd_err_log == 2 && !j && (log & status_mask)) @@ -368,10 +368,10 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, /* Move back one space. */ n--; - n += snprintf(msg + n, len - n, "]"); + n += scnprintf(msg + n, len - n, "]"); if (len - n > 0) { - n += snprintf(msg + n, len - n, " correrrcnt["); + n += scnprintf(msg + n, len - n, " correrrcnt["); for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { offset = rrl->cecnt_offsets[i]; width = rrl->cecnt_widths[i]; @@ -379,20 +379,20 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ if (res_cfg->type <= SPR) { - n += snprintf(msg + n, len - n, "%.4llx %.4llx ", + n += scnprintf(msg + n, len - n, "%.4llx %.4llx ", corr & 0xffff, corr >> 16); } else { /* CPUs {GNR} encode one counter per CORRERRCNT register. */ if (width == 4) - n += snprintf(msg + n, len - n, "%.8llx ", corr); + n += scnprintf(msg + n, len - n, "%.8llx ", corr); else - n += snprintf(msg + n, len - n, "%.16llx ", corr); + n += scnprintf(msg + n, len - n, "%.16llx ", corr); } } /* Move back one space. */ n--; - n += snprintf(msg + n, len - n, "]"); + n += scnprintf(msg + n, len - n, "]"); } } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index c950f1a188d2..6e92f2ff5c24 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -671,12 +671,12 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, } if (res->decoded_by_adxl) { - len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", mscod, errcode, adxl_msg); } else { - len = snprintf(skx_msg, MSG_SIZE, + len = scnprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x", overflow ? " OVERFLOW" : "", (uncorrected_error && recoverable) ? " recoverable" : "", -- Gitee