From fd3cebffa30bd92da526c36176d1f647f64ce372 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 22 Aug 2019 00:00:01 +0000 Subject: [PATCH 01/19] EDAC/amd64: Cache secondary Chip Select registers ANBZ: #5455 commit 7574729e91468d568cc198de438feb35ef04f41a upstream. AMD Family 17h systems have a set of secondary Chip Select Base Addresses and Address Masks. These do not represent unique Chip Selects, rather they are used in conjunction with the primary Chip Select registers in certain cases. Cache these secondary Chip Select registers for future use. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Tony Luck Link: https://lkml.kernel.org/r/20190821235938.118710-7-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 23 ++++++++++++++++++++--- drivers/edac/amd64_edac.h | 4 ++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 6da720d06fe5..ef1fb5e5a7cc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -938,34 +938,51 @@ static void prep_chip_selects(struct amd64_pvt *pvt) static void read_umc_base_mask(struct amd64_pvt *pvt) { - u32 umc_base_reg, umc_mask_reg; - u32 base_reg, mask_reg; - u32 *base, *mask; + u32 umc_base_reg, umc_base_reg_sec; + u32 umc_mask_reg, umc_mask_reg_sec; + u32 base_reg, base_reg_sec; + u32 mask_reg, mask_reg_sec; + u32 *base, *base_sec; + u32 *mask, *mask_sec; int cs, umc; for_each_umc(umc) { umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; + umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; for_each_chip_select(cs, umc, pvt) { base = &pvt->csels[umc].csbases[cs]; + base_sec = &pvt->csels[umc].csbases_sec[cs]; base_reg = umc_base_reg + (cs * 4); + base_reg_sec = umc_base_reg_sec + (cs * 4); if (!amd_smn_read(pvt->mc_node_id, base_reg, base)) edac_dbg(0, " DCSB%d[%d]=0x%08x reg: 0x%x\n", umc, cs, *base, base_reg); + + if (!amd_smn_read(pvt->mc_node_id, base_reg_sec, base_sec)) + edac_dbg(0, " DCSB_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *base_sec, base_reg_sec); } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; + umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; + mask_sec = &pvt->csels[umc].csmasks_sec[cs]; mask_reg = umc_mask_reg + (cs * 4); + mask_reg_sec = umc_mask_reg_sec + (cs * 4); if (!amd_smn_read(pvt->mc_node_id, mask_reg, mask)) edac_dbg(0, " DCSM%d[%d]=0x%08x reg: 0x%x\n", umc, cs, *mask, mask_reg); + + if (!amd_smn_read(pvt->mc_node_id, mask_reg_sec, mask_sec)) + edac_dbg(0, " DCSM_SEC%d[%d]=0x%08x reg: 0x%x\n", + umc, cs, *mask_sec, mask_reg_sec); } } } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 0b2903ade1b2..6a02c3eec4a8 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -261,7 +261,9 @@ /* UMC CH register offsets */ #define UMCCH_BASE_ADDR 0x0 +#define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 +#define UMCCH_ADDR_MASK_SEC 0x28 #define UMCCH_ADDR_CFG 0x30 #define UMCCH_DIMM_CFG 0x80 #define UMCCH_UMC_CFG 0x100 @@ -315,9 +317,11 @@ struct dram_range { /* A DCT chip selects collection */ struct chip_select { u32 csbases[NUM_CHIPSELECTS]; + u32 csbases_sec[NUM_CHIPSELECTS]; u8 b_cnt; u32 csmasks[NUM_CHIPSELECTS]; + u32 csmasks_sec[NUM_CHIPSELECTS]; u8 m_cnt; }; -- Gitee From 23e2e3f126d0d1340eb8f7921bf7f62dbe2e53ac Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 22 Aug 2019 00:00:02 +0000 Subject: [PATCH 02/19] EDAC/amd64: Support asymmetric dual-rank DIMMs ANBZ: #5455 commit 81f5090db843be897414418c24fe472fa6e082b6 upstream. Future AMD systems will support asymmetric dual-rank DIMMs. These are DIMMs where the ranks are of different sizes. The even rank will use the Primary Even Chip Select registers and the odd rank will use the Secondary Odd Chip Select registers. Recognize if a Secondary Odd Chip Select is being used. Use the Secondary Odd Address Mask when calculating the chip select size. [ bp: move csrow_sec_enabled() to the header, fix CS_ODD define and tone-down the capitalized words spelling. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Tony Luck Link: https://lkml.kernel.org/r/20190821235938.118710-8-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 16 +++++++++++++--- drivers/edac/amd64_edac.h | 3 ++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ef1fb5e5a7cc..aaf38c01ad20 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -782,9 +782,11 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) #define CS_EVEN_PRIMARY BIT(0) #define CS_ODD_PRIMARY BIT(1) +#define CS_EVEN_SECONDARY BIT(2) +#define CS_ODD_SECONDARY BIT(3) -#define CS_EVEN CS_EVEN_PRIMARY -#define CS_ODD CS_ODD_PRIMARY +#define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) +#define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { @@ -796,6 +798,10 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_PRIMARY; + /* Asymmetric dual-rank DIMM support. */ + if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) + cs_mode |= CS_ODD_SECONDARY; + return cs_mode; } @@ -1592,7 +1598,11 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, */ dimm = csrow_nr >> 1; - addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + /* Asymmetric dual-rank DIMM support. */ + if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) + addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + else + addr_mask_orig = pvt->csels[umc].csmasks[dimm]; /* * The number of zero bits in the mask is equal to the number of bits diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 6a02c3eec4a8..cc509698ce2c 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -171,7 +171,8 @@ #define DCSM0 0x60 #define DCSM1 0x160 -#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define csrow_sec_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases_sec[(i)] & DCSB_CS_ENABLE) #define DRAM_CONTROL 0x78 -- Gitee From eec85be25e1b68fbc817a5905198af1f86ebd18d Mon Sep 17 00:00:00 2001 From: Isaac Vaughn Date: Fri, 6 Sep 2019 23:21:38 +0000 Subject: [PATCH 03/19] EDAC/amd64: Add PCI device IDs for family 17h, model 70h ANBZ: #5455 commit 3e443eb353eda6f4b4796e07f2599683fa752f1d upstream. Add the new Family 17h Model 70h PCI IDs (device 18h functions 0 and 6) to the AMD64 EDAC module. [ bp: s/f17_base_addr_to_cs_size/f17_addr_mask_to_cs_size/g ] Signed-off-by: Isaac Vaughn Signed-off-by: Borislav Petkov Cc: James Morse Cc: linux-edac@vger.kernel.org Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Link: https://lkml.kernel.org/r/20190906192131.8ced0ca112146f32d82b6cae@knights.ucf.edu [fix conflict during backport] Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 13 +++++++++++++ drivers/edac/amd64_edac.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index aaf38c01ad20..186377506baa 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2310,6 +2310,15 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F17_M70H_CPUS] = { + .ctl_name = "F17h_M70h", + .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, [F19_CPUS] = { .ctl_name = "F19h", .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, @@ -3358,6 +3367,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F17_M30H_CPUS]; pvt->ops = &family_types[F17_M30H_CPUS].ops; break; + } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + break; } /* fall through */ case 0x18: diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index cc509698ce2c..456da5905876 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -120,6 +120,8 @@ #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F6 0x15ee #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F0 0x1490 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 +#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 @@ -291,6 +293,7 @@ enum amd_families { F17_CPUS, F17_M10H_CPUS, F17_M30H_CPUS, + F17_M70H_CPUS, F19_CPUS, NUM_FAMILIES, }; -- Gitee From 1736a7b44ab959b46cf42106425a7bb7d44eaef6 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 22 Oct 2019 20:35:09 +0000 Subject: [PATCH 04/19] EDAC/amd64: Make struct amd64_family_type global ANBZ: #5455 commit 38ddd4d1574530e1447b6ad91d27225d0f7662fb upstream. The struct amd64_family_type doesn't change between multiple nodes and instances of the module, so make it global. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106012448.243970-2-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 186377506baa..490a9bb95b22 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,6 +15,8 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; +static struct amd64_family_type *fam_type; + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -3281,8 +3283,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci, - struct amd64_family_type *fam) +static void setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; @@ -3301,7 +3302,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = fam->ctl_name; + mci->ctl_name = fam_type->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; @@ -3315,8 +3316,6 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, */ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) { - struct amd64_family_type *fam_type = NULL; - pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; @@ -3429,7 +3428,6 @@ static void compute_num_umcs(void) static int init_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; - struct amd64_family_type *fam_type = NULL; struct mem_ctl_info *mci = NULL; struct edac_mc_layer layers[2]; struct amd64_pvt *pvt = NULL; @@ -3506,7 +3504,7 @@ static int init_one_instance(unsigned int nid) mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; - setup_mci_misc_attrs(mci, fam_type); + setup_mci_misc_attrs(mci); if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; -- Gitee From 45dc72ca3227d266855fd75388ec8b65ad1b0d31 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 22 Oct 2019 20:35:10 +0000 Subject: [PATCH 05/19] EDAC/amd64: Gather hardware information early ANBZ: #5455 commit 80355a3b2db9d0b713af5169e2cdd7f8fbfdad82 upstream. Split out gathering hardware information from init_one_instance() into a separate function hw_info_get(). This is necessary so that the information can be cached earlier and used to check if memory is populated and if ECC is enabled on a node. Also, define a function hw_info_put() to back out changes made in hw_info_get(). Check for an allocated PCI device (Function 0 for Family 17h or Function 1 for pre-Family 17h) before freeing, since hw_info_put() may be called before PCI siblings are reserved. Drop the family check when freeing pvt->umc. This will be NULL on pre-Family 17h systems. However, kfree() is safe and will check for a NULL pointer before freeing. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106012448.243970-3-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 101 +++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 50 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 490a9bb95b22..df094f8d3c4f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3425,34 +3425,15 @@ static void compute_num_umcs(void) edac_dbg(1, "Number of UMCs: %x", num_umcs); } -static int init_one_instance(unsigned int nid) +static int hw_info_get(struct amd64_pvt *pvt) { - struct pci_dev *F3 = node_to_amd_nb(nid)->misc; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - struct amd64_pvt *pvt = NULL; u16 pci_id1, pci_id2; - int err = 0, ret; - - ret = -ENOMEM; - pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); - if (!pvt) - goto err_ret; - - pvt->mc_node_id = nid; - pvt->F3 = F3; - - ret = -EINVAL; - fam_type = per_family_init(pvt); - if (!fam_type) - goto err_free; + int ret = -EINVAL; if (pvt->fam >= 0x17) { pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); - if (!pvt->umc) { - ret = -ENOMEM; - goto err_free; - } + if (!pvt->umc) + return -ENOMEM; pci_id1 = fam_type->f0_id; pci_id2 = fam_type->f6_id; @@ -3461,21 +3442,37 @@ static int init_one_instance(unsigned int nid) pci_id2 = fam_type->f2_id; } - err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); - if (err) - goto err_post_init; + ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); + if (ret) + return ret; read_mc_regs(pvt); + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + if (pvt->F0 || pvt->F1) + free_mc_sibling_devs(pvt); + + kfree(pvt->umc); +} + +static int init_one_instance(struct amd64_pvt *pvt) +{ + struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; + int ret = -EINVAL; + /* * We need to determine how many memory channels there are. Then use * that information for calculating the size of the dynamic instance * tables in the 'mci' structure. */ - ret = -EINVAL; pvt->channel_count = pvt->ops->early_channel_count(pvt); if (pvt->channel_count < 0) - goto err_siblings; + return ret; ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; @@ -3497,9 +3494,9 @@ static int init_one_instance(unsigned int nid) layers[1].size = 2; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); if (!mci) - goto err_siblings; + return ret; mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; @@ -3512,31 +3509,17 @@ static int init_one_instance(unsigned int nid) ret = -ENODEV; if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { edac_dbg(1, "failed edac_mc_add_mc()\n"); - goto err_add_mc; + edac_mc_free(mci); + return ret; } return 0; - -err_add_mc: - edac_mc_free(mci); - -err_siblings: - free_mc_sibling_devs(pvt); - -err_post_init: - if (pvt->fam >= 0x17) - kfree(pvt->umc); - -err_free: - kfree(pvt); - -err_ret: - return ret; } static int probe_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct amd64_pvt *pvt = NULL; struct ecc_settings *s; int ret; @@ -3547,6 +3530,21 @@ static int probe_one_instance(unsigned int nid) ecc_stngs[nid] = s; + pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); + if (!pvt) + goto err_settings; + + pvt->mc_node_id = nid; + pvt->F3 = F3; + + fam_type = per_family_init(pvt); + if (!fam_type) + goto err_enable; + + ret = hw_info_get(pvt); + if (ret < 0) + goto err_enable; + if (!ecc_enabled(F3, nid)) { ret = 0; @@ -3563,7 +3561,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - ret = init_one_instance(nid); + ret = init_one_instance(pvt); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); @@ -3576,6 +3574,10 @@ static int probe_one_instance(unsigned int nid) return ret; err_enable: + hw_info_put(pvt); + kfree(pvt); + +err_settings: kfree(s); ecc_stngs[nid] = NULL; @@ -3602,14 +3604,13 @@ static void remove_one_instance(unsigned int nid) restore_ecc_error_reporting(s, nid, F3); - free_mc_sibling_devs(pvt); - kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; /* Free the EDAC CORE resources */ mci->pvt_info = NULL; + hw_info_put(pvt); kfree(pvt); edac_mc_free(mci); } -- Gitee From ff1340dcf16fc0955101cb6582c2dde5a7a1f1b3 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 22 Oct 2019 20:35:11 +0000 Subject: [PATCH 06/19] EDAC/amd64: Save max number of controllers to family type ANBZ: #5455 commit 5e4c55276ae8758f5789722b384bb2ab3de3a24f upstream. The maximum number of memory controllers is fixed within a family/model group. In most cases, this has been fixed at 2, but some systems may have up to 8. The struct amd64_family_type already contains family/model-specific information, and this can be used rather than adding model checks to various functions. Create a new field in struct amd64_family_type for max_mcs. Set this when setting other family type information, and use this when needing the maximum number of memory controllers possible for a system. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: "linux-edac@vger.kernel.org" Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Tony Luck Link: https://lkml.kernel.org/r/20191106012448.243970-4-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 44 +++++++++++++-------------------------- drivers/edac/amd64_edac.h | 2 ++ 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index df094f8d3c4f..1843ce766ecc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -20,9 +20,6 @@ static struct amd64_family_type *fam_type; /* Per-node stuff */ static struct ecc_settings **ecc_stngs; -/* Number of Unified Memory Controllers */ -static u8 num_umcs; - /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -448,7 +445,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < num_umcs; i++) + for (i = 0; i < fam_type->max_mcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -2219,6 +2216,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + .max_mcs = 2, .ops = { .early_channel_count = k8_early_channel_count, .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, @@ -2229,6 +2227,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F10h", .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2239,6 +2238,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h", .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2249,6 +2249,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M30h", .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2259,6 +2260,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M60h", .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2269,6 +2271,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2279,6 +2282,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h_M30h", .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2289,6 +2293,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h", .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2298,6 +2303,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h_M10h", .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2307,6 +2313,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h_M30h", .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, + .max_mcs = 8, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2316,6 +2323,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h_M70h", .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -3409,29 +3417,13 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -/* Set the number of Unified Memory Controllers in the system. */ -static void compute_num_umcs(void) -{ - u8 model = boot_cpu_data.x86_model; - - if (boot_cpu_data.x86 < 0x17) - return; - - if (model >= 0x30 && model <= 0x3f) - num_umcs = 8; - else - num_umcs = 2; - - edac_dbg(1, "Number of UMCs: %x", num_umcs); -} - static int hw_info_get(struct amd64_pvt *pvt) { u16 pci_id1, pci_id2; int ret = -EINVAL; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); + pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); if (!pvt->umc) return -ENOMEM; @@ -3484,14 +3476,8 @@ static int init_one_instance(struct amd64_pvt *pvt) * Always allocate two channels since we can have setups with DIMMs on * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. - * - * On Fam17h+, the number of controllers may be greater than two. So set - * the size equal to the maximum number of UMCs. */ - if (pvt->fam >= 0x17) - layers[1].size = num_umcs; - else - layers[1].size = 2; + layers[1].size = fam_type->max_mcs; layers[1].is_virt_csrow = false; mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); @@ -3677,8 +3663,6 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - compute_num_umcs(); - for (i = 0; i < amd_nb_num(); i++) { err = probe_one_instance(i); if (err) { diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 456da5905876..abbf3c274d74 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -482,6 +482,8 @@ struct low_ops { struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; + /* Maximum number of memory controllers per die/node. */ + u8 max_mcs; struct low_ops ops; }; -- Gitee From 881060d39999a389df3ce35a9761e352ae9b792e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 9 Oct 2020 17:18:03 +0000 Subject: [PATCH 07/19] EDAC/amd64: Set proper family type for Family 19h Models 20h-2Fh ANBZ: #5455 commit b4210eab9164e3ea647b71e1049391177db9b215 upstream. AMD Family 19h Models 20h-2Fh use the same PCI IDs as Family 17h Models 70h-7Fh. The same family ops and number of channels also apply. Use the Family17h Model 70h family_type and ops for Family 19h Models 20h-2Fh. Update the controller name to match the system. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201009171803.3214354-1-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 1843ce766ecc..fdd5db151c0d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3389,6 +3389,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x19: + if (pvt->model >= 0x20 && pvt->model <= 0x2f) { + fam_type = &family_types[F17_M70H_CPUS]; + pvt->ops = &family_types[F17_M70H_CPUS].ops; + fam_type->ctl_name = "F19h_M20h"; + break; + } fam_type = &family_types[F19_CPUS]; pvt->ops = &family_types[F19_CPUS].ops; family_types[F19_CPUS].ctl_name = "F19h"; -- Gitee From 48d9e792c514823111a4721a4f4d026845fa46f5 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 9 Nov 2020 21:06:58 +0000 Subject: [PATCH 08/19] EDAC/mce_amd: Use struct cpuinfo_x86.cpu_die_id for AMD NodeId ANBZ: #5455 commit 8de0c9917cc1297bc5543b61992d5bdee4ce621a upstream. The edac_mce_amd module calls decode_dram_ecc() on AMD Family17h and later systems. This function is used in amd64_edac_mod to do system-specific decoding for DRAM ECC errors. The function takes a "NodeId" as a parameter. In AMD documentation, NodeId is used to identify a physical die in a system. This can be used to identify a node in the AMD_NB code and also it is used with umc_normaddr_to_sysaddr(). However, the input used for decode_dram_ecc() is currently the NUMA node of a logical CPU. In the default configuration, the NUMA node and physical die will be equivalent, so this doesn't have an impact. But the NUMA node configuration can be adjusted with optional memory interleaving modes. This will cause the NUMA node enumeration to not match the physical die enumeration. The mismatch will cause the address translation function to fail or report incorrect results. Use struct cpuinfo_x86.cpu_die_id for the node_id parameter to ensure the physical ID is used. Fixes: fbe63acf62f5 ("EDAC, mce_amd: Use cpu_to_node() to find the node ID") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20201109210659.754018-4-Yazen.Ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 9ced0659355f..0ba91f01e13f 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1003,7 +1003,7 @@ static void decode_smca_error(struct mce *m) } if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) - decode_dram_ecc(cpu_to_node(m->extcpu), m); + decode_dram_ecc(topology_die_id(m->extcpu), m); } static inline void amd_decode_err_code(u16 ec) -- Gitee From 7f78b361fe9d7658d9fea958b7603b5aa6ac1b34 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Tue, 17 Nov 2020 20:49:52 +0800 Subject: [PATCH 09/19] EDAC: Add DDR5 new memory type ANBZ: #5455 commit bc1c99a5971aa7571e8b9731c28fa32abe12cab8 upstream. Add a new entry to 'enum mem_type' and a new string to 'edac_mem_types[]' for DDR5 new memory type. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Signed-off-by: Pu Wen --- drivers/edac/edac_mc.c | 1 + include/linux/edac.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index f59511bd9926..7877ce426f12 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -216,6 +216,7 @@ const char * const edac_mem_types[] = { [MEM_DDR4] = "Unbuffered-DDR4", [MEM_RDDR4] = "Registered-DDR4", [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", + [MEM_DDR5] = "Unbuffered-DDR5", [MEM_NVDIMM] = "Non-volatile-RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/include/linux/edac.h b/include/linux/edac.h index 958d69332c1d..f2cc1864c114 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -187,6 +187,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. + * @MEM_DDR5: Unbuffered DDR5 RAM * @MEM_NVDIMM: Non-volatile RAM */ enum mem_type { @@ -211,6 +212,7 @@ enum mem_type { MEM_DDR4, MEM_RDDR4, MEM_LRDDR4, + MEM_DDR5, MEM_NVDIMM, }; @@ -234,6 +236,7 @@ enum mem_type { #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) +#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** -- Gitee From f1d92bfd27f5555ae9c75cd49bdd8ed5b99c399e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 8 Dec 2021 17:43:53 +0000 Subject: [PATCH 10/19] EDAC: Add RDDR5 and LRDDR5 memory types ANBZ: #5455 commit f95711242390d759f69fd67ad46b31491fe904d6 upstream. Include Registered-DDR5 and Load-Reduced DDR5 in the list of memory types. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211208174356.1997855-2-yazen.ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/edac_mc.c | 2 ++ include/linux/edac.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 7877ce426f12..b5515def60cd 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -217,6 +217,8 @@ const char * const edac_mem_types[] = { [MEM_RDDR4] = "Registered-DDR4", [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", [MEM_DDR5] = "Unbuffered-DDR5", + [MEM_RDDR5] = "Registered-DDR5", + [MEM_LRDDR5] = "Load-Reduced-DDR5-RAM", [MEM_NVDIMM] = "Non-volatile-RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/include/linux/edac.h b/include/linux/edac.h index f2cc1864c114..f9edb7270157 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -188,6 +188,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) * This is a variant of the DDR4 memories. * @MEM_LRDDR4: Load-Reduced DDR4 memory. * @MEM_DDR5: Unbuffered DDR5 RAM + * @MEM_RDDR5: Registered DDR5 RAM + * @MEM_LRDDR5: Load-Reduced DDR5 memory. * @MEM_NVDIMM: Non-volatile RAM */ enum mem_type { @@ -213,6 +215,8 @@ enum mem_type { MEM_RDDR4, MEM_LRDDR4, MEM_DDR5, + MEM_RDDR5, + MEM_LRDDR5, MEM_NVDIMM, }; @@ -237,6 +241,8 @@ enum mem_type { #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_DDR5 BIT(MEM_DDR5) +#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) +#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) /** -- Gitee From 791aaa7d3b45de363e749892c04bb3d0952f8711 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 8 Dec 2021 17:43:54 +0000 Subject: [PATCH 11/19] EDAC/amd64: Add support for AMD Family 19h Models 10h-1Fh and A0h-AFh ANBZ: #5455 commit e2be5955a88664421b25e463c28a910b8dbd534c upstream. Add a new family type for AMD Family 19h Models 10h to 1Fh. Use this new family type for Models A0h to AFh also. Increase the maximum number of controllers from 8 to 12. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211208174356.1997855-3-yazen.ghannam@amd.com [fix conflict during backport] Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 21 ++++++++++++++++++++- drivers/edac/amd64_edac.h | 5 ++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fdd5db151c0d..717fd909c27f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2338,6 +2338,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F19_M10H_CPUS] = { + .ctl_name = "F19h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, + .max_mcs = 12, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, }; /* @@ -3389,11 +3399,20 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x19: - if (pvt->model >= 0x20 && pvt->model <= 0x2f) { + if (pvt->model >= 0x10 && pvt->model <= 0x1f) { + fam_type = &family_types[F19_M10H_CPUS]; + pvt->ops = &family_types[F19_M10H_CPUS].ops; + break; + } else if (pvt->model >= 0x20 && pvt->model <= 0x2f) { fam_type = &family_types[F17_M70H_CPUS]; pvt->ops = &family_types[F17_M70H_CPUS].ops; fam_type->ctl_name = "F19h_M20h"; break; + } else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) { + fam_type = &family_types[F19_M10H_CPUS]; + pvt->ops = &family_types[F19_M10H_CPUS].ops; + fam_type->ctl_name = "F19h_MA0h"; + break; } fam_type = &family_types[F19_CPUS]; pvt->ops = &family_types[F19_CPUS].ops; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index abbf3c274d74..8e6f0bc8cbd4 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,7 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 8 +#define NUM_CONTROLLERS 12 #define ON true #define OFF false @@ -124,6 +124,8 @@ #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3 /* * Function 1 - Address Map @@ -295,6 +297,7 @@ enum amd_families { F17_M30H_CPUS, F17_M70H_CPUS, F19_CPUS, + F19_M10H_CPUS, NUM_FAMILIES, }; -- Gitee From a519d45ce7b438e13cd19f217fa9559480509042 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 2 Feb 2022 14:43:06 +0000 Subject: [PATCH 12/19] EDAC/amd64: Set memory type per DIMM ANBZ: #5455 commit 75aeaaf23def967853c8d1cfb513a6842dbc232e upstream. Current AMD systems allow mixing of DIMM types within a system. However, DIMMs within a channel, i.e. managed by a single Unified Memory Controller (UMC), must be of the same type. Handle this possible configuration by checking and setting the memory type for each individual "UMC" structure. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Reviewed-by: William Roche Link: https://lore.kernel.org/r/20220202144307.2678405-2-yazen.ghannam@amd.com [fix conflict during backport] Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 43 ++++++++++++++++++++++++++++----------- drivers/edac/amd64_edac.h | 10 ++++++++- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 717fd909c27f..a322fb60183a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -856,7 +856,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - if (pvt->dram_type == MEM_LRDDR4) { + if (umc->dram_type == MEM_LRDDR4) { amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); @@ -1043,19 +1043,36 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } -static void determine_memory_type(struct amd64_pvt *pvt) +static void determine_memory_type_df(struct amd64_pvt *pvt) { - u32 dram_ctrl, dcsm; + struct amd64_umc *umc; + u32 i; - if (pvt->umc) { - if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5)) - pvt->dram_type = MEM_LRDDR4; - else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4)) - pvt->dram_type = MEM_RDDR4; + for_each_umc(i) { + umc = &pvt->umc[i]; + + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) { + umc->dram_type = MEM_EMPTY; + continue; + } + + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; else - pvt->dram_type = MEM_DDR4; - return; + umc->dram_type = MEM_DDR4; + + edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); } +} + +static void determine_memory_type(struct amd64_pvt *pvt) +{ + u32 dram_ctrl, dcsm; + + if (pvt->umc) + return determine_memory_type_df(pvt); switch (pvt->fam) { case 0xf: @@ -2857,7 +2874,9 @@ static void read_mc_regs(struct amd64_pvt *pvt) read_dct_base_mask(pvt); determine_memory_type(pvt); - edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); + + if (!pvt->umc) + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); @@ -2955,7 +2974,7 @@ static int init_csrows_df(struct mem_ctl_info *mci) pvt->mc_node_id, cs); dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); - dimm->mtype = pvt->dram_type; + dimm->mtype = pvt->umc[umc].dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8e6f0bc8cbd4..fcfb5455c015 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -338,6 +338,9 @@ struct amd64_umc { u32 sdp_ctrl; /* SDP Control reg */ u32 ecc_ctrl; /* DRAM ECC Control reg */ u32 umc_cap_hi; /* Capabilities High reg */ + + /* cache the dram_type */ + enum mem_type dram_type; }; struct amd64_pvt { @@ -385,7 +388,12 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; - /* cache the dram_type */ + /* + * cache the dram_type + * + * NOTE: Don't use this for Family 17h and later. + * Use dram_type in struct amd64_umc instead. + */ enum mem_type dram_type; struct amd64_umc *umc; /* UMC registers */ -- Gitee From a90ad06f8c874c1e467942d776346c5d420282bd Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 2 Feb 2022 14:43:07 +0000 Subject: [PATCH 13/19] EDAC/amd64: Add new register offset support and related changes ANBZ: #5455 commit 2151c84ece920dc55942495004a823cbecb921e5 upstream. Introduce a "family flags" bitmask that can be used to indicate any special behavior needed on a per-family basis. Add a flag to indicate a system uses the new register offsets introduced with Family 19h Model 10h. Use this flag to account for register offset changes, a new bitfield indicating DDR5 use on a memory controller, and to set the proper number of chip select masks. Rework f17_addr_mask_to_cs_size() to properly handle the change in chip select masks. And update code comments to reflect the updated Chip Select, DIMM, and Mask relationships. [uninitialized variable warning] Reported-by: kernel test robot Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Reviewed-by: William Roche Link: https://lore.kernel.org/r/20220202144307.2678405-3-yazen.ghannam@amd.com Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 80 +++++++++++++++++++++++++++++++-------- drivers/edac/amd64_edac.h | 14 +++++++ 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index a322fb60183a..ad2611ef516e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -17,6 +17,21 @@ static struct msr __percpu *msrs; static struct amd64_family_type *fam_type; +static inline u32 get_umc_reg(u32 reg) +{ + if (!fam_type->flags.zn_regs_v2) + return reg; + + switch (reg) { + case UMCCH_ADDR_CFG: return UMCCH_ADDR_CFG_DDR5; + case UMCCH_ADDR_MASK_SEC: return UMCCH_ADDR_MASK_SEC_DDR5; + case UMCCH_DIMM_CFG: return UMCCH_DIMM_CFG_DDR5; + } + + WARN_ONCE(1, "%s: unknown register 0x%x", __func__, reg); + return 0; +} + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; @@ -856,8 +871,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) edac_dbg(1, "UMC%d x16 DIMMs present: %s\n", i, (umc->dimm_cfg & BIT(7)) ? "yes" : "no"); - if (umc->dram_type == MEM_LRDDR4) { - amd_smn_read(pvt->mc_node_id, umc_base + UMCCH_ADDR_CFG, &tmp); + if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) { + amd_smn_read(pvt->mc_node_id, + umc_base + get_umc_reg(UMCCH_ADDR_CFG), + &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); } @@ -932,7 +949,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) for_each_umc(umc) { pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = 2; + pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2; } } else { @@ -972,7 +989,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + UMCCH_ADDR_MASK_SEC; + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1056,12 +1073,25 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) continue; } - if (umc->dimm_cfg & BIT(5)) - umc->dram_type = MEM_LRDDR4; - else if (umc->dimm_cfg & BIT(4)) - umc->dram_type = MEM_RDDR4; - else - umc->dram_type = MEM_DDR4; + /* + * Check if the system supports the "DDR Type" field in UMC Config + * and has DDR5 DIMMs in use. + */ + if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR5; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR5; + else + umc->dram_type = MEM_DDR5; + } else { + if (umc->dimm_cfg & BIT(5)) + umc->dram_type = MEM_LRDDR4; + else if (umc->dimm_cfg & BIT(4)) + umc->dram_type = MEM_RDDR4; + else + umc->dram_type = MEM_DDR4; + } edac_dbg(1, " UMC%d DIMM type: %s\n", i, edac_mem_types[umc->dram_type]); } @@ -1593,6 +1623,7 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, { u32 addr_mask_orig, addr_mask_deinterleaved; u32 msb, weight, num_zero_bits; + int cs_mask_nr = csrow_nr; int dimm, size = 0; /* No Chip Selects are enabled. */ @@ -1608,17 +1639,33 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, return size; /* - * There is one mask per DIMM, and two Chip Selects per DIMM. - * CS0 and CS1 -> DIMM0 - * CS2 and CS3 -> DIMM1 + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. */ dimm = csrow_nr >> 1; + if (!fam_type->flags.zn_regs_v2) + cs_mask_nr >>= 1; + /* Asymmetric dual-rank DIMM support. */ if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; else - addr_mask_orig = pvt->csels[umc].csmasks[dimm]; + addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; /* * The number of zero bits in the mask is equal to the number of bits @@ -2360,6 +2407,7 @@ static struct amd64_family_type family_types[] = { .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, .max_mcs = 12, + .flags.zn_regs_v2 = 1, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2790,7 +2838,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + UMCCH_DIMM_CFG, &umc->dimm_cfg); + amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index fcfb5455c015..cc92a38b2150 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -269,8 +269,11 @@ #define UMCCH_BASE_ADDR_SEC 0x10 #define UMCCH_ADDR_MASK 0x20 #define UMCCH_ADDR_MASK_SEC 0x28 +#define UMCCH_ADDR_MASK_SEC_DDR5 0x30 #define UMCCH_ADDR_CFG 0x30 +#define UMCCH_ADDR_CFG_DDR5 0x40 #define UMCCH_DIMM_CFG 0x80 +#define UMCCH_DIMM_CFG_DDR5 0x90 #define UMCCH_UMC_CFG 0x100 #define UMCCH_SDP_CTRL 0x104 #define UMCCH_ECC_CTRL 0x14C @@ -490,11 +493,22 @@ struct low_ops { unsigned cs_mode, int cs_mask_nr); }; +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; /* Maximum number of memory controllers per die/node. */ u8 max_mcs; + struct amd64_family_flags flags; struct low_ops ops; }; -- Gitee From b08ee2ab8f6e599f2ecc039744629b1a360c71e5 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 8 Jun 2023 12:37:49 +0800 Subject: [PATCH 14/19] anolis: EDAC/amd64: Get UMC channel from the 6th nibble for Hygon ANBZ: #5455 On Hygon family 18h platforms, we look at the 6th nibble(bit 20~23) in the instance_id to derive the channel number. Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ad2611ef516e..d6821777a068 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2673,6 +2673,9 @@ static inline void decode_bus_error(int node_id, struct mce *m) */ static int find_umc_channel(struct mce *m) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18) + return (m->ipid & GENMASK(23, 0)) >> 20; return (m->ipid & GENMASK(31, 0)) >> 20; } -- Gitee From de7e3c0c3f5600b0b736375d0d9f467d328b9d15 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 8 Jun 2023 12:38:28 +0800 Subject: [PATCH 15/19] anolis: EDAC/amd64: Add support for Hygon family 18h model 4h ANBZ: #5455 Hygon family 18h model 4h processor has the same F0/F6 device IDs as F17h_M30h. Add support to determine which DDR memory types it supports, but determine syndrome sizes from different bits of the DRAM ECC control reg. Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 88 ++++++++++++++++++++++++++++++++------- 1 file changed, 74 insertions(+), 14 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d6821777a068..4361bb90c95a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -97,6 +97,17 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, return err; } +static u32 get_umc_base_f18h_m4h(u16 node, u8 channel) +{ + struct pci_dev *f3 = node_to_amd_nb(node)->misc; + u8 df_id; + + get_df_id(f3, &df_id); + df_id -= 4; + + return get_umc_base(channel) + (0x80000000 + (0x10000000 * df_id)); +} + /* * Select DCT to which PCI cfg accesses are routed */ @@ -846,7 +857,10 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) u32 i, tmp, umc_base; for_each_umc(i) { - umc_base = get_umc_base(i); + if (hygon_f18h_m4h()) + umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, i); + else + umc_base = get_umc_base(i); umc = &pvt->umc[i]; edac_dbg(1, "UMC%d DIMM cfg: 0x%x\n", i, umc->dimm_cfg); @@ -966,11 +980,17 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) u32 mask_reg, mask_reg_sec; u32 *base, *base_sec; u32 *mask, *mask_sec; + u32 umc_base; int cs, umc; for_each_umc(umc) { - umc_base_reg = get_umc_base(umc) + UMCCH_BASE_ADDR; - umc_base_reg_sec = get_umc_base(umc) + UMCCH_BASE_ADDR_SEC; + if (hygon_f18h_m4h()) + umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, umc); + else + umc_base = get_umc_base(umc); + + umc_base_reg = umc_base + UMCCH_BASE_ADDR; + umc_base_reg_sec = umc_base + UMCCH_BASE_ADDR_SEC; for_each_chip_select(cs, umc, pvt) { base = &pvt->csels[umc].csbases[cs]; @@ -988,8 +1008,8 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) umc, cs, *base_sec, base_reg_sec); } - umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); + umc_mask_reg = umc_base + UMCCH_ADDR_MASK; + umc_mask_reg_sec = umc_base + get_umc_reg(UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1077,7 +1097,8 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) * Check if the system supports the "DDR Type" field in UMC Config * and has DDR5 DIMMs in use. */ - if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if ((fam_type->flags.zn_regs_v2 || hygon_f18h_m4h()) && + ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { if (umc->dimm_cfg & BIT(5)) umc->dram_type = MEM_LRDDR5; else if (umc->dimm_cfg & BIT(4)) @@ -2793,6 +2814,14 @@ static void free_mc_sibling_devs(struct amd64_pvt *pvt) } } +static void determine_ecc_sym_sz_f18h_m4h(struct amd64_pvt *pvt, int channel) +{ + if (pvt->umc[channel].ecc_ctrl & BIT(8)) + pvt->ecc_sym_sz = 16; + else if (pvt->umc[channel].ecc_ctrl & BIT(7)) + pvt->ecc_sym_sz = 8; +} + static void determine_ecc_sym_sz(struct amd64_pvt *pvt) { pvt->ecc_sym_sz = 4; @@ -2803,6 +2832,14 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) for_each_umc(i) { /* Check enabled channels only: */ if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18 && + boot_cpu_data.x86_model == 0x4 && + (pvt->umc[i].umc_cfg & GENMASK(2, 0)) == 0x1) { + determine_ecc_sym_sz_f18h_m4h(pvt, i); + return; + } + if (pvt->umc[i].ecc_ctrl & BIT(9)) { pvt->ecc_sym_sz = 16; return; @@ -2837,8 +2874,11 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) /* Read registers from each UMC */ for_each_umc(i) { + if (hygon_f18h_m4h()) + umc_base = get_umc_base_f18h_m4h(pvt->mc_node_id, i); + else + umc_base = get_umc_base(i); - umc_base = get_umc_base(i); umc = &pvt->umc[i]; amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); @@ -3459,13 +3499,21 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) pvt->ops = &family_types[F17_M70H_CPUS].ops; break; } - /* fall through */ - case 0x18: fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; + break; - if (pvt->fam == 0x18) - family_types[F17_CPUS].ctl_name = "F18h"; + case 0x18: + if (pvt->model == 0x4) { + fam_type = &family_types[F17_M30H_CPUS]; + pvt->ops = &family_types[F17_M30H_CPUS].ops; + family_types[F17_M30H_CPUS].max_mcs = 3; + family_types[F17_M30H_CPUS].ctl_name = "F18h_M04h"; + break; + } + fam_type = &family_types[F17_CPUS]; + pvt->ops = &family_types[F17_CPUS].ops; + family_types[F17_CPUS].ctl_name = "F18h"; break; case 0x19: @@ -3735,6 +3783,7 @@ static int __init amd64_edac_init(void) { const char *owner; int err = -ENODEV; + u16 instance_num; int i; owner = edac_get_owner(); @@ -3749,8 +3798,13 @@ static int __init amd64_edac_init(void) opstate_init(); + if (hygon_f18h_m4h()) + instance_num = hygon_nb_num(); + else + instance_num = amd_nb_num(); + err = -ENOMEM; - ecc_stngs = kcalloc(amd_nb_num(), sizeof(ecc_stngs[0]), GFP_KERNEL); + ecc_stngs = kcalloc(instance_num, sizeof(ecc_stngs[0]), GFP_KERNEL); if (!ecc_stngs) goto err_free; @@ -3758,7 +3812,7 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < instance_num; i++) { err = probe_one_instance(i); if (err) { /* unwind properly */ @@ -3806,6 +3860,7 @@ static int __init amd64_edac_init(void) static void __exit amd64_edac_exit(void) { + u16 instance_num; int i; if (pci_ctl) @@ -3819,7 +3874,12 @@ static void __exit amd64_edac_exit(void) else amd_unregister_ecc_decoder(decode_bus_error); - for (i = 0; i < amd_nb_num(); i++) + if (hygon_f18h_m4h()) + instance_num = hygon_nb_num(); + else + instance_num = amd_nb_num(); + + for (i = 0; i < instance_num; i++) remove_one_instance(i); kfree(ecc_stngs); -- Gitee From 544892fb0206c6db9f7346497e446222001fd6d0 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 8 Jun 2023 12:39:07 +0800 Subject: [PATCH 16/19] anolis: EDAC/amd64: Adjust address translation for Hygon family 18h model 4h ANBZ: #5455 Add Hygon family 18h model 4h processor support for DramOffset and HiAddrOffset, and get the socket interleaving number from DramBase- Address(D18F0x110). Update intlv_num_chan and num_intlv_bits support for Hygon family 18h model 4h processor. Signed-off-by: Pu Wen --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 48 ++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 3a87222f011e..c95eeee248d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -649,13 +649,21 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) u8 cs_mask, cs_id = 0; bool hash_enabled = false; - /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + /* Read DramOffset, check if base 1 is used. */ + if (hygon_f18h_m4h() && + amd_df_indirect_read(nid, 0, 0x214, umc, &tmp)) + goto out_err; + else if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) goto out_err; /* Remove HiAddrOffset from normalized address, if enabled: */ if (tmp & BIT(0)) { - u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + u64 hi_addr_offset; + + if (hygon_f18h_m4h()) + hi_addr_offset = (tmp & GENMASK_ULL(31, 18)) << 8; + else + hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; if (norm_addr >= hi_addr_offset) { ret_addr -= hi_addr_offset; @@ -674,6 +682,9 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) goto out_err; } + intlv_num_sockets = 0; + if (hygon_f18h_m4h()) + intlv_num_sockets = (tmp >> 2) & 0x3; lgcy_mmio_hole_en = tmp & BIT(1); intlv_num_chan = (tmp >> 4) & 0xF; intlv_addr_sel = (tmp >> 8) & 0x7; @@ -690,7 +701,8 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) goto out_err; - intlv_num_sockets = (tmp >> 8) & 0x1; + if (!hygon_f18h_m4h()) + intlv_num_sockets = (tmp >> 8) & 0x1; intlv_num_dies = (tmp >> 10) & 0x3; dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); @@ -700,6 +712,10 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) switch (intlv_num_chan) { case 0: intlv_num_chan = 0; break; case 1: intlv_num_chan = 1; break; + case 2: + if (hygon_f18h_m4h()) + intlv_num_chan = 2; + break; case 3: intlv_num_chan = 2; break; case 5: intlv_num_chan = 3; break; case 7: intlv_num_chan = 4; break; @@ -726,8 +742,9 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) /* Add a bit if sockets are interleaved. */ num_intlv_bits += intlv_num_sockets; - /* Assert num_intlv_bits <= 4 */ - if (num_intlv_bits > 4) { + /* Assert num_intlv_bits in the correct range. */ + if ((hygon_f18h_m4h() && num_intlv_bits > 7) || + (!hygon_f18h_m4h() && num_intlv_bits > 4)) { pr_err("%s: Invalid interleave bits %d.\n", __func__, num_intlv_bits); goto out_err; @@ -746,7 +763,10 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) goto out_err; - cs_fabric_id = (tmp >> 8) & 0xFF; + if (hygon_f18h_m4h()) + cs_fabric_id = (tmp >> 8) & 0x7FF; + else + cs_fabric_id = (tmp >> 8) & 0xFF; die_id_bit = 0; /* If interleaved over more than 1 channel: */ @@ -766,8 +786,13 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) /* If interleaved over more than 1 die. */ if (intlv_num_dies) { sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (tmp >> 24) & 0xF; - die_id_mask = (tmp >> 8) & 0xFF; + if (hygon_f18h_m4h()) { + die_id_shift = (tmp >> 12) & 0xF; + die_id_mask = tmp & 0x7FF; + } else { + die_id_shift = (tmp >> 24) & 0xF; + die_id_mask = (tmp >> 8) & 0xFF; + } cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; } @@ -775,7 +800,10 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) /* If interleaved over more than 1 socket. */ if (intlv_num_sockets) { socket_id_shift = (tmp >> 28) & 0xF; - socket_id_mask = (tmp >> 16) & 0xFF; + if (hygon_f18h_m4h()) + socket_id_mask = (tmp >> 16) & 0x7FF; + else + socket_id_mask = (tmp >> 16) & 0xFF; cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; } -- Gitee From 98842fcc485300a73ad360028fd573e557ba6ad5 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 8 Jun 2023 12:39:46 +0800 Subject: [PATCH 17/19] anolis: EDAC/mce_amd: Use struct cpuinfo_x86.logical_die_id for Hygon NodeId ANBZ: #5455 The cpuinfo_x86.cpu_die_id is get from CPUID or MSR in the commit 028c221ed190 ("x86/CPU/AMD: Save AMD NodeId as cpu_die_id"). But the value may not be continuous for Hygon model 4h~6h processors. Use cpuinfo_x86.logical_die_id will always format continuous die (or node) IDs, because it will convert the physical die ID to logical die ID. So use topology_logical_die_id() instead of topology_die_id() to decode UMC ECC errors for Hygon processors. Signed-off-by: Pu Wen --- drivers/edac/mce_amd.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 0ba91f01e13f..e7822a22afe7 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1002,8 +1002,13 @@ static void decode_smca_error(struct mce *m) pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); } - if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) - decode_dram_ecc(topology_die_id(m->extcpu), m); + if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) { + if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && + boot_cpu_data.x86 == 0x18) + decode_dram_ecc(topology_logical_die_id(m->extcpu), m); + else + decode_dram_ecc(topology_die_id(m->extcpu), m); + } } static inline void amd_decode_err_code(u16 ec) -- Gitee From 5290464f2c61e868b2ee164555aae1edbb6d2d30 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 8 Jun 2023 12:47:19 +0800 Subject: [PATCH 18/19] anolis: EDAC/amd64: Add support for Hygon family 18h model 5h ANBZ: #5455 Hygon family 18h model 5h processor has the same F0/F6 device IDs as F17h_M30h, and the syndrome size is the same as model 4h processor. Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 4361bb90c95a..fa92b0bc9380 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2834,7 +2834,8 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON && boot_cpu_data.x86 == 0x18 && - boot_cpu_data.x86_model == 0x4 && + (boot_cpu_data.x86_model == 0x4 || + boot_cpu_data.x86_model == 0x5) && (pvt->umc[i].umc_cfg & GENMASK(2, 0)) == 0x1) { determine_ecc_sym_sz_f18h_m4h(pvt, i); return; @@ -3510,6 +3511,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) family_types[F17_M30H_CPUS].max_mcs = 3; family_types[F17_M30H_CPUS].ctl_name = "F18h_M04h"; break; + } else if (pvt->model == 0x5) { + fam_type = &family_types[F17_M30H_CPUS]; + pvt->ops = &family_types[F17_M30H_CPUS].ops; + family_types[F17_M30H_CPUS].max_mcs = 1; + family_types[F17_M30H_CPUS].ctl_name = "F18h_M05h"; + break; } fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; -- Gitee From 1658c9b8e71ff5744af189de4052912666cb1521 Mon Sep 17 00:00:00 2001 From: Pu Wen Date: Thu, 8 Jun 2023 12:50:08 +0800 Subject: [PATCH 19/19] anolis: EDAC/amd64: Add support for Hygon family 18h model 6h ANBZ: #5455 Add F0/F6 device IDs for Hygon family 18h model 6h processor. Signed-off-by: Pu Wen --- drivers/edac/amd64_edac.c | 14 ++++++++++++++ drivers/edac/amd64_edac.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index fa92b0bc9380..1e69e2da36ea 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2414,6 +2414,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F18_M06H_CPUS] = { + .ctl_name = "F18h_M06h", + .f0_id = PCI_DEVICE_ID_HYGON_18H_M06H_DF_F0, + .f6_id = PCI_DEVICE_ID_HYGON_18H_M06H_DF_F6, + .max_mcs = 2, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, [F19_CPUS] = { .ctl_name = "F19h", .f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0, @@ -3517,6 +3527,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) family_types[F17_M30H_CPUS].max_mcs = 1; family_types[F17_M30H_CPUS].ctl_name = "F18h_M05h"; break; + } else if (pvt->model == 0x6) { + fam_type = &family_types[F18_M06H_CPUS]; + pvt->ops = &family_types[F18_M06H_CPUS].ops; + break; } fam_type = &family_types[F17_CPUS]; pvt->ops = &family_types[F17_CPUS].ops; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index cc92a38b2150..13955ff01698 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -127,6 +127,9 @@ #define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad #define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3 +#define PCI_DEVICE_ID_HYGON_18H_M06H_DF_F0 0x14b0 +#define PCI_DEVICE_ID_HYGON_18H_M06H_DF_F6 0x14b6 + /* * Function 1 - Address Map */ @@ -299,6 +302,7 @@ enum amd_families { F17_M10H_CPUS, F17_M30H_CPUS, F17_M70H_CPUS, + F18_M06H_CPUS, F19_CPUS, F19_M10H_CPUS, NUM_FAMILIES, -- Gitee