diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 3ead5817ea9962007166b3cde60bf89bdcc4f25b..6cb51090a6733ecb709af57b98f92bcd209c086c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -338,20 +338,39 @@ static bool i10nm_check_2lm(struct res_config *cfg) } /* - * Check whether the error comes from DDRT by ICX/Tremont model specific error code. - * Refer to SDM vol3B 16.11.3 Intel IMC MC error codes for IA32_MCi_STATUS. + * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code. + * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS. */ static bool i10nm_mscod_is_ddrt(u32 mscod) { - switch (mscod) { - case 0x0106: case 0x0107: - case 0x0800: case 0x0804: - case 0x0806 ... 0x0808: - case 0x080a ... 0x080e: - case 0x0810: case 0x0811: - case 0x0816: case 0x081e: - case 0x081f: - return true; + switch (res_cfg->type) { + case I10NM: + switch (mscod) { + case 0x0106: case 0x0107: + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + break; + case SPR: + switch (mscod) { + case 0x0800: case 0x0804: + case 0x0806 ... 0x0808: + case 0x080a ... 0x080e: + case 0x0810: case 0x0811: + case 0x0816: case 0x081e: + case 0x081f: + return true; + } + + break; + default: + return false; } return false; @@ -359,6 +378,7 @@ static bool i10nm_mscod_is_ddrt(u32 mscod) static bool i10nm_mc_decode_available(struct mce *mce) { +#define ICX_IMCx_CHy 0x06666000 u8 bank; if (!decoding_via_mca || mem_cfg_2lm) @@ -372,21 +392,26 @@ static bool i10nm_mc_decode_available(struct mce *mce) switch (res_cfg->type) { case I10NM: - if (bank < 13 || bank > 26) - return false; - - /* DDRT errors can't be decoded from MCA bank registers */ - if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */ + if (!(ICX_IMCx_CHy & (1 << bank))) return false; - - if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + break; + case SPR: + if (bank < 13 || bank > 20) return false; - - /* Check whether one of {13,14,17,18,21,22,25,26} */ - return ((bank - 13) & BIT(1)) == 0; + break; default: return false; } + + /* DDRT errors can't be decoded from MCA bank registers */ + if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT) + return false; + + if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status))) + return false; + + return true; } static bool i10nm_mc_decode(struct decoded_addr *res) @@ -408,9 +433,29 @@ static bool i10nm_mc_decode(struct decoded_addr *res) switch (res_cfg->type) { case I10NM: - bank = m->bank - 13; - res->imc = bank / 4; - res->channel = bank % 2; + bank = m->bank - 13; + res->imc = bank / 4; + res->channel = bank % 2; + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 39); + res->bank_group = GET_BITFIELD(m->misc, 40, 41); + res->bank_address = GET_BITFIELD(m->misc, 42, 43); + res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; + res->rank = GET_BITFIELD(m->misc, 56, 58); + res->dimm = res->rank >> 2; + res->rank = res->rank % 4; + break; + case SPR: + bank = m->bank - 13; + res->imc = bank / 2; + res->channel = bank % 2; + res->column = GET_BITFIELD(m->misc, 9, 18) << 2; + res->row = GET_BITFIELD(m->misc, 19, 36); + res->bank_group = GET_BITFIELD(m->misc, 37, 38); + res->bank_address = GET_BITFIELD(m->misc, 39, 40); + res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2; + res->rank = GET_BITFIELD(m->misc, 57, 57); + res->dimm = GET_BITFIELD(m->misc, 58, 58); break; default: return false; @@ -422,15 +467,6 @@ static bool i10nm_mc_decode(struct decoded_addr *res) return false; } - res->column = GET_BITFIELD(m->misc, 9, 18) << 2; - res->row = GET_BITFIELD(m->misc, 19, 39); - res->bank_group = GET_BITFIELD(m->misc, 40, 41); - res->bank_address = GET_BITFIELD(m->misc, 42, 43); - res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2; - res->rank = GET_BITFIELD(m->misc, 56, 58); - res->dimm = res->rank >> 2; - res->rank = res->rank % 4; - return true; } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index f0f8e98f6efb289f0c415be7885f7473e65b8ca3..5fdcb6d5b96b6e8f056e5a5a9ba0eda92564fdab 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -632,12 +632,18 @@ static bool skx_error_in_1st_level_mem(const struct mce *m) if (!skx_mem_cfg_2lm) return false; - errcode = GET_BITFIELD(m->status, 0, 15); + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; - if ((errcode & 0xef80) != 0x280) - return false; + return errcode == MCACOD_EXT_MEM_ERR; +} - return true; +static bool skx_error_in_mem(const struct mce *m) +{ + u32 errcode; + + errcode = GET_BITFIELD(m->status, 0, 15) & MCACOD_MEM_ERR_MASK; + + return (errcode == MCACOD_MEM_CTL_ERR || errcode == MCACOD_EXT_MEM_ERR); } int skx_mce_check_error(struct notifier_block *nb, unsigned long val, @@ -651,8 +657,8 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; - /* ignore unless this is memory related with an address */ - if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + /* Ignore unless this is memory related with an address */ + if (!skx_error_in_mem(mce) || !(mce->status & MCI_STATUS_ADDRV)) return NOTIFY_DONE; memset(&res, 0, sizeof(res)); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 0cbadd3d2cd39044f438dfa6bc6431ac72826229..312032657264912352a5e804709f3da49978d73e 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -56,6 +56,30 @@ #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ +/* + * According to Intel Architecture spec vol 3B, + * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" + * memory errors should fit one of these masks: + * 000f 0000 1mmm cccc (binary) + * 000f 0010 1mmm cccc (binary) [RAM used as cache] + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + */ +#define MCACOD_MEM_ERR_MASK 0xef80 +/* + * Errors from either the memory of the 1-level memory system or the + * 2nd level memory (the slow "far" memory) of the 2-level memory system. + */ +#define MCACOD_MEM_CTL_ERR 0x80 +/* + * Errors from the 1st level memory (the fast "near" memory as cache) + * of the 2-level memory system. + */ +#define MCACOD_EXT_MEM_ERR 0x280 + /* * Each cpu socket contains some pci devices that provide global * information, and also some that are local to each of the two