From 6311350d54b5e510b5860866679b6e41b659d381 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 22 Jan 2024 22:13:59 -0600 Subject: [PATCH 01/23] RAS: Introduce AMD Address Translation Library mainline inclusion from mainline-v6.9-rc1 commit 3f3174996be6b4312c38f54d5969f5d5b75fec9e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/3f3174996be6b4312c38f54d5969f5d5b75fec9e -------------------------------- commit 3f3174996be6b4312c38f54d5969f5d5b75fec9e upstream. AMD Zen-based systems report memory errors through Machine Check banks representing Unified Memory Controllers (UMCs). The address value reported for DRAM ECC errors is a "normalized address" that is relative to the UMC. This normalized address must be converted to a system physical address to be usable by the OS. Support for this address translation was introduced to the MCA subsystem with Zen1 systems. The code was later moved to the AMD64 EDAC module, since this was the only user of the code at the time. However, there are uses for this translation outside of EDAC. The system physical address can be used in MCA for preemptive page offlining as done in some MCA notifier functions. Also, this translation is needed as the basis of similar functionality needed for some CXL configurations on AMD systems. Introduce a common address translation library that can be used for multiple subsystems including MCA, EDAC, and CXL. Include support for UMC normalized to system physical address translation for current CPU systems. The Data Fabric Indirect register access offsets and one of the register fields were changed. Default to the current offsets and register field definition. And fallback to the older values if running on a "legacy" system. Provide built-in code to facilitate the loading and unloading of the library module without affecting other modules or built-in code. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240123041401.79812-2-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- MAINTAINERS | 6 + drivers/ras/Kconfig | 1 + drivers/ras/Makefile | 2 + drivers/ras/amd/atl/Kconfig | 20 + drivers/ras/amd/atl/Makefile | 18 + drivers/ras/amd/atl/access.c | 106 +++++ drivers/ras/amd/atl/core.c | 225 ++++++++++ drivers/ras/amd/atl/dehash.c | 407 ++++++++++++++++++ drivers/ras/amd/atl/denormalize.c | 617 +++++++++++++++++++++++++++ drivers/ras/amd/atl/internal.h | 297 +++++++++++++ drivers/ras/amd/atl/map.c | 665 ++++++++++++++++++++++++++++++ drivers/ras/amd/atl/reg_fields.h | 603 +++++++++++++++++++++++++++ drivers/ras/amd/atl/system.c | 281 +++++++++++++ drivers/ras/amd/atl/umc.c | 41 ++ drivers/ras/ras.c | 31 ++ include/linux/ras.h | 16 + 16 files changed, 3336 insertions(+) create mode 100644 drivers/ras/amd/atl/Kconfig create mode 100644 drivers/ras/amd/atl/Makefile create mode 100644 drivers/ras/amd/atl/access.c create mode 100644 drivers/ras/amd/atl/core.c create mode 100644 drivers/ras/amd/atl/dehash.c create mode 100644 drivers/ras/amd/atl/denormalize.c create mode 100644 drivers/ras/amd/atl/internal.h create mode 100644 drivers/ras/amd/atl/map.c create mode 100644 drivers/ras/amd/atl/reg_fields.h create mode 100644 drivers/ras/amd/atl/system.c create mode 100644 drivers/ras/amd/atl/umc.c diff --git a/MAINTAINERS b/MAINTAINERS index d0ceed7e6673..077765cbf0b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -891,6 +891,12 @@ Q: https://patchwork.kernel.org/project/linux-rdma/list/ F: drivers/infiniband/hw/efa/ F: include/uapi/rdma/efa-abi.h +AMD ADDRESS TRANSLATION LIBRARY (ATL) +M: Yazen Ghannam +L: linux-edac@vger.kernel.org +S: Supported +F: drivers/ras/amd/atl/* + AMD CDX BUS DRIVER M: Nipun Gupta M: Nikhil Agarwal diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index 84b6d569cfda..1c3794de1be8 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -44,5 +44,6 @@ if RAS source "arch/x86/ras/Kconfig" source "drivers/ras/hisilicon/Kconfig" +source "drivers/ras/amd/atl/Kconfig" endif diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 0b841332130c..6437124a20bf 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -4,3 +4,5 @@ obj-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_RAS_CEC) += cec.o obj-$(CONFIG_ARCH_HISI) += hisilicon/ + +obj-y += amd/atl/ diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig new file mode 100644 index 000000000000..a43513a700f1 --- /dev/null +++ b/drivers/ras/amd/atl/Kconfig @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Kconfig +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam + +config AMD_ATL + tristate "AMD Address Translation Library" + depends on AMD_NB && X86_64 && RAS + default N + help + This library includes support for implementation-specific + address translation procedures needed for various error + handling cases. + + Enable this option if using DRAM ECC on Zen-based systems + and OS-based error handling. diff --git a/drivers/ras/amd/atl/Makefile b/drivers/ras/amd/atl/Makefile new file mode 100644 index 000000000000..4acd5f05bd9c --- /dev/null +++ b/drivers/ras/amd/atl/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-or-later +# +# AMD Address Translation Library Makefile +# +# Copyright (c) 2023, Advanced Micro Devices, Inc. +# All Rights Reserved. +# +# Author: Yazen Ghannam + +amd_atl-y := access.o +amd_atl-y += core.o +amd_atl-y += dehash.o +amd_atl-y += denormalize.o +amd_atl-y += map.o +amd_atl-y += system.o +amd_atl-y += umc.o + +obj-$(CONFIG_AMD_ATL) += amd_atl.o diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c new file mode 100644 index 000000000000..f6dd87bb2c35 --- /dev/null +++ b/drivers/ras/amd/atl/access.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * access.c : DF Indirect Access functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* Protect the PCI config register pairs used for DF indirect access. */ +static DEFINE_MUTEX(df_indirect_mutex); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): there are FICAD + * low and high registers but so far only the low register is needed. + * + * Use Instance Id 0xFF to indicate a broadcast read. + */ +#define DF_BROADCAST 0xFF + +#define DF_FICAA_INST_EN BIT(0) +#define DF_FICAA_REG_NUM GENMASK(10, 1) +#define DF_FICAA_FUNC_NUM GENMASK(13, 11) +#define DF_FICAA_INST_ID GENMASK(23, 16) + +#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) + +static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; + struct pci_dev *F4; + int err = -ENODEV; + u32 ficaa = 0; + + if (node >= amd_nb_num()) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + /* Enable instance-specific access. */ + if (instance_id != DF_BROADCAST) { + ficaa |= FIELD_PREP(DF_FICAA_INST_EN, 1); + ficaa |= FIELD_PREP(DF_FICAA_INST_ID, instance_id); + } + + /* + * The two least-significant bits are masked when inputing the + * register offset to FICAA. + */ + reg >>= 2; + + if (df_cfg.flags.legacy_ficaa) { + ficaa_addr = 0x5C; + ficad_addr = 0x98; + + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM_LEGACY, reg); + } else { + ficaa |= FIELD_PREP(DF_FICAA_REG_NUM, reg); + } + + ficaa |= FIELD_PREP(DF_FICAA_FUNC_NUM, func); + + mutex_lock(&df_indirect_mutex); + + err = pci_write_config_dword(F4, ficaa_addr, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, ficad_addr, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + + pr_debug("node=%u inst=0x%x func=0x%x reg=0x%x val=0x%x", + node, instance_id, func, reg << 2, *lo); + +out_unlock: + mutex_unlock(&df_indirect_mutex); + +out: + return err; +} + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + return __df_indirect_read(node, func, reg, instance_id, lo); +} + +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +{ + return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); +} diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c new file mode 100644 index 000000000000..6dc4e06305f7 --- /dev/null +++ b/drivers/ras/amd/atl/core.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * core.c : Module init and base translation functions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include +#include + +#include "internal.h" + +struct df_config df_cfg __read_mostly; + +static int addr_over_limit(struct addr_ctx *ctx) +{ + u64 dram_limit_addr; + + if (df_cfg.rev >= DF4) + dram_limit_addr = FIELD_GET(DF4_DRAM_LIMIT_ADDR, ctx->map.limit); + else + dram_limit_addr = FIELD_GET(DF2_DRAM_LIMIT_ADDR, ctx->map.limit); + + dram_limit_addr <<= DF_DRAM_BASE_LIMIT_LSB; + dram_limit_addr |= GENMASK(DF_DRAM_BASE_LIMIT_LSB - 1, 0); + + /* Is calculated system address above DRAM limit address? */ + if (ctx->ret_addr > dram_limit_addr) { + atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)", + ctx->ret_addr, dram_limit_addr); + return -EINVAL; + } + + return 0; +} + +static bool legacy_hole_en(struct addr_ctx *ctx) +{ + u32 reg = ctx->map.base; + + if (df_cfg.rev >= DF4) + reg = ctx->map.ctl; + + return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg); +} + +static int add_legacy_hole(struct addr_ctx *ctx) +{ + u32 dram_hole_base; + u8 func = 0; + + if (!legacy_hole_en(ctx)) + return 0; + + if (df_cfg.rev >= DF4) + func = 7; + + if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base)) + return -EINVAL; + + dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; + + if (ctx->ret_addr >= dram_hole_base) + ctx->ret_addr += (BIT_ULL(32) - dram_hole_base); + + return 0; +} + +static u64 get_base_addr(struct addr_ctx *ctx) +{ + u64 base_addr; + + if (df_cfg.rev >= DF4) + base_addr = FIELD_GET(DF4_BASE_ADDR, ctx->map.base); + else + base_addr = FIELD_GET(DF2_BASE_ADDR, ctx->map.base); + + return base_addr << DF_DRAM_BASE_LIMIT_LSB; +} + +static int add_base_and_hole(struct addr_ctx *ctx) +{ + ctx->ret_addr += get_base_addr(ctx); + + if (add_legacy_hole(ctx)) + return -EINVAL; + + return 0; +} + +static bool late_hole_remove(struct addr_ctx *ctx) +{ + if (df_cfg.rev == DF3p5) + return true; + + if (df_cfg.rev == DF4) + return true; + + if (ctx->map.intlv_mode == DF3_6CHAN) + return true; + + return false; +} + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr) +{ + struct addr_ctx ctx; + + if (df_cfg.rev == UNKNOWN) + return -EINVAL; + + memset(&ctx, 0, sizeof(ctx)); + + /* Start from the normalized address */ + ctx.ret_addr = addr; + ctx.inst_id = coh_st_inst_id; + + ctx.inputs.norm_addr = addr; + ctx.inputs.socket_id = socket_id; + ctx.inputs.die_id = die_id; + ctx.inputs.coh_st_inst_id = coh_st_inst_id; + + if (determine_node_id(&ctx, socket_id, die_id)) + return -EINVAL; + + if (get_address_map(&ctx)) + return -EINVAL; + + if (denormalize_address(&ctx)) + return -EINVAL; + + if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (dehash_address(&ctx)) + return -EINVAL; + + if (late_hole_remove(&ctx) && add_base_and_hole(&ctx)) + return -EINVAL; + + if (addr_over_limit(&ctx)) + return -EINVAL; + + return ctx.ret_addr; +} + +static void check_for_legacy_df_access(void) +{ + /* + * All Zen-based systems before Family 19h use the legacy + * DF Indirect Access (FICAA/FICAD) offsets. + */ + if (boot_cpu_data.x86 < 0x19) { + df_cfg.flags.legacy_ficaa = true; + return; + } + + /* All systems after Family 19h use the current offsets. */ + if (boot_cpu_data.x86 > 0x19) + return; + + /* Some Family 19h systems use the legacy offsets. */ + switch (boot_cpu_data.x86_model) { + case 0x00 ... 0x0f: + case 0x20 ... 0x5f: + df_cfg.flags.legacy_ficaa = true; + } +} + +/* + * This library provides functionality for AMD-based systems with a Data Fabric. + * The set of systems with a Data Fabric is equivalent to the set of Zen-based systems + * and the set of systems with the Scalable MCA feature at this time. However, these + * are technically independent things. + * + * It's possible to match on the PCI IDs of the Data Fabric devices, but this will be + * an ever expanding list. Instead, match on the SMCA and Zen features to cover all + * relevant systems. + */ +static const struct x86_cpu_id amd_atl_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_SMCA, NULL), + X86_MATCH_FEATURE(X86_FEATURE_ZEN, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, amd_atl_cpuids); + +static int __init amd_atl_init(void) +{ + if (!x86_match_cpu(amd_atl_cpuids)) + return -ENODEV; + + if (!amd_nb_num()) + return -ENODEV; + + check_for_legacy_df_access(); + + if (get_df_system_info()) + return -ENODEV; + + /* Increment this module's recount so that it can't be easily unloaded. */ + __module_get(THIS_MODULE); + amd_atl_register_decoder(convert_umc_mca_addr_to_sys_addr); + + pr_info("AMD Address Translation Library initialized"); + return 0; +} + +/* + * Exit function is only needed for testing and debug. Module unload must be + * forced to override refcount check. + */ +static void __exit amd_atl_exit(void) +{ + amd_atl_unregister_decoder(); +} + +module_init(amd_atl_init); +module_exit(amd_atl_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c new file mode 100644 index 000000000000..6f414926e6fe --- /dev/null +++ b/drivers/ras/amd/atl/dehash.c @@ -0,0 +1,407 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * dehash.c : Functions to account for hashing bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * Verify the interleave bits are correct in the different interleaving + * settings. + * + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the + * respective interleaving is disabled. + */ +static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, + u8 num_intlv_dies, u8 num_intlv_sockets) +{ + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return false; + } + + if (ctx->map.num_intlv_dies > num_intlv_dies) { + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); + return false; + } + + if (ctx->map.num_intlv_sockets > num_intlv_sockets) { + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); + return false; + } + + return true; +} + +static int df2_dehash_addr(struct addr_ctx *ctx) +{ + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(12), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df3_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit, intlv_bit_pos; + + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit_pos = ctx->map.intlv_bit_pos; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + /* Calculation complete for 2 channels. Continue for 4 and 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 channels. */ + if (ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + return 0; +} + +static int df3_6chan_dehash_addr(struct addr_ctx *ctx) +{ + u8 intlv_bit_pos = ctx->map.intlv_bit_pos; + u8 hashed_bit, intlv_bit, num_intlv_bits; + bool hash_ctl_2M, hash_ctl_1G; + + if (ctx->map.intlv_mode != DF3_6CHAN) { + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + num_intlv_bits = ilog2(ctx->map.num_intlv_chan) + 1; + + hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= !!(BIT_ULL(intlv_bit_pos + num_intlv_bits) & ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + intlv_bit_pos++; + intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(intlv_bit_pos); + + return 0; +} + +static int df4_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u8 hashed_bit, intlv_bit; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + if (ctx->map.num_intlv_sockets == 1) + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + + /* + * Hashing is possible with socket interleaving, so check the total number + * of channels in the system rather than DRAM map interleaving mode. + * + * Calculation complete for 2 channels. Continue for 4, 8, and 16 channels. + */ + if (ctx->map.total_intlv_chan <= 2) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + + /* Calculation complete for 4 channels. Continue for 8 and 16 channels. */ + if (ctx->map.total_intlv_chan <= 4) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + + /* Calculation complete for 8 channels. Continue for 16 channels. */ + if (ctx->map.total_intlv_chan <= 8) + return 0; + + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + + return 0; +} + +static int df4p5_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + u8 hashed_bit, intlv_bit; + u64 rehash_vector; + + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + return -EINVAL; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl); + + /* + * Generate a unique address to determine which bits + * need to be dehashed. + * + * Start with a contiguous bitmask for the total + * number of channels starting at bit 8. + * + * Then make a gap in the proper place based on + * interleave mode. + */ + rehash_vector = ctx->map.total_intlv_chan - 1; + rehash_vector <<= 8; + + if (ctx->map.intlv_mode == DF4p5_NPS2_4CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_8CHAN_1K_HASH || + ctx->map.intlv_mode == DF4p5_NPS1_16CHAN_1K_HASH) + rehash_vector = expand_bits(10, 2, rehash_vector); + else + rehash_vector = expand_bits(9, 3, rehash_vector); + + if (rehash_vector & BIT_ULL(8)) { + intlv_bit = FIELD_GET(BIT_ULL(8), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(40), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(8); + } + + if (rehash_vector & BIT_ULL(9)) { + intlv_bit = FIELD_GET(BIT_ULL(9), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(41), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(9); + } + + if (rehash_vector & BIT_ULL(12)) { + intlv_bit = FIELD_GET(BIT_ULL(12), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(42), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(12); + } + + if (rehash_vector & BIT_ULL(13)) { + intlv_bit = FIELD_GET(BIT_ULL(13), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(19), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(43), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(13); + } + + if (rehash_vector & BIT_ULL(14)) { + intlv_bit = FIELD_GET(BIT_ULL(14), ctx->ret_addr); + + hashed_bit = intlv_bit; + hashed_bit ^= FIELD_GET(BIT_ULL(20), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(25), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(34), ctx->ret_addr) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(44), ctx->ret_addr) & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(14); + } + + return 0; +} + +int dehash_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + /* No hashing cases. */ + case NONE: + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + /* Hashing bits handled earlier during CS ID calculation. */ + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + /* No hash physical address bits, so nothing to do. */ + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 0; + + case DF2_2CHAN_HASH: + return df2_dehash_addr(ctx); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + return df3_dehash_addr(ctx); + + case DF3_6CHAN: + return df3_6chan_dehash_addr(ctx); + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + return df4_dehash_addr(ctx); + + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return df4p5_dehash_addr(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } +} diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c new file mode 100644 index 000000000000..01f1d0fb6799 --- /dev/null +++ b/drivers/ras/amd/atl/denormalize.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * denormalize.c : Functions to account for interleaving bits + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +/* + * Returns the Destination Fabric ID. This is the first (lowest) + * COH_ST Fabric ID used within a DRAM Address map. + */ +static u16 get_dst_fabric_id(struct addr_ctx *ctx) +{ + switch (df_cfg.rev) { + case DF2: return FIELD_GET(DF2_DST_FABRIC_ID, ctx->map.limit); + case DF3: return FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); + case DF3p5: return FIELD_GET(DF3p5_DST_FABRIC_ID, ctx->map.limit); + case DF4: return FIELD_GET(DF4_DST_FABRIC_ID, ctx->map.ctl); + case DF4p5: return FIELD_GET(DF4p5_DST_FABRIC_ID, ctx->map.ctl); + default: + atl_debug_on_bad_df_rev(); + return 0; + } +} + +/* + * Make a contiguous gap in address for N bits starting at bit P. + * + * Example: + * address bits: [20:0] + * # of interleave bits (n): 3 + * starting interleave bit (p): 8 + * + * expanded address bits: [20+n : n+p][n+p-1 : p][p-1 : 0] + * [23 : 11][10 : 8][7 : 0] + */ +static u64 make_space_for_coh_st_id_at_intlv_bit(struct addr_ctx *ctx) +{ + return expand_bits(ctx->map.intlv_bit_pos, + ctx->map.total_intlv_bits, + ctx->ret_addr); +} + +/* + * Make two gaps in address for N bits. + * First gap is a single bit at bit P. + * Second gap is the remaining N-1 bits at bit 12. + * + * Example: + * address bits: [20:0] + * # of interleave bits (n): 3 + * starting interleave bit (p): 8 + * + * First gap + * expanded address bits: [20+1 : p+1][p][p-1 : 0] + * [21 : 9][8][7 : 0] + * + * Second gap uses result from first. + * r = n - 1; remaining interleave bits + * expanded address bits: [21+r : 12+r][12+r-1: 12][11 : 0] + * [23 : 14][13 : 12][11 : 0] + */ +static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) +{ + /* Make a single space at the interleave bit. */ + u64 denorm_addr = expand_bits(ctx->map.intlv_bit_pos, 1, ctx->ret_addr); + + /* Done if there's only a single interleave bit. */ + if (ctx->map.total_intlv_bits <= 1) + return denorm_addr; + + /* Make spaces for the remaining interleave bits starting at bit 12. */ + return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); +} + +/* + * Take the current calculated address and shift enough bits in the middle + * to make a gap where the interleave bits will be inserted. + */ +static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF2_2CHAN_HASH: + return make_space_for_coh_st_id_at_intlv_bit(ctx); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return make_space_for_coh_st_id_split_2_1(ctx); + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0ULL; + } +} + +static u16 get_coh_st_id_df2(struct addr_ctx *ctx) +{ + u8 num_socket_intlv_bits = ilog2(ctx->map.num_intlv_sockets); + u8 num_die_intlv_bits = ilog2(ctx->map.num_intlv_dies); + u8 num_intlv_bits; + u16 coh_st_id, mask; + + coh_st_id = ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); + + /* Channel interleave bits */ + num_intlv_bits = order_base_2(ctx->map.num_intlv_chan); + mask = GENMASK(num_intlv_bits - 1, 0); + coh_st_id &= mask; + + /* Die interleave bits */ + if (num_die_intlv_bits) { + u16 die_bits; + + mask = GENMASK(num_die_intlv_bits - 1, 0); + die_bits = ctx->coh_st_fabric_id & df_cfg.die_id_mask; + die_bits >>= df_cfg.die_id_shift; + + coh_st_id |= (die_bits & mask) << num_intlv_bits; + num_intlv_bits += num_die_intlv_bits; + } + + /* Socket interleave bits */ + if (num_socket_intlv_bits) { + u16 socket_bits; + + mask = GENMASK(num_socket_intlv_bits - 1, 0); + socket_bits = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; + socket_bits >>= df_cfg.socket_id_shift; + + coh_st_id |= (socket_bits & mask) << num_intlv_bits; + } + + return coh_st_id; +} + +static u16 get_coh_st_id_df4(struct addr_ctx *ctx) +{ + /* + * Start with the original component mask and the number of interleave + * bits for the channels in this map. + */ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u16 mask = df_cfg.component_id_mask; + + u16 socket_bits; + + /* Set the derived Coherent Station ID to the input Coherent Station Fabric ID. */ + u16 coh_st_id = ctx->coh_st_fabric_id & mask; + + /* + * Subtract the "base" Destination Fabric ID. + * This accounts for systems with disabled Coherent Stations. + */ + coh_st_id -= get_dst_fabric_id(ctx) & mask; + + /* + * Generate and use a new mask based on the number of bits + * needed for channel interleaving in this map. + */ + mask = GENMASK(num_intlv_bits - 1, 0); + coh_st_id &= mask; + + /* Done if socket interleaving is not enabled. */ + if (ctx->map.num_intlv_sockets <= 1) + return coh_st_id; + + /* + * Figure out how many bits are needed for the number of + * interleaved sockets. And shift the derived Coherent Station ID to account + * for these. + */ + num_intlv_bits = ilog2(ctx->map.num_intlv_sockets); + coh_st_id <<= num_intlv_bits; + + /* Generate a new mask for the socket interleaving bits. */ + mask = GENMASK(num_intlv_bits - 1, 0); + + /* Get the socket interleave bits from the original Coherent Station Fabric ID. */ + socket_bits = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask) >> df_cfg.socket_id_shift; + + /* Apply the appropriate socket bits to the derived Coherent Station ID. */ + coh_st_id |= socket_bits & mask; + + return coh_st_id; +} + +/* + * Derive the correct Coherent Station ID that represents the interleave bits + * used within the system physical address. This accounts for the + * interleave mode, number of interleaved channels/dies/sockets, and + * other system/mode-specific bit swizzling. + * + * Returns: Coherent Station ID on success. + * All bits set on error. + */ +static u16 calculate_coh_st_id(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF2_2CHAN_HASH: + return get_coh_st_id_df2(ctx); + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return get_coh_st_id_df4(ctx); + + /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + return ctx->coh_st_fabric_id - get_dst_fabric_id(ctx); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0; + } +} + +static u64 insert_coh_st_id_at_intlv_bit(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + return denorm_addr | (coh_st_id << ctx->map.intlv_bit_pos); +} + +static u64 insert_coh_st_id_split_2_1(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + /* Insert coh_st_id[0] at the interleave bit. */ + denorm_addr |= (coh_st_id & BIT(0)) << ctx->map.intlv_bit_pos; + + /* Insert coh_st_id[2:1] at bit 12. */ + denorm_addr |= (coh_st_id & GENMASK(2, 1)) << 11; + + return denorm_addr; +} + +static u64 insert_coh_st_id_split_2_2(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + /* Insert coh_st_id[1:0] at bit 8. */ + denorm_addr |= (coh_st_id & GENMASK(1, 0)) << 8; + + /* + * Insert coh_st_id[n:2] at bit 12. 'n' could be 2 or 3. + * Grab both because bit 3 will be clear if unused. + */ + denorm_addr |= (coh_st_id & GENMASK(3, 2)) << 10; + + return denorm_addr; +} + +static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id) +{ + switch (ctx->map.intlv_mode) { + case NOHASH_2CHAN: + case NOHASH_4CHAN: + case NOHASH_8CHAN: + case NOHASH_16CHAN: + case NOHASH_32CHAN: + case DF2_2CHAN_HASH: + return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); + + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return insert_coh_st_id_split_2_1(ctx, denorm_addr, coh_st_id); + + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + return insert_coh_st_id_split_2_2(ctx, denorm_addr, coh_st_id); + + default: + atl_debug_on_bad_intlv_mode(ctx); + return ~0ULL; + } +} + +static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) +{ + u16 component_id, log_fabric_id; + + /* Start with the physical COH_ST Fabric ID. */ + u16 phys_fabric_id = ctx->coh_st_fabric_id; + + /* Skip logical ID lookup if remapping is disabled. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && + ctx->map.intlv_mode != DF3_6CHAN) + return phys_fabric_id; + + /* Mask off the Node ID bits to get the "local" Component ID. */ + component_id = phys_fabric_id & df_cfg.component_id_mask; + + /* + * Search the list of logical Component IDs for the one that + * matches this physical Component ID. + */ + for (log_fabric_id = 0; log_fabric_id < MAX_COH_ST_CHANNELS; log_fabric_id++) { + if (ctx->map.remap_array[log_fabric_id] == component_id) + break; + } + + if (log_fabric_id == MAX_COH_ST_CHANNELS) + atl_debug(ctx, "COH_ST remap entry not found for 0x%x", + log_fabric_id); + + /* Get the Node ID bits from the physical and apply to the logical. */ + return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; +} + +static int denorm_addr_common(struct addr_ctx *ctx) +{ + u64 denorm_addr; + u16 coh_st_id; + + /* + * Convert the original physical COH_ST Fabric ID to a logical value. + * This is required for non-power-of-two and other interleaving modes. + */ + ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx); + + denorm_addr = make_space_for_coh_st_id(ctx); + coh_st_id = calculate_coh_st_id(ctx); + ctx->ret_addr = insert_coh_st_id(ctx, denorm_addr, coh_st_id); + return 0; +} + +static int denorm_addr_df3_6chan(struct addr_ctx *ctx) +{ + u16 coh_st_id = ctx->coh_st_fabric_id & df_cfg.component_id_mask; + u8 total_intlv_bits = ctx->map.total_intlv_bits; + u8 low_bit, intlv_bit = ctx->map.intlv_bit_pos; + u64 msb_intlv_bits, temp_addr_a, temp_addr_b; + u8 np2_bits = ctx->map.np2_bits; + + if (ctx->map.intlv_mode != DF3_6CHAN) + return -EINVAL; + + /* + * 'np2_bits' holds the number of bits needed to cover the + * amount of memory (rounded up) in this map using 64K chunks. + * + * Example: + * Total memory in map: 6GB + * Rounded up to next power-of-2: 8GB + * Number of 64K chunks: 0x20000 + * np2_bits = log2(# of chunks): 17 + * + * Get the two most-significant interleave bits from the + * input address based on the following: + * + * [15 + np2_bits - total_intlv_bits : 14 + np2_bits - total_intlv_bits] + */ + low_bit = 14 + np2_bits - total_intlv_bits; + msb_intlv_bits = ctx->ret_addr >> low_bit; + msb_intlv_bits &= 0x3; + + /* + * If MSB are 11b, then logical COH_ST ID is 6 or 7. + * Need to adjust based on the mod3 result. + */ + if (msb_intlv_bits == 3) { + u8 addr_mod, phys_addr_msb, msb_coh_st_id; + + /* Get the remaining interleave bits from the input address. */ + temp_addr_b = GENMASK_ULL(low_bit - 1, intlv_bit) & ctx->ret_addr; + temp_addr_b >>= intlv_bit; + + /* Calculate the logical COH_ST offset based on mod3. */ + addr_mod = temp_addr_b % 3; + + /* Get COH_ST ID bits [2:1]. */ + msb_coh_st_id = (coh_st_id >> 1) & 0x3; + + /* Get the bit that starts the physical address bits. */ + phys_addr_msb = (intlv_bit + np2_bits + 1); + phys_addr_msb &= BIT(0); + phys_addr_msb++; + phys_addr_msb *= 3 - addr_mod + msb_coh_st_id; + phys_addr_msb %= 3; + + /* Move the physical address MSB to the correct place. */ + temp_addr_b |= phys_addr_msb << (low_bit - total_intlv_bits - intlv_bit); + + /* Generate a new COH_ST ID as follows: coh_st_id = [1, 1, coh_st_id[0]] */ + coh_st_id &= BIT(0); + coh_st_id |= GENMASK(2, 1); + } else { + temp_addr_b = GENMASK_ULL(63, intlv_bit) & ctx->ret_addr; + temp_addr_b >>= intlv_bit; + } + + temp_addr_a = GENMASK_ULL(intlv_bit - 1, 0) & ctx->ret_addr; + temp_addr_b <<= intlv_bit + total_intlv_bits; + + ctx->ret_addr = temp_addr_a | temp_addr_b; + ctx->ret_addr |= coh_st_id << intlv_bit; + return 0; +} + +static int denorm_addr_df4_np2(struct addr_ctx *ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; + u16 group, group_offset, log_coh_st_offset; + unsigned int mod_value, shift_value; + u16 mask = df_cfg.component_id_mask; + u64 temp_addr_a, temp_addr_b; + u8 hash_pa8, hashed_bit; + + switch (ctx->map.intlv_mode) { + case DF4_NPS4_3CHAN_HASH: + mod_value = 3; + shift_value = 13; + break; + case DF4_NPS2_6CHAN_HASH: + mod_value = 3; + shift_value = 12; + break; + case DF4_NPS1_12CHAN_HASH: + mod_value = 3; + shift_value = 11; + break; + case DF4_NPS2_5CHAN_HASH: + mod_value = 5; + shift_value = 13; + break; + case DF4_NPS1_10CHAN_HASH: + mod_value = 5; + shift_value = 12; + break; + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + }; + + if (ctx->map.num_intlv_sockets == 1) { + hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; + temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); + } else { + hash_pa8 = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask); + hash_pa8 >>= df_cfg.socket_id_shift; + temp_addr_a = ctx->ret_addr; + } + + /* Make a gap for the real bit [8]. */ + temp_addr_a = expand_bits(8, 1, temp_addr_a); + + /* Make an additional gap for bits [13:12], as appropriate.*/ + if (ctx->map.intlv_mode == DF4_NPS2_6CHAN_HASH || + ctx->map.intlv_mode == DF4_NPS1_10CHAN_HASH) { + temp_addr_a = expand_bits(13, 1, temp_addr_a); + } else if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) { + temp_addr_a = expand_bits(12, 2, temp_addr_a); + } + + /* Keep bits [13:0]. */ + temp_addr_a &= GENMASK_ULL(13, 0); + + /* Get the appropriate high bits. */ + shift_value += 1 - ilog2(ctx->map.num_intlv_sockets); + temp_addr_b = GENMASK_ULL(63, shift_value) & ctx->ret_addr; + temp_addr_b >>= shift_value; + temp_addr_b *= mod_value; + + /* + * Coherent Stations are divided into groups. + * + * Multiples of 3 (mod3) are divided into quadrants. + * e.g. NP4_3CHAN -> [0, 1, 2] [6, 7, 8] + * [3, 4, 5] [9, 10, 11] + * + * Multiples of 5 (mod5) are divided into sides. + * e.g. NP2_5CHAN -> [0, 1, 2, 3, 4] [5, 6, 7, 8, 9] + */ + + /* + * Calculate the logical offset for the COH_ST within its DRAM Address map. + * e.g. if map includes [5, 6, 7, 8, 9] and target instance is '8', then + * log_coh_st_offset = 8 - 5 = 3 + */ + log_coh_st_offset = (ctx->coh_st_fabric_id & mask) - (get_dst_fabric_id(ctx) & mask); + + /* + * Figure out the group number. + * + * Following above example, + * log_coh_st_offset = 3 + * mod_value = 5 + * group = 3 / 5 = 0 + */ + group = log_coh_st_offset / mod_value; + + /* + * Figure out the offset within the group. + * + * Following above example, + * log_coh_st_offset = 3 + * mod_value = 5 + * group_offset = 3 % 5 = 3 + */ + group_offset = log_coh_st_offset % mod_value; + + /* Adjust group_offset if the hashed bit [8] is set. */ + if (hash_pa8) { + if (!group_offset) + group_offset = mod_value - 1; + else + group_offset--; + } + + /* Add in the group offset to the high bits. */ + temp_addr_b += group_offset; + + /* Shift the high bits to the proper starting position. */ + temp_addr_b <<= 14; + + /* Combine the high and low bits together. */ + ctx->ret_addr = temp_addr_a | temp_addr_b; + + /* Account for hashing here instead of in dehash_address(). */ + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + + hashed_bit = !!hash_pa8; + hashed_bit ^= FIELD_GET(BIT_ULL(14), ctx->ret_addr); + hashed_bit ^= FIELD_GET(BIT_ULL(16), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 8; + + /* Done for 3 and 5 channel. */ + if (ctx->map.intlv_mode == DF4_NPS4_3CHAN_HASH || + ctx->map.intlv_mode == DF4_NPS2_5CHAN_HASH) + return 0; + + /* Select the proper 'group' bit to use for Bit 13. */ + if (ctx->map.intlv_mode == DF4_NPS1_12CHAN_HASH) + hashed_bit = !!(group & BIT(1)); + else + hashed_bit = group & BIT(0); + + hashed_bit ^= FIELD_GET(BIT_ULL(18), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 13; + + /* Done for 6 and 10 channel. */ + if (ctx->map.intlv_mode != DF4_NPS1_12CHAN_HASH) + return 0; + + hashed_bit = group & BIT(0); + hashed_bit ^= FIELD_GET(BIT_ULL(17), ctx->ret_addr) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), ctx->ret_addr) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), ctx->ret_addr) & hash_ctl_1G; + + ctx->ret_addr |= hashed_bit << 12; + return 0; +} + +int denormalize_address(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NONE: + return 0; + case DF4_NPS4_3CHAN_HASH: + case DF4_NPS2_6CHAN_HASH: + case DF4_NPS1_12CHAN_HASH: + case DF4_NPS2_5CHAN_HASH: + case DF4_NPS1_10CHAN_HASH: + return denorm_addr_df4_np2(ctx); + case DF3_6CHAN: + return denorm_addr_df3_6chan(ctx); + default: + return denorm_addr_common(ctx); + } +} diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h new file mode 100644 index 000000000000..13f1b6098c96 --- /dev/null +++ b/drivers/ras/amd/atl/internal.h @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Address Translation Library + * + * internal.h : Helper functions and common defines + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#ifndef __AMD_ATL_INTERNAL_H__ +#define __AMD_ATL_INTERNAL_H__ + +#include +#include +#include + +#include + +#include "reg_fields.h" + +/* Maximum possible number of Coherent Stations within a single Data Fabric. */ +#define MAX_COH_ST_CHANNELS 32 + +/* PCI ID for Zen4 Server DF Function 0. */ +#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 + +/* Shift needed for adjusting register values to true values. */ +#define DF_DRAM_BASE_LIMIT_LSB 28 + +enum df_revisions { + UNKNOWN, + DF2, + DF3, + DF3p5, + DF4, + DF4p5, +}; + +/* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */ +enum intlv_modes { + NONE = 0x00, + NOHASH_2CHAN = 0x01, + NOHASH_4CHAN = 0x03, + NOHASH_8CHAN = 0x05, + DF3_6CHAN = 0x06, + NOHASH_16CHAN = 0x07, + NOHASH_32CHAN = 0x08, + DF3_COD4_2CHAN_HASH = 0x0C, + DF3_COD2_4CHAN_HASH = 0x0D, + DF3_COD1_8CHAN_HASH = 0x0E, + DF4_NPS4_2CHAN_HASH = 0x10, + DF4_NPS2_4CHAN_HASH = 0x11, + DF4_NPS1_8CHAN_HASH = 0x12, + DF4_NPS4_3CHAN_HASH = 0x13, + DF4_NPS2_6CHAN_HASH = 0x14, + DF4_NPS1_12CHAN_HASH = 0x15, + DF4_NPS2_5CHAN_HASH = 0x16, + DF4_NPS1_10CHAN_HASH = 0x17, + DF2_2CHAN_HASH = 0x21, + /* DF4.5 modes are all IntLvNumChan + 0x20 */ + DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, + DF4p5_NPS0_24CHAN_1K_HASH = 0x2E, + DF4p5_NPS4_2CHAN_1K_HASH = 0x30, + DF4p5_NPS2_4CHAN_1K_HASH = 0x31, + DF4p5_NPS1_8CHAN_1K_HASH = 0x32, + DF4p5_NPS4_3CHAN_1K_HASH = 0x33, + DF4p5_NPS2_6CHAN_1K_HASH = 0x34, + DF4p5_NPS1_12CHAN_1K_HASH = 0x35, + DF4p5_NPS2_5CHAN_1K_HASH = 0x36, + DF4p5_NPS1_10CHAN_1K_HASH = 0x37, + DF4p5_NPS4_2CHAN_2K_HASH = 0x40, + DF4p5_NPS2_4CHAN_2K_HASH = 0x41, + DF4p5_NPS1_8CHAN_2K_HASH = 0x42, + DF4p5_NPS1_16CHAN_2K_HASH = 0x43, + DF4p5_NPS4_3CHAN_2K_HASH = 0x44, + DF4p5_NPS2_6CHAN_2K_HASH = 0x45, + DF4p5_NPS1_12CHAN_2K_HASH = 0x46, + DF4p5_NPS0_24CHAN_2K_HASH = 0x47, + DF4p5_NPS2_5CHAN_2K_HASH = 0x48, + DF4p5_NPS1_10CHAN_2K_HASH = 0x49, +}; + +struct df_flags { + __u8 legacy_ficaa : 1, + socket_id_shift_quirk : 1, + __reserved_0 : 6; +}; + +struct df_config { + enum df_revisions rev; + + /* + * These masks operate on the 16-bit Coherent Station IDs, + * e.g. Instance, Fabric, Destination, etc. + */ + u16 component_id_mask; + u16 die_id_mask; + u16 node_id_mask; + u16 socket_id_mask; + + /* + * Least-significant bit of Node ID portion of the + * system-wide Coherent Station Fabric ID. + */ + u8 node_id_shift; + + /* + * Least-significant bit of Die portion of the Node ID. + * Adjusted to include the Node ID shift in order to apply + * to the Coherent Station Fabric ID. + */ + u8 die_id_shift; + + /* + * Least-significant bit of Socket portion of the Node ID. + * Adjusted to include the Node ID shift in order to apply + * to the Coherent Station Fabric ID. + */ + u8 socket_id_shift; + + /* Number of DRAM Address maps visible in a Coherent Station. */ + u8 num_coh_st_maps; + + /* Global flags to handle special cases. */ + struct df_flags flags; +}; + +extern struct df_config df_cfg; + +struct dram_addr_map { + /* + * Each DRAM Address Map can operate independently + * in different interleaving modes. + */ + enum intlv_modes intlv_mode; + + /* System-wide number for this address map. */ + u8 num; + + /* Raw register values */ + u32 base; + u32 limit; + u32 ctl; + u32 intlv; + + /* + * Logical to Physical Coherent Station Remapping array + * + * Index: Logical Coherent Station Instance ID + * Value: Physical Coherent Station Instance ID + * + * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id] + */ + u8 remap_array[MAX_COH_ST_CHANNELS]; + + /* + * Number of bits covering DRAM Address map 0 + * when interleaving is non-power-of-2. + * + * Used only for DF3_6CHAN. + */ + u8 np2_bits; + + /* Position of the 'interleave bit'. */ + u8 intlv_bit_pos; + /* Number of channels interleaved in this map. */ + u8 num_intlv_chan; + /* Number of dies interleaved in this map. */ + u8 num_intlv_dies; + /* Number of sockets interleaved in this map. */ + u8 num_intlv_sockets; + /* + * Total number of channels interleaved accounting + * for die and socket interleaving. + */ + u8 total_intlv_chan; + /* Total bits needed to cover 'total_intlv_chan'. */ + u8 total_intlv_bits; +}; + +/* Original input values cached for debug printing. */ +struct addr_ctx_inputs { + u64 norm_addr; + u8 socket_id; + u8 die_id; + u8 coh_st_inst_id; +}; + +struct addr_ctx { + u64 ret_addr; + + struct addr_ctx_inputs inputs; + struct dram_addr_map map; + + /* AMD Node ID calculated from Socket and Die IDs. */ + u8 node_id; + + /* + * Coherent Station Instance ID + * Local ID used within a 'node'. + */ + u16 inst_id; + + /* + * Coherent Station Fabric ID + * System-wide ID that includes 'node' bits. + */ + u16 coh_st_fabric_id; +}; + +int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); +int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); + +int get_df_system_info(void); +int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); + +int get_address_map(struct addr_ctx *ctx); + +int denormalize_address(struct addr_ctx *ctx); +int dehash_address(struct addr_ctx *ctx); + +unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); +unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); + +/* + * Make a gap in @data that is @num_bits long starting at @bit_num. + * e.g. data = 11111111'b + * bit_num = 3 + * num_bits = 2 + * result = 1111100111'b + */ +static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data) +{ + u64 temp1, temp2; + + if (!num_bits) + return data; + + if (!bit_num) { + WARN_ON_ONCE(num_bits >= BITS_PER_LONG); + return data << num_bits; + } + + WARN_ON_ONCE(bit_num >= BITS_PER_LONG); + + temp1 = data & GENMASK_ULL(bit_num - 1, 0); + + temp2 = data & GENMASK_ULL(63, bit_num); + temp2 <<= num_bits; + + return temp1 | temp2; +} + +/* + * Remove bits in @data between @low_bit and @high_bit inclusive. + * e.g. data = XXXYYZZZ'b + * low_bit = 3 + * high_bit = 4 + * result = XXXZZZ'b + */ +static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data) +{ + u64 temp1, temp2; + + WARN_ON_ONCE(high_bit >= BITS_PER_LONG); + WARN_ON_ONCE(low_bit >= BITS_PER_LONG); + WARN_ON_ONCE(low_bit > high_bit); + + if (!low_bit) + return data >> (high_bit++); + + temp1 = GENMASK_ULL(low_bit - 1, 0) & data; + temp2 = GENMASK_ULL(63, high_bit + 1) & data; + temp2 >>= high_bit - low_bit + 1; + + return temp1 | temp2; +} + +#define atl_debug(ctx, fmt, arg...) \ + pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\ + (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\ + (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg) + +static inline void atl_debug_on_bad_df_rev(void) +{ + pr_debug("Unrecognized DF rev: %u", df_cfg.rev); +} + +static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) +{ + atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); +} + +#endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c new file mode 100644 index 000000000000..33f549b6255a --- /dev/null +++ b/drivers/ras/amd/atl/map.c @@ -0,0 +1,665 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * map.c : Functions to read and decode DRAM address maps + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +static int df2_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF2_INTLV_NUM_CHAN, ctx->map.base); + + if (ctx->map.intlv_mode == 8) + ctx->map.intlv_mode = DF2_2CHAN_HASH; + + if (ctx->map.intlv_mode != NONE && + ctx->map.intlv_mode != NOHASH_2CHAN && + ctx->map.intlv_mode != DF2_2CHAN_HASH) + return -EINVAL; + + return 0; +} + +static int df3_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF3_INTLV_NUM_CHAN, ctx->map.base); + return 0; +} + +static int df3p5_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF3p5_INTLV_NUM_CHAN, ctx->map.base); + + if (ctx->map.intlv_mode == DF3_6CHAN) + return -EINVAL; + + return 0; +} + +static int df4_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF4_INTLV_NUM_CHAN, ctx->map.intlv); + + if (ctx->map.intlv_mode == DF3_COD4_2CHAN_HASH || + ctx->map.intlv_mode == DF3_COD2_4CHAN_HASH || + ctx->map.intlv_mode == DF3_COD1_8CHAN_HASH || + ctx->map.intlv_mode == DF3_6CHAN) + return -EINVAL; + + return 0; +} + +static int df4p5_get_intlv_mode(struct addr_ctx *ctx) +{ + ctx->map.intlv_mode = FIELD_GET(DF4p5_INTLV_NUM_CHAN, ctx->map.intlv); + + if (ctx->map.intlv_mode <= NOHASH_32CHAN) + return 0; + + /* + * Modes matching the ranges above are returned as-is. + * + * All other modes are "fixed up" by adding 20h to make a unique value. + */ + ctx->map.intlv_mode += 0x20; + + return 0; +} + +static int get_intlv_mode(struct addr_ctx *ctx) +{ + int ret; + + switch (df_cfg.rev) { + case DF2: + ret = df2_get_intlv_mode(ctx); + break; + case DF3: + ret = df3_get_intlv_mode(ctx); + break; + case DF3p5: + ret = df3p5_get_intlv_mode(ctx); + break; + case DF4: + ret = df4_get_intlv_mode(ctx); + break; + case DF4p5: + ret = df4p5_get_intlv_mode(ctx); + break; + default: + ret = -EINVAL; + } + + if (ret) + atl_debug_on_bad_df_rev(); + + return ret; +} + +static u64 get_hi_addr_offset(u32 reg_dram_offset) +{ + u8 shift = DF_DRAM_BASE_LIMIT_LSB; + u64 hi_addr_offset; + + switch (df_cfg.rev) { + case DF2: + hi_addr_offset = FIELD_GET(DF2_HI_ADDR_OFFSET, reg_dram_offset); + break; + case DF3: + case DF3p5: + hi_addr_offset = FIELD_GET(DF3_HI_ADDR_OFFSET, reg_dram_offset); + break; + case DF4: + case DF4p5: + hi_addr_offset = FIELD_GET(DF4_HI_ADDR_OFFSET, reg_dram_offset); + break; + default: + hi_addr_offset = 0; + atl_debug_on_bad_df_rev(); + } + + return hi_addr_offset << shift; +} + +/* + * Returns: 0 if offset is disabled. + * 1 if offset is enabled. + * -EINVAL on error. + */ +static int get_dram_offset(struct addr_ctx *ctx, u64 *norm_offset) +{ + u32 reg_dram_offset; + u8 map_num; + + /* Should not be called for map 0. */ + if (!ctx->map.num) { + atl_debug(ctx, "Trying to find DRAM offset for map 0"); + return -EINVAL; + } + + /* + * DramOffset registers don't exist for map 0, so the base register + * actually refers to map 1. + * Adjust the map_num for the register offsets. + */ + map_num = ctx->map.num - 1; + + if (df_cfg.rev >= DF4) { + /* Read D18F7x140 (DramOffset) */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x140 + (4 * map_num), + ctx->inst_id, ®_dram_offset)) + return -EINVAL; + + } else { + /* Read D18F0x1B4 (DramOffset) */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x1B4 + (4 * map_num), + ctx->inst_id, ®_dram_offset)) + return -EINVAL; + } + + if (!FIELD_GET(DF_HI_ADDR_OFFSET_EN, reg_dram_offset)) + return 0; + + *norm_offset = get_hi_addr_offset(reg_dram_offset); + + return 1; +} + +static int df3_6ch_get_dram_addr_map(struct addr_ctx *ctx) +{ + u16 dst_fabric_id = FIELD_GET(DF3_DST_FABRIC_ID, ctx->map.limit); + u8 i, j, shift = 4, mask = 0xF; + u32 reg, offset = 0x60; + u16 dst_node_id; + + /* Get Socket 1 register. */ + if (dst_fabric_id & df_cfg.socket_id_mask) + offset = 0x68; + + /* Read D18F0x06{0,8} (DF::Skt0CsTargetRemap0)/(DF::Skt0CsTargetRemap1) */ + if (df_indirect_read_broadcast(ctx->node_id, 0, offset, ®)) + return -EINVAL; + + /* Save 8 remap entries. */ + for (i = 0, j = 0; i < 8; i++, j++) + ctx->map.remap_array[i] = (reg >> (j * shift)) & mask; + + dst_node_id = dst_fabric_id & df_cfg.node_id_mask; + dst_node_id >>= df_cfg.node_id_shift; + + /* Read D18F2x090 (DF::Np2ChannelConfig) */ + if (df_indirect_read_broadcast(dst_node_id, 2, 0x90, ®)) + return -EINVAL; + + ctx->map.np2_bits = FIELD_GET(DF_LOG2_ADDR_64K_SPACE0, reg); + return 0; +} + +static int df2_get_dram_addr_map(struct addr_ctx *ctx) +{ + /* Read D18F0x110 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x110 + (8 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F0x114 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x114 + (8 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + return 0; +} + +static int df3_get_dram_addr_map(struct addr_ctx *ctx) +{ + if (df2_get_dram_addr_map(ctx)) + return -EINVAL; + + /* Read D18F0x3F8 (DfGlobalCtl). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x3F8, + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + return 0; +} + +static int df4_get_dram_addr_map(struct addr_ctx *ctx) +{ + u8 remap_sel, i, j, shift = 4, mask = 0xF; + u32 remap_reg; + + /* Read D18F7xE00 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE00 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F7xE04 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE04 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + /* Read D18F7xE08 (DramAddressCtl). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE08 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + /* Read D18F7xE0C (DramAddressIntlv). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0xE0C + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.intlv)) + return -EINVAL; + + /* Check if Remap Enable bit is valid. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl)) + return 0; + + /* Fill with bogus values, because '0' is a valid value. */ + memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array)); + + /* Get Remap registers. */ + remap_sel = FIELD_GET(DF4_REMAP_SEL, ctx->map.ctl); + + /* Read D18F7x180 (CsTargetRemap0A). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (8 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save first 8 remap entries. */ + for (i = 0, j = 0; i < 8; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x184 (CsTargetRemap0B). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (8 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 8 remap entries. */ + for (i = 8, j = 0; i < 16; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + return 0; +} + +static int df4p5_get_dram_addr_map(struct addr_ctx *ctx) +{ + u8 remap_sel, i, j, shift = 5, mask = 0x1F; + u32 remap_reg; + + /* Read D18F7x200 (DramBaseAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x200 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.base)) + return -EINVAL; + + /* Read D18F7x204 (DramLimitAddress). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x204 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.limit)) + return -EINVAL; + + /* Read D18F7x208 (DramAddressCtl). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x208 + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.ctl)) + return -EINVAL; + + /* Read D18F7x20C (DramAddressIntlv). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x20C + (16 * ctx->map.num), + ctx->inst_id, &ctx->map.intlv)) + return -EINVAL; + + /* Check if Remap Enable bit is valid. */ + if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl)) + return 0; + + /* Fill with bogus values, because '0' is a valid value. */ + memset(&ctx->map.remap_array, 0xFF, sizeof(ctx->map.remap_array)); + + /* Get Remap registers. */ + remap_sel = FIELD_GET(DF4p5_REMAP_SEL, ctx->map.ctl); + + /* Read D18F7x180 (CsTargetRemap0A). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x180 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save first 6 remap entries. */ + for (i = 0, j = 0; i < 6; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x184 (CsTargetRemap0B). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x184 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 6 remap entries. */ + for (i = 6, j = 0; i < 12; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + /* Read D18F7x188 (CsTargetRemap0C). */ + if (df_indirect_read_instance(ctx->node_id, 7, 0x188 + (24 * remap_sel), + ctx->inst_id, &remap_reg)) + return -EINVAL; + + /* Save next 6 remap entries. */ + for (i = 12, j = 0; i < 18; i++, j++) + ctx->map.remap_array[i] = (remap_reg >> (j * shift)) & mask; + + return 0; +} + +static int get_dram_addr_map(struct addr_ctx *ctx) +{ + switch (df_cfg.rev) { + case DF2: return df2_get_dram_addr_map(ctx); + case DF3: + case DF3p5: return df3_get_dram_addr_map(ctx); + case DF4: return df4_get_dram_addr_map(ctx); + case DF4p5: return df4p5_get_dram_addr_map(ctx); + default: + atl_debug_on_bad_df_rev(); + return -EINVAL; + } +} + +static int get_coh_st_fabric_id(struct addr_ctx *ctx) +{ + u32 reg; + + /* Read D18F0x50 (FabricBlockInstanceInformation3). */ + if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®)) + return -EINVAL; + + if (df_cfg.rev < DF4p5) + ctx->coh_st_fabric_id = FIELD_GET(DF2_COH_ST_FABRIC_ID, reg); + else + ctx->coh_st_fabric_id = FIELD_GET(DF4p5_COH_ST_FABRIC_ID, reg); + + return 0; +} + +static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset) +{ + u64 last_offset = 0; + int ret; + + for (ctx->map.num = 1; ctx->map.num < df_cfg.num_coh_st_maps; ctx->map.num++) { + ret = get_dram_offset(ctx, norm_offset); + if (ret < 0) + return ret; + + /* Continue search if this map's offset is not enabled. */ + if (!ret) + continue; + + /* Enabled offsets should never be 0. */ + if (*norm_offset == 0) { + atl_debug(ctx, "Enabled map %u offset is 0", ctx->map.num); + return -EINVAL; + } + + /* Offsets should always increase from one map to the next. */ + if (*norm_offset <= last_offset) { + atl_debug(ctx, "Map %u offset (0x%016llx) <= previous (0x%016llx)", + ctx->map.num, *norm_offset, last_offset); + return -EINVAL; + } + + /* Match if this map's offset is less than the current calculated address. */ + if (ctx->ret_addr >= *norm_offset) + break; + + last_offset = *norm_offset; + } + + /* + * Finished search without finding a match. + * Reset to map 0 and no offset. + */ + if (ctx->map.num >= df_cfg.num_coh_st_maps) { + ctx->map.num = 0; + *norm_offset = 0; + } + + return 0; +} + +static bool valid_map(struct addr_ctx *ctx) +{ + if (df_cfg.rev >= DF4) + return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.ctl); + else + return FIELD_GET(DF_ADDR_RANGE_VAL, ctx->map.base); +} + +static int get_address_map_common(struct addr_ctx *ctx) +{ + u64 norm_offset = 0; + + if (get_coh_st_fabric_id(ctx)) + return -EINVAL; + + if (find_normalized_offset(ctx, &norm_offset)) + return -EINVAL; + + if (get_dram_addr_map(ctx)) + return -EINVAL; + + if (!valid_map(ctx)) + return -EINVAL; + + ctx->ret_addr -= norm_offset; + + return 0; +} + +static u8 get_num_intlv_chan(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case NONE: + return 1; + case NOHASH_2CHAN: + case DF2_2CHAN_HASH: + case DF3_COD4_2CHAN_HASH: + case DF4_NPS4_2CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + return 2; + case DF4_NPS4_3CHAN_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + return 3; + case NOHASH_4CHAN: + case DF3_COD2_4CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + return 4; + case DF4_NPS2_5CHAN_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + return 5; + case DF3_6CHAN: + case DF4_NPS2_6CHAN_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + return 6; + case NOHASH_8CHAN: + case DF3_COD1_8CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + return 8; + case DF4_NPS1_10CHAN_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + return 10; + case DF4_NPS1_12CHAN_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + return 12; + case NOHASH_16CHAN: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + return 16; + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + return 24; + case NOHASH_32CHAN: + return 32; + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } +} + +static void calculate_intlv_bits(struct addr_ctx *ctx) +{ + ctx->map.num_intlv_chan = get_num_intlv_chan(ctx); + + ctx->map.total_intlv_chan = ctx->map.num_intlv_chan; + ctx->map.total_intlv_chan *= ctx->map.num_intlv_dies; + ctx->map.total_intlv_chan *= ctx->map.num_intlv_sockets; + + /* + * Get the number of bits needed to cover this many channels. + * order_base_2() rounds up automatically. + */ + ctx->map.total_intlv_bits = order_base_2(ctx->map.total_intlv_chan); +} + +static u8 get_intlv_bit_pos(struct addr_ctx *ctx) +{ + u8 addr_sel = 0; + + switch (df_cfg.rev) { + case DF2: + addr_sel = FIELD_GET(DF2_INTLV_ADDR_SEL, ctx->map.base); + break; + case DF3: + case DF3p5: + addr_sel = FIELD_GET(DF3_INTLV_ADDR_SEL, ctx->map.base); + break; + case DF4: + case DF4p5: + addr_sel = FIELD_GET(DF4_INTLV_ADDR_SEL, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Add '8' to get the 'interleave bit position'. */ + return addr_sel + 8; +} + +static u8 get_num_intlv_dies(struct addr_ctx *ctx) +{ + u8 dies = 0; + + switch (df_cfg.rev) { + case DF2: + dies = FIELD_GET(DF2_INTLV_NUM_DIES, ctx->map.limit); + break; + case DF3: + dies = FIELD_GET(DF3_INTLV_NUM_DIES, ctx->map.base); + break; + case DF3p5: + dies = FIELD_GET(DF3p5_INTLV_NUM_DIES, ctx->map.base); + break; + case DF4: + case DF4p5: + dies = FIELD_GET(DF4_INTLV_NUM_DIES, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Register value is log2, e.g. 0 -> 1 die, 1 -> 2 dies, etc. */ + return 1 << dies; +} + +static u8 get_num_intlv_sockets(struct addr_ctx *ctx) +{ + u8 sockets = 0; + + switch (df_cfg.rev) { + case DF2: + sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.limit); + break; + case DF3: + case DF3p5: + sockets = FIELD_GET(DF2_INTLV_NUM_SOCKETS, ctx->map.base); + break; + case DF4: + case DF4p5: + sockets = FIELD_GET(DF4_INTLV_NUM_SOCKETS, ctx->map.intlv); + break; + default: + atl_debug_on_bad_df_rev(); + break; + } + + /* Register value is log2, e.g. 0 -> 1 sockets, 1 -> 2 sockets, etc. */ + return 1 << sockets; +} + +static int get_global_map_data(struct addr_ctx *ctx) +{ + if (get_intlv_mode(ctx)) + return -EINVAL; + + if (ctx->map.intlv_mode == DF3_6CHAN && + df3_6ch_get_dram_addr_map(ctx)) + return -EINVAL; + + ctx->map.intlv_bit_pos = get_intlv_bit_pos(ctx); + ctx->map.num_intlv_dies = get_num_intlv_dies(ctx); + ctx->map.num_intlv_sockets = get_num_intlv_sockets(ctx); + calculate_intlv_bits(ctx); + + return 0; +} + +static void dump_address_map(struct dram_addr_map *map) +{ + u8 i; + + pr_debug("intlv_mode=0x%x", map->intlv_mode); + pr_debug("num=0x%x", map->num); + pr_debug("base=0x%x", map->base); + pr_debug("limit=0x%x", map->limit); + pr_debug("ctl=0x%x", map->ctl); + pr_debug("intlv=0x%x", map->intlv); + + for (i = 0; i < MAX_COH_ST_CHANNELS; i++) + pr_debug("remap_array[%u]=0x%x", i, map->remap_array[i]); + + pr_debug("intlv_bit_pos=%u", map->intlv_bit_pos); + pr_debug("num_intlv_chan=%u", map->num_intlv_chan); + pr_debug("num_intlv_dies=%u", map->num_intlv_dies); + pr_debug("num_intlv_sockets=%u", map->num_intlv_sockets); + pr_debug("total_intlv_chan=%u", map->total_intlv_chan); + pr_debug("total_intlv_bits=%u", map->total_intlv_bits); +} + +int get_address_map(struct addr_ctx *ctx) +{ + int ret; + + ret = get_address_map_common(ctx); + if (ret) + return ret; + + ret = get_global_map_data(ctx); + if (ret) + return ret; + + dump_address_map(&ctx->map); + + return ret; +} diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h new file mode 100644 index 000000000000..6aaa5093f42c --- /dev/null +++ b/drivers/ras/amd/atl/reg_fields.h @@ -0,0 +1,603 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD Address Translation Library + * + * reg_fields.h : Register field definitions + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +/* + * Notes on naming: + * 1) Use "DF_" prefix for fields that are the same for all revisions. + * 2) Use "DFx_" prefix for fields that differ between revisions. + * a) "x" is the first major revision where the new field appears. + * b) E.g., if DF2 and DF3 have the same field, then call it DF2. + * c) E.g., if DF3p5 and DF4 have the same field, then call it DF4. + */ + +/* + * Coherent Station Fabric ID + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x50 [Fabric Block Instance Information 3] + * DF2 BlockFabricId [19:8] + * DF3 BlockFabricId [19:8] + * DF3p5 BlockFabricId [19:8] + * DF4 BlockFabricId [19:8] + * DF4p5 BlockFabricId [15:8] + */ +#define DF2_COH_ST_FABRIC_ID GENMASK(19, 8) +#define DF4p5_COH_ST_FABRIC_ID GENMASK(15, 8) + +/* + * Component ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x208 [System Fabric ID Mask 0] + * DF3 ComponentIdMask [9:0] + * + * D18F1x150 [System Fabric ID Mask 0] + * DF3p5 ComponentIdMask [15:0] + * + * D18F4x1B0 [System Fabric ID Mask 0] + * DF4 ComponentIdMask [15:0] + * DF4p5 ComponentIdMask [15:0] + */ +#define DF3_COMPONENT_ID_MASK GENMASK(9, 0) +#define DF4_COMPONENT_ID_MASK GENMASK(15, 0) + +/* + * Destination Fabric ID + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 DstFabricID [7:0] + * DF3 DstFabricID [9:0] + * DF3 DstFabricID [11:0] + * + * D18F7xE08 [DRAM Address Control] + * DF4 DstFabricID [27:16] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 DstFabricID [23:16] + */ +#define DF2_DST_FABRIC_ID GENMASK(7, 0) +#define DF3_DST_FABRIC_ID GENMASK(9, 0) +#define DF3p5_DST_FABRIC_ID GENMASK(11, 0) +#define DF4_DST_FABRIC_ID GENMASK(27, 16) +#define DF4p5_DST_FABRIC_ID GENMASK(23, 16) + +/* + * Die ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 DieIdMask [15:8] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 DieIdMask [18:16] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 DieIdMask [15:0] + * + * D18F4x1B8 [System Fabric ID Mask 2] + * DF4 DieIdMask [15:0] + * DF4p5 DieIdMask [15:0] + */ +#define DF2_DIE_ID_MASK GENMASK(15, 8) +#define DF3_DIE_ID_MASK GENMASK(18, 16) +#define DF4_DIE_ID_MASK GENMASK(15, 0) + +/* + * Die ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 DieIdShift [27:24] + * + * DF3 N/A + * DF3p5 N/A + * DF4 N/A + * DF4p5 N/A + */ +#define DF2_DIE_ID_SHIFT GENMASK(27, 24) + +/* + * DRAM Address Range Valid + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 AddrRngVal [0] + * DF3 AddrRngVal [0] + * DF3p5 AddrRngVal [0] + * + * D18F7xE08 [DRAM Address Control] + * DF4 AddrRngVal [0] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 AddrRngVal [0] + */ +#define DF_ADDR_RANGE_VAL BIT(0) + +/* + * DRAM Base Address + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 DramBaseAddr [31:12] + * DF3 DramBaseAddr [31:12] + * DF3p5 DramBaseAddr [31:12] + * + * D18F7xE00 [DRAM Base Address] + * DF4 DramBaseAddr [27:0] + * + * D18F7x200 [DRAM Base Address] + * DF4p5 DramBaseAddr [27:0] + */ +#define DF2_BASE_ADDR GENMASK(31, 12) +#define DF4_BASE_ADDR GENMASK(27, 0) + +/* + * DRAM Hole Base + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F0x104 [DRAM Hole Control] + * DF2 DramHoleBase [31:24] + * DF3 DramHoleBase [31:24] + * DF3p5 DramHoleBase [31:24] + * + * D18F7x104 [DRAM Hole Control] + * DF4 DramHoleBase [31:24] + * DF4p5 DramHoleBase [31:24] + */ +#define DF_DRAM_HOLE_BASE_MASK GENMASK(31, 24) + +/* + * DRAM Limit Address + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 DramLimitAddr [31:12] + * DF3 DramLimitAddr [31:12] + * DF3p5 DramLimitAddr [31:12] + * + * D18F7xE04 [DRAM Limit Address] + * DF4 DramLimitAddr [27:0] + * + * D18F7x204 [DRAM Limit Address] + * DF4p5 DramLimitAddr [27:0] + */ +#define DF2_DRAM_LIMIT_ADDR GENMASK(31, 12) +#define DF4_DRAM_LIMIT_ADDR GENMASK(27, 0) + +/* + * Hash Interleave Controls + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F0x3F8 [DF Global Control] + * DF3 GlbHashIntlvCtl64K [20] + * GlbHashIntlvCtl2M [21] + * GlbHashIntlvCtl1G [22] + * + * DF3p5 GlbHashIntlvCtl64K [20] + * GlbHashIntlvCtl2M [21] + * GlbHashIntlvCtl1G [22] + * + * D18F7xE08 [DRAM Address Control] + * DF4 HashIntlvCtl64K [8] + * HashIntlvCtl2M [9] + * HashIntlvCtl1G [10] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 HashIntlvCtl4K [7] + * HashIntlvCtl64K [8] + * HashIntlvCtl2M [9] + * HashIntlvCtl1G [10] + * HashIntlvCtl1T [15] + */ +#define DF3_HASH_CTL_64K BIT(20) +#define DF3_HASH_CTL_2M BIT(21) +#define DF3_HASH_CTL_1G BIT(22) +#define DF4_HASH_CTL_4K BIT(7) +#define DF4_HASH_CTL_64K BIT(8) +#define DF4_HASH_CTL_2M BIT(9) +#define DF4_HASH_CTL_1G BIT(10) +#define DF4_HASH_CTL_1T BIT(15) + +/* + * High Address Offset + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x1B4 [DRAM Offset] + * DF2 HiAddrOffset [31:20] + * DF3 HiAddrOffset [31:12] + * DF3p5 HiAddrOffset [31:12] + * + * D18F7x140 [DRAM Offset] + * DF4 HiAddrOffset [24:1] + * DF4p5 HiAddrOffset [24:1] + */ +#define DF2_HI_ADDR_OFFSET GENMASK(31, 20) +#define DF3_HI_ADDR_OFFSET GENMASK(31, 12) +#define DF4_HI_ADDR_OFFSET GENMASK(24, 1) + +/* + * High Address Offset Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x1B4 [DRAM Offset] + * DF2 HiAddrOffsetEn [0] + * DF3 HiAddrOffsetEn [0] + * DF3p5 HiAddrOffsetEn [0] + * + * D18F7x140 [DRAM Offset] + * DF4 HiAddrOffsetEn [0] + * DF4p5 HiAddrOffsetEn [0] + */ +#define DF_HI_ADDR_OFFSET_EN BIT(0) + +/* + * Interleave Address Select + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 IntLvAddrSel [10:8] + * DF3 IntLvAddrSel [11:9] + * DF3p5 IntLvAddrSel [11:9] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvAddrSel [2:0] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvAddrSel [2:0] + */ +#define DF2_INTLV_ADDR_SEL GENMASK(10, 8) +#define DF3_INTLV_ADDR_SEL GENMASK(11, 9) +#define DF4_INTLV_ADDR_SEL GENMASK(2, 0) + +/* + * Interleave Number of Channels + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 IntLvNumChan [7:4] + * DF3 IntLvNumChan [5:2] + * DF3p5 IntLvNumChan [6:2] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumChan [8:4] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumChan [9:4] + */ +#define DF2_INTLV_NUM_CHAN GENMASK(7, 4) +#define DF3_INTLV_NUM_CHAN GENMASK(5, 2) +#define DF3p5_INTLV_NUM_CHAN GENMASK(6, 2) +#define DF4_INTLV_NUM_CHAN GENMASK(8, 4) +#define DF4p5_INTLV_NUM_CHAN GENMASK(9, 4) + +/* + * Interleave Number of Dies + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 IntLvNumDies [11:10] + * + * D18F0x110 [DRAM Base Address] + * DF3 IntLvNumDies [7:6] + * DF3p5 IntLvNumDies [7] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumDies [13:12] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumDies [13:12] + */ +#define DF2_INTLV_NUM_DIES GENMASK(11, 10) +#define DF3_INTLV_NUM_DIES GENMASK(7, 6) +#define DF3p5_INTLV_NUM_DIES BIT(7) +#define DF4_INTLV_NUM_DIES GENMASK(13, 12) + +/* + * Interleave Number of Sockets + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x114 [DRAM Limit Address] + * DF2 IntLvNumSockets [8] + * + * D18F0x110 [DRAM Base Address] + * DF3 IntLvNumSockets [8] + * DF3p5 IntLvNumSockets [8] + * + * D18F7xE0C [DRAM Address Interleave] + * DF4 IntLvNumSockets [18] + * + * D18F7x20C [DRAM Address Interleave] + * DF4p5 IntLvNumSockets [18] + */ +#define DF2_INTLV_NUM_SOCKETS BIT(8) +#define DF4_INTLV_NUM_SOCKETS BIT(18) + +/* + * Legacy MMIO Hole Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * D18F0x110 [DRAM Base Address] + * DF2 LgcyMmioHoleEn [1] + * DF3 LgcyMmioHoleEn [1] + * DF3p5 LgcyMmioHoleEn [1] + * + * D18F7xE08 [DRAM Address Control] + * DF4 LgcyMmioHoleEn [1] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 LgcyMmioHoleEn [1] + */ +#define DF_LEGACY_MMIO_HOLE_EN BIT(1) + +/* + * Log2 Address 64K Space 0 + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F2x90 [Non-power-of-2 channel Configuration Register for COH_ST DRAM Address Maps] + * DF3 Log2Addr64KSpace0 [5:0] + * + * DF3p5 N/A + * DF4 N/A + * DF4p5 N/A + */ +#define DF_LOG2_ADDR_64K_SPACE0 GENMASK(5, 0) + +/* + * Major Revision + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F0x040 [Fabric Block Instance Count] + * DF4 MajorRevision [27:24] + * DF4p5 MajorRevision [27:24] + */ +#define DF_MAJOR_REVISION GENMASK(27, 24) + +/* + * Minor Revision + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F0x040 [Fabric Block Instance Count] + * DF4 MinorRevision [23:16] + * DF4p5 MinorRevision [23:16] + */ +#define DF_MINOR_REVISION GENMASK(23, 16) + +/* + * Node ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x208 [System Fabric ID Mask 0] + * DF3 NodeIdMask [25:16] + * + * D18F1x150 [System Fabric ID Mask 0] + * DF3p5 NodeIdMask [31:16] + * + * D18F4x1B0 [System Fabric ID Mask 0] + * DF4 NodeIdMask [31:16] + * DF4p5 NodeIdMask [31:16] + */ +#define DF3_NODE_ID_MASK GENMASK(25, 16) +#define DF4_NODE_ID_MASK GENMASK(31, 16) + +/* + * Node ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 NodeIdShift [3:0] + * + * D18F1x154 [System Fabric ID Mask 1] + * DF3p5 NodeIdShift [3:0] + * + * D18F4x1B4 [System Fabric ID Mask 1] + * DF4 NodeIdShift [3:0] + * DF4p5 NodeIdShift [3:0] + */ +#define DF3_NODE_ID_SHIFT GENMASK(3, 0) + +/* + * Remap Enable + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F7xE08 [DRAM Address Control] + * DF4 RemapEn [4] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 RemapEn [4] + */ +#define DF4_REMAP_EN BIT(4) + +/* + * Remap Select + * + * Access type: Instance + * + * Register + * Rev Fieldname Bits + * + * DF2 N/A + * DF3 N/A + * DF3p5 N/A + * + * D18F7xE08 [DRAM Address Control] + * DF4 RemapSel [7:5] + * + * D18F7x208 [DRAM Address Control] + * DF4p5 RemapSel [6:5] + */ +#define DF4_REMAP_SEL GENMASK(7, 5) +#define DF4p5_REMAP_SEL GENMASK(6, 5) + +/* + * Socket ID Mask + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 SocketIdMask [23:16] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 SocketIdMask [26:24] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 SocketIdMask [31:16] + * + * D18F4x1B8 [System Fabric ID Mask 2] + * DF4 SocketIdMask [31:16] + * DF4p5 SocketIdMask [31:16] + */ +#define DF2_SOCKET_ID_MASK GENMASK(23, 16) +#define DF3_SOCKET_ID_MASK GENMASK(26, 24) +#define DF4_SOCKET_ID_MASK GENMASK(31, 16) + +/* + * Socket ID Shift + * + * Access type: Broadcast + * + * Register + * Rev Fieldname Bits + * + * D18F1x208 [System Fabric ID Mask] + * DF2 SocketIdShift [31:28] + * + * D18F1x20C [System Fabric ID Mask 1] + * DF3 SocketIdShift [9:8] + * + * D18F1x158 [System Fabric ID Mask 2] + * DF3p5 SocketIdShift [11:8] + * + * D18F4x1B4 [System Fabric ID Mask 1] + * DF4 SocketIdShift [11:8] + * DF4p5 SocketIdShift [11:8] + */ +#define DF2_SOCKET_ID_SHIFT GENMASK(31, 28) +#define DF3_SOCKET_ID_SHIFT GENMASK(9, 8) +#define DF4_SOCKET_ID_SHIFT GENMASK(11, 8) diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c new file mode 100644 index 000000000000..af61f2f1d6de --- /dev/null +++ b/drivers/ras/amd/atl/system.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * system.c : Functions to read and save system-wide data + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id) +{ + u16 socket_id_bits, die_id_bits; + + if (socket_id > 0 && df_cfg.socket_id_mask == 0) { + atl_debug(ctx, "Invalid socket inputs: socket_id=%u socket_id_mask=0x%x", + socket_id, df_cfg.socket_id_mask); + return -EINVAL; + } + + /* Do each step independently to avoid shift out-of-bounds issues. */ + socket_id_bits = socket_id; + socket_id_bits <<= df_cfg.socket_id_shift; + socket_id_bits &= df_cfg.socket_id_mask; + + if (die_id > 0 && df_cfg.die_id_mask == 0) { + atl_debug(ctx, "Invalid die inputs: die_id=%u die_id_mask=0x%x", + die_id, df_cfg.die_id_mask); + return -EINVAL; + } + + /* Do each step independently to avoid shift out-of-bounds issues. */ + die_id_bits = die_id; + die_id_bits <<= df_cfg.die_id_shift; + die_id_bits &= df_cfg.die_id_mask; + + ctx->node_id = (socket_id_bits | die_id_bits) >> df_cfg.node_id_shift; + return 0; +} + +static void df2_get_masks_shifts(u32 mask0) +{ + df_cfg.socket_id_shift = FIELD_GET(DF2_SOCKET_ID_SHIFT, mask0); + df_cfg.socket_id_mask = FIELD_GET(DF2_SOCKET_ID_MASK, mask0); + df_cfg.die_id_shift = FIELD_GET(DF2_DIE_ID_SHIFT, mask0); + df_cfg.die_id_mask = FIELD_GET(DF2_DIE_ID_MASK, mask0); + df_cfg.node_id_shift = df_cfg.die_id_shift; + df_cfg.node_id_mask = df_cfg.socket_id_mask | df_cfg.die_id_mask; + df_cfg.component_id_mask = ~df_cfg.node_id_mask; +} + +static void df3_get_masks_shifts(u32 mask0, u32 mask1) +{ + df_cfg.component_id_mask = FIELD_GET(DF3_COMPONENT_ID_MASK, mask0); + df_cfg.node_id_mask = FIELD_GET(DF3_NODE_ID_MASK, mask0); + + df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1); + df_cfg.socket_id_shift = FIELD_GET(DF3_SOCKET_ID_SHIFT, mask1); + df_cfg.socket_id_mask = FIELD_GET(DF3_SOCKET_ID_MASK, mask1); + df_cfg.die_id_mask = FIELD_GET(DF3_DIE_ID_MASK, mask1); +} + +static void df3p5_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2) +{ + df_cfg.component_id_mask = FIELD_GET(DF4_COMPONENT_ID_MASK, mask0); + df_cfg.node_id_mask = FIELD_GET(DF4_NODE_ID_MASK, mask0); + + df_cfg.node_id_shift = FIELD_GET(DF3_NODE_ID_SHIFT, mask1); + df_cfg.socket_id_shift = FIELD_GET(DF4_SOCKET_ID_SHIFT, mask1); + + df_cfg.socket_id_mask = FIELD_GET(DF4_SOCKET_ID_MASK, mask2); + df_cfg.die_id_mask = FIELD_GET(DF4_DIE_ID_MASK, mask2); +} + +static void df4_get_masks_shifts(u32 mask0, u32 mask1, u32 mask2) +{ + df3p5_get_masks_shifts(mask0, mask1, mask2); + + if (!(df_cfg.flags.socket_id_shift_quirk && df_cfg.socket_id_shift == 1)) + return; + + df_cfg.socket_id_shift = 0; + df_cfg.socket_id_mask = 1; + df_cfg.die_id_shift = 0; + df_cfg.die_id_mask = 0; + df_cfg.node_id_shift = 8; + df_cfg.node_id_mask = 0x100; +} + +static int df4_get_fabric_id_mask_registers(void) +{ + u32 mask0, mask1, mask2; + + /* Read D18F4x1B0 (SystemFabricIdMask0) */ + if (df_indirect_read_broadcast(0, 4, 0x1B0, &mask0)) + return -EINVAL; + + /* Read D18F4x1B4 (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 4, 0x1B4, &mask1)) + return -EINVAL; + + /* Read D18F4x1B8 (SystemFabricIdMask2) */ + if (df_indirect_read_broadcast(0, 4, 0x1B8, &mask2)) + return -EINVAL; + + df4_get_masks_shifts(mask0, mask1, mask2); + return 0; +} + +static int df4_determine_df_rev(u32 reg) +{ + df_cfg.rev = FIELD_GET(DF_MINOR_REVISION, reg) < 5 ? DF4 : DF4p5; + + /* Check for special cases or quirks based on Device/Vendor IDs.*/ + + /* Read D18F0x000 (DeviceVendorId0) */ + if (df_indirect_read_broadcast(0, 0, 0, ®)) + return -EINVAL; + + if (reg == DF_FUNC0_ID_ZEN4_SERVER) + df_cfg.flags.socket_id_shift_quirk = 1; + + return df4_get_fabric_id_mask_registers(); +} + +static int determine_df_rev_legacy(void) +{ + u32 fabric_id_mask0, fabric_id_mask1, fabric_id_mask2; + + /* + * Check for DF3.5. + * + * Component ID Mask must be non-zero. Register D18F1x150 is + * reserved pre-DF3.5, so value will be Read-as-Zero. + */ + + /* Read D18F1x150 (SystemFabricIdMask0). */ + if (df_indirect_read_broadcast(0, 1, 0x150, &fabric_id_mask0)) + return -EINVAL; + + if (FIELD_GET(DF4_COMPONENT_ID_MASK, fabric_id_mask0)) { + df_cfg.rev = DF3p5; + + /* Read D18F1x154 (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 1, 0x154, &fabric_id_mask1)) + return -EINVAL; + + /* Read D18F1x158 (SystemFabricIdMask2) */ + if (df_indirect_read_broadcast(0, 1, 0x158, &fabric_id_mask2)) + return -EINVAL; + + df3p5_get_masks_shifts(fabric_id_mask0, fabric_id_mask1, fabric_id_mask2); + return 0; + } + + /* + * Check for DF3. + * + * Component ID Mask must be non-zero. Field is Read-as-Zero on DF2. + */ + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (df_indirect_read_broadcast(0, 1, 0x208, &fabric_id_mask0)) + return -EINVAL; + + if (FIELD_GET(DF3_COMPONENT_ID_MASK, fabric_id_mask0)) { + df_cfg.rev = DF3; + + /* Read D18F1x20C (SystemFabricIdMask1) */ + if (df_indirect_read_broadcast(0, 1, 0x20C, &fabric_id_mask1)) + return -EINVAL; + + df3_get_masks_shifts(fabric_id_mask0, fabric_id_mask1); + return 0; + } + + /* Default to DF2. */ + df_cfg.rev = DF2; + df2_get_masks_shifts(fabric_id_mask0); + return 0; +} + +static int determine_df_rev(void) +{ + u32 reg; + u8 rev; + + if (df_cfg.rev != UNKNOWN) + return 0; + + /* Read D18F0x40 (FabricBlockInstanceCount). */ + if (df_indirect_read_broadcast(0, 0, 0x40, ®)) + return -EINVAL; + + /* + * Revision fields added for DF4 and later. + * + * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. + */ + rev = FIELD_GET(DF_MAJOR_REVISION, reg); + if (!rev) + return determine_df_rev_legacy(); + + /* + * Fail out for major revisions other than '4'. + * + * Explicit support should be added for newer systems to avoid issues. + */ + if (rev == 4) + return df4_determine_df_rev(reg); + + return -EINVAL; +} + +static void get_num_maps(void) +{ + switch (df_cfg.rev) { + case DF2: + case DF3: + case DF3p5: + df_cfg.num_coh_st_maps = 2; + break; + case DF4: + case DF4p5: + df_cfg.num_coh_st_maps = 4; + break; + default: + atl_debug_on_bad_df_rev(); + } +} + +static void apply_node_id_shift(void) +{ + if (df_cfg.rev == DF2) + return; + + df_cfg.die_id_shift = df_cfg.node_id_shift; + df_cfg.die_id_mask <<= df_cfg.node_id_shift; + df_cfg.socket_id_mask <<= df_cfg.node_id_shift; + df_cfg.socket_id_shift += df_cfg.node_id_shift; +} + +static void dump_df_cfg(void) +{ + pr_debug("rev=0x%x", df_cfg.rev); + + pr_debug("component_id_mask=0x%x", df_cfg.component_id_mask); + pr_debug("die_id_mask=0x%x", df_cfg.die_id_mask); + pr_debug("node_id_mask=0x%x", df_cfg.node_id_mask); + pr_debug("socket_id_mask=0x%x", df_cfg.socket_id_mask); + + pr_debug("die_id_shift=0x%x", df_cfg.die_id_shift); + pr_debug("node_id_shift=0x%x", df_cfg.node_id_shift); + pr_debug("socket_id_shift=0x%x", df_cfg.socket_id_shift); + + pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps); + + pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa); + pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk); +} + +int get_df_system_info(void) +{ + if (determine_df_rev()) { + pr_warn("amd_atl: Failed to determine DF Revision"); + df_cfg.rev = UNKNOWN; + return -EINVAL; + } + + apply_node_id_shift(); + + get_num_maps(); + + dump_df_cfg(); + + return 0; +} diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c new file mode 100644 index 000000000000..9d51e4954687 --- /dev/null +++ b/drivers/ras/amd/atl/umc.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * AMD Address Translation Library + * + * umc.c : Unified Memory Controller (UMC) topology helpers + * + * Copyright (c) 2023, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Author: Yazen Ghannam + */ + +#include "internal.h" + +static u8 get_die_id(struct atl_err *err) +{ + /* + * For CPUs, this is the AMD Node ID modulo the number + * of AMD Nodes per socket. + */ + return topology_die_id(err->cpu) % amd_get_nodes_per_socket(); +} + +#define UMC_CHANNEL_NUM GENMASK(31, 20) +static u8 get_coh_st_inst_id(struct atl_err *err) +{ + return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); +} + +unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +{ + u8 socket_id = topology_physical_package_id(err->cpu); + u8 coh_st_inst_id = get_coh_st_inst_id(err); + unsigned long addr = err->addr; + u8 die_id = get_die_id(err); + + pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx", + socket_id, die_id, coh_st_inst_id, addr); + + return norm_to_sys_addr(socket_id, die_id, coh_st_inst_id, addr); +} diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 9a12398ee0a8..547e058e2d56 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -10,6 +10,37 @@ #include #include +#if IS_ENABLED(CONFIG_AMD_ATL) +/* + * Once set, this function pointer should never be unset. + * + * The library module will set this pointer if it successfully loads. The module + * should not be unloaded except for testing and debug purposes. + */ +static unsigned long (*amd_atl_umc_na_to_spa)(struct atl_err *err); + +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)) +{ + amd_atl_umc_na_to_spa = f; +} +EXPORT_SYMBOL_GPL(amd_atl_register_decoder); + +void amd_atl_unregister_decoder(void) +{ + amd_atl_umc_na_to_spa = NULL; +} +EXPORT_SYMBOL_GPL(amd_atl_unregister_decoder); + +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) +{ + if (!amd_atl_umc_na_to_spa) + return -EINVAL; + + return amd_atl_umc_na_to_spa(err); +} +EXPORT_SYMBOL_GPL(amd_convert_umc_mca_addr_to_sys_addr); +#endif /* CONFIG_AMD_ATL */ + #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include diff --git a/include/linux/ras.h b/include/linux/ras.h index ff030993603e..d610156c12f5 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -29,6 +29,7 @@ void log_non_standard_event(const guid_t *sec_type, void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev); #else void log_arm_hw_error(struct cper_sec_proc_arm *err); + #endif #else @@ -58,5 +59,20 @@ log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } #else #define GET_LOGICAL_INDEX(mpidr) -EINVAL #endif /* CONFIG_ARM || CONFIG_ARM64 */ +struct atl_err { + u64 addr; + u64 ipid; + u32 cpu; +}; + +#if IS_ENABLED(CONFIG_AMD_ATL) +void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); +void amd_atl_unregister_decoder(void); +unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +#else +static inline unsigned long +amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } +#endif /* CONFIG_AMD_ATL */ + #endif /* __RAS_H__ */ -- Gitee From 4ce174fc9b798f4cbaa0f6c560b5625f61ed33ee Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Sun, 28 Jan 2024 09:59:50 -0600 Subject: [PATCH 02/23] RAS/AMD/ATL: Add MI300 support mainline inclusion from mainline-v6.9-rc1 commit 453f0ae797328e675840466c80e5b268d7feb9ba category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/453f0ae797328e675840466c80e5b268d7feb9ba -------------------------------- commit 453f0ae797328e675840466c80e5b268d7feb9ba upstream. AMD MI300 systems include on-die HBM3 memory and a unique topology. And they fall under Data Fabric version 4.5 in overall design. Generally, topology information (IDs, etc.) is gathered from Data Fabric registers. However, the unique topology for MI300 means that some topology information is fixed in hardware and follows arbitrary mappings. Furthermore, not all hardware instances are software-visible, so register accesses must be adjusted. Recognize and add helper functions for the new MI300 interleave modes. Add lookup tables for fixed values where appropriate. Adjust how Die and Node IDs are found and used. Also, fix some register bitmasks that were mislabeled. Signed-off-by: Muralidhara M K Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240128155950.1434067-1-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/access.c | 27 ++++++++ drivers/ras/amd/atl/dehash.c | 95 +++++++++++++++++++++++++++- drivers/ras/amd/atl/denormalize.c | 102 ++++++++++++++++++++++++++++++ drivers/ras/amd/atl/internal.h | 10 ++- drivers/ras/amd/atl/map.c | 17 +++++ drivers/ras/amd/atl/reg_fields.h | 9 ++- drivers/ras/amd/atl/system.c | 3 + drivers/ras/amd/atl/umc.c | 51 +++++++++++++++ 8 files changed, 309 insertions(+), 5 deletions(-) diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c index f6dd87bb2c35..ee4661ed28ba 100644 --- a/drivers/ras/amd/atl/access.c +++ b/drivers/ras/amd/atl/access.c @@ -36,6 +36,32 @@ static DEFINE_MUTEX(df_indirect_mutex); #define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) +static u16 get_accessible_node(u16 node) +{ + /* + * On heterogeneous systems, not all AMD Nodes are accessible + * through software-visible registers. The Node ID needs to be + * adjusted for register accesses. But its value should not be + * changed for the translation methods. + */ + if (df_cfg.flags.heterogeneous) { + /* Only Node 0 is accessible on DF3.5 systems. */ + if (df_cfg.rev == DF3p5) + node = 0; + + /* + * Only the first Node in each Socket is accessible on + * DF4.5 systems, and this is visible to software as one + * Fabric per Socket. The Socket ID can be derived from + * the Node ID and global shift values. + */ + if (df_cfg.rev == DF4p5) + node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift; + } + + return node; +} + static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) { u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; @@ -43,6 +69,7 @@ static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *l int err = -ENODEV; u32 ficaa = 0; + node = get_accessible_node(node); if (node >= amd_nb_num()) goto out; diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c index 6f414926e6fe..4ea46262c4f5 100644 --- a/drivers/ras/amd/atl/dehash.c +++ b/drivers/ras/amd/atl/dehash.c @@ -253,7 +253,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); - hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); /* * Generate a unique address to determine which bits @@ -343,6 +343,94 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) return 0; } +/* + * MI300 hash bits + * 4K 64K 2M 1G 1T 1T + * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43} + * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44} + * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45} + * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46} + * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack + * DieID[0] = XOR of addr{12, 20, 27, 34, 41 } + * DieID[1] = XOR of addr{13, 21, 28, 35, 42 } + */ +static int mi300_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hashed_bit, intlv_bit, test_bit; + u8 num_intlv_bits, base_bit, i; + + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + return -EINVAL; + + hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* Channel bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 8 + i; + + /* COH_ST_Select[4] jumps to a base bit of 14. */ + if (i == 4) + base_bit = 14; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + /* 4k hash bit only applies to the first 3 bits. */ + if (i <= 2) { + test_bit = BIT_ULL(12 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_4k; + } + + /* Use temporary 'test_bit' value to avoid Sparse warnings. */ + test_bit = BIT_ULL(15 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(22 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(29 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(36 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + test_bit = BIT_ULL(43 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + /* Die bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_dies); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 12 + i; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + test_bit = BIT_ULL(20 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(27 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(34 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(41 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + return 0; +} + int dehash_address(struct addr_ctx *ctx) { switch (ctx->map.intlv_mode) { @@ -400,6 +488,11 @@ int dehash_address(struct addr_ctx *ctx) case DF4p5_NPS1_16CHAN_2K_HASH: return df4p5_dehash_addr(ctx); + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return mi300_dehash_addr(ctx); + default: atl_debug_on_bad_intlv_mode(ctx); return -EINVAL; diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index 01f1d0fb6799..d5d0e1fda159 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -80,6 +80,40 @@ static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr); } +/* + * Make space for CS ID at bits [14:8] as follows: + * + * 8 channels -> bits [10:8] + * 16 channels -> bits [11:8] + * 32 channels -> bits [14,11:8] + * + * 1 die -> N/A + * 2 dies -> bit [12] + * 4 dies -> bits [13:12] + */ +static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u64 denorm_addr; + + if (ctx->map.intlv_bit_pos != 8) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return ~0ULL; + } + + /* Channel bits. Covers up to 4 bits at [11:8]. */ + denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr); + + /* Die bits. Always starts at [12]. */ + denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr); + + /* Additional channel bit at [14]. */ + if (num_intlv_bits > 4) + denorm_addr = expand_bits(14, 1, denorm_addr); + + return denorm_addr; +} + /* * Take the current calculated address and shift enough bits in the middle * to make a gap where the interleave bits will be inserted. @@ -107,6 +141,12 @@ static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) case DF4p5_NPS1_8CHAN_2K_HASH: case DF4p5_NPS1_16CHAN_2K_HASH: return make_space_for_coh_st_id_split_2_1(ctx); + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return make_space_for_coh_st_id_mi300(ctx); + default: atl_debug_on_bad_intlv_mode(ctx); return ~0ULL; @@ -204,6 +244,32 @@ static u16 get_coh_st_id_df4(struct addr_ctx *ctx) return coh_st_id; } +/* + * MI300 hash has: + * (C)hannel[3:0] = coh_st_id[3:0] + * (S)tack[0] = coh_st_id[4] + * (D)ie[1:0] = coh_st_id[6:5] + * + * Hashed coh_st_id is swizzled so that Stack bit is at the end. + * coh_st_id = SDDCCCC + */ +static u16 get_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 channel_bits, die_bits, stack_bit; + u16 die_id; + + /* Subtract the "base" Destination Fabric ID. */ + ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx); + + die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift; + + channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id); + stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6; + die_bits = die_id << 4; + + return stack_bit | die_bits | channel_bits; +} + /* * Derive the correct Coherent Station ID that represents the interleave bits * used within the system physical address. This accounts for the @@ -237,6 +303,11 @@ static u16 calculate_coh_st_id(struct addr_ctx *ctx) case DF4p5_NPS1_16CHAN_2K_HASH: return get_coh_st_id_df4(ctx); + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return get_coh_st_id_mi300(ctx); + /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ case DF4p5_NPS2_4CHAN_1K_HASH: case DF4p5_NPS1_8CHAN_1K_HASH: @@ -287,6 +358,9 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id case NOHASH_8CHAN: case NOHASH_16CHAN: case NOHASH_32CHAN: + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: case DF2_2CHAN_HASH: return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); @@ -314,6 +388,31 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id } } +/* + * MI300 systems have a fixed, hardware-defined physical-to-logical + * Coherent Station mapping. The Remap registers are not used. + */ +static const u16 phy_to_log_coh_st_map_mi300[] = { + 12, 13, 14, 15, + 8, 9, 10, 11, + 4, 5, 6, 7, + 0, 1, 2, 3, + 28, 29, 30, 31, + 24, 25, 26, 27, + 20, 21, 22, 23, + 16, 17, 18, 19, +}; + +static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) +{ + if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) { + atl_debug(ctx, "Instance ID out of range"); + return ~0; + } + + return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift); +} + static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) { u16 component_id, log_fabric_id; @@ -321,6 +420,9 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) /* Start with the physical COH_ST Fabric ID. */ u16 phys_fabric_id = ctx->coh_st_fabric_id; + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_logical_coh_st_fabric_id_mi300(ctx); + /* Skip logical ID lookup if remapping is disabled. */ if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && ctx->map.intlv_mode != DF3_6CHAN) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 13f1b6098c96..21d45755e5f2 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -27,8 +27,12 @@ /* PCI ID for Zen4 Server DF Function 0. */ #define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 +/* PCI IDs for MI300 DF Function 0. */ +#define DF_FUNC0_ID_MI300 0x15281022 + /* Shift needed for adjusting register values to true values. */ #define DF_DRAM_BASE_LIMIT_LSB 28 +#define MI300_DRAM_LIMIT_LSB 20 enum df_revisions { UNKNOWN, @@ -59,6 +63,9 @@ enum intlv_modes { DF4_NPS1_12CHAN_HASH = 0x15, DF4_NPS2_5CHAN_HASH = 0x16, DF4_NPS1_10CHAN_HASH = 0x17, + MI3_HASH_8CHAN = 0x18, + MI3_HASH_16CHAN = 0x19, + MI3_HASH_32CHAN = 0x1A, DF2_2CHAN_HASH = 0x21, /* DF4.5 modes are all IntLvNumChan + 0x20 */ DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, @@ -86,7 +93,8 @@ enum intlv_modes { struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, - __reserved_0 : 6; + heterogeneous : 1, + __reserved_0 : 5; }; struct df_config { diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c index 33f549b6255a..8b908e8d7495 100644 --- a/drivers/ras/amd/atl/map.c +++ b/drivers/ras/amd/atl/map.c @@ -63,6 +63,10 @@ static int df4p5_get_intlv_mode(struct addr_ctx *ctx) if (ctx->map.intlv_mode <= NOHASH_32CHAN) return 0; + if (ctx->map.intlv_mode >= MI3_HASH_8CHAN && + ctx->map.intlv_mode <= MI3_HASH_32CHAN) + return 0; + /* * Modes matching the ranges above are returned as-is. * @@ -125,6 +129,9 @@ static u64 get_hi_addr_offset(u32 reg_dram_offset) atl_debug_on_bad_df_rev(); } + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + shift = MI300_DRAM_LIMIT_LSB; + return hi_addr_offset << shift; } @@ -369,6 +376,13 @@ static int get_coh_st_fabric_id(struct addr_ctx *ctx) { u32 reg; + /* + * On MI300 systems, the Coherent Station Fabric ID is derived + * later. And it does not depend on the register value. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return 0; + /* Read D18F0x50 (FabricBlockInstanceInformation3). */ if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®)) return -EINVAL; @@ -490,6 +504,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case NOHASH_8CHAN: case DF3_COD1_8CHAN_HASH: case DF4_NPS1_8CHAN_HASH: + case MI3_HASH_8CHAN: case DF4p5_NPS1_8CHAN_1K_HASH: case DF4p5_NPS1_8CHAN_2K_HASH: return 8; @@ -502,6 +517,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case DF4p5_NPS1_12CHAN_2K_HASH: return 12; case NOHASH_16CHAN: + case MI3_HASH_16CHAN: case DF4p5_NPS1_16CHAN_1K_HASH: case DF4p5_NPS1_16CHAN_2K_HASH: return 16; @@ -509,6 +525,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case DF4p5_NPS0_24CHAN_2K_HASH: return 24; case NOHASH_32CHAN: + case MI3_HASH_32CHAN: return 32; default: atl_debug_on_bad_intlv_mode(ctx); diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h index 6aaa5093f42c..9dcdf6e4a856 100644 --- a/drivers/ras/amd/atl/reg_fields.h +++ b/drivers/ras/amd/atl/reg_fields.h @@ -246,11 +246,11 @@ #define DF3_HASH_CTL_64K BIT(20) #define DF3_HASH_CTL_2M BIT(21) #define DF3_HASH_CTL_1G BIT(22) -#define DF4_HASH_CTL_4K BIT(7) #define DF4_HASH_CTL_64K BIT(8) #define DF4_HASH_CTL_2M BIT(9) #define DF4_HASH_CTL_1G BIT(10) -#define DF4_HASH_CTL_1T BIT(15) +#define DF4p5_HASH_CTL_4K BIT(7) +#define DF4p5_HASH_CTL_1T BIT(15) /* * High Address Offset @@ -268,10 +268,13 @@ * D18F7x140 [DRAM Offset] * DF4 HiAddrOffset [24:1] * DF4p5 HiAddrOffset [24:1] + * MI300 HiAddrOffset [31:1] */ #define DF2_HI_ADDR_OFFSET GENMASK(31, 20) #define DF3_HI_ADDR_OFFSET GENMASK(31, 12) -#define DF4_HI_ADDR_OFFSET GENMASK(24, 1) + +/* Follow reference code by including reserved bits for simplicity. */ +#define DF4_HI_ADDR_OFFSET GENMASK(31, 1) /* * High Address Offset Enable diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index af61f2f1d6de..46493ed405d6 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -124,6 +124,9 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_ZEN4_SERVER) df_cfg.flags.socket_id_shift_quirk = 1; + if (reg == DF_FUNC0_ID_MI300) + df_cfg.flags.heterogeneous = 1; + return df4_get_fabric_id_mask_registers(); } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 9d51e4954687..7bfa21a582f0 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -12,8 +12,56 @@ #include "internal.h" +/* + * MI300 has a fixed, model-specific mapping between a UMC instance and + * its related Data Fabric Coherent Station instance. + * + * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the + * UMC instance within a Node. Use this to find the appropriate Coherent + * Station ID. + * + * Redundant bits were removed from the map below. + */ +static const u16 umc_coh_st_map[32] = { + 0x393, 0x293, 0x193, 0x093, + 0x392, 0x292, 0x192, 0x092, + 0x391, 0x291, 0x191, 0x091, + 0x390, 0x290, 0x190, 0x090, + 0x793, 0x693, 0x593, 0x493, + 0x792, 0x692, 0x592, 0x492, + 0x791, 0x691, 0x591, 0x491, + 0x790, 0x690, 0x590, 0x490, +}; + +#define UMC_ID_MI300 GENMASK(23, 12) +static u8 get_coh_st_inst_id_mi300(struct atl_err *err) +{ + u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid); + u8 i; + + for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) { + if (umc_id == umc_coh_st_map[i]) + break; + } + + WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map)); + + return i; +} + +#define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) static u8 get_die_id(struct atl_err *err) { + /* + * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this + * needs to be divided by 4 to get the internal Die ID. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) { + u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid); + + return node_id >> 2; + } + /* * For CPUs, this is the AMD Node ID modulo the number * of AMD Nodes per socket. @@ -24,6 +72,9 @@ static u8 get_die_id(struct atl_err *err) #define UMC_CHANNEL_NUM GENMASK(31, 20) static u8 get_coh_st_inst_id(struct atl_err *err) { + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_coh_st_inst_id_mi300(err); + return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); } -- Gitee From 2581766dd685cbe9914f308c98dcd5e38504277a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 31 Jan 2024 10:57:32 -0600 Subject: [PATCH 03/23] RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support mainline inclusion from mainline-v6.9-rc1 commit 87a61237530769d5a7a750fbc747ac0d1b2e18c1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/87a61237530769d5a7a750fbc747ac0d1b2e18c1 -------------------------------- commit 87a61237530769d5a7a750fbc747ac0d1b2e18c1 upstream. Zen-based AMD systems report DRAM ECC errors through Unified Memory Controller (UMC) MCA banks. The value provided in MCA_ADDR is a "normalized" address which represents the UMC's view of its managed memory. The normalized address must be translated to a system physical address for software to take action. MI300 systems, uniquely, do not provide a normalized address in MCA_ADDR for DRAM ECC errors. Rather, the "DRAM" address is reported. This value includes identifiers for the bank, row, column, pseudochannel and stack of the memory location. The DRAM address must be converted to a normalized address in order to be further translated to a system physical address. Add helper functions to do the DRAM to normalized translation for MI300 systems. The method is based on the fixed hardware layout of the on-chip memory. [ bp: Massage commit message, decapitalize some, rename function. ] Signed-off-by: Yazen Ghannam Co-developed-by: Muralidhara M K Signed-off-by: Muralidhara M K Signed-off-by: Borislav Petkov (AMD) Tested-by: Muralidhara M K Link: https://lore.kernel.org/r/20240131165732.88297-1-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/internal.h | 1 + drivers/ras/amd/atl/system.c | 6 +- drivers/ras/amd/atl/umc.c | 200 ++++++++++++++++++++++++++++++++- 3 files changed, 205 insertions(+), 2 deletions(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 21d45755e5f2..5de69e0bb0f9 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -224,6 +224,7 @@ int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); int get_df_system_info(void); int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); +int get_addr_hash_mi300(void); int get_address_map(struct addr_ctx *ctx); diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 46493ed405d6..701349e84942 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -124,9 +124,13 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_ZEN4_SERVER) df_cfg.flags.socket_id_shift_quirk = 1; - if (reg == DF_FUNC0_ID_MI300) + if (reg == DF_FUNC0_ID_MI300) { df_cfg.flags.heterogeneous = 1; + if (get_addr_hash_mi300()) + return -EINVAL; + } + return df4_get_fabric_id_mask_registers(); } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 7bfa21a582f0..7e310d1dfcfc 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -49,6 +49,204 @@ static u8 get_coh_st_inst_id_mi300(struct atl_err *err) return i; } +/* XOR the bits in @val. */ +static u16 bitwise_xor_bits(u16 val) +{ + u16 tmp = 0; + u8 i; + + for (i = 0; i < 16; i++) + tmp ^= (val >> i) & 0x1; + + return tmp; +} + +struct xor_bits { + bool xor_enable; + u16 col_xor; + u32 row_xor; +}; + +#define NUM_BANK_BITS 4 + +static struct { + /* UMC::CH::AddrHashBank */ + struct xor_bits bank[NUM_BANK_BITS]; + + /* UMC::CH::AddrHashPC */ + struct xor_bits pc; + + /* UMC::CH::AddrHashPC2 */ + u8 bank_xor; +} addr_hash; + +#define MI300_UMC_CH_BASE 0x90000 +#define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) +#define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) +#define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) + +#define ADDR_HASH_XOR_EN BIT(0) +#define ADDR_HASH_COL_XOR GENMASK(13, 1) +#define ADDR_HASH_ROW_XOR GENMASK(31, 14) +#define ADDR_HASH_BANK_XOR GENMASK(5, 0) + +/* + * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used + * for hashing. Do this during module init, since the values will not + * change during run time. + * + * These registers are instantiated for each UMC across each AMD Node. + * However, they should be identically programmed due to the fixed hardware + * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a + * single global structure for simplicity. + */ +int get_addr_hash_mi300(void) +{ + u32 temp; + int ret; + u8 i; + + for (i = 0; i < NUM_BANK_BITS; i++) { + ret = amd_smn_read(0, MI300_ADDR_HASH_BANK0 + (i * 4), &temp); + if (ret) + return ret; + + addr_hash.bank[i].xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); + addr_hash.bank[i].col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); + addr_hash.bank[i].row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); + } + + ret = amd_smn_read(0, MI300_ADDR_HASH_PC, &temp); + if (ret) + return ret; + + addr_hash.pc.xor_enable = FIELD_GET(ADDR_HASH_XOR_EN, temp); + addr_hash.pc.col_xor = FIELD_GET(ADDR_HASH_COL_XOR, temp); + addr_hash.pc.row_xor = FIELD_GET(ADDR_HASH_ROW_XOR, temp); + + ret = amd_smn_read(0, MI300_ADDR_HASH_PC2, &temp); + if (ret) + return ret; + + addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); + + return 0; +} + +/* + * MI300 systems report a DRAM address in MCA_ADDR for DRAM ECC errors. This must + * be converted to the intermediate normalized address (NA) before translating to a + * system physical address. + * + * The DRAM address includes bank, row, and column. Also included are bits for + * pseudochannel (PC) and stack ID (SID). + * + * Abbreviations: (S)tack ID, (P)seudochannel, (R)ow, (B)ank, (C)olumn, (Z)ero + * + * The MCA address format is as follows: + * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} + * + * The normalized address format is fixed in hardware and is as follows: + * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} + * + * Additionally, the PC and Bank bits may be hashed. This must be accounted for before + * reconstructing the normalized address. + */ +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_BANK GENMASK(9, 6) +#define MI300_UMC_MCA_ROW GENMASK(24, 10) +#define MI300_UMC_MCA_PC BIT(25) +#define MI300_UMC_MCA_SID GENMASK(27, 26) + +#define MI300_NA_COL_1_0 GENMASK(6, 5) +#define MI300_NA_PC BIT(7) +#define MI300_NA_COL_3_2 GENMASK(9, 8) +#define MI300_NA_BANK_3_2 GENMASK(11, 10) +#define MI300_NA_BANK_1_0 GENMASK(13, 12) +#define MI300_NA_COL_4 BIT(14) +#define MI300_NA_ROW GENMASK(28, 15) +#define MI300_NA_SID GENMASK(30, 29) + +static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) +{ + u16 i, col, row, bank, pc, sid, temp; + + col = FIELD_GET(MI300_UMC_MCA_COL, addr); + bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); + row = FIELD_GET(MI300_UMC_MCA_ROW, addr); + pc = FIELD_GET(MI300_UMC_MCA_PC, addr); + sid = FIELD_GET(MI300_UMC_MCA_SID, addr); + + /* Calculate hash for each Bank bit. */ + for (i = 0; i < NUM_BANK_BITS; i++) { + if (!addr_hash.bank[i].xor_enable) + continue; + + temp = bitwise_xor_bits(col & addr_hash.bank[i].col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.bank[i].row_xor); + bank ^= temp << i; + } + + /* Calculate hash for PC bit. */ + if (addr_hash.pc.xor_enable) { + /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ + bank |= sid << 5; + + temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); + temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); + temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); + pc ^= temp; + + /* Drop SID bits for the sake of debug printing later. */ + bank &= 0x1F; + } + + /* Reconstruct the normalized address starting with NA[4:0] = 0 */ + addr = 0; + + /* NA[6:5] = Column[1:0] */ + temp = col & 0x3; + addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); + + /* NA[7] = PC */ + addr |= FIELD_PREP(MI300_NA_PC, pc); + + /* NA[9:8] = Column[3:2] */ + temp = (col >> 2) & 0x3; + addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); + + /* NA[11:10] = Bank[3:2] */ + temp = (bank >> 2) & 0x3; + addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); + + /* NA[13:12] = Bank[1:0] */ + temp = bank & 0x3; + addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); + + /* NA[14] = Column[4] */ + temp = (col >> 4) & 0x1; + addr |= FIELD_PREP(MI300_NA_COL_4, temp); + + /* NA[28:15] = Row[13:0] */ + addr |= FIELD_PREP(MI300_NA_ROW, row); + + /* NA[30:29] = SID[1:0] */ + addr |= FIELD_PREP(MI300_NA_SID, sid); + + pr_debug("Addr=0x%016lx", addr); + pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); + + return addr; +} + +static unsigned long get_addr(unsigned long addr) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return convert_dram_to_norm_addr_mi300(addr); + + return addr; +} + #define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) static u8 get_die_id(struct atl_err *err) { @@ -82,7 +280,7 @@ unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { u8 socket_id = topology_physical_package_id(err->cpu); u8 coh_st_inst_id = get_coh_st_inst_id(err); - unsigned long addr = err->addr; + unsigned long addr = get_addr(err->addr); u8 die_id = get_die_id(err); pr_debug("socket_id=0x%x die_id=0x%x coh_st_inst_id=0x%x addr=0x%016lx", -- Gitee From c6007e259f53053d6d07650e3e6e3900fcf2704a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 13 Feb 2024 21:35:15 -0600 Subject: [PATCH 04/23] RAS/AMD/ATL: Add MI300 row retirement support mainline inclusion from mainline-v6.9-rc1 commit 3b566b30b41401888ee0e8eb904a1e7a6693794b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/3b566b30b41401888ee0e8eb904a1e7a6693794b -------------------------------- commit 3b566b30b41401888ee0e8eb904a1e7a6693794b upstream. DRAM row retirement depends on model-specific information that is best done within the AMD Address Translation Library. Export a generic wrapper function for other modules to use. Add any model-specific helpers here. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240214033516.1344948-2-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/Kconfig | 1 + drivers/ras/amd/atl/umc.c | 51 +++++++++++++++++++++++++++++++++++++ include/linux/ras.h | 2 ++ 3 files changed, 54 insertions(+) diff --git a/drivers/ras/amd/atl/Kconfig b/drivers/ras/amd/atl/Kconfig index a43513a700f1..df49c23e7f62 100644 --- a/drivers/ras/amd/atl/Kconfig +++ b/drivers/ras/amd/atl/Kconfig @@ -10,6 +10,7 @@ config AMD_ATL tristate "AMD Address Translation Library" depends on AMD_NB && X86_64 && RAS + depends on MEMORY_FAILURE default N help This library includes support for implementation-specific diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 7e310d1dfcfc..08c6dbd44c62 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -239,6 +239,57 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) return addr; } +/* + * When a DRAM ECC error occurs on MI300 systems, it is recommended to retire + * all memory within that DRAM row. This applies to the memory with a DRAM + * bank. + * + * To find the memory addresses, loop through permutations of the DRAM column + * bits and find the System Physical address of each. The column bits are used + * to calculate the intermediate Normalized address, so all permutations should + * be checked. + * + * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. + */ +#define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) +static void retire_row_mi300(struct atl_err *a_err) +{ + unsigned long addr; + struct page *p; + u8 col; + + for (col = 0; col < MI300_NUM_COL; col++) { + a_err->addr &= ~MI300_UMC_MCA_COL; + a_err->addr |= FIELD_PREP(MI300_UMC_MCA_COL, col); + + addr = amd_convert_umc_mca_addr_to_sys_addr(a_err); + if (IS_ERR_VALUE(addr)) + continue; + + addr = PHYS_PFN(addr); + + /* + * Skip invalid or already poisoned pages to avoid unnecessary + * error messages from memory_failure(). + */ + p = pfn_to_online_page(addr); + if (!p) + continue; + + if (PageHWPoison(p)) + continue; + + memory_failure(addr, 0); + } +} + +void amd_retire_dram_row(struct atl_err *a_err) +{ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return retire_row_mi300(a_err); +} +EXPORT_SYMBOL_GPL(amd_retire_dram_row); + static unsigned long get_addr(unsigned long addr) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) diff --git a/include/linux/ras.h b/include/linux/ras.h index d610156c12f5..1cbbcb4a4121 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -68,8 +68,10 @@ struct atl_err { #if IS_ENABLED(CONFIG_AMD_ATL) void amd_atl_register_decoder(unsigned long (*f)(struct atl_err *)); void amd_atl_unregister_decoder(void); +void amd_retire_dram_row(struct atl_err *err); unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err); #else +static inline void amd_retire_dram_row(struct atl_err *err) { } static inline unsigned long amd_convert_umc_mca_addr_to_sys_addr(struct atl_err *err) { return -EINVAL; } #endif /* CONFIG_AMD_ATL */ -- Gitee From 39c06704a8ecd0d27ea7b5c337bbc7709802dfe5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 1 Mar 2024 08:37:46 -0600 Subject: [PATCH 05/23] RAS: Export helper to get ras_debugfs_dir mainline inclusion from mainline-v6.9-rc1 commit 9d2b6fa09d15d021fb83ec6f1336176ebaebbeec category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/9d2b6fa09d15d021fb83ec6f1336176ebaebbeec -------------------------------- commit 9d2b6fa09d15d021fb83ec6f1336176ebaebbeec upstream. Export a getter instead of the debugfs node directly so that, other in-tree-only RAS modules can use it. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240301143748.854090-2-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/cec.c | 10 ++++++++-- drivers/ras/debugfs.c | 8 +++++++- drivers/ras/debugfs.h | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index 321af498ee11..e440b15fbabc 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -480,9 +480,15 @@ DEFINE_SHOW_ATTRIBUTE(array); static int __init create_debugfs_nodes(void) { - struct dentry *d, *pfn, *decay, *count, *array; + struct dentry *d, *pfn, *decay, *count, *array, *dfs; - d = debugfs_create_dir("cec", ras_debugfs_dir); + dfs = ras_get_debugfs_root(); + if (!dfs) { + pr_warn("Error getting RAS debugfs root!\n"); + return -1; + } + + d = debugfs_create_dir("cec", dfs); if (!d) { pr_warn("Error creating cec debugfs node!\n"); return -1; diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c index ffb973c328e3..42afd3de68b2 100644 --- a/drivers/ras/debugfs.c +++ b/drivers/ras/debugfs.c @@ -3,10 +3,16 @@ #include #include "debugfs.h" -struct dentry *ras_debugfs_dir; +static struct dentry *ras_debugfs_dir; static atomic_t trace_count = ATOMIC_INIT(0); +struct dentry *ras_get_debugfs_root(void) +{ + return ras_debugfs_dir; +} +EXPORT_SYMBOL_GPL(ras_get_debugfs_root); + int ras_userspace_consumers(void) { return atomic_read(&trace_count); diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h index c07443b462ad..4749ccdeeba1 100644 --- a/drivers/ras/debugfs.h +++ b/drivers/ras/debugfs.h @@ -4,6 +4,6 @@ #include -extern struct dentry *ras_debugfs_dir; +struct dentry *ras_get_debugfs_root(void); #endif /* __RAS_DEBUGFS_H__ */ -- Gitee From ef9aad1ad6032770510e0d36f8528a42a6fa44bd Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 22 Jan 2024 22:14:00 -0600 Subject: [PATCH 06/23] EDAC/amd64: Use new AMD Address Translation Library mainline inclusion from mainline-v6.9-rc1 commit 6c9058f49084569d1d816e87185e0a4f9ab1a321 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/6c9058f49084569d1d816e87185e0a4f9ab1a321 -------------------------------- commit 6c9058f49084569d1d816e87185e0a4f9ab1a321 upstream. Remove old address translation code and use the new AMD Address Translation Library. Use "imply" in Kconfig so that the "AMD_ATL" config option takes the value of "EDAC_AMD64" as its default. [Backport changes] 1. Retained all existing 277+ lines of in-file address translation code along with changes added in Euler commit 845ff73148337 including function umc_normaddr_to_sysaddr() in drivers/edac/amd64_edac.c to continue support for address translation in EDAC for DRAM ECC errors in Hygon family 18h and model 4h. 2. Modified conditional block in decode_umc_error() added by Euler commit 7722509e5c1f2 to retain usage of existing old in-file address translation code in hygon family CPU's and enabled use of new AMD Address Translation Library in AMD CPU's as per upstream. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240123041401.79812-3-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/edac/Kconfig | 1 + drivers/edac/amd64_edac.c | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 110e99b86a66..87feeb24b3ff 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -78,6 +78,7 @@ config EDAC_GHES config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64)" depends on AMD_NB && EDAC_DECODE_MCE + imply AMD_ATL help Support for error detection and correction of DRAM ECC errors on the AMD64 families (>= K8) of memory controllers. diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 3053f3383021..d026fa7a5bd2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include "amd64_edac.h" #include @@ -3115,6 +3116,7 @@ static void decode_umc_error(int node_id, struct mce *m) u8 ecc_type = (m->status >> 45) & 0x3; struct mem_ctl_info *mci; struct amd64_pvt *pvt; + struct atl_err a_err; struct err_info err; u64 sys_addr; u8 umc; @@ -3148,16 +3150,23 @@ static void decode_umc_error(int node_id, struct mce *m) pvt->ops->get_err_info(m, &err); - if (hygon_f18h_m4h() && boot_cpu_data.x86_model == 0x6) + if (hygon_f18h_m4h() && boot_cpu_data.x86_model == 0x6) { umc = err.channel << 1; - else - umc = err.channel; - - if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, umc, &sys_addr)) { - err.err_code = ERR_NORM_ADDR; - goto log_error; + if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, umc, &sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } + } else { + a_err.addr = m->addr; + a_err.ipid = m->ipid; + a_err.cpu = m->extcpu; + + sys_addr = (u64)amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(sys_addr)) { + err.err_code = ERR_NORM_ADDR; + goto log_error; + } } - error_address_to_page_and_offset(sys_addr, &err); log_error: -- Gitee From 42360999e2e9361f6ae0b19cf5436b01304d2121 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 6 Jun 2024 20:33:08 +0000 Subject: [PATCH 07/23] RAS/AMD/ATL: Add amd_atl pr_fmt() prefix mainline inclusion from mainline-v6.11-rc1 commit efdbe82a216191d77c8edd5e4dabc7cff7d790d9 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/efdbe82a216191d77c8edd5e4dabc7cff7d790d9 -------------------------------- commit efdbe82a216191d77c8edd5e4dabc7cff7d790d9 upstream Prefix all AMD ATL pr_* statements with "amd_atl:". Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240606203313.51197-2-john.allen@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/internal.h | 3 +++ drivers/ras/amd/atl/system.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 5de69e0bb0f9..cb0f96040fbd 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -21,6 +21,9 @@ #include "reg_fields.h" +#undef pr_fmt +#define pr_fmt(fmt) "amd_atl: " fmt + /* Maximum possible number of Coherent Stations within a single Data Fabric. */ #define MAX_COH_ST_CHANNELS 32 diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 701349e84942..248ea493b841 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -273,7 +273,7 @@ static void dump_df_cfg(void) int get_df_system_info(void) { if (determine_df_rev()) { - pr_warn("amd_atl: Failed to determine DF Revision"); + pr_warn("Failed to determine DF Revision"); df_cfg.rev = UNKNOWN; return -EINVAL; } -- Gitee From 2b63f8573dcd697a4187a133588f6a34881f0600 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 6 Jun 2024 20:33:09 +0000 Subject: [PATCH 08/23] RAS/AMD/ATL: Read DRAM hole base early mainline inclusion from mainline-v6.11-rc1 commit 1233aa3fb342ca4e63d398c6a3de8ed32ce796ea category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/1233aa3fb342ca4e63d398c6a3de8ed32ce796ea -------------------------------- commit 1233aa3fb342ca4e63d398c6a3de8ed32ce796ea upstream Read DRAM hole base when constructing the address map as the value will not change during run time. Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240606203313.51197-3-john.allen@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/core.c | 18 +++++------------- drivers/ras/amd/atl/internal.h | 2 ++ drivers/ras/amd/atl/system.c | 19 +++++++++++++++++++ 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c index 6dc4e06305f7..a942a6547958 100644 --- a/drivers/ras/amd/atl/core.c +++ b/drivers/ras/amd/atl/core.c @@ -51,22 +51,11 @@ static bool legacy_hole_en(struct addr_ctx *ctx) static int add_legacy_hole(struct addr_ctx *ctx) { - u32 dram_hole_base; - u8 func = 0; - if (!legacy_hole_en(ctx)) return 0; - if (df_cfg.rev >= DF4) - func = 7; - - if (df_indirect_read_broadcast(ctx->node_id, func, 0x104, &dram_hole_base)) - return -EINVAL; - - dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; - - if (ctx->ret_addr >= dram_hole_base) - ctx->ret_addr += (BIT_ULL(32) - dram_hole_base); + if (ctx->ret_addr >= df_cfg.dram_hole_base) + ctx->ret_addr += (BIT_ULL(32) - df_cfg.dram_hole_base); return 0; } @@ -125,6 +114,9 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig ctx.inputs.die_id = die_id; ctx.inputs.coh_st_inst_id = coh_st_inst_id; + if (legacy_hole_en(&ctx) && !df_cfg.dram_hole_base) + return -EINVAL; + if (determine_node_id(&ctx, socket_id, die_id)) return -EINVAL; diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index cb0f96040fbd..3596ad5ca3e0 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -135,6 +135,8 @@ struct df_config { /* Number of DRAM Address maps visible in a Coherent Station. */ u8 num_coh_st_maps; + u32 dram_hole_base; + /* Global flags to handle special cases. */ struct df_flags flags; }; diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 248ea493b841..8423c9f3a8d2 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -223,6 +223,21 @@ static int determine_df_rev(void) return -EINVAL; } +static int get_dram_hole_base(void) +{ + u8 func = 0; + + if (df_cfg.rev >= DF4) + func = 7; + + if (df_indirect_read_broadcast(0, func, 0x104, &df_cfg.dram_hole_base)) + return -EINVAL; + + df_cfg.dram_hole_base &= DF_DRAM_HOLE_BASE_MASK; + + return 0; +} + static void get_num_maps(void) { switch (df_cfg.rev) { @@ -266,6 +281,7 @@ static void dump_df_cfg(void) pr_debug("num_coh_st_maps=%u", df_cfg.num_coh_st_maps); + pr_debug("dram_hole_base=0x%x", df_cfg.dram_hole_base); pr_debug("flags.legacy_ficaa=%u", df_cfg.flags.legacy_ficaa); pr_debug("flags.socket_id_shift_quirk=%u", df_cfg.flags.socket_id_shift_quirk); } @@ -282,6 +298,9 @@ int get_df_system_info(void) get_num_maps(); + if (get_dram_hole_base()) + pr_warn("Failed to read DRAM hole base"); + dump_df_cfg(); return 0; -- Gitee From baf6d421d1daa8eb11e6b43b4edcd22f3308c152 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 6 Jun 2024 20:33:10 +0000 Subject: [PATCH 09/23] RAS/AMD/ATL: Expand helpers for adding and removing base and hole mainline inclusion from mainline-v6.11-rc1 commit 6cce048cb31f272ca2c9b772cf541715b9ff6ca1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/6cce048cb31f272ca2c9b772cf541715b9ff6ca1 -------------------------------- commit 6cce048cb31f272ca2c9b772cf541715b9ff6ca1 upstream The ret_addr field in struct addr_ctx contains the intermediate value of the returned address as it passes through multiple steps in the translation process. Currently, adding the DRAM base and legacy hole is only done once, so it operates directly on the intermediate value. However, for DF 4.5 non-power-of-2 denormalization, adding and removing the DRAM base and legacy hole needs to be done for multiple temporary address values. During this process, the intermediate value should not be lost so the ret_addr value can't be reused. Update the existing 'add' helper to operate on an arbitrary address and introduce a new 'remove' helper to do the inverse operations. Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240606203313.51197-4-john.allen@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/core.c | 41 +++++++++++++++++++++------------- drivers/ras/amd/atl/internal.h | 3 +++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c index a942a6547958..0b69e4dfc6c7 100644 --- a/drivers/ras/amd/atl/core.c +++ b/drivers/ras/amd/atl/core.c @@ -49,15 +49,26 @@ static bool legacy_hole_en(struct addr_ctx *ctx) return FIELD_GET(DF_LEGACY_MMIO_HOLE_EN, reg); } -static int add_legacy_hole(struct addr_ctx *ctx) +static u64 add_legacy_hole(struct addr_ctx *ctx, u64 addr) { if (!legacy_hole_en(ctx)) - return 0; + return addr; - if (ctx->ret_addr >= df_cfg.dram_hole_base) - ctx->ret_addr += (BIT_ULL(32) - df_cfg.dram_hole_base); + if (addr >= df_cfg.dram_hole_base) + addr += (BIT_ULL(32) - df_cfg.dram_hole_base); - return 0; + return addr; +} + +static u64 remove_legacy_hole(struct addr_ctx *ctx, u64 addr) +{ + if (!legacy_hole_en(ctx)) + return addr; + + if (addr >= df_cfg.dram_hole_base) + addr -= (BIT_ULL(32) - df_cfg.dram_hole_base); + + return addr; } static u64 get_base_addr(struct addr_ctx *ctx) @@ -72,14 +83,14 @@ static u64 get_base_addr(struct addr_ctx *ctx) return base_addr << DF_DRAM_BASE_LIMIT_LSB; } -static int add_base_and_hole(struct addr_ctx *ctx) +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr) { - ctx->ret_addr += get_base_addr(ctx); - - if (add_legacy_hole(ctx)) - return -EINVAL; + return add_legacy_hole(ctx, addr + get_base_addr(ctx)); +} - return 0; +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr) +{ + return remove_legacy_hole(ctx, addr) - get_base_addr(ctx); } static bool late_hole_remove(struct addr_ctx *ctx) @@ -126,14 +137,14 @@ unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsig if (denormalize_address(&ctx)) return -EINVAL; - if (!late_hole_remove(&ctx) && add_base_and_hole(&ctx)) - return -EINVAL; + if (!late_hole_remove(&ctx)) + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr); if (dehash_address(&ctx)) return -EINVAL; - if (late_hole_remove(&ctx) && add_base_and_hole(&ctx)) - return -EINVAL; + if (late_hole_remove(&ctx)) + ctx.ret_addr = add_base_and_hole(&ctx, ctx.ret_addr); if (addr_over_limit(&ctx)) return -EINVAL; diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 3596ad5ca3e0..f623ac23e4b9 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -239,6 +239,9 @@ int dehash_address(struct addr_ctx *ctx); unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); +u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr); +u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr); + /* * Make a gap in @data that is @num_bits long starting at @bit_num. * e.g. data = 11111111'b -- Gitee From 21f964c27c7a2821cb0714caf26ac4d557d5f12b Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 6 Jun 2024 20:33:11 +0000 Subject: [PATCH 10/23] RAS/AMD/ATL: Validate address map when information is gathered mainline inclusion from mainline-v6.11-rc1 commit d5811a165caf63a69cd8ae11156b8587cc57d1d1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/d5811a165caf63a69cd8ae11156b8587cc57d1d1 -------------------------------- commit d5811a165caf63a69cd8ae11156b8587cc57d1d1 upstream Validate address maps at the time the information is gathered as the address map will not change during translation. Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240606203313.51197-5-john.allen@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/dehash.c | 43 -------------------- drivers/ras/amd/atl/map.c | 77 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 43 deletions(-) diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c index 4ea46262c4f5..d4ee7ecabaee 100644 --- a/drivers/ras/amd/atl/dehash.c +++ b/drivers/ras/amd/atl/dehash.c @@ -12,41 +12,10 @@ #include "internal.h" -/* - * Verify the interleave bits are correct in the different interleaving - * settings. - * - * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the - * respective interleaving is disabled. - */ -static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, - u8 num_intlv_dies, u8 num_intlv_sockets) -{ - if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { - pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); - return false; - } - - if (ctx->map.num_intlv_dies > num_intlv_dies) { - pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); - return false; - } - - if (ctx->map.num_intlv_sockets > num_intlv_sockets) { - pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); - return false; - } - - return true; -} - static int df2_dehash_addr(struct addr_ctx *ctx) { u8 hashed_bit, intlv_bit, intlv_bit_pos; - if (!map_bits_valid(ctx, 8, 9, 1, 1)) - return -EINVAL; - intlv_bit_pos = ctx->map.intlv_bit_pos; intlv_bit = !!(BIT_ULL(intlv_bit_pos) & ctx->ret_addr); @@ -67,9 +36,6 @@ static int df3_dehash_addr(struct addr_ctx *ctx) bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; u8 hashed_bit, intlv_bit, intlv_bit_pos; - if (!map_bits_valid(ctx, 8, 9, 1, 1)) - return -EINVAL; - hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF3_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF3_HASH_CTL_1G, ctx->map.ctl); @@ -171,9 +137,6 @@ static int df4_dehash_addr(struct addr_ctx *ctx) bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G; u8 hashed_bit, intlv_bit; - if (!map_bits_valid(ctx, 8, 8, 1, 2)) - return -EINVAL; - hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); @@ -247,9 +210,6 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) u8 hashed_bit, intlv_bit; u64 rehash_vector; - if (!map_bits_valid(ctx, 8, 8, 1, 2)) - return -EINVAL; - hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); @@ -360,9 +320,6 @@ static int mi300_dehash_addr(struct addr_ctx *ctx) bool hashed_bit, intlv_bit, test_bit; u8 num_intlv_bits, base_bit, i; - if (!map_bits_valid(ctx, 8, 8, 4, 1)) - return -EINVAL; - hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c index 8b908e8d7495..04419923f088 100644 --- a/drivers/ras/amd/atl/map.c +++ b/drivers/ras/amd/atl/map.c @@ -642,6 +642,79 @@ static int get_global_map_data(struct addr_ctx *ctx) return 0; } +/* + * Verify the interleave bits are correct in the different interleaving + * settings. + * + * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the + * respective interleaving is disabled. + */ +static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2, + u8 num_intlv_dies, u8 num_intlv_sockets) +{ + if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return false; + } + + if (ctx->map.num_intlv_dies > num_intlv_dies) { + pr_debug("Invalid number of interleave dies: %u", ctx->map.num_intlv_dies); + return false; + } + + if (ctx->map.num_intlv_sockets > num_intlv_sockets) { + pr_debug("Invalid number of interleave sockets: %u", ctx->map.num_intlv_sockets); + return false; + } + + return true; +} + +static int validate_address_map(struct addr_ctx *ctx) +{ + switch (ctx->map.intlv_mode) { + case DF2_2CHAN_HASH: + case DF3_COD4_2CHAN_HASH: + case DF3_COD2_4CHAN_HASH: + case DF3_COD1_8CHAN_HASH: + if (!map_bits_valid(ctx, 8, 9, 1, 1)) + goto err; + break; + + case DF4_NPS4_2CHAN_HASH: + case DF4_NPS2_4CHAN_HASH: + case DF4_NPS1_8CHAN_HASH: + case DF4p5_NPS4_2CHAN_1K_HASH: + case DF4p5_NPS4_2CHAN_2K_HASH: + case DF4p5_NPS2_4CHAN_1K_HASH: + case DF4p5_NPS2_4CHAN_2K_HASH: + case DF4p5_NPS1_8CHAN_1K_HASH: + case DF4p5_NPS1_8CHAN_2K_HASH: + case DF4p5_NPS1_16CHAN_1K_HASH: + case DF4p5_NPS1_16CHAN_2K_HASH: + if (!map_bits_valid(ctx, 8, 8, 1, 2)) + goto err; + break; + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + goto err; + break; + + /* Nothing to do for modes that don't need special validation checks. */ + default: + break; + } + + return 0; + +err: + atl_debug(ctx, "Inconsistent address map"); + return -EINVAL; +} + static void dump_address_map(struct dram_addr_map *map) { u8 i; @@ -678,5 +751,9 @@ int get_address_map(struct addr_ctx *ctx) dump_address_map(&ctx->map); + ret = validate_address_map(ctx); + if (ret) + return ret; + return ret; } -- Gitee From 53f1f359045567cf3f8d62cc421f90f04d85d043 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 6 Jun 2024 20:33:12 +0000 Subject: [PATCH 11/23] RAS/AMD/ATL: Implement DF 4.5 NP2 denormalization mainline inclusion from mainline-v6.11-rc1 commit e0372d6969bca2bc57e1a24129473694ff65641c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/e0372d6969bca2bc57e1a24129473694ff65641c -------------------------------- commit e0372d6969bca2bc57e1a24129473694ff65641c upstream Unlike with previous Data Fabric versions, with Data Fabric 4.5 non-power-of-2 denormalization, there are bits of the system physical address that can't be fully reconstructed from the normalized address. To determine the proper combination of missing system physical address bits, iterate through each possible combination of these bits, normalize the resulting system physical address, and compare to the original address that is being translated. If the addresses match, then the correct permutation of bits has been found. Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam Link: https://lore.kernel.org/r/20240606203313.51197-6-john.allen@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/denormalize.c | 561 ++++++++++++++++++++++++++++++ drivers/ras/amd/atl/internal.h | 40 +++ drivers/ras/amd/atl/map.c | 20 ++ 3 files changed, 621 insertions(+) diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index d5d0e1fda159..c0f496588c8f 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -448,6 +448,118 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id; } +static u16 get_logical_coh_st_fabric_id_for_current_spa(struct addr_ctx *ctx, + struct df4p5_denorm_ctx *denorm_ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hash_pa8, hash_pa9, hash_pa12, hash_pa13; + u64 cs_id = 0; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + hash_pa8 = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); + hash_pa8 ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); + hash_pa8 ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa8 ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa8 ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa8 ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; + + hash_pa9 = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); + hash_pa9 ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa9 ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa9 ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa9 ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; + + hash_pa12 = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); + hash_pa12 ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa12 ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa12 ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa12 ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; + + hash_pa13 = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); + hash_pa13 ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; + hash_pa13 ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; + hash_pa13 ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; + hash_pa13 ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa9 | (hash_pa12 << 1)); + cs_id |= hash_pa8 << df_cfg.socket_id_shift; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 14), denorm_ctx->current_spa) << 4; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa12 | (hash_pa13 << 1)); + cs_id |= hash_pa8 << df_cfg.socket_id_shift; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa8 | (hash_pa9 << 1)); + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 13), denorm_ctx->current_spa) << 3; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 2; + cs_id |= (hash_pa8 | (hash_pa12 << 1)); + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id |= (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) << 1); + cs_id %= denorm_ctx->mod_value; + cs_id <<= 1; + cs_id |= hash_pa8; + break; + + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id %= denorm_ctx->mod_value; + cs_id <<= 1; + cs_id |= hash_pa8; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id |= FIELD_GET(GENMASK_ULL(9, 8), denorm_ctx->current_spa); + cs_id %= denorm_ctx->mod_value; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + cs_id = FIELD_GET(GENMASK_ULL(63, 12), denorm_ctx->current_spa) << 2; + cs_id |= FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) << 1; + cs_id %= denorm_ctx->mod_value; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } + + if (cs_id > 0xffff) { + atl_debug(ctx, "Translation error: Resulting cs_id larger than u16\n"); + return 0; + } + + return cs_id; +} + static int denorm_addr_common(struct addr_ctx *ctx) { u64 denorm_addr; @@ -700,6 +812,442 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx) return 0; } +static u64 normalize_addr_df4p5_np2(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx, + u64 addr) +{ + u64 temp_addr_a = 0, temp_addr_b = 0; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 10), addr) << 8; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + temp_addr_a = FIELD_GET(GENMASK_ULL(11, 9), addr) << 8; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 14), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 13), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; + temp_addr_b |= FIELD_GET(BIT_ULL(9), addr); + temp_addr_b /= denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) / denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 2; + temp_addr_b |= FIELD_GET(GENMASK_ULL(9, 8), addr); + temp_addr_b /= denorm_ctx->mod_value; + temp_addr_b <<= 10; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + temp_addr_b = FIELD_GET(GENMASK_ULL(63, 12), addr) << 1; + temp_addr_b |= FIELD_GET(BIT_ULL(8), addr); + temp_addr_b /= denorm_ctx->mod_value; + temp_addr_b <<= 11; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return 0; + } + + return denorm_ctx->base_denorm_addr | temp_addr_a | temp_addr_b; +} + +static void recalculate_hashed_bits_df4p5_np2(struct addr_ctx *ctx, + struct df4p5_denorm_ctx *denorm_ctx) +{ + bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T, hashed_bit; + + if (!denorm_ctx->rehash_vector) + return; + + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + if (denorm_ctx->rehash_vector & BIT_ULL(8)) { + hashed_bit = FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(14), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(16), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(21), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(30), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(40), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(8), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(8); + } + + if (denorm_ctx->rehash_vector & BIT_ULL(9)) { + hashed_bit = FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(17), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(22), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(31), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(41), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(9), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(9); + } + + if (denorm_ctx->rehash_vector & BIT_ULL(12)) { + hashed_bit = FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(18), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(23), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(32), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(42), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(12), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(12); + } + + if (denorm_ctx->rehash_vector & BIT_ULL(13)) { + hashed_bit = FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa); + hashed_bit ^= FIELD_GET(BIT_ULL(19), denorm_ctx->current_spa) & hash_ctl_64k; + hashed_bit ^= FIELD_GET(BIT_ULL(24), denorm_ctx->current_spa) & hash_ctl_2M; + hashed_bit ^= FIELD_GET(BIT_ULL(33), denorm_ctx->current_spa) & hash_ctl_1G; + hashed_bit ^= FIELD_GET(BIT_ULL(43), denorm_ctx->current_spa) & hash_ctl_1T; + + if (FIELD_GET(BIT_ULL(13), denorm_ctx->current_spa) != hashed_bit) + denorm_ctx->current_spa ^= BIT_ULL(13); + } +} + +static bool match_logical_coh_st_fabric_id(struct addr_ctx *ctx, + struct df4p5_denorm_ctx *denorm_ctx) +{ + /* + * The logical CS fabric ID of the permutation must be calculated from the + * current SPA with the base and with the MMIO hole. + */ + u16 id = get_logical_coh_st_fabric_id_for_current_spa(ctx, denorm_ctx); + + atl_debug(ctx, "Checking calculated logical coherent station fabric id:\n"); + atl_debug(ctx, " calculated fabric id = 0x%x\n", id); + atl_debug(ctx, " expected fabric id = 0x%x\n", denorm_ctx->coh_st_fabric_id); + + return denorm_ctx->coh_st_fabric_id == id; +} + +static bool match_norm_addr(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) +{ + u64 addr = remove_base_and_hole(ctx, denorm_ctx->current_spa); + + /* + * The normalized address must be calculated with the current SPA without + * the base and without the MMIO hole. + */ + addr = normalize_addr_df4p5_np2(ctx, denorm_ctx, addr); + + atl_debug(ctx, "Checking calculated normalized address:\n"); + atl_debug(ctx, " calculated normalized addr = 0x%016llx\n", addr); + atl_debug(ctx, " expected normalized addr = 0x%016llx\n", ctx->ret_addr); + + return addr == ctx->ret_addr; +} + +static int check_permutations(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) +{ + u64 test_perm, temp_addr, denorm_addr, num_perms; + unsigned int dropped_remainder; + + denorm_ctx->div_addr *= denorm_ctx->mod_value; + + /* + * The high order bits of num_permutations represent the permutations + * of the dropped remainder. This will be either 0-3 or 0-5 depending + * on the interleave mode. The low order bits represent the + * permutations of other "lost" bits which will be any combination of + * 1, 2, or 3 bits depending on the interleave mode. + */ + num_perms = denorm_ctx->mod_value << denorm_ctx->perm_shift; + + for (test_perm = 0; test_perm < num_perms; test_perm++) { + denorm_addr = denorm_ctx->base_denorm_addr; + dropped_remainder = test_perm >> denorm_ctx->perm_shift; + temp_addr = denorm_ctx->div_addr + dropped_remainder; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_2K_HASH: + denorm_addr |= temp_addr << 14; + break; + + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + denorm_addr |= temp_addr << 13; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + denorm_addr |= temp_addr << 12; + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 9; + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_addr |= FIELD_GET(GENMASK_ULL(1, 0), temp_addr) << 8; + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 2), (temp_addr)) << 12; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), temp_addr) << 8; + denorm_addr |= FIELD_GET(GENMASK_ULL(63, 1), temp_addr) << 12; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 12; + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; + denorm_addr |= FIELD_GET(BIT_ULL(2), test_perm) << 13; + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 12; + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + denorm_addr |= FIELD_GET(BIT_ULL(1), test_perm) << 9; + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_addr |= FIELD_GET(BIT_ULL(0), test_perm) << 8; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + denorm_ctx->current_spa = add_base_and_hole(ctx, denorm_addr); + recalculate_hashed_bits_df4p5_np2(ctx, denorm_ctx); + + atl_debug(ctx, "Checking potential system physical address 0x%016llx\n", + denorm_ctx->current_spa); + + if (!match_logical_coh_st_fabric_id(ctx, denorm_ctx)) + continue; + + if (!match_norm_addr(ctx, denorm_ctx)) + continue; + + if (denorm_ctx->resolved_spa == INVALID_SPA || + denorm_ctx->current_spa > denorm_ctx->resolved_spa) + denorm_ctx->resolved_spa = denorm_ctx->current_spa; + } + + if (denorm_ctx->resolved_spa == INVALID_SPA) { + atl_debug(ctx, "Failed to find valid SPA for normalized address 0x%016llx\n", + ctx->ret_addr); + return -EINVAL; + } + + /* Return the resolved SPA without the base, without the MMIO hole */ + ctx->ret_addr = remove_base_and_hole(ctx, denorm_ctx->resolved_spa); + + return 0; +} + +static int init_df4p5_denorm_ctx(struct addr_ctx *ctx, struct df4p5_denorm_ctx *denorm_ctx) +{ + denorm_ctx->current_spa = INVALID_SPA; + denorm_ctx->resolved_spa = INVALID_SPA; + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + denorm_ctx->perm_shift = 3; + denorm_ctx->rehash_vector = BIT(8) | BIT(9) | BIT(12); + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + denorm_ctx->perm_shift = 3; + denorm_ctx->rehash_vector = BIT(8) | BIT(12) | BIT(13); + break; + + case DF4p5_NPS1_12CHAN_1K_HASH: + denorm_ctx->perm_shift = 2; + denorm_ctx->rehash_vector = BIT(8); + break; + + case DF4p5_NPS1_12CHAN_2K_HASH: + denorm_ctx->perm_shift = 2; + denorm_ctx->rehash_vector = BIT(8) | BIT(12); + break; + + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + denorm_ctx->perm_shift = 1; + denorm_ctx->rehash_vector = BIT(8); + break; + + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_ctx->perm_shift = 2; + denorm_ctx->rehash_vector = 0; + break; + + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_ctx->perm_shift = 1; + denorm_ctx->rehash_vector = 0; + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + denorm_ctx->base_denorm_addr = FIELD_GET(GENMASK_ULL(7, 0), ctx->ret_addr); + + switch (ctx->map.intlv_mode) { + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(9, 8), ctx->ret_addr) << 10; + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 10), ctx->ret_addr); + break; + + case DF4p5_NPS0_24CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + denorm_ctx->base_denorm_addr |= FIELD_GET(GENMASK_ULL(10, 8), ctx->ret_addr) << 9; + denorm_ctx->div_addr = FIELD_GET(GENMASK_ULL(63, 11), ctx->ret_addr); + break; + + default: + atl_debug_on_bad_intlv_mode(ctx); + return -EINVAL; + } + + if (ctx->map.num_intlv_chan % 3 == 0) + denorm_ctx->mod_value = 3; + else + denorm_ctx->mod_value = 5; + + denorm_ctx->coh_st_fabric_id = get_logical_coh_st_fabric_id(ctx) - get_dst_fabric_id(ctx); + + atl_debug(ctx, "Initialized df4p5_denorm_ctx:"); + atl_debug(ctx, " mod_value = %d", denorm_ctx->mod_value); + atl_debug(ctx, " perm_shift = %d", denorm_ctx->perm_shift); + atl_debug(ctx, " rehash_vector = 0x%x", denorm_ctx->rehash_vector); + atl_debug(ctx, " base_denorm_addr = 0x%016llx", denorm_ctx->base_denorm_addr); + atl_debug(ctx, " div_addr = 0x%016llx", denorm_ctx->div_addr); + atl_debug(ctx, " coh_st_fabric_id = 0x%x", denorm_ctx->coh_st_fabric_id); + + return 0; +} + +/* + * For DF 4.5, parts of the physical address can be directly pulled from the + * normalized address. The exact bits will differ between interleave modes, but + * using NPS0_24CHAN_1K_HASH as an example, the normalized address consists of + * bits [63:13] (divided by 3), bits [11:10], and bits [7:0] of the system + * physical address. + * + * In this case, there is no way to reconstruct the missing bits (bits 8, 9, + * and 12) from the normalized address. Additionally, when bits [63:13] are + * divided by 3, the remainder is dropped. Determine the proper combination of + * "lost" bits and dropped remainder by iterating through each possible + * permutation of these bits and then normalizing the generated system physical + * addresses. If the normalized address matches the address we are trying to + * translate, then we have found the correct permutation of bits. + */ +static int denorm_addr_df4p5_np2(struct addr_ctx *ctx) +{ + struct df4p5_denorm_ctx denorm_ctx; + int ret = 0; + + memset(&denorm_ctx, 0, sizeof(denorm_ctx)); + + atl_debug(ctx, "Denormalizing DF 4.5 normalized address 0x%016llx", ctx->ret_addr); + + ret = init_df4p5_denorm_ctx(ctx, &denorm_ctx); + if (ret) + return ret; + + return check_permutations(ctx, &denorm_ctx); +} + int denormalize_address(struct addr_ctx *ctx) { switch (ctx->map.intlv_mode) { @@ -711,6 +1259,19 @@ int denormalize_address(struct addr_ctx *ctx) case DF4_NPS2_5CHAN_HASH: case DF4_NPS1_10CHAN_HASH: return denorm_addr_df4_np2(ctx); + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + return denorm_addr_df4p5_np2(ctx); case DF3_6CHAN: return denorm_addr_df3_6chan(ctx); default: diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index f623ac23e4b9..c67ba4bfe9cf 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -37,6 +37,8 @@ #define DF_DRAM_BASE_LIMIT_LSB 28 #define MI300_DRAM_LIMIT_LSB 20 +#define INVALID_SPA ~0ULL + enum df_revisions { UNKNOWN, DF2, @@ -93,6 +95,44 @@ enum intlv_modes { DF4p5_NPS1_10CHAN_2K_HASH = 0x49, }; +struct df4p5_denorm_ctx { + /* Indicates the number of "lost" bits. This will be 1, 2, or 3. */ + u8 perm_shift; + + /* A mask indicating the bits that need to be rehashed. */ + u16 rehash_vector; + + /* + * Represents the value that the high bits of the normalized address + * are divided by during normalization. This value will be 3 for + * interleave modes with a number of channels divisible by 3 or the + * value will be 5 for interleave modes with a number of channels + * divisible by 5. Power-of-two interleave modes are handled + * separately. + */ + u8 mod_value; + + /* + * Represents the bits that can be directly pulled from the normalized + * address. In each case, pass through bits [7:0] of the normalized + * address. The other bits depend on the interleave bit position which + * will be bit 10 for 1K interleave stripe cases and bit 11 for 2K + * interleave stripe cases. + */ + u64 base_denorm_addr; + + /* + * Represents the high bits of the physical address that have been + * divided by the mod_value. + */ + u64 div_addr; + + u64 current_spa; + u64 resolved_spa; + + u16 coh_st_fabric_id; +}; + struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c index 04419923f088..24a05af747d5 100644 --- a/drivers/ras/amd/atl/map.c +++ b/drivers/ras/amd/atl/map.c @@ -696,6 +696,26 @@ static int validate_address_map(struct addr_ctx *ctx) goto err; break; + case DF4p5_NPS4_3CHAN_1K_HASH: + case DF4p5_NPS4_3CHAN_2K_HASH: + case DF4p5_NPS2_5CHAN_1K_HASH: + case DF4p5_NPS2_5CHAN_2K_HASH: + case DF4p5_NPS2_6CHAN_1K_HASH: + case DF4p5_NPS2_6CHAN_2K_HASH: + case DF4p5_NPS1_10CHAN_1K_HASH: + case DF4p5_NPS1_10CHAN_2K_HASH: + case DF4p5_NPS1_12CHAN_1K_HASH: + case DF4p5_NPS1_12CHAN_2K_HASH: + if (ctx->map.num_intlv_sockets != 1 || !map_bits_valid(ctx, 8, 0, 1, 1)) + goto err; + break; + + case DF4p5_NPS0_24CHAN_1K_HASH: + case DF4p5_NPS0_24CHAN_2K_HASH: + if (ctx->map.num_intlv_sockets < 2 || !map_bits_valid(ctx, 8, 0, 1, 2)) + goto err; + break; + case MI3_HASH_8CHAN: case MI3_HASH_16CHAN: case MI3_HASH_32CHAN: -- Gitee From a49ef40b4436b5c884981b554eda8f6a70e82662 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 13 Feb 2024 21:35:16 -0600 Subject: [PATCH 12/23] RAS: Introduce a FRU memory poison manager mainline inclusion from mainline-v6.11-rc1 commit 6f15e617cc99323339dc241d19956f0d640c4354 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/6f15e617cc99323339dc241d19956f0d640c4354 -------------------------------- commit 6f15e617cc99323339dc241d19956f0d640c4354 upstream Memory errors are an expected occurrence on systems with high memory density. Generally, errors within a small number of unique physical locations are acceptable, based on manufacturer and/or admin policy. During run time, memory with errors may be retired so it is no longer used by the system. This is done in mm through page poisoning, and the effect will remain until the system is restarted. If a memory location is consistently faulty, then the same run time error handling may occur in the next reboot cycle, leading to terminating jobs due to that already known bad memory. This could be prevented if information from the previous boot was not lost. Some add-in cards with driver-managed memory have on-board persistent storage. Their driver saves memory error information to the persistent storage during run time. The information is then restored after reset, and known bad memory will be retired before the hardware is used. A running log of bad memory locations is kept across multiple resets. A similar solution is desirable for CPUs. However, this solution should leverage industry-standard components as much as possible, rather than a bespoke platform driver. Two components are needed: a record format and a persistent storage interface. Implement a new module to manage the record formats on persistent storage. Use the requirements for an AMD MI300-based system to start. Vendor- and platform-specific details can be abstracted later as needed. [ bp: Massage commit message and code, squash 30-ish more fixes from Yazen and me. ] Signed-off-by: Yazen Ghannam Co-developed-by: Signed-off-by: Co-developed-by: Signed-off-by: Tested-by: Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240214033516.1344948-3-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- MAINTAINERS | 6 + drivers/ras/Kconfig | 12 + drivers/ras/Makefile | 1 + drivers/ras/amd/fmpm.c | 812 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 831 insertions(+) create mode 100644 drivers/ras/amd/fmpm.c diff --git a/MAINTAINERS b/MAINTAINERS index 077765cbf0b7..12bb85473550 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18056,6 +18056,12 @@ L: linux-wireless@vger.kernel.org S: Orphan F: drivers/net/wireless/legacy/ray* +RAS FRU MEMORY POISON MANAGER (FMPM) +M: Yazen Ghannam +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/ras/amd/fmpm.c + RC-CORE / LIRC FRAMEWORK M: Sean Young L: linux-media@vger.kernel.org diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index 1c3794de1be8..5cc4f70b71be 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig @@ -46,4 +46,16 @@ source "arch/x86/ras/Kconfig" source "drivers/ras/hisilicon/Kconfig" source "drivers/ras/amd/atl/Kconfig" +config RAS_FMPM + tristate "FRU Memory Poison Manager" + default m + depends on AMD_ATL && ACPI_APEI + help + Support saving and restoring memory error information across reboot + using ACPI ERST as persistent storage. Error information is saved with + the UEFI CPER "FRU Memory Poison" section format. + + Memory will be retired during boot time and run time depending on + platform-specific policies. + endif diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 6437124a20bf..562a79c84a7f 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_RAS_CEC) += cec.o obj-$(CONFIG_ARCH_HISI) += hisilicon/ +obj-$(CONFIG_RAS_FMPM) += amd/fmpm.o obj-y += amd/atl/ diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c new file mode 100644 index 000000000000..80dd112b720a --- /dev/null +++ b/drivers/ras/amd/fmpm.c @@ -0,0 +1,812 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * FRU (Field-Replaceable Unit) Memory Poison Manager + * + * Copyright (c) 2024, Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Authors: + * Naveen Krishna Chatradhi + * Muralidhara M K + * Yazen Ghannam + * + * Implementation notes, assumptions, and limitations: + * + * - FRU memory poison section and memory poison descriptor definitions are not yet + * included in the UEFI specification. So they are defined here. Afterwards, they + * may be moved to linux/cper.h, if appropriate. + * + * - Platforms based on AMD MI300 systems will be the first to use these structures. + * There are a number of assumptions made here that will need to be generalized + * to support other platforms. + * + * AMD MI300-based platform(s) assumptions: + * - Memory errors are reported through x86 MCA. + * - The entire DRAM row containing a memory error should be retired. + * - There will be (1) FRU memory poison section per CPER. + * - The FRU will be the CPU package (processor socket). + * - The default number of memory poison descriptor entries should be (8). + * - The platform will use ACPI ERST for persistent storage. + * - All FRU records should be saved to persistent storage. Module init will + * fail if any FRU record is not successfully written. + * + * - Boot time memory retirement may occur later than ideal due to dependencies + * on other libraries and drivers. This leaves a gap where bad memory may be + * accessed during early boot stages. + * + * - Enough memory should be pre-allocated for each FRU record to be able to hold + * the expected number of descriptor entries. This, mostly empty, record is + * written to storage during init time. Subsequent writes to the same record + * should allow the Platform to update the stored record in-place. Otherwise, + * if the record is extended, then the Platform may need to perform costly memory + * management operations on the storage. For example, the Platform may spend time + * in Firmware copying and invalidating memory on a relatively slow SPI ROM. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include + +#include +#include + +#define INVALID_CPU UINT_MAX + +/* Validation Bits */ +#define FMP_VALID_ARCH_TYPE BIT_ULL(0) +#define FMP_VALID_ARCH BIT_ULL(1) +#define FMP_VALID_ID_TYPE BIT_ULL(2) +#define FMP_VALID_ID BIT_ULL(3) +#define FMP_VALID_LIST_ENTRIES BIT_ULL(4) +#define FMP_VALID_LIST BIT_ULL(5) + +/* FRU Architecture Types */ +#define FMP_ARCH_TYPE_X86_CPUID_1_EAX 0 + +/* FRU ID Types */ +#define FMP_ID_TYPE_X86_PPIN 0 + +/* FRU Memory Poison Section */ +struct cper_sec_fru_mem_poison { + u32 checksum; + u64 validation_bits; + u32 fru_arch_type; + u64 fru_arch; + u32 fru_id_type; + u64 fru_id; + u32 nr_entries; +} __packed; + +/* FRU Descriptor ID Types */ +#define FPD_HW_ID_TYPE_MCA_IPID 0 + +/* FRU Descriptor Address Types */ +#define FPD_ADDR_TYPE_MCA_ADDR 0 + +/* Memory Poison Descriptor */ +struct cper_fru_poison_desc { + u64 timestamp; + u32 hw_id_type; + u64 hw_id; + u32 addr_type; + u64 addr; +} __packed; + +/* Collection of headers and sections for easy pointer use. */ +struct fru_rec { + struct cper_record_header hdr; + struct cper_section_descriptor sec_desc; + struct cper_sec_fru_mem_poison fmp; + struct cper_fru_poison_desc entries[]; +} __packed; + +/* + * Pointers to the complete CPER record of each FRU. + * + * Memory allocation will include padded space for descriptor entries. + */ +static struct fru_rec **fru_records; + +#define CPER_CREATOR_FMP \ + GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ + 0xa0, 0x33, 0x08, 0x75) + +#define CPER_SECTION_TYPE_FMP \ + GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2, \ + 0x12, 0x0a, 0x44, 0x58) + +/** + * DOC: fru_poison_entries (byte) + * Maximum number of descriptor entries possible for each FRU. + * + * Values between '1' and '255' are valid. + * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES. + */ +static u8 max_nr_entries; +module_param(max_nr_entries, byte, 0644); +MODULE_PARM_DESC(max_nr_entries, + "Maximum number of memory poison descriptor entries per FRU"); + +#define FMPM_DEFAULT_MAX_NR_ENTRIES 8 + +/* Maximum number of FRUs in the system. */ +#define FMPM_MAX_NR_FRU 256 +static unsigned int max_nr_fru; + +/* Total length of record including headers and list of descriptor entries. */ +static size_t max_rec_len; + +/* + * Protect the local records cache in fru_records and prevent concurrent + * writes to storage. This is only needed after init once notifier block + * registration is done. + */ +static DEFINE_MUTEX(fmpm_update_mutex); + +#define for_each_fru(i, rec) \ + for (i = 0; rec = fru_records[i], i < max_nr_fru; i++) + +static inline u32 get_fmp_len(struct fru_rec *rec) +{ + return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor); +} + +static struct fru_rec *get_fru_record(u64 fru_id) +{ + struct fru_rec *rec; + unsigned int i; + + for_each_fru(i, rec) { + if (rec->fmp.fru_id == fru_id) + return rec; + } + + pr_debug("Record not found for FRU 0x%016llx\n", fru_id); + + return NULL; +} + +/* + * Sum up all bytes within the FRU Memory Poison Section including the Memory + * Poison Descriptor entries. + * + * Don't include the old checksum here. It's a u32 value, so summing each of its + * bytes will give the wrong total. + */ +static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len) +{ + u32 checksum = 0; + u8 *buf, *end; + + /* Skip old checksum. */ + buf = (u8 *)fmp + sizeof(u32); + end = buf + len; + + while (buf < end) + checksum += (u8)(*(buf++)); + + return checksum; +} + +static int update_record_on_storage(struct fru_rec *rec) +{ + u32 len, checksum; + int ret; + + /* Calculate a new checksum. */ + len = get_fmp_len(rec); + + /* Get the current total. */ + checksum = do_fmp_checksum(&rec->fmp, len); + + /* Use the complement value. */ + rec->fmp.checksum = -checksum; + + pr_debug("Writing to storage\n"); + + ret = erst_write(&rec->hdr); + if (ret) { + pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id); + + if (ret == -ENOSPC) + pr_warn("Not enough space on storage\n"); + } + + return ret; +} + +static bool rec_has_valid_entries(struct fru_rec *rec) +{ + if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES)) + return false; + + if (!(rec->fmp.validation_bits & FMP_VALID_LIST)) + return false; + + return true; +} + +static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) +{ + /* + * Ignore timestamp field. + * The same physical error may be reported multiple times due to stuck bits, etc. + * + * Also, order the checks from most->least likely to fail to shortcut the code. + */ + if (old->addr != new->addr) + return false; + + if (old->hw_id != new->hw_id) + return false; + + if (old->addr_type != new->addr_type) + return false; + + if (old->hw_id_type != new->hw_id_type) + return false; + + return true; +} + +static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd) +{ + unsigned int i; + + for (i = 0; i < rec->fmp.nr_entries; i++) { + struct cper_fru_poison_desc *fpd_i = &rec->entries[i]; + + if (fpds_equal(fpd_i, fpd)) { + pr_debug("Found duplicate record\n"); + return true; + } + } + + return false; +} + +static void update_fru_record(struct fru_rec *rec, struct mce *m) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + struct cper_fru_poison_desc fpd, *fpd_dest; + u32 entry = 0; + + mutex_lock(&fmpm_update_mutex); + + memset(&fpd, 0, sizeof(struct cper_fru_poison_desc)); + + fpd.timestamp = m->time; + fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID; + fpd.hw_id = m->ipid; + fpd.addr_type = FPD_ADDR_TYPE_MCA_ADDR; + fpd.addr = m->addr; + + /* This is the first entry, so just save it. */ + if (!rec_has_valid_entries(rec)) + goto save_fpd; + + /* Ignore already recorded errors. */ + if (rec_has_fpd(rec, &fpd)) + goto out_unlock; + + if (rec->fmp.nr_entries >= max_nr_entries) { + pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id); + goto out_unlock; + } + + entry = fmp->nr_entries; + +save_fpd: + fpd_dest = &rec->entries[entry]; + memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc)); + + fmp->nr_entries = entry + 1; + fmp->validation_bits |= FMP_VALID_LIST_ENTRIES; + fmp->validation_bits |= FMP_VALID_LIST; + + pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry); + + update_record_on_storage(rec); + +out_unlock: + mutex_unlock(&fmpm_update_mutex); +} + +static void retire_dram_row(u64 addr, u64 id, u32 cpu) +{ + struct atl_err a_err; + + memset(&a_err, 0, sizeof(struct atl_err)); + + a_err.addr = addr; + a_err.ipid = id; + a_err.cpu = cpu; + + amd_retire_dram_row(&a_err); +} + +static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *m = (struct mce *)data; + struct fru_rec *rec; + + if (!mce_is_memory_error(m)) + return NOTIFY_DONE; + + retire_dram_row(m->addr, m->ipid, m->extcpu); + + /* + * An invalid FRU ID should not happen on real errors. But it + * could happen from software error injection, etc. + */ + rec = get_fru_record(m->ppin); + if (!rec) + return NOTIFY_DONE; + + update_fru_record(rec, m); + + return NOTIFY_OK; +} + +static struct notifier_block fru_mem_poison_nb = { + .notifier_call = fru_handle_mem_poison, + .priority = MCE_PRIO_LOWEST, +}; + +static void retire_mem_fmp(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + unsigned int i, cpu; + + for (i = 0; i < fmp->nr_entries; i++) { + struct cper_fru_poison_desc *fpd = &rec->entries[i]; + unsigned int err_cpu = INVALID_CPU; + + if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID) + continue; + + if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR) + continue; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + if (topology_ppin(cpu) == fmp->fru_id) { + err_cpu = cpu; + break; + } + } + cpus_read_unlock(); + + if (err_cpu == INVALID_CPU) + continue; + + retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); + } +} + +static void retire_mem_records(void) +{ + struct fru_rec *rec; + unsigned int i; + + for_each_fru(i, rec) { + if (!rec_has_valid_entries(rec)) + continue; + + retire_mem_fmp(rec); + } +} + +/* Set the CPER Record Header and CPER Section Descriptor fields. */ +static void set_rec_fields(struct fru_rec *rec) +{ + struct cper_section_descriptor *sec_desc = &rec->sec_desc; + struct cper_record_header *hdr = &rec->hdr; + + memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); + hdr->revision = CPER_RECORD_REV; + hdr->signature_end = CPER_SIG_END; + + /* + * Currently, it is assumed that there is one FRU Memory Poison + * section per CPER. But this may change for other implementations. + */ + hdr->section_count = 1; + + /* The logged errors are recoverable. Otherwise, they'd never make it here. */ + hdr->error_severity = CPER_SEV_RECOVERABLE; + + hdr->validation_bits = 0; + hdr->record_length = max_rec_len; + hdr->creator_id = CPER_CREATOR_FMP; + hdr->notification_type = CPER_NOTIFY_MCE; + hdr->record_id = cper_next_record_id(); + hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; + + sec_desc->section_offset = sizeof(struct cper_record_header); + sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); + sec_desc->revision = CPER_SEC_REV; + sec_desc->validation_bits = 0; + sec_desc->flags = CPER_SEC_PRIMARY; + sec_desc->section_type = CPER_SECTION_TYPE_FMP; + sec_desc->section_severity = CPER_SEV_RECOVERABLE; +} + +static int save_new_records(void) +{ + DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU); + struct fru_rec *rec; + unsigned int i; + int ret = 0; + + for_each_fru(i, rec) { + if (rec->hdr.record_length) + continue; + + set_rec_fields(rec); + + ret = update_record_on_storage(rec); + if (ret) + goto out_clear; + + set_bit(i, new_records); + } + + return ret; + +out_clear: + for_each_fru(i, rec) { + if (!test_bit(i, new_records)) + continue; + + erst_clear(rec->hdr.record_id); + } + + return ret; +} + +/* Check that the record matches expected types for the current system.*/ +static bool fmp_is_usable(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + u64 cpuid; + + pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits); + + if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) { + pr_debug("Arch type unknown\n"); + return false; + } + + if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) { + pr_debug("Arch type not 'x86 Family/Model/Stepping'\n"); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ARCH)) { + pr_debug("Arch value unknown\n"); + return false; + } + + cpuid = cpuid_eax(1); + if (fmp->fru_arch != cpuid) { + pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n", + fmp->fru_arch, cpuid); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) { + pr_debug("FRU ID type unknown\n"); + return false; + } + + if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) { + pr_debug("FRU ID type is not 'x86 PPIN'\n"); + return false; + } + + if (!(fmp->validation_bits & FMP_VALID_ID)) { + pr_debug("FRU ID value unknown\n"); + return false; + } + + return true; +} + +static bool fmp_is_valid(struct fru_rec *rec) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + u32 checksum, len; + + len = get_fmp_len(rec); + if (len < sizeof(struct cper_sec_fru_mem_poison)) { + pr_debug("fmp length is too small\n"); + return false; + } + + /* Checksum must sum to zero for the entire section. */ + checksum = do_fmp_checksum(fmp, len) + fmp->checksum; + if (checksum) { + pr_debug("fmp checksum failed: sum = 0x%x\n", checksum); + print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false); + return false; + } + + if (!fmp_is_usable(rec)) + return false; + + return true; +} + +static struct fru_rec *get_valid_record(struct fru_rec *old) +{ + struct fru_rec *new; + + if (!fmp_is_valid(old)) { + pr_debug("Ignoring invalid record\n"); + return NULL; + } + + new = get_fru_record(old->fmp.fru_id); + if (!new) + pr_debug("Ignoring record for absent FRU\n"); + + return new; +} + +/* + * Fetch saved records from persistent storage. + * + * For each found record: + * - If it was not created by this module, then ignore it. + * - If it is valid, then copy its data to the local cache. + * - If it is not valid, then erase it. + */ +static int get_saved_records(void) +{ + struct fru_rec *old, *new; + u64 record_id; + int ret, pos; + ssize_t len; + + /* + * Assume saved records match current max size. + * + * However, this may not be true depending on module parameters. + */ + old = kmalloc(max_rec_len, GFP_KERNEL); + if (!old) { + ret = -ENOMEM; + goto out; + } + + ret = erst_get_record_id_begin(&pos); + if (ret < 0) + goto out_end; + + while (!erst_get_record_id_next(&pos, &record_id)) { + if (record_id == APEI_ERST_INVALID_RECORD_ID) + goto out_end; + /* + * Make sure to clear temporary buffer between reads to avoid + * leftover data from records of various sizes. + */ + memset(old, 0, max_rec_len); + + len = erst_read_record(record_id, &old->hdr, max_rec_len, + sizeof(struct fru_rec), &CPER_CREATOR_FMP); + if (len < 0) + continue; + + if (len > max_rec_len) { + pr_debug("Found record larger than max_rec_len\n"); + continue; + } + + new = get_valid_record(old); + if (!new) + erst_clear(record_id); + + /* Restore the record */ + memcpy(new, old, len); + } + +out_end: + erst_get_record_id_end(); + kfree(old); +out: + return ret; +} + +static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu) +{ + struct cper_sec_fru_mem_poison *fmp = &rec->fmp; + + fmp->fru_arch_type = FMP_ARCH_TYPE_X86_CPUID_1_EAX; + fmp->validation_bits |= FMP_VALID_ARCH_TYPE; + + /* Assume all CPUs in the system have the same value for now. */ + fmp->fru_arch = cpuid_eax(1); + fmp->validation_bits |= FMP_VALID_ARCH; + + fmp->fru_id_type = FMP_ID_TYPE_X86_PPIN; + fmp->validation_bits |= FMP_VALID_ID_TYPE; + + fmp->fru_id = topology_ppin(cpu); + fmp->validation_bits |= FMP_VALID_ID; +} + +static int init_fmps(void) +{ + struct fru_rec *rec; + unsigned int i, cpu; + int ret = 0; + + for_each_fru(i, rec) { + unsigned int fru_cpu = INVALID_CPU; + + cpus_read_lock(); + for_each_online_cpu(cpu) { + if (topology_physical_package_id(cpu) == i) { + fru_cpu = cpu; + break; + } + } + cpus_read_unlock(); + + if (fru_cpu == INVALID_CPU) { + pr_debug("Failed to find matching CPU for FRU #%u\n", i); + ret = -ENODEV; + break; + } + + set_fmp_fields(rec, fru_cpu); + } + + return ret; +} + +static int get_system_info(void) +{ + /* Only load on MI300A systems for now. */ + if (!(boot_cpu_data.x86_model >= 0x90 && + boot_cpu_data.x86_model <= 0x9f)) + return -ENODEV; + + if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) { + pr_debug("PPIN feature not available\n"); + return -ENODEV; + } + + /* Use CPU socket as FRU for MI300 systems. */ + max_nr_fru = topology_max_packages(); + if (!max_nr_fru) + return -ENODEV; + + if (max_nr_fru > FMPM_MAX_NR_FRU) { + pr_warn("Too many FRUs to manage: found: %u, max: %u\n", + max_nr_fru, FMPM_MAX_NR_FRU); + return -ENODEV; + } + + if (!max_nr_entries) + max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES; + + max_rec_len = sizeof(struct fru_rec); + max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries; + + pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n", + max_nr_fru, max_nr_entries, max_rec_len); + + return 0; +} + +static void free_records(void) +{ + struct fru_rec *rec; + int i; + + for_each_fru(i, rec) + kfree(rec); + + kfree(fru_records); +} + +static int allocate_records(void) +{ + int i, ret = 0; + + fru_records = kcalloc(max_nr_fru, sizeof(struct fru_rec *), GFP_KERNEL); + if (!fru_records) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < max_nr_fru; i++) { + fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL); + if (!fru_records[i]) { + ret = -ENOMEM; + goto out_free; + } + } + + return ret; + +out_free: + for (; i >= 0; i--) + kfree(fru_records[i]); + + kfree(fru_records); +out: + return ret; +} + +static const struct x86_cpu_id fmpm_cpuids[] = { + X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), + { } +}; +MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids); + +static int __init fru_mem_poison_init(void) +{ + int ret; + + if (!x86_match_cpu(fmpm_cpuids)) { + ret = -ENODEV; + goto out; + } + + if (erst_disable) { + pr_debug("ERST not available\n"); + ret = -ENODEV; + goto out; + } + + ret = get_system_info(); + if (ret) + goto out; + + ret = allocate_records(); + if (ret) + goto out; + + ret = init_fmps(); + if (ret) + goto out_free; + + ret = get_saved_records(); + if (ret) + goto out_free; + + ret = save_new_records(); + if (ret) + goto out_free; + + retire_mem_records(); + + mce_register_decode_chain(&fru_mem_poison_nb); + + pr_info("FRU Memory Poison Manager initialized\n"); + return 0; + +out_free: + free_records(); +out: + return ret; +} + +static void __exit fru_mem_poison_exit(void) +{ + mce_unregister_decode_chain(&fru_mem_poison_nb); + free_records(); +} + +module_init(fru_mem_poison_init); +module_exit(fru_mem_poison_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("FRU Memory Poison Manager"); -- Gitee From 83cf7e66623296341ff0b04199851ba40bd56cfd Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 1 Mar 2024 08:37:47 -0600 Subject: [PATCH 13/23] RAS/AMD/FMPM: Save SPA values mainline inclusion from mainline-v6.11-rc1 commit 838850c50884cdd1c96fce1063ef918c394d4bdc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/838850c50884cdd1c96fce1063ef918c394d4bdc -------------------------------- commit 838850c50884cdd1c96fce1063ef918c394d4bdc upstream The system physical address (SPA) of an error is not a stable value. It will change depending on the location of the memory: parts can be swapped. And it will change depending on memory topology: NUMA nodes and/or interleaving can be adjusted. Therefore, the SPA value is not part of the "FRU Memory Poison" record format. And it will not be saved to persistent storage. However, the SPA values can be helpful during debug and for system admins during run time. Save the SPA values in a separate structure. This is updated when records are restored and when new errors are saved. [ bp: Make error messages more user friendly and add and correct comments. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240301143748.854090-3-yazen.ghannam@amd.com Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- drivers/ras/amd/fmpm.c | 72 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 80dd112b720a..8c3188488673 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -111,6 +111,11 @@ struct fru_rec { */ static struct fru_rec **fru_records; +/* system physical addresses array */ +static u64 *spa_entries; + +#define INVALID_SPA ~0ULL + #define CPER_CREATOR_FMP \ GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ 0xa0, 0x33, 0x08, 0x75) @@ -120,7 +125,7 @@ static struct fru_rec **fru_records; 0x12, 0x0a, 0x44, 0x58) /** - * DOC: fru_poison_entries (byte) + * DOC: max_nr_entries (byte) * Maximum number of descriptor entries possible for each FRU. * * Values between '1' and '255' are valid. @@ -140,6 +145,9 @@ static unsigned int max_nr_fru; /* Total length of record including headers and list of descriptor entries. */ static size_t max_rec_len; +/* Total number of SPA entries across all FRUs. */ +static unsigned int spa_nr_entries; + /* * Protect the local records cache in fru_records and prevent concurrent * writes to storage. This is only needed after init once notifier block @@ -269,6 +277,54 @@ static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd) return false; } +static void save_spa(struct fru_rec *rec, unsigned int entry, + u64 addr, u64 id, unsigned int cpu) +{ + unsigned int i, fru_idx, spa_entry; + struct atl_err a_err; + unsigned long spa; + + if (entry >= max_nr_entries) { + pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n", + entry, max_nr_entries); + return; + } + + /* spa_nr_entries is always multiple of max_nr_entries */ + for (i = 0; i < spa_nr_entries; i += max_nr_entries) { + fru_idx = i / max_nr_entries; + if (fru_records[fru_idx] == rec) + break; + } + + if (i >= spa_nr_entries) { + pr_warn_once("FRU record %d not found\n", i); + return; + } + + spa_entry = i + entry; + if (spa_entry >= spa_nr_entries) { + pr_warn_once("spa_entries[] index out-of-bounds\n"); + return; + } + + memset(&a_err, 0, sizeof(struct atl_err)); + + a_err.addr = addr; + a_err.ipid = id; + a_err.cpu = cpu; + + spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err); + if (IS_ERR_VALUE(spa)) { + pr_debug("Failed to get system address\n"); + return; + } + + spa_entries[spa_entry] = spa; + pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n", + fru_idx, entry, spa_entry, spa_entries[spa_entry]); +} + static void update_fru_record(struct fru_rec *rec, struct mce *m) { struct cper_sec_fru_mem_poison *fmp = &rec->fmp; @@ -301,6 +357,7 @@ static void update_fru_record(struct fru_rec *rec, struct mce *m) entry = fmp->nr_entries; save_fpd: + save_spa(rec, entry, m->addr, m->ipid, m->extcpu); fpd_dest = &rec->entries[entry]; memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc)); @@ -385,6 +442,7 @@ static void retire_mem_fmp(struct fru_rec *rec) continue; retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); + save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu); } } @@ -696,6 +754,8 @@ static int get_system_info(void) if (!max_nr_entries) max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES; + spa_nr_entries = max_nr_fru * max_nr_entries; + max_rec_len = sizeof(struct fru_rec); max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries; @@ -714,6 +774,7 @@ static void free_records(void) kfree(rec); kfree(fru_records); + kfree(spa_entries); } static int allocate_records(void) @@ -734,6 +795,15 @@ static int allocate_records(void) } } + spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL); + if (!spa_entries) { + ret = -ENOMEM; + goto out_free; + } + + for (i = 0; i < spa_nr_entries; i++) + spa_entries[i] = INVALID_SPA; + return ret; out_free: -- Gitee From d649c000c176d1c66294f86e0d26ee721421cb5d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 1 Mar 2024 08:37:48 -0600 Subject: [PATCH 14/23] RAS/AMD/FMPM: Add debugfs interface to print record entries mainline inclusion from mainline-v6.11-rc1 commit 7d19eea51757ad72faf4b0493e5bde85ca62012e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/7d19eea51757ad72faf4b0493e5bde85ca62012e -------------------------------- commit 7d19eea51757ad72faf4b0493e5bde85ca62012e upstream It is helpful to see the saved record entries during run time in human-readable format. This is useful for testing during module development. It can also be used by system admins to quickly and easily see the state of the system. Provide a sequential file in debugfs to print fields of interest from the FRU records and their entries. Don't fail to load the module if the debugfs interface is not available. This is a convenience feature which does not affect other module functionality. The new interface reads the record entries and should hold the mutex. Expand the mutex code comment to clarify when it should be held. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240301143748.854090-4-yazen.ghannam@amd.com Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- drivers/ras/amd/fmpm.c | 131 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 8c3188488673..0963c9e7b853 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -54,6 +54,8 @@ #include #include +#include "../debugfs.h" + #define INVALID_CPU UINT_MAX /* Validation Bits */ @@ -116,6 +118,9 @@ static u64 *spa_entries; #define INVALID_SPA ~0ULL +static struct dentry *fmpm_dfs_dir; +static struct dentry *fmpm_dfs_entries; + #define CPER_CREATOR_FMP \ GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3, \ 0xa0, 0x33, 0x08, 0x75) @@ -152,6 +157,11 @@ static unsigned int spa_nr_entries; * Protect the local records cache in fru_records and prevent concurrent * writes to storage. This is only needed after init once notifier block * registration is done. + * + * The majority of a record is fixed at module init and will not change + * during run time. The entries within a record will be updated as new + * errors are reported. The mutex should be held whenever the entries are + * accessed during run time. */ static DEFINE_MUTEX(fmpm_update_mutex); @@ -815,6 +825,124 @@ static int allocate_records(void) return ret; } +static void *fmpm_start(struct seq_file *f, loff_t *pos) +{ + if (*pos >= (spa_nr_entries + 1)) + return NULL; + return pos; +} + +static void *fmpm_next(struct seq_file *f, void *data, loff_t *pos) +{ + if (++(*pos) >= (spa_nr_entries + 1)) + return NULL; + return pos; +} + +static void fmpm_stop(struct seq_file *f, void *data) +{ +} + +#define SHORT_WIDTH 8 +#define U64_WIDTH 18 +#define TIMESTAMP_WIDTH 19 +#define LONG_WIDTH 24 +#define U64_PAD (LONG_WIDTH - U64_WIDTH) +#define TS_PAD (LONG_WIDTH - TIMESTAMP_WIDTH) +static int fmpm_show(struct seq_file *f, void *data) +{ + unsigned int fru_idx, entry, spa_entry, line; + struct cper_fru_poison_desc *fpd; + struct fru_rec *rec; + + line = *(loff_t *)data; + if (line == 0) { + seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx"); + seq_printf(f, "%-*s", LONG_WIDTH, "fru_id"); + seq_printf(f, "%-*s", SHORT_WIDTH, "entry"); + seq_printf(f, "%-*s", LONG_WIDTH, "timestamp"); + seq_printf(f, "%-*s", LONG_WIDTH, "hw_id"); + seq_printf(f, "%-*s", LONG_WIDTH, "addr"); + seq_printf(f, "%-*s", LONG_WIDTH, "spa"); + goto out_newline; + } + + spa_entry = line - 1; + fru_idx = spa_entry / max_nr_entries; + entry = spa_entry % max_nr_entries; + + rec = fru_records[fru_idx]; + if (!rec) + goto out; + + seq_printf(f, "%-*u", SHORT_WIDTH, fru_idx); + seq_printf(f, "0x%016llx%-*s", rec->fmp.fru_id, U64_PAD, ""); + seq_printf(f, "%-*u", SHORT_WIDTH, entry); + + mutex_lock(&fmpm_update_mutex); + + if (entry >= rec->fmp.nr_entries) { + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + goto out_unlock; + } + + fpd = &rec->entries[entry]; + + seq_printf(f, "%ptT%-*s", &fpd->timestamp, TS_PAD, ""); + seq_printf(f, "0x%016llx%-*s", fpd->hw_id, U64_PAD, ""); + seq_printf(f, "0x%016llx%-*s", fpd->addr, U64_PAD, ""); + + if (spa_entries[spa_entry] == INVALID_SPA) + seq_printf(f, "%-*s", LONG_WIDTH, "*"); + else + seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, ""); + +out_unlock: + mutex_unlock(&fmpm_update_mutex); +out_newline: + seq_putc(f, '\n'); +out: + return 0; +} + +static const struct seq_operations fmpm_seq_ops = { + .start = fmpm_start, + .next = fmpm_next, + .stop = fmpm_stop, + .show = fmpm_show, +}; + +static int fmpm_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &fmpm_seq_ops); +} + +static const struct file_operations fmpm_fops = { + .open = fmpm_open, + .release = seq_release, + .read = seq_read, + .llseek = seq_lseek, +}; + +static void setup_debugfs(void) +{ + struct dentry *dfs = ras_get_debugfs_root(); + + if (!dfs) + return; + + fmpm_dfs_dir = debugfs_create_dir("fmpm", dfs); + if (!fmpm_dfs_dir) + return; + + fmpm_dfs_entries = debugfs_create_file("entries", 0400, fmpm_dfs_dir, NULL, &fmpm_fops); + if (!fmpm_dfs_entries) + debugfs_remove(fmpm_dfs_dir); +} + static const struct x86_cpu_id fmpm_cpuids[] = { X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL), { } @@ -856,6 +984,8 @@ static int __init fru_mem_poison_init(void) if (ret) goto out_free; + setup_debugfs(); + retire_mem_records(); mce_register_decode_chain(&fru_mem_poison_nb); @@ -872,6 +1002,7 @@ static int __init fru_mem_poison_init(void) static void __exit fru_mem_poison_exit(void) { mce_unregister_decode_chain(&fru_mem_poison_nb); + debugfs_remove(fmpm_dfs_dir); free_records(); } -- Gitee From d5054ed80bc90920aae3d6ff9b3e8c1807347af2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Mar 2024 08:30:46 +0300 Subject: [PATCH 15/23] RAS/AMD/FMPM: Fix off by one when unwinding on error mainline inclusion from mainline-v6.11-rc1 commit bd17b7c34fadef645becde1245b9394f69f31702 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/bd17b7c34fadef645becde1245b9394f69f31702 -------------------------------- commit bd17b7c34fadef645becde1245b9394f69f31702 upstream Decrement the index variable i before the first iteration when freeing the remaining elements on error. Depending on where this fails it could free something from one element beyond the end of the fru_records[] array. [ bp: Massage commit message. ] Fixes: 6f15e617cc99 ("RAS: Introduce a FRU memory poison manager") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/6fdec71a-846b-4cd0-af69-e5f6cd12f4f6@moroto.mountain Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- drivers/ras/amd/fmpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 0963c9e7b853..2f4ac9591c8f 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -817,7 +817,7 @@ static int allocate_records(void) return ret; out_free: - for (; i >= 0; i--) + while (--i >= 0) kfree(fru_records[i]); kfree(fru_records); -- Gitee From 92499add77932c253c79d6392d98379bb18b8891 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 19 Mar 2024 06:33:21 -0500 Subject: [PATCH 16/23] RAS/AMD/FMPM: Avoid NULL ptr deref in get_saved_records() mainline inclusion from mainline-v6.11-rc1 commit 4b0e527c9970a15ac9ec8fc44af957725b854c29 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/4b0e527c9970a15ac9ec8fc44af957725b854c29 -------------------------------- commit 4b0e527c9970a15ac9ec8fc44af957725b854c29 upstream An old, invalid record should be cleared and skipped. Currently, the record is cleared in ERST, but it is not skipped. This leads to a NULL pointer dereference when attempting to copy the old record to the new record. Continue the loop after clearing an old, invalid record to skip it. Fixes: 6f15e617cc99 ("RAS: Introduce a FRU memory poison manager") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Tested-by: Muralidhara M K Link: https://lore.kernel.org/r/20240319113322.280096-2-yazen.ghannam@amd.com Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- drivers/ras/amd/fmpm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 2f4ac9591c8f..9d25195b4538 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -676,8 +676,10 @@ static int get_saved_records(void) } new = get_valid_record(old); - if (!new) + if (!new) { erst_clear(record_id); + continue; + } /* Restore the record */ memcpy(new, old, len); -- Gitee From 1b6ad6dd80b7095ec6a14dc0675f674248e53ddc Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 19 Mar 2024 06:33:22 -0500 Subject: [PATCH 17/23] RAS/AMD/FMPM: Safely handle saved records of various sizes mainline inclusion from mainline-v6.11-rc1 commit 9b195439e0c54b9b9b55b2d68706b48739911519 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/9b195439e0c54b9b9b55b2d68706b48739911519 -------------------------------- commit 9b195439e0c54b9b9b55b2d68706b48739911519 upstream Currently, the size of the locally cached FRU record structures is based on the module parameter "max_nr_entries". This creates issues when restoring records if a user changes the parameter. If the number of entries is reduced, then old, larger records will not be restored. The opportunity to take action on the saved data is missed. Also, new records will be created and written to storage, even as the old records remain in storage, resulting in wasted space. If the number of entries is increased, then the length of the old, smaller records will not be adjusted. This causes a checksum failure which leads to the old record being cleared from storage. Again this results in another missed opportunity for action on the saved data. Allocate the temporary record with the maximum possible size based on the current maximum number of supported entries (255). This allows the ERST read operation to succeed if max_nr_entries has been increased. Warn the user if a saved record exceeds the expected size and fail to load the module. This allows the user to adjust the module parameter without losing data or the opportunity to restore larger records. Increase the size of a saved record up to the current max_rec_len. The checksum will be recalculated, and the updated record will be written to storage. Fixes: 6f15e617cc99 ("RAS: Introduce a FRU memory poison manager") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Tested-by: Muralidhara M K Link: https://lore.kernel.org/r/20240319113322.280096-3-yazen.ghannam@amd.com Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- drivers/ras/amd/fmpm.c | 55 ++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 9d25195b4538..271dfad05d68 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -150,6 +150,8 @@ static unsigned int max_nr_fru; /* Total length of record including headers and list of descriptor entries. */ static size_t max_rec_len; +#define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255)) + /* Total number of SPA entries across all FRUs. */ static unsigned int spa_nr_entries; @@ -475,6 +477,16 @@ static void set_rec_fields(struct fru_rec *rec) struct cper_section_descriptor *sec_desc = &rec->sec_desc; struct cper_record_header *hdr = &rec->hdr; + /* + * This is a saved record created with fewer max_nr_entries. + * Update the record lengths and keep everything else as-is. + */ + if (hdr->record_length && hdr->record_length < max_rec_len) { + pr_debug("Growing record 0x%016llx from %u to %zu bytes\n", + hdr->record_id, hdr->record_length, max_rec_len); + goto update_lengths; + } + memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); hdr->revision = CPER_RECORD_REV; hdr->signature_end = CPER_SIG_END; @@ -489,19 +501,21 @@ static void set_rec_fields(struct fru_rec *rec) hdr->error_severity = CPER_SEV_RECOVERABLE; hdr->validation_bits = 0; - hdr->record_length = max_rec_len; hdr->creator_id = CPER_CREATOR_FMP; hdr->notification_type = CPER_NOTIFY_MCE; hdr->record_id = cper_next_record_id(); hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; sec_desc->section_offset = sizeof(struct cper_record_header); - sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); sec_desc->revision = CPER_SEC_REV; sec_desc->validation_bits = 0; sec_desc->flags = CPER_SEC_PRIMARY; sec_desc->section_type = CPER_SECTION_TYPE_FMP; sec_desc->section_severity = CPER_SEV_RECOVERABLE; + +update_lengths: + hdr->record_length = max_rec_len; + sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); } static int save_new_records(void) @@ -512,16 +526,18 @@ static int save_new_records(void) int ret = 0; for_each_fru(i, rec) { - if (rec->hdr.record_length) + /* No need to update saved records that match the current record size. */ + if (rec->hdr.record_length == max_rec_len) continue; + if (!rec->hdr.record_length) + set_bit(i, new_records); + set_rec_fields(rec); ret = update_record_on_storage(rec); if (ret) goto out_clear; - - set_bit(i, new_records); } return ret; @@ -641,12 +657,7 @@ static int get_saved_records(void) int ret, pos; ssize_t len; - /* - * Assume saved records match current max size. - * - * However, this may not be true depending on module parameters. - */ - old = kmalloc(max_rec_len, GFP_KERNEL); + old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL); if (!old) { ret = -ENOMEM; goto out; @@ -663,24 +674,32 @@ static int get_saved_records(void) * Make sure to clear temporary buffer between reads to avoid * leftover data from records of various sizes. */ - memset(old, 0, max_rec_len); + memset(old, 0, FMPM_MAX_REC_LEN); - len = erst_read_record(record_id, &old->hdr, max_rec_len, + len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN, sizeof(struct fru_rec), &CPER_CREATOR_FMP); if (len < 0) continue; - if (len > max_rec_len) { - pr_debug("Found record larger than max_rec_len\n"); - continue; - } - new = get_valid_record(old); if (!new) { erst_clear(record_id); continue; } + if (len > max_rec_len) { + unsigned int saved_nr_entries; + + saved_nr_entries = len - sizeof(struct fru_rec); + saved_nr_entries /= sizeof(struct cper_fru_poison_desc); + + pr_warn("Saved record found with %u entries.\n", saved_nr_entries); + pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries); + + ret = -EINVAL; + goto out_end; + } + /* Restore the record */ memcpy(new, old, len); } -- Gitee From b73a74a23bcaddeb07984825a886aa0031a69fb4 Mon Sep 17 00:00:00 2001 From: John Allen Date: Thu, 6 Jun 2024 20:33:13 +0000 Subject: [PATCH 18/23] RAS/AMD/FMPM: Use atl internal.h for INVALID_SPA mainline inclusion from mainline-v6.11-rc1 commit f4c0cd1870afd57181e8087c6cf8da3d7fa2cebe category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/f4c0cd1870afd57181e8087c6cf8da3d7fa2cebe -------------------------------- commit f4c0cd1870afd57181e8087c6cf8da3d7fa2cebe upstream Both the AMD ATL and the FMPM driver define INVALID_SPA. Include the definition from the ATL internal.h header in the FMPM driver. Signed-off-by: John Allen Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240606203313.51197-7-john.allen@amd.com Signed-off-by: suryasaimadhu Signed-off-by: PrithivishS --- arch/x86/configs/openeuler_defconfig | 34 ++++++++++++++++++---------- drivers/ras/amd/fmpm.c | 4 ++-- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 5c54e0fed823..d95d966f7fb2 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -918,6 +918,12 @@ CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling +# +# Profile Guided Optimization (PGO) +# +CONFIG_ARCH_SUPPORTS_PGO_CLANG=y +# end of Profile Guided Optimization (PGO) + CONFIG_HAVE_GCC_PLUGINS=y CONFIG_FUNCTION_ALIGNMENT_4B=y CONFIG_FUNCTION_ALIGNMENT_16B=y @@ -1143,6 +1149,7 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set CONFIG_THP_SWAP=y CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_PGTABLE_HAS_HUGE_LEAVES=y CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y @@ -3082,6 +3089,8 @@ CONFIG_NET_VENDOR_BZWX=y CONFIG_NCE=m CONFIG_NE6X=m CONFIG_NE6XVF=m +CONFIG_NET_VENDOR_NEBULA_MATRIX=y +CONFIG_NBL_CORE=m # CONFIG_FDDI is not set # CONFIG_HIPPI is not set # CONFIG_NET_SB1000 is not set @@ -3091,8 +3100,6 @@ CONFIG_SWPHY=y CONFIG_LED_TRIGGER_PHY=y CONFIG_FIXED_PHY=y CONFIG_SFP=m -CONFIG_NET_VENDOR_NEBULA_MATRIX=y -CONFIG_NBL_CORE=m # # MII PHY device drivers @@ -3898,6 +3905,8 @@ CONFIG_TCG_INFINEON=m # CONFIG_TCG_XEN is not set CONFIG_TCG_CRB=y # CONFIG_TCG_VTPM_PROXY is not set +CONFIG_TCG_HYGON=m +CONFIG_TCM_HYGON=m CONFIG_TCG_TIS_ST33ZP24=m CONFIG_TCG_TIS_ST33ZP24_I2C=m CONFIG_TCG_TIS_ST33ZP24_SPI=m @@ -5526,6 +5535,7 @@ CONFIG_DVB_CXD2099=m # Graphics support # CONFIG_APERTURE_HELPERS=y +CONFIG_SCREEN_INFO=y CONFIG_VIDEO_CMDLINE=y CONFIG_VIDEO_NOMODESET=y # CONFIG_AUXDISPLAY is not set @@ -5736,6 +5746,7 @@ CONFIG_FB_SYS_IMAGEBLIT=y # CONFIG_FB_FOREIGN_ENDIAN is not set CONFIG_FB_SYS_FOPS=y CONFIG_FB_DEFERRED_IO=y +CONFIG_FB_IOMEM_FOPS=y CONFIG_FB_IOMEM_HELPERS=y CONFIG_FB_SYSMEM_HELPERS=y CONFIG_FB_SYSMEM_HELPERS_DEFERRED=y @@ -6856,8 +6867,8 @@ CONFIG_INFINIBAND_USER_MEM=y CONFIG_INFINIBAND_ON_DEMAND_PAGING=y CONFIG_INFINIBAND_ADDR_TRANS=y CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS=y -CONFIG_INFINIBAND_VIRT_DMA=y CONFIG_INFINIBAND_PEER_MEMORY=y +CONFIG_INFINIBAND_VIRT_DMA=y CONFIG_INFINIBAND_BNXT_RE=m CONFIG_INFINIBAND_CXGB4=m # CONFIG_INFINIBAND_EFA is not set @@ -7508,8 +7519,8 @@ CONFIG_HID_SENSOR_ACCEL_3D=m # CONFIG_AD7923 is not set # CONFIG_AD7949 is not set # CONFIG_AD799X is not set -# CONFIG_ADI_AXI_ADC is not set # CONFIG_AD9467 is not set +# CONFIG_ADI_AXI_ADC is not set # CONFIG_ENVELOPE_DETECTOR is not set # CONFIG_HI8435 is not set # CONFIG_HX711 is not set @@ -8026,6 +8037,8 @@ CONFIG_IDLE_INJECT=y CONFIG_RAS=y # CONFIG_RAS_CEC is not set +CONFIG_AMD_ATL=m +CONFIG_RAS_FMPM=m CONFIG_USB4=m # CONFIG_USB4_DEBUGFS_WRITE is not set # CONFIG_USB4_DMA_TEST is not set @@ -8568,8 +8581,6 @@ CONFIG_LIST_HARDENED=y CONFIG_RANDSTRUCT_NONE=y # end of Kernel hardening options - -# CONFIG_SECURITY_BOOT_INIT is not set # end of Security options CONFIG_XOR_BLOCKS=m @@ -8590,6 +8601,7 @@ CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_SIG=y CONFIG_CRYPTO_SIG2=y CONFIG_CRYPTO_SKCIPHER=y CONFIG_CRYPTO_SKCIPHER2=y @@ -8813,19 +8825,17 @@ CONFIG_CRYPTO_DEV_ZHAOXIN_AES=m CONFIG_CRYPTO_DEV_ZHAOXIN_SHA=m # CONFIG_CRYPTO_DEV_ATMEL_ECC is not set # CONFIG_CRYPTO_DEV_ATMEL_SHA204A is not set -CONFIG_HYGON_GM=y CONFIG_CRYPTO_DEV_CCP=y CONFIG_CRYPTO_DEV_CCP_DD=m CONFIG_CRYPTO_DEV_SP_CCP=y CONFIG_CRYPTO_DEV_CCP_CRYPTO=m CONFIG_CRYPTO_DEV_SP_PSP=y +CONFIG_HYGON_GM=y +# CONFIG_CRYPTO_DEV_CCP_DEBUGFS is not set CONFIG_HYGON_PSP2CPU_CMD=y -CONFIG_TCG_HYGON=m -CONFIG_TCM_HYGON=m CONFIG_TDM_DEV_HYGON=y CONFIG_TDM_KERNEL_GUARD=m CONFIG_CRYPTO_DEV_HCT=m -# CONFIG_CRYPTO_DEV_CCP_DEBUGFS is not set CONFIG_CRYPTO_DEV_NITROX=m CONFIG_CRYPTO_DEV_NITROX_CNN55XX=m CONFIG_CRYPTO_DEV_QAT=m @@ -8838,13 +8848,13 @@ CONFIG_CRYPTO_DEV_QAT_DH895xCCVF=m CONFIG_CRYPTO_DEV_QAT_C3XXXVF=m CONFIG_CRYPTO_DEV_QAT_C62XVF=m # CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION is not set +CONFIG_CRYPTO_DEV_IAA_CRYPTO=m +CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS=y CONFIG_CRYPTO_DEV_CHELSIO=m # CONFIG_CRYPTO_DEV_VIRTIO is not set # CONFIG_CRYPTO_DEV_SAFEXCEL is not set # CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set CONFIG_CRYPTO_DEV_TSSE=m -CONFIG_CRYPTO_DEV_IAA_CRYPTO=m -CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 271dfad05d68..90de737fbc90 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -56,6 +56,8 @@ #include "../debugfs.h" +#include "atl/internal.h" + #define INVALID_CPU UINT_MAX /* Validation Bits */ @@ -116,8 +118,6 @@ static struct fru_rec **fru_records; /* system physical addresses array */ static u64 *spa_entries; -#define INVALID_SPA ~0ULL - static struct dentry *fmpm_dfs_dir; static struct dentry *fmpm_dfs_entries; -- Gitee From 8b2bebdc7311ff7b892c217314ba3d3ec237575e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 26 Mar 2024 20:32:52 +0000 Subject: [PATCH 19/23] RAS: Avoid build errors when CONFIG_DEBUG_FS=n mainline inclusion from mainline-v6.9-rc2 commit a6b227d70d2ad9eb08adc3fe532ebb7ec88ac897 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/a6b227d70d2ad9eb08adc3fe532ebb7ec88ac897 -------------------------------- commit a6b227d70d2ad9eb08adc3fe532ebb7ec88ac897 upstream. A new helper was introduced for RAS modules to be able to get the RAS subsystem debugfs root directory. The helper is defined in debugfs.c which is only built when CONFIG_DEBUG_FS=y. However, it's possible that the modules would include debugfs support for optional functionality. One current example is the fmpm module. In this case, a build error will occur when CONFIG_RAS_FMPM is selected and CONFIG_DEBUG_FS=n. Add an inline helper function stub for the CONFIG_DEBUG_FS=n case as the fmpm module can function without the debugfs functionality too. Fixes: 9d2b6fa09d15 ("RAS: Export helper to get ras_debugfs_dir") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218640 Reported-by: anthony s. knowles Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Tested-by: anthony s. knowles Link: https://lore.kernel.org/r/20240325183755.776-1-bp@alien8.de Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/debugfs.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ras/debugfs.h b/drivers/ras/debugfs.h index 4749ccdeeba1..5a2f48439258 100644 --- a/drivers/ras/debugfs.h +++ b/drivers/ras/debugfs.h @@ -4,6 +4,10 @@ #include +#if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *ras_get_debugfs_root(void); +#else +static inline struct dentry *ras_get_debugfs_root(void) { return NULL; } +#endif /* DEBUG_FS */ #endif /* __RAS_DEBUGFS_H__ */ -- Gitee From 685c23e72c158999bd5d61559dd237289596509a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 22 Feb 2024 10:54:49 -0600 Subject: [PATCH 20/23] RAS/AMD/ATL: Fix bit overflow in denorm_addr_df4_np2() mainline inclusion from mainline-v6.9-rc1 commit dd61b55d733eee9bbe51abe7ab0e6f2ce1fae332 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/dd61b55d733eee9bbe51abe7ab0e6f2ce1fae332 -------------------------------- commit dd61b55d733eee9bbe51abe7ab0e6f2ce1fae332 upstream. The hash_pa8 and hashed_bit values in denorm_addr_df4_np2() are currently defined as u8 types. These variables represent single bits. 'hash_pa8' is set based on logical AND operations using masks with more than 8 bits. So the calculated value will not fit in this variable. It will always be '0'. The 'hash_pa8' check later in the function will fail which produces incorrect results for some cases. Change these variables to bool type. This clarifies that they are single bit values. Also, this allows the compiler to ensure they hold the proper results. Remove an unnecessary shift operation. [ bp: Remove the unnecessary brackets in the else-branch of the hash_pa8 assignment. ] Fixes: 3f3174996be6 ("RAS: Introduce AMD Address Translation Library") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240222165449.23582-1-yazen.ghannam@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/denormalize.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index c0f496588c8f..375ce525c337 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -657,7 +657,7 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx) unsigned int mod_value, shift_value; u16 mask = df_cfg.component_id_mask; u64 temp_addr_a, temp_addr_b; - u8 hash_pa8, hashed_bit; + bool hash_pa8, hashed_bit; switch (ctx->map.intlv_mode) { case DF4_NPS4_3CHAN_HASH: @@ -689,8 +689,7 @@ static int denorm_addr_df4_np2(struct addr_ctx *ctx) hash_pa8 = BIT_ULL(shift_value) & ctx->ret_addr; temp_addr_a = remove_bits(shift_value, shift_value, ctx->ret_addr); } else { - hash_pa8 = (ctx->coh_st_fabric_id & df_cfg.socket_id_mask); - hash_pa8 >>= df_cfg.socket_id_shift; + hash_pa8 = ctx->coh_st_fabric_id & df_cfg.socket_id_mask; temp_addr_a = ctx->ret_addr; } -- Gitee From 784a951142f3215e4e57ecad5848997a1a149607 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 31 Jan 2024 11:24:25 +0300 Subject: [PATCH 21/23] RAS/AMD/ATL: Fix array overflow in get_logical_coh_st_fabric_id_mi300() mainline inclusion from mainline-v6.9-rc1 commit a7b57372e1c5c848cbe9169574f07a9ee2177a1b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/a7b57372e1c5c848cbe9169574f07a9ee2177a1b -------------------------------- commit a7b57372e1c5c848cbe9169574f07a9ee2177a1b upstream. Check against ARRAY_SIZE() which is the number of elements instead of sizeof() which is the number of bytes. Fixes: 453f0ae79732 ("RAS/AMD/ATL: Add MI300 support") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/279c8b5e-6c00-467a-9071-9c67926abea4@moroto.mountain Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/denormalize.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index 375ce525c337..1a525cfa983c 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -405,7 +405,7 @@ static const u16 phy_to_log_coh_st_map_mi300[] = { static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) { - if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) { + if (ctx->inst_id >= ARRAY_SIZE(phy_to_log_coh_st_map_mi300)) { atl_debug(ctx, "Instance ID out of range"); return ~0; } -- Gitee From 00001044baf69da9650262d9776604b94bf801ef Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 7 Jun 2024 16:33:00 -0500 Subject: [PATCH 22/23] RAS/AMD/ATL: Use system settings for MI300 DRAM to normalized address translation mainline inclusion from mainline-v6.10-rc4 commit ba437905b4fbf0ee1686c175069239a1cc292558 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/ba437905b4fbf0ee1686c175069239a1cc292558 -------------------------------- commit ba437905b4fbf0ee1686c175069239a1cc292558 upstream. The currently used normalized address format is not applicable to all MI300 systems. This leads to incorrect results during address translation. Drop the fixed layout and construct the normalized address from system settings. Fixes: 87a612375307 ("RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20240607-mi300-dram-xl-fix-v1-2-2f11547a178c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/internal.h | 2 +- drivers/ras/amd/atl/system.c | 2 +- drivers/ras/amd/atl/umc.c | 151 ++++++++++++++++++++++++--------- 3 files changed, 114 insertions(+), 41 deletions(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index c67ba4bfe9cf..9de5d53d0568 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -269,7 +269,7 @@ int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); int get_df_system_info(void); int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); -int get_addr_hash_mi300(void); +int get_umc_info_mi300(void); int get_address_map(struct addr_ctx *ctx); diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index 8423c9f3a8d2..e18d916d5e8b 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -127,7 +127,7 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_MI300) { df_cfg.flags.heterogeneous = 1; - if (get_addr_hash_mi300()) + if (get_umc_info_mi300()) return -EINVAL; } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 08c6dbd44c62..cca31e5324cb 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -68,6 +68,8 @@ struct xor_bits { }; #define NUM_BANK_BITS 4 +#define NUM_COL_BITS 5 +#define NUM_SID_BITS 2 static struct { /* UMC::CH::AddrHashBank */ @@ -80,7 +82,22 @@ static struct { u8 bank_xor; } addr_hash; +static struct { + u8 bank[NUM_BANK_BITS]; + u8 col[NUM_COL_BITS]; + u8 sid[NUM_SID_BITS]; + u8 num_row_lo; + u8 num_row_hi; + u8 row_lo; + u8 row_hi; + u8 pc; +} bit_shifts; + #define MI300_UMC_CH_BASE 0x90000 +#define MI300_ADDR_CFG (MI300_UMC_CH_BASE + 0x30) +#define MI300_ADDR_SEL (MI300_UMC_CH_BASE + 0x40) +#define MI300_COL_SEL_LO (MI300_UMC_CH_BASE + 0x50) +#define MI300_ADDR_SEL_2 (MI300_UMC_CH_BASE + 0xA4) #define MI300_ADDR_HASH_BANK0 (MI300_UMC_CH_BASE + 0xC8) #define MI300_ADDR_HASH_PC (MI300_UMC_CH_BASE + 0xE0) #define MI300_ADDR_HASH_PC2 (MI300_UMC_CH_BASE + 0xE4) @@ -90,17 +107,42 @@ static struct { #define ADDR_HASH_ROW_XOR GENMASK(31, 14) #define ADDR_HASH_BANK_XOR GENMASK(5, 0) +#define ADDR_CFG_NUM_ROW_LO GENMASK(11, 8) +#define ADDR_CFG_NUM_ROW_HI GENMASK(15, 12) + +#define ADDR_SEL_BANK0 GENMASK(3, 0) +#define ADDR_SEL_BANK1 GENMASK(7, 4) +#define ADDR_SEL_BANK2 GENMASK(11, 8) +#define ADDR_SEL_BANK3 GENMASK(15, 12) +#define ADDR_SEL_BANK4 GENMASK(20, 16) +#define ADDR_SEL_ROW_LO GENMASK(27, 24) +#define ADDR_SEL_ROW_HI GENMASK(31, 28) + +#define COL_SEL_LO_COL0 GENMASK(3, 0) +#define COL_SEL_LO_COL1 GENMASK(7, 4) +#define COL_SEL_LO_COL2 GENMASK(11, 8) +#define COL_SEL_LO_COL3 GENMASK(15, 12) +#define COL_SEL_LO_COL4 GENMASK(19, 16) + +#define ADDR_SEL_2_BANK5 GENMASK(4, 0) +#define ADDR_SEL_2_CHAN GENMASK(15, 12) + /* * Read UMC::CH::AddrHash{Bank,PC,PC2} registers to get XOR bits used - * for hashing. Do this during module init, since the values will not - * change during run time. + * for hashing. + * + * Also, read UMC::CH::Addr{Cfg,Sel,Sel2} and UMC::CH:ColSelLo registers to + * get the values needed to reconstruct the normalized address. Apply additional + * offsets to the raw register values, as needed. + * + * Do this during module init, since the values will not change during run time. * * These registers are instantiated for each UMC across each AMD Node. * However, they should be identically programmed due to the fixed hardware * design of MI300 systems. So read the values from Node 0 UMC 0 and keep a * single global structure for simplicity. */ -int get_addr_hash_mi300(void) +int get_umc_info_mi300(void) { u32 temp; int ret; @@ -130,6 +172,44 @@ int get_addr_hash_mi300(void) addr_hash.bank_xor = FIELD_GET(ADDR_HASH_BANK_XOR, temp); + ret = amd_smn_read(0, MI300_ADDR_CFG, &temp); + if (ret) + return ret; + + bit_shifts.num_row_hi = FIELD_GET(ADDR_CFG_NUM_ROW_HI, temp); + bit_shifts.num_row_lo = 10 + FIELD_GET(ADDR_CFG_NUM_ROW_LO, temp); + + ret = amd_smn_read(0, MI300_ADDR_SEL, &temp); + if (ret) + return ret; + + bit_shifts.bank[0] = 5 + FIELD_GET(ADDR_SEL_BANK0, temp); + bit_shifts.bank[1] = 5 + FIELD_GET(ADDR_SEL_BANK1, temp); + bit_shifts.bank[2] = 5 + FIELD_GET(ADDR_SEL_BANK2, temp); + bit_shifts.bank[3] = 5 + FIELD_GET(ADDR_SEL_BANK3, temp); + /* Use BankBit4 for the SID0 position. */ + bit_shifts.sid[0] = 5 + FIELD_GET(ADDR_SEL_BANK4, temp); + bit_shifts.row_lo = 12 + FIELD_GET(ADDR_SEL_ROW_LO, temp); + bit_shifts.row_hi = 24 + FIELD_GET(ADDR_SEL_ROW_HI, temp); + + ret = amd_smn_read(0, MI300_COL_SEL_LO, &temp); + if (ret) + return ret; + + bit_shifts.col[0] = 2 + FIELD_GET(COL_SEL_LO_COL0, temp); + bit_shifts.col[1] = 2 + FIELD_GET(COL_SEL_LO_COL1, temp); + bit_shifts.col[2] = 2 + FIELD_GET(COL_SEL_LO_COL2, temp); + bit_shifts.col[3] = 2 + FIELD_GET(COL_SEL_LO_COL3, temp); + bit_shifts.col[4] = 2 + FIELD_GET(COL_SEL_LO_COL4, temp); + + ret = amd_smn_read(0, MI300_ADDR_SEL_2, &temp); + if (ret) + return ret; + + /* Use BankBit5 for the SID1 position. */ + bit_shifts.sid[1] = 5 + FIELD_GET(ADDR_SEL_2_BANK5, temp); + bit_shifts.pc = 5 + FIELD_GET(ADDR_SEL_2_CHAN, temp); + return 0; } @@ -146,9 +226,6 @@ int get_addr_hash_mi300(void) * The MCA address format is as follows: * MCA_ADDR[27:0] = {S[1:0], P[0], R[14:0], B[3:0], C[4:0], Z[0]} * - * The normalized address format is fixed in hardware and is as follows: - * NA[30:0] = {S[1:0], R[13:0], C4, B[1:0], B[3:2], C[3:2], P, C[1:0], Z[4:0]} - * * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ @@ -158,18 +235,10 @@ int get_addr_hash_mi300(void) #define MI300_UMC_MCA_PC BIT(25) #define MI300_UMC_MCA_SID GENMASK(27, 26) -#define MI300_NA_COL_1_0 GENMASK(6, 5) -#define MI300_NA_PC BIT(7) -#define MI300_NA_COL_3_2 GENMASK(9, 8) -#define MI300_NA_BANK_3_2 GENMASK(11, 10) -#define MI300_NA_BANK_1_0 GENMASK(13, 12) -#define MI300_NA_COL_4 BIT(14) -#define MI300_NA_ROW GENMASK(28, 15) -#define MI300_NA_SID GENMASK(30, 29) - static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) { - u16 i, col, row, bank, pc, sid, temp; + u16 i, col, row, bank, pc, sid; + u32 temp; col = FIELD_GET(MI300_UMC_MCA_COL, addr); bank = FIELD_GET(MI300_UMC_MCA_BANK, addr); @@ -204,34 +273,38 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) /* Reconstruct the normalized address starting with NA[4:0] = 0 */ addr = 0; - /* NA[6:5] = Column[1:0] */ - temp = col & 0x3; - addr |= FIELD_PREP(MI300_NA_COL_1_0, temp); - - /* NA[7] = PC */ - addr |= FIELD_PREP(MI300_NA_PC, pc); - - /* NA[9:8] = Column[3:2] */ - temp = (col >> 2) & 0x3; - addr |= FIELD_PREP(MI300_NA_COL_3_2, temp); + /* Column bits */ + for (i = 0; i < NUM_COL_BITS; i++) { + temp = (col >> i) & 0x1; + addr |= temp << bit_shifts.col[i]; + } - /* NA[11:10] = Bank[3:2] */ - temp = (bank >> 2) & 0x3; - addr |= FIELD_PREP(MI300_NA_BANK_3_2, temp); + /* Bank bits */ + for (i = 0; i < NUM_BANK_BITS; i++) { + temp = (bank >> i) & 0x1; + addr |= temp << bit_shifts.bank[i]; + } - /* NA[13:12] = Bank[1:0] */ - temp = bank & 0x3; - addr |= FIELD_PREP(MI300_NA_BANK_1_0, temp); + /* Row lo bits */ + for (i = 0; i < bit_shifts.num_row_lo; i++) { + temp = (row >> i) & 0x1; + addr |= temp << (i + bit_shifts.row_lo); + } - /* NA[14] = Column[4] */ - temp = (col >> 4) & 0x1; - addr |= FIELD_PREP(MI300_NA_COL_4, temp); + /* Row hi bits */ + for (i = 0; i < bit_shifts.num_row_hi; i++) { + temp = (row >> (i + bit_shifts.num_row_lo)) & 0x1; + addr |= temp << (i + bit_shifts.row_hi); + } - /* NA[28:15] = Row[13:0] */ - addr |= FIELD_PREP(MI300_NA_ROW, row); + /* PC bit */ + addr |= pc << bit_shifts.pc; - /* NA[30:29] = SID[1:0] */ - addr |= FIELD_PREP(MI300_NA_SID, sid); + /* SID bits */ + for (i = 0; i < NUM_SID_BITS; i++) { + temp = (sid >> i) & 0x1; + addr |= temp << bit_shifts.sid[i]; + } pr_debug("Addr=0x%016lx", addr); pr_debug("Bank=%u Row=%u Column=%u PC=%u SID=%u", bank, row, col, pc, sid); -- Gitee From 0966ae2b7380bf339175da942c6533ab6b48743e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 7 Jun 2024 16:32:59 -0500 Subject: [PATCH 23/23] RAS/AMD/ATL: Fix MI300 bank hash mainline inclusion from mainline-v6.10-rc4 commit fe8a08973a0dea9757394c5adbdc3c0a03b0b432 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAYOV8 CVE: NA Reference: https://github.com/torvalds/linux/commit/fe8a08973a0dea9757394c5adbdc3c0a03b0b432 -------------------------------- commit fe8a08973a0dea9757394c5adbdc3c0a03b0b432 upstream. Apply the SID bits to the correct offset in the Bank value. Do this in the temporary value so they don't need to be masked off later. Fixes: 87a612375307 ("RAS/AMD/ATL: Add MI300 DRAM to normalized address translation support") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20240607-mi300-dram-xl-fix-v1-1-2f11547a178c@amd.com Signed-off-by: Jeevan deep J Signed-off-by: PrithivishS --- drivers/ras/amd/atl/umc.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index cca31e5324cb..94942e08821a 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -258,16 +258,11 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) /* Calculate hash for PC bit. */ if (addr_hash.pc.xor_enable) { - /* Bits SID[1:0] act as Bank[6:5] for PC hash, so apply them here. */ - bank |= sid << 5; - temp = bitwise_xor_bits(col & addr_hash.pc.col_xor); temp ^= bitwise_xor_bits(row & addr_hash.pc.row_xor); - temp ^= bitwise_xor_bits(bank & addr_hash.bank_xor); + /* Bits SID[1:0] act as Bank[5:4] for PC hash, so apply them here. */ + temp ^= bitwise_xor_bits((bank | sid << NUM_BANK_BITS) & addr_hash.bank_xor); pc ^= temp; - - /* Drop SID bits for the sake of debug printing later. */ - bank &= 0x1F; } /* Reconstruct the normalized address starting with NA[4:0] = 0 */ -- Gitee