From ed7f393e175cd8d0f44c7d262b9725803a0a4c70 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Feb 2024 21:29:39 +0100 Subject: [PATCH 01/88] x86/cpu: Provide cpuid_read() et al. ANBZ: #26668 commit 43d86e3cd9a77912772cf7ad37ad94211bf7351d upstream. Provide a few helper functions to read CPUID leafs or individual registers into a data structure without requiring unions. Intel-SIG: commit 43d86e3cd9a7 x86/cpu: Provide cpuid_read() et al.. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/878r3mg570.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/cpuid.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index 9bee3e7bf973..6b122a31da06 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -127,6 +127,42 @@ static inline unsigned int cpuid_edx(unsigned int op) return edx; } +static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs) +{ + regs[CPUID_EAX] = leaf; + regs[CPUID_ECX] = subleaf; + __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX); +} + +#define cpuid_subleaf(leaf, subleaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, subleaf, (u32 *)(regs)); \ +} + +#define cpuid_leaf(leaf, regs) { \ + static_assert(sizeof(*(regs)) == 16); \ + __cpuid_read(leaf, 0, (u32 *)(regs)); \ +} + +static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf, + enum cpuid_regs_idx regidx, u32 *reg) +{ + u32 regs[4]; + + __cpuid_read(leaf, subleaf, regs); + *reg = regs[regidx]; +} + +#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \ +} + +#define cpuid_leaf_reg(leaf, regidx, reg) { \ + static_assert(sizeof(*(reg)) == 4); \ + __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \ +} + static __always_inline bool cpuid_function_is_indexed(u32 function) { switch (function) { -- Gitee From fd231e5008d75b0e75221f4d406380a709d5d37e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:15 +0100 Subject: [PATCH 02/88] x86/cpuid: Include in ANBZ: #26668 commit 97c7d5723537de08e076892e07d6089ae9777965 upstream. uses static_assert() at multiple locations but it does not include the CPP macro's definition at linux/build_bug.h. Include the needed header to make self-sufficient. This gets triggered when cpuid.h is included in new C files, which is to be done in further commits. Fixes: 43d86e3cd9a7 ("x86/cpu: Provide cpuid_read() et al.") Intel-SIG: commit 97c7d5723537 x86/cpuid: Include in . x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250304085152.51092-5-darwi@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/cpuid.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index 6b122a31da06..d6b466e91e6d 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -6,6 +6,7 @@ #ifndef _ASM_X86_CPUID_H #define _ASM_X86_CPUID_H +#include #include struct cpuid_regs { -- Gitee From 4e3d36fce9edbb542f9f38173ae7a53b60c35791 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:01 +0100 Subject: [PATCH 03/88] x86/cpu: Provide cpu_init/parse_topology() ANBZ: #26668 commit ebdb20361059b3c4fd7b23cfa10c28e798b7a3d2 upstream. Topology evaluation is a complete disaster and impenetrable mess. It's scattered all over the place with some vendor implementations doing early evaluation and some not. The most horrific part is the permanent overwriting of smt_max_siblings and __max_die_per_package, instead of establishing them once on the boot CPU and validating the result on the APs. The goals are: - One topology evaluation entry point - Proper sharing of pointlessly duplicated code - Proper structuring of the evaluation logic and preferences. - Evaluating important system wide information only once on the boot CPU - Making the 0xb/0x1f leaf parsing less convoluted and actually fixing the short comings of leaf 0x1f evaluation. Start to consolidate the topology evaluation code by providing the entry points for the early boot CPU evaluation and for the final parsing on the boot CPU and the APs. Move the trivial pieces into that new code: - The initialization of cpuinfo_x86::topo - The evaluation of CPUID leaf 1, which presets topo::initial_apicid - topo_apicid is set to topo::initial_apicid when invoked from early boot. When invoked for the final evaluation on the boot CPU it reads the actual APIC ID, which makes apic_get_initial_apicid() obsolete once everything is converted over. Provide a temporary helper function topo_converted() which shields off the not yet converted CPU vendors from invoking code which would break them. This shielding covers all vendor CPUs which support SMP, but not the historical pure UP ones as they only need the topology info init and eventually the initial APIC initialization. Provide two new members in cpuinfo_x86::topo to store the maximum number of SMT siblings and the number of dies per package and add them to the debugfs readout. These two members will be used to populate this information on the boot CPU and to validate the APs against it. Intel-SIG: commit ebdb20361059 x86/cpu: Provide cpu_init/parse_topology(). x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Peter Zijlstra (Intel) Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20240212153624.581436579@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 19 +++ arch/x86/kernel/cpu/Makefile | 3 +- arch/x86/kernel/cpu/common.c | 22 +-- arch/x86/kernel/cpu/cpu.h | 6 + arch/x86/kernel/cpu/debugfs.c | 38 ++++++ arch/x86/kernel/cpu/topology.h | 36 +++++ arch/x86/kernel/cpu/topology_common.c | 188 ++++++++++++++++++++++++++ 7 files changed, 296 insertions(+), 16 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology.h create mode 100644 arch/x86/kernel/cpu/topology_common.c diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 489364815874..60aceacac190 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -102,6 +102,25 @@ static inline void setup_node_to_cpumask_map(void) { } #include +/* Topology information */ +enum x86_topology_domains { + TOPO_SMT_DOMAIN, + TOPO_CORE_DOMAIN, + TOPO_MODULE_DOMAIN, + TOPO_TILE_DOMAIN, + TOPO_DIE_DOMAIN, + TOPO_DIEGRP_DOMAIN, + TOPO_PKG_DOMAIN, + TOPO_MAX_DOMAIN, +}; + +struct x86_topology_system { + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_size[TOPO_MAX_DOMAIN]; +}; + +extern struct x86_topology_system x86_topo_system; + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index cd8fbb2d3ef0..509f3d36253c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,7 +17,8 @@ KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n -obj-y := cacheinfo.o scattered.o topology.o +obj-y := cacheinfo.o scattered.o +obj-y += topology_common.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index fe28d3f92ff4..ec6980dc8a69 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1694,6 +1694,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_CPUID); cpu_parse_early_param(); + cpu_init_topology(c); + if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -1704,6 +1706,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_bsp_init(c); } else { setup_clear_cpu_cap(X86_FEATURE_CPUID); + cpu_init_topology(c); } setup_force_cpu_cap(X86_FEATURE_ALWAYS); @@ -1855,18 +1858,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_cpu_address_sizes(c); - if (c->cpuid_level >= 0x00000001) { - c->topo.initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; -#ifdef CONFIG_X86_32 -# ifdef CONFIG_SMP - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); -# else - c->topo.apicid = c->topo.initial_apicid; -# endif -#endif - c->topo.pkg_id = c->topo.initial_apicid; - } - get_model_name(c); /* Default name */ /* @@ -1925,7 +1916,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; - c->topo.cu_id = 0xff; c->topo.llc_id = BAD_APICID; c->topo.l2c_id = BAD_APICID; #ifdef CONFIG_X86_64 @@ -1946,6 +1936,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) generic_identify(c); + cpu_parse_topology(c); + if (this_cpu->c_identify) this_cpu->c_identify(c); @@ -1953,10 +1945,10 @@ static void identify_cpu(struct cpuinfo_x86 *c) apply_forced_caps(c); #ifdef CONFIG_X86_64 - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); + if (!topo_is_converted(c)) + c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); #endif - /* * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and * Hygon will clear it in ->c_init() below. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 885281ae79a5..2a446ecb2cc0 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -2,6 +2,11 @@ #ifndef ARCH_X86_CPU_H #define ARCH_X86_CPU_H +#include +#include + +#include "topology.h" + /* attempt to consolidate cpu attributes */ struct cpu_dev { const char *c_vendor; @@ -96,4 +101,5 @@ static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) mode == SPECTRE_V2_EIBRS_RETPOLINE || mode == SPECTRE_V2_EIBRS_LFENCE; } + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 0c179d684b3b..be8ca21ecd6a 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -5,6 +5,8 @@ #include #include +#include "cpu.h" + static int cpu_debug_show(struct seq_file *m, void *p) { unsigned long cpu = (unsigned long)m->private; @@ -42,12 +44,48 @@ static const struct file_operations dfs_cpu_ops = { .release = single_release, }; +static int dom_debug_show(struct seq_file *m, void *p) +{ + static const char *domain_names[TOPO_MAX_DOMAIN] = { + [TOPO_SMT_DOMAIN] = "Thread", + [TOPO_CORE_DOMAIN] = "Core", + [TOPO_MODULE_DOMAIN] = "Module", + [TOPO_TILE_DOMAIN] = "Tile", + [TOPO_DIE_DOMAIN] = "Die", + [TOPO_DIEGRP_DOMAIN] = "DieGrp", + [TOPO_PKG_DOMAIN] = "Package", + }; + unsigned int dom, nthreads = 1; + + for (dom = 0; dom < TOPO_MAX_DOMAIN; dom++) { + nthreads *= x86_topo_system.dom_size[dom]; + seq_printf(m, "domain: %-10s shift: %u dom_size: %5u max_threads: %5u\n", + domain_names[dom], x86_topo_system.dom_shifts[dom], + x86_topo_system.dom_size[dom], nthreads); + } + return 0; +} + +static int dom_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, dom_debug_show, inode->i_private); +} + +static const struct file_operations dfs_dom_ops = { + .open = dom_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static __init int cpu_init_debugfs(void) { struct dentry *dir, *base = debugfs_create_dir("topo", arch_debugfs_dir); unsigned long id; char name[24]; + debugfs_create_file("domains", 0444, base, NULL, &dfs_dom_ops); + dir = debugfs_create_dir("cpus", base); for_each_possible_cpu(id) { sprintf(name, "%lu", id); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h new file mode 100644 index 000000000000..99c94c519b5d --- /dev/null +++ b/arch/x86/kernel/cpu/topology.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_X86_TOPOLOGY_H +#define ARCH_X86_TOPOLOGY_H + +struct topo_scan { + struct cpuinfo_x86 *c; + unsigned int dom_shifts[TOPO_MAX_DOMAIN]; + unsigned int dom_ncpus[TOPO_MAX_DOMAIN]; +}; + +bool topo_is_converted(struct cpuinfo_x86 *c); +void cpu_init_topology(struct cpuinfo_x86 *c); +void cpu_parse_topology(struct cpuinfo_x86 *c); +void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus); + +static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) +{ + if (dom == TOPO_SMT_DOMAIN) + return apicid; + return apicid >> x86_topo_system.dom_shifts[dom - 1]; +} + +static inline u32 topo_relative_domain_id(u32 apicid, enum x86_topology_domains dom) +{ + if (dom != TOPO_SMT_DOMAIN) + apicid >>= x86_topo_system.dom_shifts[dom - 1]; + return apicid & (x86_topo_system.dom_size[dom] - 1); +} + +static inline u32 topo_domain_mask(enum x86_topology_domains dom) +{ + return (1U << x86_topo_system.dom_shifts[dom]) - 1; +} + +#endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c new file mode 100644 index 000000000000..873d7c3fe7ac --- /dev/null +++ b/arch/x86/kernel/cpu/topology_common.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include + +#include +#include +#include + +#include "cpu.h" + +struct x86_topology_system x86_topo_system __ro_after_init; + +void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus) +{ + tscan->dom_shifts[dom] = shift; + tscan->dom_ncpus[dom] = ncpus; + + /* Propagate to the upper levels */ + for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { + tscan->dom_shifts[dom] = tscan->dom_shifts[dom - 1]; + tscan->dom_ncpus[dom] = tscan->dom_ncpus[dom - 1]; + } +} + +bool topo_is_converted(struct cpuinfo_x86 *c) +{ + /* Temporary until everything is converted over. */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_CENTAUR: + case X86_VENDOR_INTEL: + case X86_VENDOR_HYGON: + case X86_VENDOR_ZHAOXIN: + return false; + default: + /* Let all UP systems use the below */ + return true; + } +} + +static bool fake_topology(struct topo_scan *tscan) +{ + /* + * Preset the CORE level shift for CPUID less systems and XEN_PV, + * which has useless CPUID information. + */ + topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 1, 1); + + return tscan->c->cpuid_level < 1 || xen_pv_domain(); +} + +static void parse_topology(struct topo_scan *tscan, bool early) +{ + const struct cpuinfo_topology topo_defaults = { + .cu_id = 0xff, + .llc_id = BAD_APICID, + .l2c_id = BAD_APICID, + }; + struct cpuinfo_x86 *c = tscan->c; + struct { + u32 unused0 : 16, + nproc : 8, + apicid : 8; + } ebx; + + c->topo = topo_defaults; + + if (fake_topology(tscan)) + return; + + /* Preset Initial APIC ID from CPUID leaf 1 */ + cpuid_leaf_reg(1, CPUID_EBX, &ebx); + c->topo.initial_apicid = ebx.apicid; + + /* + * The initial invocation from early_identify_cpu() happens before + * the APIC is mapped or X2APIC enabled. For establishing the + * topology, that's not required. Use the initial APIC ID. + */ + if (early) + c->topo.apicid = c->topo.initial_apicid; + else + c->topo.apicid = read_apic_id(); + + /* The above is sufficient for UP */ + if (!IS_ENABLED(CONFIG_SMP)) + return; +} + +static void topo_set_ids(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u32 apicid = c->topo.apicid; + + c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); + c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); + + /* Package relative core ID */ + c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> + x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; +} + +static void topo_set_max_cores(struct topo_scan *tscan) +{ + /* + * Bug compatible for now. This is broken on hybrid systems: + * 8 cores SMT + 8 cores w/o SMT + * tscan.dom_ncpus[TOPO_DIEGRP_DOMAIN] = 24; 24 / 2 = 12 !! + * + * Cannot be fixed without further topology enumeration changes. + */ + tscan->c->x86_max_cores = tscan->dom_ncpus[TOPO_DIEGRP_DOMAIN] >> + x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; +} + +void cpu_parse_topology(struct cpuinfo_x86 *c) +{ + unsigned int dom, cpu = smp_processor_id(); + struct topo_scan tscan = { .c = c, }; + + parse_topology(&tscan, false); + + if (!topo_is_converted(c)) + return; + + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { + if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) + continue; + pr_err(FW_BUG "CPU%d: Topology domain %u shift %u != %u\n", cpu, dom, + tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); + } + + /* Bug compatible with the existing parsers */ + if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) { + if (system_state == SYSTEM_BOOTING) { + pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu); + smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; + } else { + pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu); + } + } + + topo_set_ids(&tscan); + topo_set_max_cores(&tscan); +} + +void __init cpu_init_topology(struct cpuinfo_x86 *c) +{ + struct topo_scan tscan = { .c = c, }; + unsigned int dom, sft; + + parse_topology(&tscan, true); + + if (!topo_is_converted(c)) + return; + + /* Copy the shift values and calculate the unit sizes. */ + memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts)); + + dom = TOPO_SMT_DOMAIN; + x86_topo_system.dom_size[dom] = 1U << x86_topo_system.dom_shifts[dom]; + + for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { + sft = x86_topo_system.dom_shifts[dom] - x86_topo_system.dom_shifts[dom - 1]; + x86_topo_system.dom_size[dom] = 1U << sft; + } + + topo_set_ids(&tscan); + topo_set_max_cores(&tscan); + + /* + * Bug compatible with the existing code. If the boot CPU does not + * have SMT this ends up with one sibling. This needs way deeper + * changes further down the road to get it right during early boot. + */ + smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; + + /* + * Neither it's clear whether there are as many dies as the APIC + * space indicating die level is. But assume that the actual number + * of CPUs gives a proper indication for now to stay bug compatible. + */ + __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / + tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; +} -- Gitee From 9013d5015d8c81c336c83e3a794d04a45ca2c7d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:03 +0100 Subject: [PATCH 04/88] x86/cpu: Add legacy topology parser ANBZ: #26668 commit bda74aae20086c044b31ca0dcdab7deaaf23d0e8 upstream. The legacy topology detection via CPUID leaf 4, which provides the number of cores in the package and CPUID leaf 1 which provides the number of logical CPUs in case that FEATURE_HT is enabled and the CMP_LEGACY feature is not set, is shared for Intel, Centaur and Zhaoxin CPUs. Lift the code from common.c without the early detection hack and provide it as common fallback mechanism. Will be utilized in later changes. Intel-SIG: commit bda74aae2008 x86/cpu: Add legacy topology parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.644448852@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 3 ++ arch/x86/kernel/cpu/topology.h | 3 ++ arch/x86/kernel/cpu/topology_common.c | 46 ++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ec6980dc8a69..acc6d368f490 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -901,6 +901,9 @@ void detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP int index_msb, core_bits; + if (topo_is_converted(c)) + return; + if (detect_ht_early(c) < 0) return; diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 99c94c519b5d..934a100b9fe6 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -6,6 +6,9 @@ struct topo_scan { struct cpuinfo_x86 *c; unsigned int dom_shifts[TOPO_MAX_DOMAIN]; unsigned int dom_ncpus[TOPO_MAX_DOMAIN]; + + /* Legacy CPUID[1]:EBX[23:16] number of logical processors */ + unsigned int ebx1_nproc_shift; }; bool topo_is_converted(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 873d7c3fe7ac..b0ff1fc84a13 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -24,6 +24,48 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, } } +static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) +{ + struct { + u32 cache_type : 5, + unused : 21, + ncores : 6; + } eax; + + if (c->cpuid_level < 4) + return 1; + + cpuid_subleaf_reg(4, 0, CPUID_EAX, &eax); + if (!eax.cache_type) + return 1; + + return eax.ncores + 1; +} + +static void __maybe_unused parse_legacy(struct topo_scan *tscan) +{ + unsigned int cores, core_shift, smt_shift = 0; + struct cpuinfo_x86 *c = tscan->c; + + cores = parse_num_cores_legacy(c); + core_shift = get_count_order(cores); + + if (cpu_has(c, X86_FEATURE_HT)) { + if (!WARN_ON_ONCE(tscan->ebx1_nproc_shift < core_shift)) + smt_shift = tscan->ebx1_nproc_shift - core_shift; + /* + * The parser expects leaf 0xb/0x1f format, which means + * the number of logical processors at core level is + * counting threads. + */ + core_shift += smt_shift; + cores <<= smt_shift; + } + + topology_set_dom(tscan, TOPO_SMT_DOMAIN, smt_shift, 1U << smt_shift); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores); +} + bool topo_is_converted(struct cpuinfo_x86 *c) { /* Temporary until everything is converted over. */ @@ -47,7 +89,7 @@ static bool fake_topology(struct topo_scan *tscan) * which has useless CPUID information. */ topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); - topology_set_dom(tscan, TOPO_CORE_DOMAIN, 1, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 0, 1); return tscan->c->cpuid_level < 1 || xen_pv_domain(); } @@ -88,6 +130,8 @@ static void parse_topology(struct topo_scan *tscan, bool early) /* The above is sufficient for UP */ if (!IS_ENABLED(CONFIG_SMP)) return; + + tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); } static void topo_set_ids(struct topo_scan *tscan) -- Gitee From 316b142590c50a173937f2cf81bcc4761eb6bf4b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:04 +0100 Subject: [PATCH 05/88] x86/cpu: Use common topology code for Centaur and Zhaoxin ANBZ: #26668 commit 598e719c40d67b1473d78423e941bed4ea6c726d upstream. Centaur and Zhaoxin CPUs use only the legacy SMP detection. Remove the invocations from their 32bit path and exclude them from the 64-bit call path. No functional change intended. Intel-SIG: commit 598e719c40d6 x86/cpu: Use common topology code for Centaur and Zhaoxin. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.706794189@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/centaur.c | 10 ---------- arch/x86/kernel/cpu/topology_common.c | 11 ++++++++--- arch/x86/kernel/cpu/zhaoxin.c | 10 ---------- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 6c191b3a2392..be09838720d0 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -119,9 +119,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) if (cpuid_eax(0xC0000000) >= 0xC0000006) c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); - - if (detect_extended_topology_early(c) < 0) - detect_ht_early(c); } static void init_centaur(struct cpuinfo_x86 *c) @@ -140,14 +137,7 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, 0*32+31); #endif early_init_centaur(c); - detect_extended_topology(c); init_intel_cacheinfo(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } if (c->cpuid_level > 9) { unsigned int eax = cpuid_eax(10); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index b0ff1fc84a13..bcaaeecaf1a6 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -42,7 +42,7 @@ static unsigned int __maybe_unused parse_num_cores_legacy(struct cpuinfo_x86 *c) return eax.ncores + 1; } -static void __maybe_unused parse_legacy(struct topo_scan *tscan) +static void parse_legacy(struct topo_scan *tscan) { unsigned int cores, core_shift, smt_shift = 0; struct cpuinfo_x86 *c = tscan->c; @@ -71,10 +71,8 @@ bool topo_is_converted(struct cpuinfo_x86 *c) /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - case X86_VENDOR_CENTAUR: case X86_VENDOR_INTEL: case X86_VENDOR_HYGON: - case X86_VENDOR_ZHAOXIN: return false; default: /* Let all UP systems use the below */ @@ -132,6 +130,13 @@ static void parse_topology(struct topo_scan *tscan, bool early) return; tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); + + switch (c->x86_vendor) { + case X86_VENDOR_CENTAUR: + case X86_VENDOR_ZHAOXIN: + parse_legacy(tscan); + break; + } } static void topo_set_ids(struct topo_scan *tscan) diff --git a/arch/x86/kernel/cpu/zhaoxin.c b/arch/x86/kernel/cpu/zhaoxin.c index 0c42baff9b1c..cb01eccc0975 100644 --- a/arch/x86/kernel/cpu/zhaoxin.c +++ b/arch/x86/kernel/cpu/zhaoxin.c @@ -75,22 +75,12 @@ static void early_init_zhaoxin(struct cpuinfo_x86 *c) if (cpuid_eax(0xC0000000) >= 0xC0000006) c->x86_capability[CPUID_C000_0006_EAX] = cpuid_eax(0xC0000006); - - if (detect_extended_topology_early(c) < 0) - detect_ht_early(c); } static void init_zhaoxin(struct cpuinfo_x86 *c) { early_init_zhaoxin(c); - detect_extended_topology(c); init_intel_cacheinfo(c); - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } if (c->cpuid_level > 9) { unsigned int eax = cpuid_eax(10); -- Gitee From 02e58cfd992624fdf1e8ec150119ce0c0443eac5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:05 +0100 Subject: [PATCH 06/88] Subject: [PATCH 06/27] x86/cpu: Move __max_die_per_package to common.c ANBZ: #26668 commit 92853a7774f942e3692dbd83bace82333a2b47bd upstream. In preparation of a complete replacement for the topology leaf 0xb/0x1f evaluation, move __max_die_per_package into the common code. Will be removed once everything is converted over. Intel-SIG: commit 92853a7774f9 x86/cpu: Move __max_die_per_package to common.c. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.768188958@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/topology.c | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index acc6d368f490..1c1b86b5f22d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -77,6 +77,9 @@ EXPORT_SYMBOL(smp_num_siblings); DEFINE_STATIC_KEY_FALSE(hygon_lmc_key); EXPORT_SYMBOL_GPL(hygon_lmc_key); +unsigned int __max_die_per_package __read_mostly = 1; +EXPORT_SYMBOL(__max_die_per_package); + static struct ppin_info { int feature; int msr_ppin_ctl; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a518300ea01a..c298b1af12e3 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -25,9 +25,6 @@ #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) #define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) -unsigned int __max_die_per_package __read_mostly = 1; -EXPORT_SYMBOL(__max_die_per_package); - /* CPUs which are the primary SMT threads */ struct cpumask __cpu_primary_thread_mask __read_mostly; -- Gitee From 9711397f0eb9510e7b328151582ae43788ce74dc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:07 +0100 Subject: [PATCH 07/88] x86/cpu: Provide a sane leaf 0xb/0x1f parser ANBZ: #26668 commit 3d41009425225ca5e09016c634ecee513b4713bb upstream. detect_extended_topology() along with it's early() variant is a classic example for duct tape engineering: - It evaluates an array of subleafs with a boatload of local variables for the relevant topology levels instead of using an array to save the enumerated information and propagate it to the right level - It has no boundary checks for subleafs - It prevents updating the die_id with a crude workaround instead of checking for leaf 0xb which does not provide die information. - It's broken vs. the number of dies evaluation as it uses: num_processors[DIE_LEVEL] / num_processors[CORE_LEVEL] which "works" only correctly if there is none of the intermediate topology levels (MODULE/TILE) enumerated. There is zero value in trying to "fix" that code as the only proper fix is to rewrite it from scratch. Implement a sane parser with proper code documentation, which will be used for the consolidated topology evaluation in the next step. Intel-SIG: commit 3d4100942522 x86/cpu: Provide a sane leaf 0xb/0x1f parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.830571770@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/topology.h | 12 +++ arch/x86/kernel/cpu/topology_ext.c | 130 +++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/cpu/topology_ext.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 509f3d36253c..e749f11ad240 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n KCSAN_SANITIZE_common.o := n obj-y := cacheinfo.o scattered.o -obj-y += topology_common.o topology.o +obj-y += topology_common.o topology_ext.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 934a100b9fe6..2b100cd11f17 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -16,6 +16,7 @@ void cpu_init_topology(struct cpuinfo_x86 *c); void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus); +bool cpu_parse_topology_ext(struct topo_scan *tscan); static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) { @@ -36,4 +37,15 @@ static inline u32 topo_domain_mask(enum x86_topology_domains dom) return (1U << x86_topo_system.dom_shifts[dom]) - 1; } +/* + * Update a domain level after the fact without propagating. Used to fixup + * broken CPUID enumerations. + */ +static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topology_domains dom, + unsigned int shift, unsigned int ncpus) +{ + tscan->dom_shifts[dom] = shift; + tscan->dom_ncpus[dom] = ncpus; +} + #endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_ext.c b/arch/x86/kernel/cpu/topology_ext.c new file mode 100644 index 000000000000..e477228cd5b2 --- /dev/null +++ b/arch/x86/kernel/cpu/topology_ext.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include +#include +#include + +#include "cpu.h" + +enum topo_types { + INVALID_TYPE = 0, + SMT_TYPE = 1, + CORE_TYPE = 2, + MAX_TYPE_0B = 3, + MODULE_TYPE = 3, + TILE_TYPE = 4, + DIE_TYPE = 5, + DIEGRP_TYPE = 6, + MAX_TYPE_1F = 7, +}; + +/* + * Use a lookup table for the case that there are future types > 6 which + * describe an intermediate domain level which does not exist today. + */ +static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = { + [SMT_TYPE] = TOPO_SMT_DOMAIN, + [CORE_TYPE] = TOPO_CORE_DOMAIN, + [MODULE_TYPE] = TOPO_MODULE_DOMAIN, + [TILE_TYPE] = TOPO_TILE_DOMAIN, + [DIE_TYPE] = TOPO_DIE_DOMAIN, + [DIEGRP_TYPE] = TOPO_DIEGRP_DOMAIN, +}; + +static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf, + unsigned int *last_dom) +{ + unsigned int dom, maxtype; + const unsigned int *map; + struct { + // eax + u32 x2apic_shift : 5, // Number of bits to shift APIC ID right + // for the topology ID at the next level + : 27; // Reserved + // ebx + u32 num_processors : 16, // Number of processors at current level + : 16; // Reserved + // ecx + u32 level : 8, // Current topology level. Same as sub leaf number + type : 8, // Level type. If 0, invalid + : 16; // Reserved + // edx + u32 x2apic_id : 32; // X2APIC ID of the current logical processor + } sl; + + switch (leaf) { + case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break; + case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break; + default: return false; + } + + cpuid_subleaf(leaf, subleaf, &sl); + + if (!sl.num_processors || sl.type == INVALID_TYPE) + return false; + + if (sl.type >= maxtype) { + pr_err_once("Topology: leaf 0x%x:%d Unknown domain type %u\n", + leaf, subleaf, sl.type); + /* + * It really would have been too obvious to make the domain + * type space sparse and leave a few reserved types between + * the points which might change instead of following the + * usual "this can be fixed in software" principle. + */ + dom = *last_dom + 1; + } else { + dom = map[sl.type]; + *last_dom = dom; + } + + if (!dom) { + tscan->c->topo.initial_apicid = sl.x2apic_id; + } else if (tscan->c->topo.initial_apicid != sl.x2apic_id) { + pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf %d APIC ID mismatch %x != %x\n", + leaf, subleaf, tscan->c->topo.initial_apicid, sl.x2apic_id); + } + + topology_set_dom(tscan, dom, sl.x2apic_shift, sl.num_processors); + return true; +} + +static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf) +{ + unsigned int last_dom; + u32 subleaf; + + /* Read all available subleafs and populate the levels */ + for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++); + + /* If subleaf 0 failed to parse, give up */ + if (!subleaf) + return false; + + /* + * There are machines in the wild which have shift 0 in the subleaf + * 0, but advertise 2 logical processors at that level. They are + * truly SMT. + */ + if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) { + unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]); + + pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs. Fixing it up.\n", + leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]); + topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]); + } + + set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY); + return true; +} + +bool cpu_parse_topology_ext(struct topo_scan *tscan) +{ + /* Intel: Try leaf 0x1F first. */ + if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f)) + return true; + + /* Intel/AMD: Fall back to leaf 0xB if available */ + return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b); +} -- Gitee From 601c9f8cd138cd05718de592529fa5e05d22dfef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:08 +0100 Subject: [PATCH 08/88] x86/cpu: Use common topology code for Intel ANBZ: #26668 commit 22d63660c35eb751c63a709bf901a64c1726592a upstream. Intel CPUs use either topology leaf 0xb/0x1f evaluation or the legacy SMP/HT evaluation based on CPUID leaf 0x1/0x4. Move it over to the consolidated topology code and remove the random topology hacks which are sprinkled into the Intel and the common code. No functional change intended. Intel-SIG: commit 22d63660c35e x86/cpu: Use common topology code for Intel. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.893644349@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 65 --------------------------- arch/x86/kernel/cpu/cpu.h | 4 -- arch/x86/kernel/cpu/intel.c | 25 ----------- arch/x86/kernel/cpu/topology.c | 22 --------- arch/x86/kernel/cpu/topology_common.c | 5 ++- 5 files changed, 4 insertions(+), 117 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1c1b86b5f22d..80741dd756eb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -802,19 +802,6 @@ static void get_model_name(struct cpuinfo_x86 *c) *(s + 1) = '\0'; } -void detect_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - c->x86_max_cores = 1; - if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) - return; - - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - c->x86_max_cores = (eax >> 26) + 1; -} - void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -876,54 +863,6 @@ static void cpu_detect_tlb(struct cpuinfo_x86 *c) tlb_lld_4m[ENTRIES], tlb_lld_1g[ENTRIES]); } -int detect_ht_early(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - u32 eax, ebx, ecx, edx; - - if (!cpu_has(c, X86_FEATURE_HT)) - return -1; - - if (cpu_has(c, X86_FEATURE_CMP_LEGACY)) - return -1; - - if (cpu_has(c, X86_FEATURE_XTOPOLOGY)) - return -1; - - cpuid(1, &eax, &ebx, &ecx, &edx); - - smp_num_siblings = (ebx & 0xff0000) >> 16; - if (smp_num_siblings == 1) - pr_info_once("CPU0: Hyper-Threading is disabled\n"); -#endif - return 0; -} - -void detect_ht(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - int index_msb, core_bits; - - if (topo_is_converted(c)) - return; - - if (detect_ht_early(c) < 0) - return; - - index_msb = get_count_order(smp_num_siblings); - c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb); - - smp_num_siblings = smp_num_siblings / c->x86_max_cores; - - index_msb = get_count_order(smp_num_siblings); - - core_bits = get_count_order(c->x86_max_cores); - - c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, index_msb) & - ((1 << core_bits) - 1); -#endif -} - void get_cpu_vendor(struct cpuinfo_x86 *c) { char *v = c->x86_vendor_id; @@ -2010,10 +1949,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86, c->x86_model); } -#ifdef CONFIG_X86_64 - detect_ht(c); -#endif - x86_init_rdrand(c); setup_pku(c); setup_cet(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 2a446ecb2cc0..5a790f12dc28 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -76,11 +76,7 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); -extern void detect_num_cpu_cores(struct cpuinfo_x86 *c); -extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); -extern int detect_ht_early(struct cpuinfo_x86 *c); -extern void detect_ht(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 81e475f3e2ec..89eed78348d7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -365,13 +365,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) check_memory_type_self_snoop_errata(c); - /* - * Get the number of SMT siblings early from the extended topology - * leaf, if available. Otherwise try the legacy SMT detection. - */ - if (detect_extended_topology_early(c) < 0) - detect_ht_early(c); - /* * Adjust the number of physical bits early because it affects the * valid bits of the MTRR mask registers. @@ -572,24 +565,6 @@ static void init_intel(struct cpuinfo_x86 *c) intel_workarounds(c); - /* - * Detect the extended topology information if available. This - * will reinitialise the initial_apicid which will be used - * in init_intel_cacheinfo() - */ - detect_extended_topology(c); - - if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { - /* - * let's use the legacy cpuid vector 0x1 and 0x4 for topology - * detection. - */ - detect_num_cpu_cores(c); -#ifdef CONFIG_X86_32 - detect_ht(c); -#endif - } - init_intel_cacheinfo(c); if (c->cpuid_level > 9) { diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index c298b1af12e3..29376e5af634 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -62,28 +62,6 @@ static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) } #endif -int detect_extended_topology_early(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx; - int leaf; - - leaf = detect_extended_topology_leaf(c); - if (leaf < 0) - return -1; - - set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); - - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - /* - * initial apic id, which also represents 32-bit extended x2apic id. - */ - c->topo.initial_apicid = edx; - smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); -#endif - return 0; -} - /* * Check for extended topology enumeration cpuid leaf, and if it * exists, use it for populating initial_apicid and cpu topology diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index bcaaeecaf1a6..ef99499c9bbe 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -71,7 +71,6 @@ bool topo_is_converted(struct cpuinfo_x86 *c) /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - case X86_VENDOR_INTEL: case X86_VENDOR_HYGON: return false; default: @@ -136,6 +135,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) case X86_VENDOR_ZHAOXIN: parse_legacy(tscan); break; + case X86_VENDOR_INTEL: + if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) + parse_legacy(tscan); + break; } } -- Gitee From 6bc3022026f5641b860a6c314fabd5dbf716e193 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 30 May 2024 17:29:18 +0200 Subject: [PATCH 09/88] x86/topology/intel: Unlock CPUID before evaluating anything ANBZ: #26668 commit 0c2f6d04619ec2b53ad4b0b591eafc9389786e86 upstream. Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If this bit is set by the BIOS then CPUID evaluation including topology enumeration does not work correctly as the evaluation code does not try to analyze any leaf greater than two. This went unnoticed before because the original topology code just repeated evaluation several times and managed to overwrite the initial limited information with the correct one later. The new evaluation code does it once and therefore ends up with the limited and wrong information. Cure this by unlocking CPUID right before evaluating anything which depends on the maximum CPUID leaf being greater than two instead of rereading stuff after unlock. Fixes: 22d63660c35e ("x86/cpu: Use common topology code for Intel") Reported-by: Peter Schneider Intel-SIG: commit 0c2f6d04619e x86/topology/intel: Unlock CPUID before evaluating anything. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Peter Schneider Cc: Link: https://lore.kernel.org/r/fd3f73dc-a86f-4bcf-9c60-43556a21eb42@googlemail.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 3 ++- arch/x86/kernel/cpu/cpu.h | 2 ++ arch/x86/kernel/cpu/intel.c | 25 ++++++++++++++++--------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80741dd756eb..a9f84aa5498a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1634,6 +1634,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (have_cpuid_p()) { cpu_detect(c); get_cpu_vendor(c); + intel_unlock_cpuid_leafs(c); get_cpu_cap(c); get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); @@ -1798,7 +1799,7 @@ static void generic_identify(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); - + intel_unlock_cpuid_leafs(c); get_cpu_cap(c); get_cpu_address_sizes(c); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 5a790f12dc28..08beca97ad17 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -61,9 +61,11 @@ extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; extern void __init tsx_init(void); void tsx_ap_init(void); +void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c); #else static inline void tsx_init(void) { } static inline void tsx_ap_init(void) { } +static inline void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { } #endif /* CONFIG_CPU_SUP_INTEL */ extern void init_spectral_chicken(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 89eed78348d7..fa43abfe7fda 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -237,19 +237,26 @@ static void detect_tme_early(struct cpuinfo_x86 *c) c->x86_phys_bits -= keyid_bits; } +void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return; + + if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd)) + return; + + /* + * The BIOS can have limited CPUID to leaf 2, which breaks feature + * enumeration. Unlock it and update the maximum leaf info. + */ + if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) + c->cpuid_level = cpuid_eax(0); +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; - /* Unmask CPUID levels if masked: */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - if (msr_clear_bit(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { - c->cpuid_level = cpuid_eax(0); - get_cpu_cap(c); - } - } - if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); -- Gitee From 7cc7a9274c25aa472246681313c6ae203a412cba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:10 +0100 Subject: [PATCH 10/88] x86/cpu/amd: Provide a separate accessor for Node ID ANBZ: #26668 commit 7e3ec6286753b404666af9a58d283690302c9321 upstream. AMD (ab)uses topology_die_id() to store the Node ID information and topology_max_dies_per_pkg to store the number of nodes per package. This collides with the proper processor die level enumeration which is coming on AMD with CPUID 8000_0026, unless there is a correlation between the two. There is zero documentation about that. So provide new storage and new accessors which for now still access die_id and topology_max_die_per_pkg(). Will be mopped up after AMD and HYGON are converted over. Intel-SIG: commit 7e3ec6286753 x86/cpu/amd: Provide a separate accessor for Node ID. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153624.956116738@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/amd/core.c | 2 +- arch/x86/include/asm/processor.h | 3 +++ arch/x86/include/asm/topology.h | 8 ++++++++ arch/x86/kernel/amd_nb.c | 4 ++-- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/mce/amd.c | 4 ++-- arch/x86/kernel/cpu/mce/inject.c | 4 ++-- drivers/edac/amd64_edac.c | 4 ++-- drivers/edac/mce_amd.c | 4 ++-- drivers/ras/amd/atl/umc.c | 2 +- 10 files changed, 24 insertions(+), 13 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index e7edce877878..b19a035d2702 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -611,7 +611,7 @@ static void amd_pmu_cpu_starting(int cpu) if (!x86_pmu.amd_nb_constraints) return; - nb_id = topology_die_id(cpu); + nb_id = topology_amd_node_id(cpu); WARN_ON_ONCE(nb_id == BAD_APICID); for_each_online_cpu(i) { diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 10a921821781..114b7775bb19 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -100,6 +100,9 @@ struct cpuinfo_topology { u32 logical_pkg_id; u32 logical_die_id; + // AMD Node ID and Nodes per Package info + u32 amd_node_id; + // Cache level topology IDs u32 llc_id; u32 l2c_id; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 60aceacac190..0f843ce4fccc 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -131,6 +131,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) +#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id) + extern unsigned int __max_die_per_package; extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -163,6 +165,11 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +static inline unsigned int topology_amd_nodes_per_pkg(void) +{ + return __max_die_per_package; +} + /** * topology_is_primary_thread - Check whether CPU is the primary SMT thread * @cpu: CPU to check @@ -181,6 +188,7 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } +static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; }; #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 488a184034d4..dd8c9860d3af 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -648,7 +648,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res) int amd_get_subcaches(int cpu) { - struct pci_dev *link = node_to_amd_nb(topology_die_id(cpu))->link; + struct pci_dev *link = node_to_amd_nb(topology_amd_node_id(cpu))->link; unsigned int mask; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) @@ -662,7 +662,7 @@ int amd_get_subcaches(int cpu) int amd_set_subcaches(int cpu, unsigned long mask) { static unsigned int reset, ban; - struct amd_northbridge *nb = node_to_amd_nb(topology_die_id(cpu)); + struct amd_northbridge *nb = node_to_amd_nb(topology_amd_node_id(cpu)); unsigned int reg; int cuid; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 56897392dcf2..e21639e0e35b 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -595,7 +595,7 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) if (index < 3) return; - node = topology_die_id(smp_processor_id()); + node = topology_amd_node_id(smp_processor_id()); this_leaf->nb = node_to_amd_nb(node); if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) amd_calc_l3_indices(this_leaf->nb); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index b89c5a385403..b88f4fb7b7d9 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1187,7 +1187,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, return -ENODEV; if (is_shared_bank(bank)) { - nb = node_to_amd_nb(topology_die_id(cpu)); + nb = node_to_amd_nb(topology_amd_node_id(cpu)); /* threshold descriptor already initialized on this node? */ if (nb && nb->bank4) { @@ -1291,7 +1291,7 @@ static void threshold_remove_bank(struct threshold_bank *bank) * The last CPU on this node using the shared bank is going * away, remove that bank now. */ - nb = node_to_amd_nb(topology_die_id(smp_processor_id())); + nb = node_to_amd_nb(topology_amd_node_id(smp_processor_id())); nb->bank4 = NULL; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 72f0695c3dc1..308c5b5e0bbe 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -543,8 +543,8 @@ static void do_inject(void) if (boot_cpu_has(X86_FEATURE_AMD_DCM) && b == 4 && boot_cpu_data.x86 < 0x17) { - toggle_nb_mca_mst_cpu(topology_die_id(cpu)); - cpu = get_nbc_for_node(topology_die_id(cpu)); + toggle_nb_mca_mst_cpu(topology_amd_node_id(cpu)); + cpu = get_nbc_for_node(topology_amd_node_id(cpu)); } cpus_read_lock(); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c1cc244f42f6..89a96330dafc 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2008,7 +2008,7 @@ static void dct_determine_memory_type(struct amd64_pvt *pvt) /* On F10h and later ErrAddr is MC4_ADDR[47:1] */ static u64 get_error_address(struct amd64_pvt *pvt, struct mce *m) { - u16 mce_nid = topology_die_id(m->extcpu); + u16 mce_nid = topology_amd_node_id(m->extcpu); struct mem_ctl_info *mci; u8 start_bit = 1; u8 end_bit = 47; @@ -3568,7 +3568,7 @@ static void get_cpus_on_this_dct_cpumask(struct cpumask *mask, u16 nid) int cpu; for_each_online_cpu(cpu) - if (topology_die_id(cpu) == nid) + if (topology_amd_node_id(cpu) == nid) cpumask_set_cpu(cpu, mask); } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index cf0298e44849..e1b289e88cbf 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1112,7 +1112,7 @@ static void decode_mc3_mce(struct mce *m) static void decode_mc4_mce(struct mce *m) { unsigned int fam = x86_family(m->cpuid); - int node_id = topology_die_id(m->extcpu); + int node_id = topology_amd_node_id(m->extcpu); u16 ec = EC(m->status); u8 xec = XEC(m->status, 0x1f); u8 offset = 0; @@ -1244,7 +1244,7 @@ static void decode_smca_error(struct mce *m) boot_cpu_data.x86 == 0x18) decode_dram_ecc(topology_logical_die_id(m->extcpu), m); else - decode_dram_ecc(topology_die_id(m->extcpu), m); + decode_dram_ecc(topology_amd_node_id(m->extcpu), m); } } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 6be1b06999b8..dc8aa12f63c8 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -383,7 +383,7 @@ static u8 get_die_id(struct atl_err *err) * For CPUs, this is the AMD Node ID modulo the number * of AMD Nodes per socket. */ - return topology_die_id(err->cpu) % amd_get_nodes_per_socket(); + return topology_amd_node_id(err->cpu) % topology_amd_nodes_per_pkg(); } #define UMC_CHANNEL_NUM GENMASK(31, 20) -- Gitee From 96510e9e82839138becd63628b4cafaaa413d9c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:11 +0100 Subject: [PATCH 11/88] x86/cpu: Provide an AMD/HYGON specific topology parser ANBZ: #26668 commit f7fb3b2dd92c633871b7037773b89531c488a371 upstream. AMD/HYGON uses various methods for topology evaluation: - Leaf 0x80000008 and 0x8000001e based with an optional leaf 0xb, which is the preferred variant for modern CPUs. Leaf 0xb will be superseded by leaf 0x80000026 soon, which is just another variant of the Intel 0x1f leaf for whatever reasons. - Subleaf 0x80000008 and NODEID_MSR base - Legacy fallback That code is following the principle of random bits and pieces all over the place which results in multiple evaluations and impenetrable code flows in the same way as the Intel parsing did. Provide a sane implementation by clearly separating the three variants and bringing them in the proper preference order in one place. This provides the parsing for both AMD and HYGON because there is no point in having a separate HYGON parser which only differs by 3 lines of code. Any further divergence between AMD and HYGON can be handled in different functions, while still sharing the existing parsers. Intel-SIG: commit f7fb3b2dd92c x86/cpu: Provide an AMD/HYGON specific topology parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.020038641@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 2 + arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/cacheinfo.c | 4 +- arch/x86/kernel/cpu/cpu.h | 2 +- arch/x86/kernel/cpu/debugfs.c | 2 + arch/x86/kernel/cpu/topology.h | 6 + arch/x86/kernel/cpu/topology_amd.c | 183 ++++++++++++++++++++++++++ arch/x86/kernel/cpu/topology_common.c | 19 ++- 9 files changed, 215 insertions(+), 7 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology_amd.c diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 0f843ce4fccc..03083a3b1ae4 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -165,6 +165,8 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); +extern unsigned int __amd_nodes_per_pkg; + static inline unsigned int topology_amd_nodes_per_pkg(void) { return __max_die_per_package; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index e749f11ad240..255ebd57f243 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n KCSAN_SANITIZE_common.o := n obj-y := cacheinfo.o scattered.o -obj-y += topology_common.o topology_ext.o topology.o +obj-y += topology_common.o topology_ext.o topology_amd.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f4e8ae350f6e..2f226b712d62 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -353,7 +353,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - cacheinfo_amd_init_llc_id(c); + cacheinfo_amd_init_llc_id(c, c->topo.die_id); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e21639e0e35b..2eeb3448a99e 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -661,7 +661,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c) +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -672,7 +672,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c) if (c->x86 < 0x17) { /* LLC is at the node level. */ - c->topo.llc_id = c->topo.die_id; + c->topo.llc_id = die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 08beca97ad17..f3f0ccc52f03 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -81,7 +81,7 @@ extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c); +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index be8ca21ecd6a..86de544cdb14 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -26,6 +26,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id); seq_printf(m, "llc_id: %u\n", c->topo.llc_id); seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); + seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); + seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package); seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 2b100cd11f17..7eead546c20e 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -9,6 +9,10 @@ struct topo_scan { /* Legacy CPUID[1]:EBX[23:16] number of logical processors */ unsigned int ebx1_nproc_shift; + + /* AMD specific node ID which cannot be mapped into APIC space. */ + u16 amd_nodes_per_pkg; + u16 amd_node_id; }; bool topo_is_converted(struct cpuinfo_x86 *c); @@ -17,6 +21,8 @@ void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus); bool cpu_parse_topology_ext(struct topo_scan *tscan); +void cpu_parse_topology_amd(struct topo_scan *tscan); +void cpu_topology_fixup_amd(struct topo_scan *tscan); static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom) { diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c new file mode 100644 index 000000000000..1a8b3ad493af --- /dev/null +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#include +#include +#include + +#include "cpu.h" + +static bool parse_8000_0008(struct topo_scan *tscan) +{ + struct { + // ecx + u32 cpu_nthreads : 8, // Number of physical threads - 1 + : 4, // Reserved + apicid_coreid_len : 4, // Number of thread core ID bits (shift) in APIC ID + perf_tsc_len : 2, // Performance time-stamp counter size + : 14; // Reserved + } ecx; + unsigned int sft; + + if (tscan->c->extended_cpuid_level < 0x80000008) + return false; + + cpuid_leaf_reg(0x80000008, CPUID_ECX, &ecx); + + /* If the thread bits are 0, then get the shift value from ecx.cpu_nthreads */ + sft = ecx.apicid_coreid_len; + if (!sft) + sft = get_count_order(ecx.cpu_nthreads + 1); + + topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + return true; +} + +static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +{ + /* + * Starting with Fam 17h the DIE domain could probably be used to + * retrieve the node info on AMD/HYGON. Analysis of CPUID dumps + * suggests it's the topmost bit(s) of the CPU cores area, but + * that's guess work and neither enumerated nor documented. + * + * Up to Fam 16h this does not work at all and the legacy node ID + * has to be used. + */ + tscan->amd_nodes_per_pkg = nr_nodes; + tscan->amd_node_id = node_id; +} + +static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) +{ + struct { + // eax + u32 ext_apic_id : 32; // Extended APIC ID + // ebx + u32 core_id : 8, // Unique per-socket logical core unit ID + core_nthreads : 8, // #Threads per core (zero-based) + : 16; // Reserved + // ecx + u32 node_id : 8, // Node (die) ID of invoking logical CPU + nnodes_per_socket : 3, // #nodes in invoking logical CPU's package/socket + : 21; // Reserved + // edx + u32 : 32; // Reserved + } leaf; + + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) + return false; + + cpuid_leaf(0x8000001e, &leaf); + + tscan->c->topo.initial_apicid = leaf.ext_apic_id; + + /* + * If leaf 0xb is available, then SMT shift is set already. If not + * take it from ecx.threads_per_core and use topo_update_dom() - + * topology_set_dom() would propagate and overwrite the already + * propagated CORE level. + */ + if (!has_0xb) { + unsigned int nthreads = leaf.core_nthreads + 1; + + topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); + } + + store_node(tscan, leaf.nnodes_per_socket + 1, leaf.node_id); + + if (tscan->c->x86_vendor == X86_VENDOR_AMD) { + if (tscan->c->x86 == 0x15) + tscan->c->topo.cu_id = leaf.core_id; + + cacheinfo_amd_init_llc_id(tscan->c, leaf.node_id); + } else { + /* + * Package ID is ApicId[6..] on certain Hygon CPUs. See + * commit e0ceeae708ce for explanation. The topology info + * is screwed up: The package shift is always 6 and the + * node ID is bit [4:5]. + */ + if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && tscan->c->x86_model <= 0x3) { + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 6, + tscan->dom_ncpus[TOPO_CORE_DOMAIN]); + } + cacheinfo_hygon_init_llc_id(tscan->c); + } + return true; +} + +static bool parse_fam10h_node_id(struct topo_scan *tscan) +{ + struct { + union { + u64 node_id : 3, + nodes_per_pkg : 3, + unused : 58; + u64 msr; + }; + } nid; + + if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) + return false; + + rdmsrl(MSR_FAM10H_NODE_ID, nid.msr); + store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id); + tscan->c->topo.llc_id = nid.node_id; + return true; +} + +static void legacy_set_llc(struct topo_scan *tscan) +{ + unsigned int apicid = tscan->c->topo.initial_apicid; + + /* parse_8000_0008() set everything up except llc_id */ + tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; +} + +static void parse_topology_amd(struct topo_scan *tscan) +{ + bool has_0xb = false; + + /* + * If the extended topology leaf 0x8000_001e is available + * try to get SMT and CORE shift from leaf 0xb first, then + * try to get the CORE shift from leaf 0x8000_0008. + */ + if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) + has_0xb = cpu_parse_topology_ext(tscan); + + if (!has_0xb && !parse_8000_0008(tscan)) + return; + + /* Prefer leaf 0x8000001e if available */ + if (parse_8000_001e(tscan, has_0xb)) + return; + + /* Try the NODEID MSR */ + if (parse_fam10h_node_id(tscan)) + return; + + legacy_set_llc(tscan); +} + +void cpu_parse_topology_amd(struct topo_scan *tscan) +{ + tscan->amd_nodes_per_pkg = 1; + parse_topology_amd(tscan); + + if (tscan->amd_nodes_per_pkg > 1) + set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM); +} + +void cpu_topology_fixup_amd(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + + /* + * Adjust the core_id relative to the node when there is more than + * one node. + */ + if (tscan->c->x86 < 0x17 && tscan->amd_nodes_per_pkg > 1) + c->topo.core_id %= tscan->dom_ncpus[TOPO_CORE_DOMAIN] / tscan->amd_nodes_per_pkg; +} diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index ef99499c9bbe..e1d1a7b7c8a9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -11,11 +11,13 @@ struct x86_topology_system x86_topo_system __ro_after_init; +unsigned int __amd_nodes_per_pkg __ro_after_init; +EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); + void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus) { - tscan->dom_shifts[dom] = shift; - tscan->dom_ncpus[dom] = ncpus; + topology_update_dom(tscan, dom, shift, ncpus); /* Propagate to the upper levels */ for (dom++; dom < TOPO_MAX_DOMAIN; dom++) { @@ -153,6 +155,13 @@ static void topo_set_ids(struct topo_scan *tscan) /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; + + /* Temporary workaround */ + if (tscan->amd_nodes_per_pkg) + c->topo.amd_node_id = c->topo.die_id = tscan->amd_node_id; + + if (c->x86_vendor == X86_VENDOR_AMD) + cpu_topology_fixup_amd(tscan); } static void topo_set_max_cores(struct topo_scan *tscan) @@ -237,4 +246,10 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) */ __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; + /* + * AMD systems have Nodes per package which cannot be mapped to + * APIC ID. + */ + if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) + __amd_nodes_per_pkg = __max_die_per_package = tscan.amd_nodes_per_pkg; } -- Gitee From 2dc64f929290ebeaba76bed7dfba0f21a123987c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Apr 2024 21:45:27 +0200 Subject: [PATCH 12/88] x86/cpu/amd: Make the CPUID 0x80000008 parser correct ANBZ: #26668 commit 1b3108f6898ef2e03973d65255182792e94e2240 upstream. CPUID 0x80000008 ECX.cpu_nthreads describes the number of threads in the package. The parser uses this value to initialize the SMT domain level. That's wrong because cpu_nthreads does not describe the number of threads per physical core. So this needs to set the CORE domain level and let the later parsers set the SMT shift if available. Preset the SMT domain level with the assumption of one thread per core, which is correct ifrt here are no other CPUID leafs to parse, and propagate cpu_nthreads and the core level APIC bitwidth into the CORE domain. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Reported-by: "kernelci.org bot" Reported-by: Laura Nao Intel-SIG: commit 1b3108f6898e x86/cpu/amd: Make the CPUID 0x80000008 parser correct. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Laura Nao Link: https://lore.kernel.org/r/20240410194311.535206450@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 1a8b3ad493af..79a85a4814ca 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan) if (!sft) sft = get_count_order(ecx.cpu_nthreads + 1); - topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + /* + * cpu_nthreads describes the number of threads in the package + * sft is the number of APIC ID bits per package + * + * As the number of actual threads per core is not described in + * this leaf, just set the CORE domain shift and let the later + * parsers set SMT shift. Assume one thread per core by default + * which is correct if there are no other CPUID leafs to parse. + */ + topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1); return true; } -static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id) { /* * Starting with Fam 17h the DIE domain could probably be used to @@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * If leaf 0xb is available, then SMT shift is set already. If not - * take it from ecx.threads_per_core and use topo_update_dom() - - * topology_set_dom() would propagate and overwrite the already - * propagated CORE level. + * If leaf 0xb is available, then the domain shifts are set + * already and nothing to do here. */ if (!has_0xb) { + /* + * Leaf 0x80000008 set the CORE domain shift already. + * Update the SMT domain, but do not propagate it. + */ unsigned int nthreads = leaf.core_nthreads + 1; topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); -- Gitee From 98885875536071fb938f590c9713c8313dc53473 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Apr 2024 21:45:28 +0200 Subject: [PATCH 13/88] x86/cpu/amd: Make the NODEID_MSR union actually work ANBZ: #26668 commit c064b536a8f9ab7c8e204da8f5a22f7420d0b56c upstream. A system with NODEID_MSR was reported to crash during early boot without any output. The reason is that the union which is used for accessing the bitfields in the MSR is written wrongly and the resulting executable code accesses the wrong part of the MSR data. As a consequence a later division by that value results in 0 and that result is used for another division as divisor, which obviously does not work well. The magic world of C, unions and bitfields: union { u64 bita : 3, bitb : 3; u64 all; } x; x.all = foo(); a = x.bita; b = x.bitb; results in the effective executable code of: a = b = x.bita; because bita and bitb are treated as union members and therefore both end up at bit offset 0. Wrapping the bitfield into an anonymous struct: union { struct { u64 bita : 3, bitb : 3; }; u64 all; } x; works like expected. Rework the NODEID_MSR union in exactly that way to cure the problem. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Reported-by: "kernelci.org bot" Reported-by: Laura Nao Intel-SIG: commit c064b536a8f9 x86/cpu/amd: Make the NODEID_MSR union actually work. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Laura Nao Link: https://lore.kernel.org/r/20240410194311.596282919@linutronix.de Closes: https://lore.kernel.org/all/20240322175210.124416-1-laura.nao@collabora.com/ [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 79a85a4814ca..7f999aef1965 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -121,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) static bool parse_fam10h_node_id(struct topo_scan *tscan) { - struct { - union { + union { + struct { u64 node_id : 3, nodes_per_pkg : 3, unused : 58; - u64 msr; }; + u64 msr; } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) -- Gitee From a9e748c434ff34ffa29a06559f93c3c62fcda073 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 11 Apr 2024 18:55:38 +0200 Subject: [PATCH 14/88] x86/cpu/amd: Move TOPOEXT enablement into the topology parser ANBZ: #26668 commit 7211274fe0ee352332255e41ab5e628b86e83994 upstream. The topology rework missed that early_init_amd() tries to re-enable the Topology Extensions when the BIOS disabled them. The new parser is invoked before early_init_amd() so the re-enable attempt happens too late. Move it into the AMD specific topology parser code where it belongs. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Intel-SIG: commit 7211274fe0ee x86/cpu/amd: Move TOPOEXT enablement into the topology parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/878r1j260l.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/amd.c | 15 --------------- arch/x86/kernel/cpu/topology_amd.c | 21 +++++++++++++++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2f226b712d62..1d03f3ac44bf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -716,7 +716,6 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) static void early_init_amd(struct cpuinfo_x86 *c) { - u64 value; u32 dummy; early_init_amd_mc(c); @@ -786,20 +785,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - /* Re-enable TopologyExtensions if switched off by BIOS */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - if (cpu_has(c, X86_FEATURE_TOPOEXT)) smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 7f999aef1965..a7aa6eff4ae5 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -147,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan) tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } +static void topoext_fixup(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u64 msrval; + + /* Try to re-enable TopologyExtensions if switched off by BIOS */ + if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f) + return; + + if (msr_set_bit(0xc0011005, 54) <= 0) + return; + + rdmsrl(0xc0011005, msrval); + if (msrval & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } +} + static void parse_topology_amd(struct topo_scan *tscan) { bool has_0xb = false; @@ -176,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan) void cpu_parse_topology_amd(struct topo_scan *tscan) { tscan->amd_nodes_per_pkg = 1; + topoext_fixup(tscan); parse_topology_amd(tscan); if (tscan->amd_nodes_per_pkg > 1) -- Gitee From b41ca8f854cdb10edd1002ba5573dab1d04af3b3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 8 May 2024 21:53:47 +0200 Subject: [PATCH 15/88] x86/topology/amd: Ensure that LLC ID is initialized ANBZ: #26668 commit 5754ace3c3199c162dcee1f3f87a538c46d1c832 upstream. The original topology evaluation code initialized cpu_data::topo::llc_id with the die ID initialy and then eventually overwrite it with information gathered from a CPUID leaf. The conversion analysis failed to spot that particular detail and omitted this initial assignment under the assumption that each topology evaluation path will set it up. That assumption is mostly correct, but turns out to be wrong in case that the CPUID leaf 0x80000006 does not provide a LLC ID. In that case, LLC ID is invalid and as a consequence the setup of the scheduling domain CPU masks is incorrect which subsequently causes the scheduler core to complain about it during CPU hotplug: BUG: arch topology borken the CLS domain not a subset of the MC domain Cure it by reusing legacy_set_llc() and assigning the die ID if the LLC ID is invalid after all possible parsers have been tried. Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Reported-by: Yuezhang Mo Intel-SIG: commit 5754ace3c319 x86/topology/amd: Ensure that LLC ID is initialized. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Yuezhang Mo Link: https://lore.kernel.org/r/PUZPR04MB63168AC442C12627E827368581292@PUZPR04MB6316.apcprd04.prod.outlook.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index a7aa6eff4ae5..ce2d507c3b07 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -119,7 +119,7 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) return true; } -static bool parse_fam10h_node_id(struct topo_scan *tscan) +static void parse_fam10h_node_id(struct topo_scan *tscan) { union { struct { @@ -131,20 +131,20 @@ static bool parse_fam10h_node_id(struct topo_scan *tscan) } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) - return false; + return; rdmsrl(MSR_FAM10H_NODE_ID, nid.msr); store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id); tscan->c->topo.llc_id = nid.node_id; - return true; } static void legacy_set_llc(struct topo_scan *tscan) { unsigned int apicid = tscan->c->topo.initial_apicid; - /* parse_8000_0008() set everything up except llc_id */ - tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; + /* If none of the parsers set LLC ID then use the die ID for it. */ + if (tscan->c->topo.llc_id == BAD_APICID) + tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } static void topoext_fixup(struct topo_scan *tscan) @@ -187,10 +187,7 @@ static void parse_topology_amd(struct topo_scan *tscan) return; /* Try the NODEID MSR */ - if (parse_fam10h_node_id(tscan)) - return; - - legacy_set_llc(tscan); + parse_fam10h_node_id(tscan); } void cpu_parse_topology_amd(struct topo_scan *tscan) @@ -198,6 +195,7 @@ void cpu_parse_topology_amd(struct topo_scan *tscan) tscan->amd_nodes_per_pkg = 1; topoext_fixup(tscan); parse_topology_amd(tscan); + legacy_set_llc(tscan); if (tscan->amd_nodes_per_pkg > 1) set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM); -- Gitee From d21c36548f386977eb8ddaaf3ea436c66d6f56db Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 28 May 2024 22:21:31 +0200 Subject: [PATCH 16/88] x86/topology/amd: Evaluate SMT in CPUID leaf 0x8000001e only on family 0x17 and greater ANBZ: #26668 commit 34bf6bae3286a58762711cfbce2cf74ecd42e1b5 upstream. The new AMD/HYGON topology parser evaluates the SMT information in CPUID leaf 0x8000001e unconditionally while the original code restricted it to CPUs with family 0x17 and greater. This breaks family 0x15 CPUs which advertise that leaf and have a non-zero value in the SMT section. The machine boots, but the scheduler complains loudly about the mismatch of the core IDs: WARNING: CPU: 1 PID: 0 at kernel/sched/core.c:6482 sched_cpu_starting+0x183/0x250 WARNING: CPU: 0 PID: 1 at kernel/sched/topology.c:2408 build_sched_domains+0x76b/0x12b0 Add the condition back to cure it. [ bp: Make it actually build because grandpa is not concerned with trivial stuff. :-P ] Fixes: f7fb3b2dd92c ("x86/cpu: Provide an AMD/HYGON specific topology parser") Closes: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/56 Reported-by: Tim Teichmann Reported-by: Christian Heusel Intel-SIG: commit 34bf6bae3286 x86/topology/amd: Evaluate SMT in CPUID leaf 0x8000001e only on family 0x17 and greater. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Tim Teichmann Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/7skhx6mwe4hxiul64v6azhlxnokheorksqsdbp7qw6g2jduf6c@7b5pvomauugk [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index ce2d507c3b07..5ee6373d4d92 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -84,9 +84,9 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) /* * If leaf 0xb is available, then the domain shifts are set - * already and nothing to do here. + * already and nothing to do here. Only valid for family >= 0x17. */ - if (!has_0xb) { + if (!has_0xb && tscan->c->x86 >= 0x17) { /* * Leaf 0x80000008 set the CORE domain shift already. * Update the SMT domain, but do not propagate it. -- Gitee From a18df6955f8488c5b2802e2fdb5254357bc03de2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:13 +0100 Subject: [PATCH 17/88] x86/smpboot: Teach it about topo.amd_node_id ANBZ: #26668 commit ace278e7eca6be5d36eb6f1efb660c13b66c4d64 upstream. When switching AMD over to the new topology parser then the match functions need to look for AMD systems with the extended topology feature at the new topo.amd_node_id member which is then holding the node id information. Intel-SIG: commit ace278e7eca6 x86/smpboot: Teach it about topo.amd_node_id. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.082979150@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/smpboot.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f6630fa0acb3..30132691f3d6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -485,6 +485,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->topo.pkg_id == o->topo.pkg_id && c->topo.die_id == o->topo.die_id && + c->topo.amd_node_id == o->topo.amd_node_id && per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) { if (c->topo.core_id == o->topo.core_id) return topology_sane(c, o, "smt"); @@ -506,10 +507,13 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { - if (c->topo.pkg_id == o->topo.pkg_id && - c->topo.die_id == o->topo.die_id) - return true; - return false; + if (c->topo.pkg_id != o->topo.pkg_id || c->topo.die_id != o->topo.die_id) + return false; + + if (cpu_feature_enabled(X86_FEATURE_TOPOEXT) && topology_amd_nodes_per_pkg() > 1) + return c->topo.amd_node_id == o->topo.amd_node_id; + + return true; } static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) -- Gitee From 5dee99d38581d770e358bc330a80496f1bae06c0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:14 +0100 Subject: [PATCH 18/88] x86/cpu: Use common topology code for AMD ANBZ: #26668 commit c749ce393b8fe9db5ed894411f06eafa88f0e13a upstream. Switch it over to the new topology evaluation mechanism and remove the random bits and pieces which are sprinkled all over the place. No functional change intended. Intel-SIG: commit c749ce393b8f x86/cpu: Use common topology code for AMD. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.145745053@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 2 - arch/x86/kernel/cpu/amd.c | 145 -------------------------- arch/x86/kernel/cpu/mce/inject.c | 3 +- arch/x86/kernel/cpu/topology_common.c | 5 +- 4 files changed, 5 insertions(+), 150 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 114b7775bb19..c5462619841f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -722,7 +722,6 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu) } #ifdef CONFIG_CPU_SUP_AMD -extern u32 amd_get_nodes_per_socket(void); /* * Issue a DIV 0/1 insn to clear any division data from previous DIV * operations. @@ -735,7 +734,6 @@ static __always_inline void amd_clear_divider(void) extern void amd_check_microcode(void); #else -static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline void amd_clear_divider(void) { } static inline void amd_check_microcode(void) { } #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1d03f3ac44bf..78797bff07c7 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -27,12 +27,6 @@ #include "cpu.h" -/* - * nodes_per_socket: Stores the number of nodes per socket. - * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX - * Node Identifiers[10:8] - */ -static u32 nodes_per_socket = 1; u16 invlpgb_count_max __ro_after_init = 1; @@ -302,97 +296,6 @@ static int nearby_node(int apicid) } #endif -/* - * Fix up topo::core_id for pre-F17h systems to be in the - * [0 .. cores_per_node - 1] range. Not really needed but - * kept so as not to break existing setups. - */ -static void legacy_fixup_core_id(struct cpuinfo_x86 *c) -{ - u32 cus_per_node; - - if (c->x86 >= 0x17) - return; - - cus_per_node = c->x86_max_cores / nodes_per_socket; - c->topo.core_id %= cus_per_node; -} - -/* - * Fixup core topology information for - * (1) AMD multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) AMD processors supporting compute units - */ -static void amd_get_topology(struct cpuinfo_x86 *c) -{ - /* get information required for multi-node processors */ - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - int err; - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - - c->topo.die_id = ecx & 0xff; - - if (c->x86 == 0x15) - c->topo.cu_id = ebx & 0xff; - - if (c->x86 >= 0x17) { - c->topo.core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; - } - - /* - * In case leaf B is available, use it to derive - * topology information. - */ - err = detect_extended_topology(c); - if (!err) - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - - cacheinfo_amd_init_llc_id(c, c->topo.die_id); - - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - c->topo.die_id = value & 7; - c->topo.llc_id = c->topo.die_id; - } else - return; - - if (nodes_per_socket > 1) { - set_cpu_cap(c, X86_FEATURE_AMD_DCM); - legacy_fixup_core_id(c); - } -} - -/* - * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. - * Assumes number of cores is a power of two. - */ -static void amd_detect_cmp(struct cpuinfo_x86 *c) -{ - unsigned bits; - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* use socket ID also for last level cache */ - c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; -} - -u32 amd_get_nodes_per_socket(void) -{ - return nodes_per_socket; -} -EXPORT_SYMBOL_GPL(amd_get_nodes_per_socket); - static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA @@ -444,32 +347,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void early_init_amd_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static bool amd_check_tsa_microcode(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -562,18 +439,6 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 ecx; - - ecx = cpuid_ecx(0x8000001e); - __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; - } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; - } - if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && !boot_cpu_has(X86_FEATURE_VIRT_SSBD) && c->x86 >= 0x15 && c->x86 <= 0x17) { @@ -718,8 +583,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) { u32 dummy; - early_init_amd_mc(c); - if (c->x86 >= 0xf) set_cpu_cap(c, X86_FEATURE_K8); @@ -785,9 +648,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); @@ -1144,9 +1004,6 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_FSRM)) set_cpu_cap(c, X86_FEATURE_FSRS); - /* get apicid instead of initial apic id from cpuid */ - c->topo.apicid = read_apic_id(); - /* K6s reports MCEs but don't actually have all the MSRs */ if (c->x86 < 6) clear_cpu_cap(c, X86_FEATURE_MCE); @@ -1189,8 +1046,6 @@ static void init_amd(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - amd_detect_cmp(c); - amd_get_topology(c); srat_detect_node(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 308c5b5e0bbe..2b290452e194 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -433,8 +433,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); - + cores_per_node = (c->x86_max_cores * smp_num_siblings) / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index e1d1a7b7c8a9..3c69229ba154 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -72,7 +72,6 @@ bool topo_is_converted(struct cpuinfo_x86 *c) { /* Temporary until everything is converted over. */ switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: case X86_VENDOR_HYGON: return false; default: @@ -133,6 +132,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) tscan->ebx1_nproc_shift = get_count_order(ebx.nproc); switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (IS_ENABLED(CONFIG_CPU_SUP_AMD)) + cpu_parse_topology_amd(tscan); + break; case X86_VENDOR_CENTAUR: case X86_VENDOR_ZHAOXIN: parse_legacy(tscan); -- Gitee From 13e9aea990b24d35c64572b2bd40634a3481499e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:15 +0100 Subject: [PATCH 19/88] x86/cpu: Use common topology code for HYGON ANBZ: #26668 commit 3279081dd0cb6bc13ffd5ee0e5cb11cfeae2c625 upstream. Switch it over to use the consolidated topology evaluation and remove the temporary safe guards which are not longer needed. No functional change intended. Intel-SIG: commit 3279081dd0cb x86/cpu: Use common topology code for HYGON. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.207750409@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/common.c | 5 - arch/x86/kernel/cpu/cpu.h | 1 - arch/x86/kernel/cpu/hygon.c | 131 -------------------------- arch/x86/kernel/cpu/topology.h | 1 - arch/x86/kernel/cpu/topology_common.c | 22 +---- 5 files changed, 4 insertions(+), 156 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a9f84aa5498a..dabaea807ce7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1890,11 +1890,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) /* Clear/Set all flags overridden by options, after probe */ apply_forced_caps(c); -#ifdef CONFIG_X86_64 - if (!topo_is_converted(c)) - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); -#endif - /* * Set default APIC and TSC_DEADLINE MSR fencing flag. AMD and * Hygon will clear it in ->c_init() below. diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f3f0ccc52f03..1beccefbaff9 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -78,7 +78,6 @@ extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void init_hygon_cacheinfo(struct cpuinfo_x86 *c); -extern int detect_extended_topology(struct cpuinfo_x86 *c); extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 4bf352971c52..d303db8dcca0 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -22,14 +22,6 @@ #include "cpu.h" -#define APICID_SOCKET_ID_BIT 6 - -/* - * nodes_per_socket: Stores the number of nodes per socket. - * Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8] - */ -static u32 nodes_per_socket = 1; - #ifdef CONFIG_NUMA /* * To workaround broken NUMA config. Read the comment in @@ -53,82 +45,6 @@ static int nearby_node(int apicid) } #endif -static void hygon_get_topology_early(struct cpuinfo_x86 *c) -{ - if (cpu_has(c, X86_FEATURE_TOPOEXT)) - smp_num_siblings = ((cpuid_ebx(0x8000001e) >> 8) & 0xff) + 1; -} - -/* - * Fixup core topology information for - * (1) Hygon multi-node processors - * Assumption: Number of cores in each internal node is the same. - * (2) Hygon processors supporting compute units - */ -static void hygon_get_topology(struct cpuinfo_x86 *c) -{ - /* get information required for multi-node processors */ - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - int err; - u32 eax, ebx, ecx, edx; - - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - - c->topo.die_id = ecx & 0xff; - - c->topo.core_id = ebx & 0xff; - - if (smp_num_siblings > 1) - c->x86_max_cores /= smp_num_siblings; - - /* - * From model 0x4, leaf B is available, so use it to derive - * topology information. - */ - err = detect_extended_topology(c); - if (!err) { - c->x86_coreid_bits = get_count_order(c->x86_max_cores); - __max_die_per_package = nodes_per_socket; - } - - /* - * Socket ID is ApicId[6] for the processors with model <= 0x3 - * when running on host. - */ - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3) - c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT; - - cacheinfo_hygon_init_llc_id(c); - } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - c->topo.die_id = value & 7; - c->topo.llc_id = c->topo.die_id; - } else - return; - - if (nodes_per_socket > 1) - set_cpu_cap(c, X86_FEATURE_AMD_DCM); -} - -/* - * On Hygon setup the lower bits of the APIC id distinguish the cores. - * Assumes number of cores is a power of two. - */ -static void hygon_detect_cmp(struct cpuinfo_x86 *c) -{ - unsigned int bits; - - bits = c->x86_coreid_bits; - /* Low order bits define the core id (index of core in socket) */ - c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1); - /* Convert the initial APIC ID into the socket ID */ - c->topo.pkg_id = c->topo.initial_apicid >> bits; - /* Use package ID also for last level cache */ - c->topo.llc_id = c->topo.die_id = c->topo.pkg_id; -} - static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA @@ -179,32 +95,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -static void early_init_hygon_mc(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int bits, ecx; - - /* Multi core CPU? */ - if (c->extended_cpuid_level < 0x80000008) - return; - - ecx = cpuid_ecx(0x80000008); - - c->x86_max_cores = (ecx & 0xff) + 1; - - /* CPU telling us the core id bits shift? */ - bits = (ecx >> 12) & 0xF; - - /* Otherwise recompute */ - if (bits == 0) { - while ((1 << bits) < c->x86_max_cores) - bits++; - } - - c->x86_coreid_bits = bits; -#endif -} - static void bsp_init_hygon(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -218,18 +108,6 @@ static void bsp_init_hygon(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { - u32 ecx; - - ecx = cpuid_ecx(0x8000001e); - __max_die_per_package = nodes_per_socket = ((ecx >> 8) & 7) + 1; - } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { - u64 value; - - rdmsrl(MSR_FAM10H_NODE_ID, value); - __max_die_per_package = nodes_per_socket = ((value >> 3) & 7) + 1; - } - if (!boot_cpu_has(X86_FEATURE_AMD_SSBD) && !boot_cpu_has(X86_FEATURE_VIRT_SSBD)) { /* @@ -310,8 +188,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c) { u32 dummy; - early_init_hygon_mc(c); - set_cpu_cap(c, X86_FEATURE_K8); rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); @@ -353,8 +229,6 @@ static void early_init_hygon(struct cpuinfo_x86 *c) */ set_cpu_cap(c, X86_FEATURE_VMMCALL); - hygon_get_topology_early(c); - early_detect_mem_encrypt(c); } @@ -370,9 +244,6 @@ static void init_hygon(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_REP_GOOD); - /* get apicid instead of initial apic id from cpuid */ - c->topo.apicid = read_apic_id(); - /* * XXX someone from Hygon needs to confirm this DTRT * @@ -384,8 +255,6 @@ static void init_hygon(struct cpuinfo_x86 *c) cpu_detect_cache_sizes(c); - hygon_detect_cmp(c); - hygon_get_topology(c); srat_detect_node(c); init_hygon_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 7eead546c20e..2a3c838b6044 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -15,7 +15,6 @@ struct topo_scan { u16 amd_node_id; }; -bool topo_is_converted(struct cpuinfo_x86 *c); void cpu_init_topology(struct cpuinfo_x86 *c); void cpu_parse_topology(struct cpuinfo_x86 *c); void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 3c69229ba154..ab944d6f973f 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -68,18 +68,6 @@ static void parse_legacy(struct topo_scan *tscan) topology_set_dom(tscan, TOPO_CORE_DOMAIN, core_shift, cores); } -bool topo_is_converted(struct cpuinfo_x86 *c) -{ - /* Temporary until everything is converted over. */ - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_HYGON: - return false; - default: - /* Let all UP systems use the below */ - return true; - } -} - static bool fake_topology(struct topo_scan *tscan) { /* @@ -144,6 +132,10 @@ static void parse_topology(struct topo_scan *tscan, bool early) if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) parse_legacy(tscan); break; + case X86_VENDOR_HYGON: + if (IS_ENABLED(CONFIG_CPU_SUP_HYGON)) + cpu_parse_topology_amd(tscan); + break; } } @@ -187,9 +179,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, false); - if (!topo_is_converted(c)) - return; - for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) continue; @@ -218,9 +207,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, true); - if (!topo_is_converted(c)) - return; - /* Copy the shift values and calculate the unit sizes. */ memcpy(x86_topo_system.dom_shifts, tscan.dom_shifts, sizeof(x86_topo_system.dom_shifts)); -- Gitee From 8225d1c967eca3e2180cd6df4c7d474189390c1b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:17 +0100 Subject: [PATCH 20/88] x86/mm/numa: Use core domain size on AMD ANBZ: #26668 commit d805a6916037a716e858a0a91d844bad1ca8f48b upstream. cpuinfo::topo::x86_coreid_bits is about to be phased out. Use the core domain size from the topology information. Add a comment why the early MPTABLE parsing is required and decrapify the loop which sets the APIC ID to node map. Intel-SIG: commit d805a6916037 x86/mm/numa: Use core domain size on AMD. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.270320718@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 5 +++++ arch/x86/mm/amdtopology.c | 35 +++++++++++++++------------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 03083a3b1ae4..e98bc2ee5362 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -121,6 +121,11 @@ struct x86_topology_system { extern struct x86_topology_system x86_topo_system; +static inline unsigned int topology_get_domain_size(enum x86_topology_domains dom) +{ + return x86_topo_system.dom_size[dom]; +} + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index b3ca7d23e4b0..5681b997b357 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -54,13 +54,11 @@ static __init int find_northbridge(void) int __init amd_numa_init(void) { - u64 start = PFN_PHYS(0); + unsigned int numnodes, cores, apicid; + u64 prevbase, start = PFN_PHYS(0); u64 end = PFN_PHYS(max_pfn); - unsigned numnodes; - u64 prevbase; - int i, j, nb; u32 nodeid, reg; - unsigned int bits, cores, apicid_base; + int i, j, nb; if (!early_pci_allowed()) return -EINVAL; @@ -158,26 +156,25 @@ int __init amd_numa_init(void) return -ENOENT; /* - * We seem to have valid NUMA configuration. Map apicids to nodes - * using the coreid bits from early_identify_cpu. + * We seem to have valid NUMA configuration. Map apicids to nodes + * using the size of the core domain in the APIC space. */ - bits = boot_cpu_data.x86_coreid_bits; - cores = 1 << bits; - apicid_base = 0; + cores = topology_get_domain_size(TOPO_CORE_DOMAIN); /* - * get boot-time SMP configuration: + * Scan MPTABLE to map the local APIC and ensure that the boot CPU + * APIC ID is valid. This is required because on pre ACPI/SRAT + * systems IO-APICs are mapped before the boot CPU. */ early_get_smp_config(); - if (boot_cpu_physical_apicid > 0) { - pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid); - apicid_base = boot_cpu_physical_apicid; - } - - for_each_node_mask(i, numa_nodes_parsed) - for (j = apicid_base; j < cores + apicid_base; j++) - set_apicid_to_node((i << bits) + j, i); + apicid = boot_cpu_physical_apicid; + if (apicid > 0) + pr_info("BSP APIC ID: %02x\n", apicid); + for_each_node_mask(i, numa_nodes_parsed) { + for (j = 0; j < cores; j++, apicid++) + set_apicid_to_node(apicid, i); + } return 0; } -- Gitee From b227958f8e83890080c851537af6444fc2f7c331 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:18 +0100 Subject: [PATCH 21/88] x86/cpu: Make topology_amd_node_id() use the actual ANBZ: #26668 commit 03fa6bea5a3e13ccbc211af1fa7e75d34239a408 upstream. Now that everything is converted switch it over and remove the intermediate operation. Intel-SIG: commit 03fa6bea5a3e x86/cpu: Make topology_amd_node_id() use the actual node info. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.334185785@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 4 ++-- arch/x86/kernel/cpu/topology_common.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index e98bc2ee5362..450a031a39c2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -136,7 +136,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_core_id(cpu) (cpu_data(cpu).topo.core_id) #define topology_ppin(cpu) (cpu_data(cpu).ppin) -#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.die_id) +#define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id) extern unsigned int __max_die_per_package; extern struct cpumask __cpu_primary_thread_mask; @@ -174,7 +174,7 @@ extern unsigned int __amd_nodes_per_pkg; static inline unsigned int topology_amd_nodes_per_pkg(void) { - return __max_die_per_package; + return __amd_nodes_per_pkg; } /** diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index ab944d6f973f..afea34d59598 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -151,9 +151,7 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Temporary workaround */ - if (tscan->amd_nodes_per_pkg) - c->topo.amd_node_id = c->topo.die_id = tscan->amd_node_id; + c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) cpu_topology_fixup_amd(tscan); @@ -239,6 +237,5 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) * AMD systems have Nodes per package which cannot be mapped to * APIC ID. */ - if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) - __amd_nodes_per_pkg = __max_die_per_package = tscan.amd_nodes_per_pkg; + __amd_nodes_per_pkg = tscan.amd_nodes_per_pkg; } -- Gitee From 209b18954973853e071f1fb61d5eab15a65fd76b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:20 +0100 Subject: [PATCH 22/88] x86/cpu: Remove topology.c ANBZ: #26668 commit 6cf70394e7205a0d65780473aa2081839eb93471 upstream. No more users. Stick it into the ugly code museum. Intel-SIG: commit 6cf70394e720 x86/cpu: Remove topology.c. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.395230346@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/topology.c | 145 -------------------------- arch/x86/kernel/cpu/topology_common.c | 3 + 3 files changed, 4 insertions(+), 146 deletions(-) delete mode 100644 arch/x86/kernel/cpu/topology.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 255ebd57f243..b5423bfbaad3 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n KCSAN_SANITIZE_common.o := n obj-y := cacheinfo.o scattered.o -obj-y += topology_common.o topology_ext.o topology_amd.o topology.o +obj-y += topology_common.o topology_ext.o topology_amd.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c deleted file mode 100644 index 29376e5af634..000000000000 --- a/arch/x86/kernel/cpu/topology.c +++ /dev/null @@ -1,145 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Check for extended topology enumeration cpuid leaf 0xb and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ - -#include -#include -#include -#include - -#include "cpu.h" - -/* leaf 0xb SMT level */ -#define SMT_LEVEL 0 - -/* extended topology sub-leaf types */ -#define INVALID_TYPE 0 -#define SMT_TYPE 1 -#define CORE_TYPE 2 -#define DIE_TYPE 5 - -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) -#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff) - -/* CPUs which are the primary SMT threads */ -struct cpumask __cpu_primary_thread_mask __read_mostly; - -#ifdef CONFIG_SMP -/* - * Check if given CPUID extended topology "leaf" is implemented - */ -static int check_extended_topology_leaf(int leaf) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return -1; - - return 0; -} -/* - * Return best CPUID Extended Topology Leaf supported - */ -static int detect_extended_topology_leaf(struct cpuinfo_x86 *c) -{ - if (c->cpuid_level >= 0x1f) { - if (check_extended_topology_leaf(0x1f) == 0) - return 0x1f; - } - - if (c->cpuid_level >= 0xb) { - if (check_extended_topology_leaf(0xb) == 0) - return 0xb; - } - - return -1; -} -#endif - -/* - * Check for extended topology enumeration cpuid leaf, and if it - * exists, use it for populating initial_apicid and cpu topology - * detection. - */ -int detect_extended_topology(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; - unsigned int core_select_mask, core_level_siblings; - unsigned int die_select_mask, die_level_siblings; - unsigned int pkg_mask_width; - bool die_level_present = false; - int leaf; - - leaf = detect_extended_topology_leaf(c); - if (leaf < 0) - return -1; - - /* - * Populate HT related information from sub-leaf level 0. - */ - cpuid_count(leaf, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - c->topo.initial_apicid = edx; - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - smp_num_siblings = max_t(int, smp_num_siblings, LEVEL_MAX_SIBLINGS(ebx)); - core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - pkg_mask_width = die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - - sub_index = 1; - while (true) { - cpuid_count(leaf, sub_index, &eax, &ebx, &ecx, &edx); - - /* - * Check for the Core type in the implemented sub leaves. - */ - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - core_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - die_level_siblings = core_level_siblings; - die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - } - if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { - die_level_present = true; - die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); - die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - } - - if (LEAFB_SUBTYPE(ecx) != INVALID_TYPE) - pkg_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); - else - break; - - sub_index++; - } - - core_select_mask = (~(-1 << pkg_mask_width)) >> ht_mask_width; - die_select_mask = (~(-1 << die_plus_mask_width)) >> - core_plus_mask_width; - - c->topo.core_id = apic->phys_pkg_id(c->topo.initial_apicid, - ht_mask_width) & core_select_mask; - - if (die_level_present) { - c->topo.die_id = apic->phys_pkg_id(c->topo.initial_apicid, - core_plus_mask_width) & die_select_mask; - } - - c->topo.pkg_id = apic->phys_pkg_id(c->topo.initial_apicid, pkg_mask_width); - /* - * Reinit the apicid, now that we have extended initial_apicid. - */ - c->topo.apicid = apic->phys_pkg_id(c->topo.initial_apicid, 0); - - c->x86_max_cores = (core_level_siblings / smp_num_siblings); - __max_die_per_package = (die_level_siblings / core_level_siblings); -#endif - return 0; -} diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index afea34d59598..2774ffa55fb9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -14,6 +14,9 @@ struct x86_topology_system x86_topo_system __ro_after_init; unsigned int __amd_nodes_per_pkg __ro_after_init; EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); +/* CPUs which are the primary SMT threads */ +struct cpumask __cpu_primary_thread_mask __read_mostly; + void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom, unsigned int shift, unsigned int ncpus) { -- Gitee From 9b5dca2ed081efe6cfcca012c4ca5f2c1c341ea2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:21 +0100 Subject: [PATCH 23/88] x86/cpu: Remove x86_coreid_bits ANBZ: #26668 commit fab75e790f00dca592d9a934d9f1237b81093b99 upstream. No more users. Intel-SIG: commit fab75e790f00 x86/cpu: Remove x86_coreid_bits. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.455839743@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/common.c | 1 - 2 files changed, 3 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c5462619841f..bfdce9ff6657 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -136,8 +136,6 @@ struct cpuinfo_x86 { #endif __u8 x86_virt_bits; __u8 x86_phys_bits; - /* CPUID returned core id bits: */ - __u8 x86_coreid_bits; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dabaea807ce7..96fd482cfc13 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1861,7 +1861,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; - c->x86_coreid_bits = 0; c->topo.llc_id = BAD_APICID; c->topo.l2c_id = BAD_APICID; #ifdef CONFIG_X86_64 -- Gitee From 8673b1e3cee51a739bf6799c69e68bda1cb71098 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:22 +0100 Subject: [PATCH 24/88] x86/apic: Remove unused phys_pkg_id() callback ANBZ: #26668 commit 035fc90a9d8fcff39b9bb43b9ff132756d947ea5 upstream. Now that the core code does not use this monstrosity anymore, it's time to put it to rest. The only real purpose was to read the APIC ID on UV and VSMP systems for the actual evaluation. That's what the core code does now. For doing the actual shift operation there is truly no APIC callback required. Intel-SIG: commit 035fc90a9d8f x86/apic: Remove unused phys_pkg_id() callback. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.516536121@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic_flat_64.c | 7 ------- arch/x86/kernel/apic/apic_noop.c | 3 --- arch/x86/kernel/apic/apic_numachip.c | 7 ------- arch/x86/kernel/apic/bigsmp_32.c | 6 ------ arch/x86/kernel/apic/local.h | 1 - arch/x86/kernel/apic/probe_32.c | 6 ------ arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 6 ------ arch/x86/kernel/apic/x2apic_uv_x.c | 11 ----------- arch/x86/kernel/vsmp_64.c | 13 ------------- arch/x86/xen/apic.c | 6 ------ 12 files changed, 68 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a8f231513848..37b18bb1a64f 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -303,7 +303,6 @@ struct apic { void (*init_apic_ldr)(void); void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); u32 (*cpu_present_to_apicid)(int mps_cpu); - u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb); u32 (*get_apic_id)(u32 id); u32 (*set_apic_id)(u32 apicid); diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 7139867d69cd..5c2d476be004 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -66,11 +66,6 @@ static u32 set_apic_id(u32 id) return (id & 0xFF) << 24; } -static u32 flat_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static int flat_probe(void) { return 1; @@ -89,7 +84,6 @@ static struct apic apic_flat __ro_after_init = { .init_apic_ldr = default_init_apic_ldr, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = flat_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, @@ -160,7 +154,6 @@ static struct apic apic_physflat __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = flat_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index b00d52ae84fa..73247856e23f 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -29,7 +29,6 @@ static void noop_send_IPI_self(int vector) { } static void noop_apic_icr_write(u32 low, u32 id) { } static int noop_wakeup_secondary_cpu(u32 apicid, unsigned long start_eip) { return -1; } static u64 noop_apic_icr_read(void) { return 0; } -static u32 noop_phys_pkg_id(u32 cpuid_apic, int index_msb) { return 0; } static u32 noop_get_apic_id(u32 apicid) { return 0; } static void noop_apic_eoi(void) { } @@ -56,8 +55,6 @@ struct apic apic_noop __ro_after_init = { .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = noop_phys_pkg_id, - .max_apic_id = 0xFE, .get_apic_id = noop_get_apic_id, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 456a14c44f67..70bedbc1a5bd 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -56,11 +56,6 @@ static u32 numachip2_set_apic_id(u32 id) return id << 24; } -static u32 numachip_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static void numachip1_apic_icr_write(int apicid, unsigned int val) { write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); @@ -228,7 +223,6 @@ static const struct apic apic_numachip1 __refconst = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = numachip_phys_pkg_id, .max_apic_id = UINT_MAX, .get_apic_id = numachip1_get_apic_id, @@ -265,7 +259,6 @@ static const struct apic apic_numachip2 __refconst = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = numachip_phys_pkg_id, .max_apic_id = UINT_MAX, .get_apic_id = numachip2_get_apic_id, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 7ee3c486cb33..f3a51d6dc167 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -29,11 +29,6 @@ static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *re physids_promote(0xFFL, retmap); } -static u32 bigsmp_phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -88,7 +83,6 @@ static struct apic apic_bigsmp __ro_after_init = { .check_apicid_used = bigsmp_check_apicid_used, .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = bigsmp_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = bigsmp_get_apic_id, diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 9ea6186ea88c..8fd37c9d1b34 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -17,7 +17,6 @@ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); u32 x2apic_get_apic_id(u32 id); u32 x2apic_set_apic_id(u32 id); -u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb); void x2apic_send_IPI_all(int vector); void x2apic_send_IPI_allbutself(int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 5eb3fbe472da..1c508681d9d4 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -18,11 +18,6 @@ #include "local.h" -static u32 default_phys_pkg_id(u32 cpuid_apic, int index_msb) -{ - return cpuid_apic >> index_msb; -} - static u32 default_get_apic_id(u32 x) { unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -54,7 +49,6 @@ static struct apic apic_default __ro_after_init = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = default_phys_pkg_id, .max_apic_id = 0xFE, .get_apic_id = default_get_apic_id, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a8306089c91b..ec204edb31e9 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -236,7 +236,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .init_apic_ldr = init_x2apic_ldr, .ioapic_phys_id_map = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = x2apic_phys_pkg_id, .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 558a4a8824f4..3396540fb46b 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -134,11 +134,6 @@ u32 x2apic_set_apic_id(u32 id) return id; } -u32 x2apic_phys_pkg_id(u32 initial_apicid, int index_msb) -{ - return initial_apicid >> index_msb; -} - static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -151,7 +146,6 @@ static struct apic apic_x2apic_phys __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = x2apic_phys_pkg_id, .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3ca49dfb653c..f6b24e72af71 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -784,16 +784,6 @@ static u32 set_apic_id(u32 id) return id; } -static unsigned int uv_read_apic_id(void) -{ - return x2apic_get_apic_id(apic_read(APIC_ID)); -} - -static u32 uv_phys_pkg_id(u32 initial_apicid, int index_msb) -{ - return uv_read_apic_id() >> index_msb; -} - static int uv_probe(void) { return apic == &apic_x2apic_uv_x; @@ -811,7 +801,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = default_cpu_present_to_apicid, - .phys_pkg_id = uv_phys_pkg_id, .max_apic_id = UINT_MAX, .get_apic_id = x2apic_get_apic_id, diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index d3fc01770558..73511332bb67 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -127,25 +127,12 @@ static void __init vsmp_cap_cpus(void) #endif } -static u32 apicid_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return read_apic_id() >> index_msb; -} - -static void vsmp_apic_post_init(void) -{ - /* need to update phys_pkg_id */ - apic->phys_pkg_id = apicid_phys_pkg_id; -} - void __init vsmp_init(void) { detect_vsmp_box(); if (!is_vsmp_box()) return; - x86_platform.apic_post_init = vsmp_apic_post_init; - vsmp_cap_cpus(); set_vsmp_ctl(); diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 9dd5490b3318..d4752382b600 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -110,11 +110,6 @@ static int xen_madt_oem_check(char *oem_id, char *oem_table_id) return xen_pv_domain(); } -static u32 xen_phys_pkg_id(u32 initial_apic_id, int index_msb) -{ - return initial_apic_id >> index_msb; -} - static u32 xen_cpu_present_to_apicid(int cpu) { if (cpu_present(cpu)) @@ -133,7 +128,6 @@ static struct apic xen_pv_apic __ro_after_init = { .disable_esr = 0, .cpu_present_to_apicid = xen_cpu_present_to_apicid, - .phys_pkg_id = xen_phys_pkg_id, /* detect_ht */ .max_apic_id = UINT_MAX, .get_apic_id = xen_get_apic_id, -- Gitee From 9befd0186aae21bdaa47152f371e7ca7ae95c1b2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:24 +0100 Subject: [PATCH 25/88] x86/xen/smp_pv: Remove cpudata fiddling ANBZ: #26668 commit d5474e4d2c91b3f27864e9898f7f6e49daf26d93 upstream. The new topology CPUID parser installs already fake topology for XEN/PV, which ends up with cpuinfo::max_cores = 1. Intel-SIG: commit d5474e4d2c91 x86/xen/smp_pv: Remove cpudata fiddling. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.576579177@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index a0f07bbfcd6e..3ae29c25ff85 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -73,7 +73,6 @@ static void cpu_bringup(void) } cpu = smp_processor_id(); smp_store_cpu_info(cpu); - cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); speculative_store_bypass_ht_init(); @@ -224,8 +223,6 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) smp_prepare_cpus_common(); - cpu_data(0).x86_max_cores = 1; - speculative_store_bypass_ht_init(); xen_pmu_init(0); -- Gitee From e7fd461ef4798d5dd5e742de35be32442a0d8c5c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:25 +0100 Subject: [PATCH 26/88] x86/apic/uv: Remove the private leaf 0xb parser ANBZ: #26668 commit bcccdf8b30736250d5057e0940468a41d633e672 upstream. The package shift has been already evaluated by the early CPU init. Put the mindless copy right next to the original leaf 0xb parser. Intel-SIG: commit bcccdf8b3073 x86/apic/uv: Remove the private leaf 0xb parser. x86/cpu: Rework the topology evaluation - part2 Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Sohil Mehta Tested-by: Michael Kelley Tested-by: Zhang Rui Tested-by: Wang Wendy Tested-by: K Prateek Nayak Link: https://lore.kernel.org/r/20240212153625.637385562@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 5 +++ arch/x86/kernel/apic/x2apic_uv_x.c | 52 ++++++------------------------ 2 files changed, 14 insertions(+), 43 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 450a031a39c2..2d3b65803033 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -126,6 +126,11 @@ static inline unsigned int topology_get_domain_size(enum x86_topology_domains do return x86_topo_system.dom_size[dom]; } +static inline unsigned int topology_get_domain_shift(enum x86_topology_domains dom) +{ + return dom == TOPO_SMT_DOMAIN ? 0 : x86_topo_system.dom_shifts[dom - 1]; +} + extern const struct cpumask *cpu_coregroup_mask(int cpu); extern const struct cpumask *cpu_clustergroup_mask(int cpu); diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f6b24e72af71..f296e85c75bb 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -241,54 +241,20 @@ static void __init uv_tsc_check_sync(void) is_uv(UV3) ? sname.s3.field : \ undef) -/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ - -#define SMT_LEVEL 0 /* Leaf 0xb SMT level */ -#define INVALID_TYPE 0 /* Leaf 0xb sub-leaf types */ -#define SMT_TYPE 1 -#define CORE_TYPE 2 -#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) -#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) - -static void set_x2apic_bits(void) -{ - unsigned int eax, ebx, ecx, edx, sub_index; - unsigned int sid_shift; - - cpuid(0, &eax, &ebx, &ecx, &edx); - if (eax < 0xb) { - pr_info("UV: CPU does not have CPUID.11\n"); - return; - } - - cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); - if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { - pr_info("UV: CPUID.11 not implemented\n"); - return; - } - - sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); - sub_index = 1; - do { - cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx); - if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) { - sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); - break; - } - sub_index++; - } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - - uv_cpuid.apicid_shift = 0; - uv_cpuid.apicid_mask = (~(-1 << sid_shift)); - uv_cpuid.socketid_shift = sid_shift; -} - static void __init early_get_apic_socketid_shift(void) { + unsigned int sid_shift = topology_get_domain_shift(TOPO_PKG_DOMAIN); + if (is_uv2_hub() || is_uv3_hub()) uvh_apicid.v = uv_early_read_mmr(UVH_APICID); - set_x2apic_bits(); + if (sid_shift) { + uv_cpuid.apicid_shift = 0; + uv_cpuid.apicid_mask = (~(-1 << sid_shift)); + uv_cpuid.socketid_shift = sid_shift; + } else { + pr_info("UV: CPU does not have valid CPUID.11\n"); + } pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); -- Gitee From 34f43b1740df7d36e10daac1db3f796298d20dc8 Mon Sep 17 00:00:00 2001 From: Quanxian Wang Date: Tue, 2 Dec 2025 23:18:10 -0500 Subject: [PATCH 27/88] anolis: x86/cpu: Add extenended topology function for Centaur and Zhaoxin ANBZ: #26668 Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 2774ffa55fb9..e2640f853f24 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -128,8 +128,12 @@ static void parse_topology(struct topo_scan *tscan, bool early) cpu_parse_topology_amd(tscan); break; case X86_VENDOR_CENTAUR: + if (!IS_ENABLED(CONFIG_CPU_SUP_CENTAUR) || !cpu_parse_topology_ext(tscan)) + parse_legacy(tscan); + break; case X86_VENDOR_ZHAOXIN: - parse_legacy(tscan); + if (!IS_ENABLED(CONFIG_CPU_SUP_ZHAOXIN) || !cpu_parse_topology_ext(tscan)) + parse_legacy(tscan); break; case X86_VENDOR_INTEL: if (!IS_ENABLED(CONFIG_CPU_SUP_INTEL) || !cpu_parse_topology_ext(tscan)) -- Gitee From 00664a441fcb4653981b59f60c81077da0511aea Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:52 +0100 Subject: [PATCH 28/88] x86/cpu/topology: Make the APIC mismatch warnings complete ANBZ: #26668 commit 52128a7a21f79d5d0be62f10cb0b73d115ab492e upstream. Detect all possible combinations of mismatch right in the CPUID evaluation code. Intel-SIG: commit 52128a7a21f7 x86/cpu/topology: Make the APIC mismatch warnings complete. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.867699078@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 5 ++--- arch/x86/kernel/cpu/common.c | 15 ++------------- arch/x86/kernel/cpu/topology_common.c | 12 ++++++++++++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 37b18bb1a64f..6a9ef0f554e1 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -52,16 +52,15 @@ extern void x86_32_probe_apic(void); static inline void x86_32_probe_apic(void) { } #endif -#ifdef CONFIG_X86_LOCAL_APIC +extern u32 cpuid_to_apicid[]; +#ifdef CONFIG_X86_LOCAL_APIC extern int apic_verbosity; extern int local_apic_timer_c2_ok; extern bool apic_is_disabled; extern unsigned int lapic_timer_period; -extern u32 cpuid_to_apicid[]; - extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { APIC_PIC, diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 96fd482cfc13..0645b62de21d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1824,22 +1824,11 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -/* - * Validate that ACPI/mptables have the same information about the - * effective APIC id and update the package map. - */ -static void validate_apic_and_package_id(struct cpuinfo_x86 *c) +static void update_package_map(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int cpu = smp_processor_id(); - u32 apicid; - apicid = apic->cpu_present_to_apicid(cpu); - - if (apicid != c->topo.apicid) { - pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x APIC: %x\n", - cpu, apicid, c->topo.initial_apicid); - } BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); #else @@ -2034,7 +2023,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - validate_apic_and_package_id(c); + update_package_map(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); if (boot_cpu_has_bug(X86_BUG_GDS)) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index e2640f853f24..263f198f24bc 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -184,6 +184,18 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) parse_topology(&tscan, false); + if (IS_ENABLED(CONFIG_X86_LOCAL_APIC)) { + if (c->topo.initial_apicid != c->topo.apicid) { + pr_err(FW_BUG "CPU%4u: APIC ID mismatch. CPUID: 0x%04x APIC: 0x%04x\n", + cpu, c->topo.initial_apicid, c->topo.apicid); + } + + if (c->topo.apicid != cpuid_to_apicid[cpu]) { + pr_err(FW_BUG "CPU%4u: APIC ID mismatch. Firmware: 0x%04x APIC: 0x%04x\n", + cpu, cpuid_to_apicid[cpu], c->topo.apicid); + } + } + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) { if (tscan.dom_shifts[dom] == x86_topo_system.dom_shifts[dom]) continue; -- Gitee From e0bf8e9d7622f1fc4ddcc46f8d1962dc0f8dd4aa Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 5 Apr 2024 14:15:47 +0200 Subject: [PATCH 29/88] x86/xen: return a sane initial apic id when running as PV guest ANBZ: #26668 commit 802600ebdf23371b893a51a4ad046213f112ea3b upstream. With recent sanity checks for topology information added, there are now warnings issued for APs when running as a Xen PV guest: [Firmware Bug]: CPU 1: APIC ID mismatch. CPUID: 0x0000 APIC: 0x0001 This is due to the initial APIC ID obtained via CPUID for PV guests is always 0. Avoid the warnings by synthesizing the CPUID data to contain the same initial APIC ID as xen_pv_smp_config() is using for registering the APIC IDs of all CPUs. Fixes: 52128a7a21f7 ("86/cpu/topology: Make the APIC mismatch warnings complete") Intel-SIG: commit 802600ebdf23 x86/xen: return a sane initial apic id when running as PV guest. x86/topology: More cleanups and preparatory work Signed-off-by: Juergen Gross [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/enlighten_pv.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 3df7c96e7388..c6ab925c2c7b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -216,13 +216,21 @@ static __read_mostly unsigned int cpuid_leaf5_edx_val; static void xen_cpuid(unsigned int *ax, unsigned int *bx, unsigned int *cx, unsigned int *dx) { - unsigned maskebx = ~0; + unsigned int maskebx = ~0; + unsigned int or_ebx = 0; /* * Mask out inconvenient features, to try and disable as many * unsupported kernel subsystems as possible. */ switch (*ax) { + case 0x1: + /* Replace initial APIC ID in bits 24-31 of EBX. */ + /* See xen_pv_smp_config() for related topology preparations. */ + maskebx = 0x00ffffff; + or_ebx = smp_processor_id() << 24; + break; + case CPUID_MWAIT_LEAF: /* Synthesize the values.. */ *ax = 0; @@ -245,6 +253,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, : "0" (*ax), "2" (*cx)); *bx &= maskebx; + *bx |= or_ebx; } static bool __init xen_check_mwait(void) -- Gitee From 58c4acae71a1ff4992a8450e87ff55e56d1545b4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:53 +0100 Subject: [PATCH 30/88] x86/platform/ce4100: Dont override x86_init.mpparse.setup_ioapic_ids ANBZ: #26668 commit 490cc3c5e724502667a104a4e818dc071faf5e77 upstream. There is no point to do that. The ATOMs have an XAPIC for which this function is a pointless exercise. Intel-SIG: commit 490cc3c5e724 x86/platform/ce4100: Dont override x86_init.mpparse.setup_ioapic_ids. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.931617775@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/io_apic.h | 1 - arch/x86/kernel/apic/io_apic.c | 2 +- arch/x86/platform/ce4100/ce4100.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 51c782600e02..0d806513c4b3 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -140,7 +140,6 @@ extern void mask_ioapic_entries(void); extern int restore_ioapic_entries(void); extern void setup_ioapic_ids_from_mpc(void); -extern void setup_ioapic_ids_from_mpc_nocheck(void); extern int mp_find_ioapic(u32 gsi); extern int mp_find_ioapic_pin(int ioapic, u32 gsi); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3a93bbc6bb65..71106fef257c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1449,7 +1449,7 @@ void restore_boot_irq_mode(void) * * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ -void __init setup_ioapic_ids_from_mpc_nocheck(void) +static void __init setup_ioapic_ids_from_mpc_nocheck(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 40745664d92f..bbe7e9124549 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -139,7 +139,6 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; -- Gitee From 18e303aaef3074303a256386e0d6f5220935ecb9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:55 +0100 Subject: [PATCH 31/88] x86/ioapic: Replace some more set bit nonsense ANBZ: #26668 commit 2ac9e529d76a8534fa357e723942dd3f076c37da upstream. Yet another set_bit() operation wrapped in oring a mask. Intel-SIG: commit 2ac9e529d76a x86/ioapic: Replace some more set bit nonsense. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154638.995080989@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 3 --- arch/x86/kernel/apic/io_apic.c | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 4b0f98a8d338..9bf1e75c1def 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -86,9 +86,6 @@ typedef struct physid_mask physid_mask_t; #define physid_set(physid, map) set_bit(physid, (map).mask) #define physid_isset(physid, map) test_bit(physid, (map).mask) -#define physids_or(dst, src1, src2) \ - bitmap_or((dst).mask, (src1).mask, (src2).mask, MAX_LOCAL_APIC) - #define physids_clear(map) \ bitmap_zero((map).mask, MAX_LOCAL_APIC) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 71106fef257c..52d5f5f6729c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2485,9 +2485,8 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) #ifdef CONFIG_X86_32 static int io_apic_get_unique_id(int ioapic, int apic_id) { - union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; + union IO_APIC_reg_00 reg_00; unsigned long flags; int i = 0; @@ -2533,8 +2532,7 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - physid_set_mask_of_physid(apic_id, &tmp); - physids_or(apic_id_map, apic_id_map, tmp); + physid_set(apic_id, apic_id_map); if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; -- Gitee From 8cb82f9b43532ad122b570da51382f1a38be6272 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:56 +0100 Subject: [PATCH 32/88] x86/apic: Get rid of get_physical_broadcast() ANBZ: #26668 commit 517234446c1ad1d6bb0d9f5b94a71b24f80edaae upstream. There is no point for this function. The only case where this is used is when there is no XAPIC available, which means the broadcast address is 0xF. Intel-SIG: commit 517234446c1a x86/apic: Get rid of get_physical_broadcast(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.057209154@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/apic/apic.c | 10 ------- arch/x86/kernel/apic/io_apic.c | 49 +++++++++++++++------------------- 2 files changed, 22 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 883caaf50413..87c5f1931f4a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -261,16 +261,6 @@ u64 native_apic_icr_read(void) return icr1 | ((u64)icr2 << 32); } -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - /** * lapic_get_maxlvt - get the maximum number of local vector table entries */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 52d5f5f6729c..b9c030c56bfc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1451,12 +1451,12 @@ void restore_boot_irq_mode(void) */ static void __init setup_ioapic_ids_from_mpc_nocheck(void) { - union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; - int ioapic_idx; - int i; + const u32 broadcast_id = 0xF; + union IO_APIC_reg_00 reg_00; unsigned char old_id; unsigned long flags; + int ioapic_idx, i; /* * This is broken; anything with a real cpu count has to @@ -1475,11 +1475,10 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) old_id = mpc_ioapic_id(ioapic_idx); - if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); + if (mpc_ioapic_id(ioapic_idx) >= broadcast_id) { + pr_err(FW_BUG "IO-APIC#%d ID is %d in the MPC table!...\n", + ioapic_idx, mpc_ioapic_id(ioapic_idx)); + pr_err("... fixing up to %d. (tell your hw vendor)\n", reg_00.bits.ID); ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID; } @@ -1490,15 +1489,14 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) */ if (apic->check_apicid_used(&phys_id_present_map, mpc_ioapic_id(ioapic_idx))) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - ioapic_idx, mpc_ioapic_id(ioapic_idx)); - for (i = 0; i < get_physical_broadcast(); i++) + pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", + ioapic_idx, mpc_ioapic_id(ioapic_idx)); + for (i = 0; i < broadcast_id; i++) if (!physid_isset(i, phys_id_present_map)) break; - if (i >= get_physical_broadcast()) + if (i >= broadcast_id) panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); + pr_err("... fixing up to %d. (tell your hw vendor)\n", i); physid_set(i, phys_id_present_map); ioapics[ioapic_idx].mp_config.apicid = i; } else { @@ -2200,7 +2198,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap(FW_BUG "Timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2486,6 +2484,7 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) static int io_apic_get_unique_id(int ioapic, int apic_id) { static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsigned long flags; int i = 0; @@ -2506,9 +2505,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) reg_00.raw = io_apic_read(ioapic, 0); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); + if (apic_id >= broadcast_id) { + pr_warn("IOAPIC[%d]: Invalid apic_id %d, trying %d\n", + ioapic, apic_id, reg_00.bits.ID); apic_id = reg_00.bits.ID; } @@ -2518,17 +2517,15 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) */ if (apic->check_apicid_used(&apic_id_map, apic_id)) { - for (i = 0; i < get_physical_broadcast(); i++) { + for (i = 0; i < broadcast_id; i++) { if (!apic->check_apicid_used(&apic_id_map, i)) break; } - if (i == get_physical_broadcast()) + if (i == broadcast_id) panic("Max apic_id exceeded!\n"); - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - + pr_warn("IOAPIC[%d]: apic_id %d already used, trying %d\n", ioapic, apic_id, i); apic_id = i; } @@ -2558,11 +2555,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) static u8 io_apic_unique_id(int idx, u8 id) { - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(boot_cpu_apic_version)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(boot_cpu_apic_version)) return io_apic_get_unique_id(idx, id); - else - return id; + return id; } #else static u8 io_apic_unique_id(int idx, u8 id) -- Gitee From 63fa75186903fb81384b46dc25a5866674bbdc6e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:57 +0100 Subject: [PATCH 33/88] x86/ioapic: Make io_apic_get_unique_id() simpler ANBZ: #26668 commit 533535afc079b745ae8a5fd06afd2ba51b3495fe upstream. No need to go through APIC callbacks. It's already established that this is an ancient APIC. So just copy the present mask and use the direct physid* functions all over the place. Intel-SIG: commit 533535afc079 x86/ioapic: Make io_apic_get_unique_id() simpler. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.119261725@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/apic/io_apic.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b9c030c56bfc..da6e8bfbf30b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2489,17 +2489,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) unsigned long flags; int i = 0; - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - + /* Initialize the ID map */ if (physids_empty(apic_id_map)) - apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); + apic_id_map = phys_cpu_present_map; raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -2511,14 +2503,10 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) apic_id = reg_00.bits.ID; } - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (apic->check_apicid_used(&apic_id_map, apic_id)) { - + /* Every APIC in a system must have a unique ID */ + if (physid_isset(apic_id, apic_id_map)) { for (i = 0; i < broadcast_id; i++) { - if (!apic->check_apicid_used(&apic_id_map, i)) + if (!physid_isset(i, apic_id_map)) break; } -- Gitee From ed6b8c35e3b818e0e172d9804b3787bbf5471ff8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:04:59 +0100 Subject: [PATCH 34/88] x86/ioapic: Simplify setup_ioapic_ids_from_mpc_nocheck() ANBZ: #26668 commit 4b99e735a5c6cb3c8b23fba522cb1d24a1679f94 upstream. No need to go through APIC callbacks. It's already established that this is an ancient APIC. So just copy the present mask and use the direct physid* functions all over the place. Intel-SIG: commit 4b99e735a5c6 x86/ioapic: Simplify setup_ioapic_ids_from_mpc_nocheck(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.181901887@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/apic/io_apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index da6e8bfbf30b..b900b11ae0a5 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1462,7 +1462,7 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); + phys_id_present_map = phys_cpu_present_map; /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -1487,8 +1487,7 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(&phys_id_present_map, - mpc_ioapic_id(ioapic_idx))) { + if (physid_isset(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < broadcast_id; i++) -- Gitee From c43c676564643f6a23dfdf936bf2c4ad4090d112 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:00 +0100 Subject: [PATCH 35/88] x86/apic: Remove check_apicid_used() and ioapic_phys_id_map() ANBZ: #26668 commit 3e48d804c8ea99170638b4e14931686bfc093f02 upstream. No more users. Intel-SIG: commit 3e48d804c8ea x86/apic: Remove check_apicid_used() and ioapic_phys_id_map(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.243307499@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 --- arch/x86/include/asm/mpspec.h | 6 ------ arch/x86/kernel/apic/apic_noop.c | 2 -- arch/x86/kernel/apic/bigsmp_32.c | 13 ------------- arch/x86/kernel/apic/probe_32.c | 2 -- arch/x86/kernel/apic/x2apic_cluster.c | 2 -- 6 files changed, 28 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6a9ef0f554e1..7a261d29915c 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -298,9 +298,7 @@ struct apic { int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); bool (*apic_id_registered)(void); - bool (*check_apicid_used)(physid_mask_t *map, u32 apicid); void (*init_apic_ldr)(void); - void (*ioapic_phys_id_map)(physid_mask_t *phys_map, physid_mask_t *retmap); u32 (*cpu_present_to_apicid)(int mps_cpu); u32 (*get_apic_id)(u32 id); @@ -533,7 +531,6 @@ extern int default_apic_id_valid(u32 apicid); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern u32 default_cpu_present_to_apicid(int mps_cpu); void apic_send_nmi_to_offline_cpu(unsigned int cpu); diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 9bf1e75c1def..666dcdbe66b0 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -92,12 +92,6 @@ typedef struct physid_mask physid_mask_t; #define physids_empty(map) \ bitmap_empty((map).mask, MAX_LOCAL_APIC) -static inline void physids_promote(unsigned long physids, physid_mask_t *map) -{ - physids_clear(*map); - map->mask[0] = physids; -} - static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) { physids_clear(*map); diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 73247856e23f..6558c59dc451 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -51,8 +51,6 @@ struct apic apic_noop __ro_after_init = { .disable_esr = 0, - .check_apicid_used = default_check_apicid_used, - .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = 0xFE, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index f3a51d6dc167..4371abd4b0df 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -18,17 +18,6 @@ static u32 bigsmp_get_apic_id(u32 x) return (x >> 24) & 0xFF; } -static bool bigsmp_check_apicid_used(physid_mask_t *map, u32 apicid) -{ - return false; -} - -static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - /* For clustered we don't have a good way to do this yet - hack */ - physids_promote(0xFFL, retmap); -} - static void bigsmp_send_IPI_allbutself(int vector) { default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); @@ -80,8 +69,6 @@ static struct apic apic_bigsmp __ro_after_init = { .disable_esr = 1, - .check_apicid_used = bigsmp_check_apicid_used, - .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = 0xFE, diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1c508681d9d4..25df3528c56b 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -45,9 +45,7 @@ static struct apic apic_default __ro_after_init = { .disable_esr = 0, - .check_apicid_used = default_check_apicid_used, .init_apic_ldr = default_init_apic_ldr, - .ioapic_phys_id_map = default_ioapic_phys_id_map, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = 0xFE, diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index ec204edb31e9..506d0d32de69 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -232,9 +232,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .disable_esr = 0, - .check_apicid_used = NULL, .init_apic_ldr = init_x2apic_ldr, - .ioapic_phys_id_map = NULL, .cpu_present_to_apicid = default_cpu_present_to_apicid, .max_apic_id = UINT_MAX, -- Gitee From 601b54242ce07371238b04b33d8dad614a8f9145 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:02 +0100 Subject: [PATCH 36/88] x86/mpparse: Rename default_find_smp_config() ANBZ: #26668 commit e061c7ae0830ff320d77566849a5cc30decfa602 upstream. MPTABLE is no longer the default SMP configuration mechanism. Rename it to mpparse_find_mptable() because that's what it does. Intel-SIG: commit e061c7ae0830 x86/mpparse: Rename default_find_smp_config(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.306287711@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 2 +- arch/x86/include/asm/mpspec.h | 13 ++++--------- arch/x86/include/asm/x86_init.h | 4 ++-- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 6 ++---- arch/x86/kernel/x86_init.c | 2 +- arch/x86/mm/mem_encrypt_amd.c | 1 - arch/x86/platform/ce4100/ce4100.c | 2 +- arch/x86/platform/intel-mid/intel-mid.c | 2 +- arch/x86/xen/smp_pv.c | 2 +- 10 files changed, 14 insertions(+), 22 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 48058bafecc3..586eec0f938d 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -33,7 +33,7 @@ void __init hv_vtl_init_platform(void) x86_init.resources.probe_roms = x86_init_noop; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_platform.get_wallclock = get_rtc_noop; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 666dcdbe66b0..c154dd7261e7 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -56,21 +56,16 @@ static inline void early_get_smp_config(void) x86_init.mpparse.get_smp_config(1); } -static inline void find_smp_config(void) -{ - x86_init.mpparse.find_smp_config(); -} - #ifdef CONFIG_X86_MPPARSE extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; -extern void default_find_smp_config(void); +extern void mpparse_find_mptable(void); extern void default_get_smp_config(unsigned int early); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } -#define enable_update_mptable 0 -#define default_find_smp_config x86_init_noop -#define default_get_smp_config x86_init_uint_noop +#define enable_update_mptable 0 +#define mpparse_find_mptable x86_init_noop +#define default_get_smp_config x86_init_uint_noop #endif int generic_processor_info(int apicid); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 550dcbbbb175..bba4fbdc98b1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -15,12 +15,12 @@ struct irq_domain; /** * struct x86_init_mpparse - platform specific mpparse ops * @setup_ioapic_ids: platform specific ioapic id override - * @find_smp_config: find the smp configuration + * @find_mptable: Find MPTABLE early to reserve the memory region * @get_smp_config: get the smp configuration */ struct x86_init_mpparse { void (*setup_ioapic_ids)(void); - void (*find_smp_config)(void); + void (*find_mptable)(void); void (*get_smp_config)(unsigned int early); }; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b223922248e9..86acccdbd631 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -587,7 +587,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) return ret; } -void __init default_find_smp_config(void) +void __init mpparse_find_mptable(void) { unsigned int address; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9a1bc98686c4..a40b72809870 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1150,10 +1150,8 @@ void __init setup_arch(char **cmdline_p) high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; #endif - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); + /* Find and reserve MPTABLE area */ + x86_init.mpparse.find_mptable(); early_alloc_pgt_buf(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 268627a17cf0..e20078b877bd 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -73,7 +73,7 @@ struct x86_init_ops x86_init __initdata = { .mpparse = { .setup_ioapic_ids = x86_init_noop, - .find_smp_config = default_find_smp_config, + .find_mptable = mpparse_find_mptable, .get_smp_config = default_get_smp_config, }, diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index f7d88ad030b9..41b7fe4de72b 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -556,7 +556,6 @@ void __init sme_early_init(void) * as this memory is not pre-validated and would thus cause a crash. */ if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { - x86_init.mpparse.find_smp_config = x86_init_noop; x86_init.pci.init_irq = x86_init_noop; x86_init.resources.probe_roms = x86_init_noop; diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index bbe7e9124549..de7f1e9ea01d 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -138,7 +138,7 @@ void __init x86_ce4100_early_setup(void) x86_init.oem.arch_setup = sdv_arch_setup; x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index f4592dc7a1c1..595dd4cfc6be 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -118,7 +118,7 @@ void __init x86_intel_mid_early_setup(void) machine_ops.emergency_restart = intel_mid_reboot; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; set_bit(MP_BUS_ISA, mp_bus_not_pci); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 3ae29c25ff85..44c35b12430f 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -455,6 +455,6 @@ void __init xen_smp_init(void) smp_ops = xen_smp_ops; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.get_smp_config = _get_smp_config; } -- Gitee From c2d923ce96e63dcdf4c5ba5bdd3f79c7aca635a7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:03 +0100 Subject: [PATCH 37/88] x86/mpparse: Provide separate early/late callbacks ANBZ: #26668 commit fc60fd009c830a21c7699c6e36ab9ec51b9dd939 upstream. The early argument of x86_init::mpparse::get_smp_config() is more than confusing. Provide two callbacks, one for each purpose. Intel-SIG: commit fc60fd009c83 x86/mpparse: Provide separate early/late callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.370491894@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/x86_init.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index bba4fbdc98b1..2e6de3d3022a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -17,11 +17,15 @@ struct irq_domain; * @setup_ioapic_ids: platform specific ioapic id override * @find_mptable: Find MPTABLE early to reserve the memory region * @get_smp_config: get the smp configuration + * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init() + * @parse_smp_cfg: Parse the SMP configuration data */ struct x86_init_mpparse { void (*setup_ioapic_ids)(void); void (*find_mptable)(void); void (*get_smp_config)(unsigned int early); + void (*early_parse_smp_cfg)(void); + void (*parse_smp_cfg)(void); }; /** -- Gitee From 7ac0066d47960214f27a02017a0807814bd63fd2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:04 +0100 Subject: [PATCH 38/88] x86/mpparse: Prepare for callback separation ANBZ: #26668 commit d0a85126b137598eab969e5ba283e5e70ca9c686 upstream. In preparation of splitting the get_smp_config() callback, rename default_get_smp_config() to mpparse_get_smp_config() and provide an early and late wrapper. Intel-SIG: commit d0a85126b137 x86/mpparse: Prepare for callback separation. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.433811243@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 12 ++++++++---- arch/x86/kernel/mpparse.c | 12 +++++++++++- arch/x86/kernel/x86_init.c | 2 +- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index c154dd7261e7..72700ae4a770 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -60,12 +60,16 @@ static inline void early_get_smp_config(void) extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; extern void mpparse_find_mptable(void); -extern void default_get_smp_config(unsigned int early); +extern void mpparse_parse_early_smp_config(void); +extern void mpparse_parse_smp_config(void); +extern void mpparse_get_smp_config(unsigned int early); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } -#define enable_update_mptable 0 -#define mpparse_find_mptable x86_init_noop -#define default_get_smp_config x86_init_uint_noop +#define enable_update_mptable 0 +#define mpparse_find_mptable x86_init_noop +#define mpparse_parse_early_smp_config x86_init_noop +#define mpparse_parse_smp_config x86_init_noop +#define mpparse_get_smp_config x86_init_uint_noop #endif int generic_processor_info(int apicid); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 86acccdbd631..b22093d2265b 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -473,7 +473,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) /* * Scan the memory blocks for an SMP configuration block. */ -void __init default_get_smp_config(unsigned int early) +void __init mpparse_get_smp_config(unsigned int early) { struct mpf_intel *mpf; @@ -538,6 +538,16 @@ void __init default_get_smp_config(unsigned int early) early_memunmap(mpf, sizeof(*mpf)); } +void __init mpparse_parse_early_smp_config(void) +{ + mpparse_get_smp_config(true); +} + +void __init mpparse_parse_smp_config(void) +{ + mpparse_get_smp_config(false); +} + static void __init smp_reserve_memory(struct mpf_intel *mpf) { memblock_reserve(mpf->physptr, get_mpc_size(mpf->physptr)); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e20078b877bd..e1da0bfa311e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -74,7 +74,7 @@ struct x86_init_ops x86_init __initdata = { .mpparse = { .setup_ioapic_ids = x86_init_noop, .find_mptable = mpparse_find_mptable, - .get_smp_config = default_get_smp_config, + .get_smp_config = mpparse_get_smp_config, }, .irqs = { -- Gitee From 5cba5abae0c8625870ddb559cfb957ef884c6cfe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:06 +0100 Subject: [PATCH 39/88] x86/dtb: Rename x86_dtb_init() ANBZ: #26668 commit 5faf8ec77111a699b6a566c4155511fc020f8644 upstream. x86_dtb_init() is a misnomer and it really should be used as a SMP configuration parser which is selected by the platform via x86_init::mpparse:parse_smp_config(). Rename it to x86_dtb_parse_smp_config() in preparation for that. Intel-SIG: commit 5faf8ec77111 x86/dtb: Rename x86_dtb_init(). x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.495992801@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/prom.h | 4 ++-- arch/x86/kernel/devicetree.c | 2 +- arch/x86/kernel/setup.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index b716d291d0d4..b86b91cd3561 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h @@ -23,11 +23,11 @@ extern int of_ioapic; extern u64 initial_dtb; extern void add_dtb(u64 data); void x86_of_pci_init(void); -void x86_dtb_init(void); +void x86_dtb_parse_smp_config(void); #else static inline void add_dtb(u64 data) { } static inline void x86_of_pci_init(void) { } -static inline void x86_dtb_init(void) { } +static inline void x86_dtb_parse_smp_config(void) { } #define of_ioapic 0 #endif diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 37ca25d82bbc..64126b20a0b5 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -306,7 +306,7 @@ static void __init x86_flattree_get_config(void) static inline void x86_flattree_get_config(void) { } #endif -void __init x86_dtb_init(void) +void __init x86_dtb_parse_smp_config(void) { x86_flattree_get_config(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a40b72809870..2a4428bf529c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1314,7 +1314,7 @@ void __init setup_arch(char **cmdline_p) * Read APIC and some other early information from ACPI tables. */ acpi_boot_init(); - x86_dtb_init(); + x86_dtb_parse_smp_config(); /* * get boot-time SMP configuration: -- Gitee From 8fa07404bf6ef0c6b3df868d1f80f65606db688c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:07 +0100 Subject: [PATCH 40/88] x86/platform/ce4100: Prepare for separate mpparse callbacks ANBZ: #26668 commit fe280ffd7eab3dd63fd349d12b449666845e905c upstream. Select x86_dtb_parse_smp_config() as SMP configuration parser in preparation of splitting up the get_smp_config() callback. Intel-SIG: commit fe280ffd7eab x86/platform/ce4100: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.558085053@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/platform/ce4100/ce4100.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index de7f1e9ea01d..6378082d8855 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -135,12 +135,14 @@ static void sdv_pci_init(void) */ void __init x86_ce4100_early_setup(void) { - x86_init.oem.arch_setup = sdv_arch_setup; - x86_init.resources.probe_roms = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - x86_init.mpparse.find_mptable = x86_init_noop; - x86_init.pci.init = ce4100_pci_init; - x86_init.pci.init_irq = sdv_pci_init; + x86_init.oem.arch_setup = sdv_arch_setup; + x86_init.resources.probe_roms = x86_init_noop; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + x86_init.pci.init = ce4100_pci_init; + x86_init.pci.init_irq = sdv_pci_init; /* * By default, the reboot method is ACPI which is supported by the -- Gitee From c8de73c81362d6e12ac9ce1fa46d4d9d17ebb4fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:09 +0100 Subject: [PATCH 41/88] x86/platform/intel-mid: Prepare for separate mpparse callbacks ANBZ: #26668 commit a626ded4e3088319e3d108bb328d48768110ae0b upstream. Initialize the split SMP configuration callbacks with NOOPs as MID is strictly ACPI only. Intel-SIG: commit a626ded4e308 x86/platform/intel-mid: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Acked-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240212154639.620189339@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/platform/intel-mid/intel-mid.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 595dd4cfc6be..14ca3675999a 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -118,7 +118,9 @@ void __init x86_intel_mid_early_setup(void) machine_ops.emergency_restart = intel_mid_reboot; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_mptable = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; set_bit(MP_BUS_ISA, mp_bus_not_pci); } -- Gitee From 638118fb441b0c341f350db565e0fe5dc2834f6a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:10 +0100 Subject: [PATCH 42/88] x86/jailhouse: Prepare for separate mpparse callbacks ANBZ: #26668 commit 30c928691ce1c861d22ef236ed28bbf0b7a763bc upstream. Provide a wrapper around the existing function and fill the new callbacks in. No functional change as the new callbacks are not yet operational. Intel-SIG: commit 30c928691ce1 x86/jailhouse: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.683073662@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/jailhouse.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 5481c7c5db30..25e091eb5d04 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -119,6 +119,11 @@ static void __init jailhouse_get_smp_config(unsigned int early) } } +static void __init jailhouse_parse_smp_config(void) +{ + jailhouse_get_smp_config(false); +} + static void jailhouse_no_restart(void) { pr_notice("Jailhouse: Restart not supported, halting\n"); @@ -202,21 +207,24 @@ static void __init jailhouse_init_platform(void) struct setup_data header; void *mapping; - x86_init.irqs.pre_vector_init = x86_init_noop; - x86_init.timers.timer_init = jailhouse_timer_init; - x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; - x86_init.pci.arch_init = jailhouse_pci_arch_init; + x86_init.irqs.pre_vector_init = x86_init_noop; + x86_init.timers.timer_init = jailhouse_timer_init; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = jailhouse_parse_smp_config; + x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; + x86_init.pci.arch_init = jailhouse_pci_arch_init; - x86_platform.calibrate_cpu = jailhouse_get_tsc; - x86_platform.calibrate_tsc = jailhouse_get_tsc; - x86_platform.get_wallclock = jailhouse_get_wallclock; - x86_platform.legacy.rtc = 0; - x86_platform.legacy.warm_reset = 0; - x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; + x86_platform.calibrate_cpu = jailhouse_get_tsc; + x86_platform.calibrate_tsc = jailhouse_get_tsc; + x86_platform.get_wallclock = jailhouse_get_wallclock; + x86_platform.legacy.rtc = 0; + x86_platform.legacy.warm_reset = 0; + x86_platform.legacy.i8042 = X86_LEGACY_I8042_PLATFORM_ABSENT; - legacy_pic = &null_legacy_pic; + legacy_pic = &null_legacy_pic; - machine_ops.emergency_restart = jailhouse_no_restart; + machine_ops.emergency_restart = jailhouse_no_restart; while (pa_data) { mapping = early_memremap(pa_data, sizeof(header)); -- Gitee From 94d06f1cc9bc63d396e1e96003eb5a07c17e00c8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:12 +0100 Subject: [PATCH 43/88] x86/xen/smp_pv: Prepare for separate mpparse callbacks ANBZ: #26668 commit 0baf4d485cbe5c1b94433f3f5aed2e6e6cd91b02 upstream. Provide a wrapper around the existing function and fill the new callbacks in. No functional change as the new callbacks are not yet operational. Intel-SIG: commit 0baf4d485cbe x86/xen/smp_pv: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.745028043@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 44c35b12430f..bd939b8b1a06 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -185,6 +185,11 @@ static void __init _get_smp_config(unsigned int early) smp_found_config = 1; } +static void __init xen_pv_smp_config(void) +{ + _get_smp_config(false); +} + static void __init xen_pv_smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != 0); @@ -455,6 +460,8 @@ void __init xen_smp_init(void) smp_ops = xen_smp_ops; /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_mptable = x86_init_noop; - x86_init.mpparse.get_smp_config = _get_smp_config; + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; + x86_init.mpparse.get_smp_config = _get_smp_config; } -- Gitee From 51cfcc65aa7a523a6d8da9d17d5ad59b8bc96a61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:13 +0100 Subject: [PATCH 44/88] x86/hyperv/vtl: Prepare for separate mpparse callbacks ANBZ: #26668 commit c22e19cd2c8a8f8ef8cfc0a0aaaa95d8cc064309 upstream. Initialize the new callbacks in preparation for switching the core code. Intel-SIG: commit c22e19cd2c8a x86/hyperv/vtl: Prepare for separate mpparse callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.808238769@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 586eec0f938d..b98abd08b2c1 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -34,6 +34,8 @@ void __init hv_vtl_init_platform(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; + x86_init.mpparse.parse_smp_cfg = x86_init_noop; x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_platform.get_wallclock = get_rtc_noop; -- Gitee From 5056023718c93409531cd350c3c22f80fa6dbe7b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:14 +0100 Subject: [PATCH 45/88] x86/mpparse: Switch to new init callbacks ANBZ: #26668 commit dcb7600849ce9b3d9b3d2965f452287f06fc9093 upstream. Now that all platforms have the new split SMP configuration callbacks set up, flip the switch and remove the old callback pointer and mop up the platform code. Intel-SIG: commit dcb7600849ce x86/mpparse: Switch to new init callbacks. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.870883080@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 1 - arch/x86/include/asm/mpspec.h | 9 +-------- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/kernel/jailhouse.c | 8 +------- arch/x86/kernel/mpparse.c | 2 +- arch/x86/kernel/setup.c | 10 +++------- arch/x86/kernel/x86_init.c | 3 ++- arch/x86/platform/ce4100/ce4100.c | 1 - arch/x86/platform/intel-mid/intel-mid.c | 1 - arch/x86/xen/smp_pv.c | 11 +---------- 10 files changed, 9 insertions(+), 39 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index b98abd08b2c1..64ca4543ceca 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -36,7 +36,6 @@ void __init hv_vtl_init_platform(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 72700ae4a770..82480b7d01f0 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -46,14 +46,9 @@ extern int smp_found_config; # define smp_found_config 0 #endif -static inline void get_smp_config(void) -{ - x86_init.mpparse.get_smp_config(0); -} - static inline void early_get_smp_config(void) { - x86_init.mpparse.get_smp_config(1); + x86_init.mpparse.early_parse_smp_cfg(); } #ifdef CONFIG_X86_MPPARSE @@ -62,14 +57,12 @@ extern int enable_update_mptable; extern void mpparse_find_mptable(void); extern void mpparse_parse_early_smp_config(void); extern void mpparse_parse_smp_config(void); -extern void mpparse_get_smp_config(unsigned int early); #else static inline void e820__memblock_alloc_reserved_mpc_new(void) { } #define enable_update_mptable 0 #define mpparse_find_mptable x86_init_noop #define mpparse_parse_early_smp_config x86_init_noop #define mpparse_parse_smp_config x86_init_noop -#define mpparse_get_smp_config x86_init_uint_noop #endif int generic_processor_info(int apicid); diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2e6de3d3022a..0b29ac4adb43 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -16,14 +16,12 @@ struct irq_domain; * struct x86_init_mpparse - platform specific mpparse ops * @setup_ioapic_ids: platform specific ioapic id override * @find_mptable: Find MPTABLE early to reserve the memory region - * @get_smp_config: get the smp configuration * @early_parse_smp_cfg: Parse the SMP configuration data early before initmem_init() * @parse_smp_cfg: Parse the SMP configuration data */ struct x86_init_mpparse { void (*setup_ioapic_ids)(void); void (*find_mptable)(void); - void (*get_smp_config)(unsigned int early); void (*early_parse_smp_cfg)(void); void (*parse_smp_cfg)(void); }; diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 25e091eb5d04..bf47224c88b1 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -90,7 +90,7 @@ static void __init jailhouse_x2apic_init(void) #endif } -static void __init jailhouse_get_smp_config(unsigned int early) +static void __init jailhouse_parse_smp_config(void) { struct ioapic_domain_cfg ioapic_cfg = { .type = IOAPIC_DOMAIN_STRICT, @@ -119,11 +119,6 @@ static void __init jailhouse_get_smp_config(unsigned int early) } } -static void __init jailhouse_parse_smp_config(void) -{ - jailhouse_get_smp_config(false); -} - static void jailhouse_no_restart(void) { pr_notice("Jailhouse: Restart not supported, halting\n"); @@ -212,7 +207,6 @@ static void __init jailhouse_init_platform(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = jailhouse_parse_smp_config; - x86_init.mpparse.get_smp_config = jailhouse_get_smp_config; x86_init.pci.arch_init = jailhouse_pci_arch_init; x86_platform.calibrate_cpu = jailhouse_get_tsc; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index b22093d2265b..9c000c409eb1 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -473,7 +473,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) /* * Scan the memory blocks for an SMP configuration block. */ -void __init mpparse_get_smp_config(unsigned int early) +static __init void mpparse_get_smp_config(unsigned int early) { struct mpf_intel *mpf; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a4428bf529c..ec2d2505f8c8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1311,15 +1311,11 @@ void __init setup_arch(char **cmdline_p) early_quirks(); /* - * Read APIC and some other early information from ACPI tables. + * Parse SMP configuration. Try ACPI first and then the platform + * specific parser. */ acpi_boot_init(); - x86_dtb_parse_smp_config(); - - /* - * get boot-time SMP configuration: - */ - get_smp_config(); + x86_init.mpparse.parse_smp_cfg(); /* * Systems w/o ACPI and mptables might not have it mapped the local diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e1da0bfa311e..5474a6fdd689 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -74,7 +74,8 @@ struct x86_init_ops x86_init __initdata = { .mpparse = { .setup_ioapic_ids = x86_init_noop, .find_mptable = mpparse_find_mptable, - .get_smp_config = mpparse_get_smp_config, + .early_parse_smp_cfg = mpparse_parse_early_smp_config, + .parse_smp_cfg = mpparse_parse_smp_config, }, .irqs = { diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 6378082d8855..f32451bdcfdd 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -140,7 +140,6 @@ void __init x86_ce4100_early_setup(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index 14ca3675999a..7be71c2cdc83 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c @@ -121,6 +121,5 @@ void __init x86_intel_mid_early_setup(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; set_bit(MP_BUS_ISA, mp_bus_not_pci); } diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index bd939b8b1a06..7f6f34056e13 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -148,14 +148,11 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } -static void __init _get_smp_config(unsigned int early) +static void __init xen_pv_smp_config(void) { int i, rc; unsigned int subtract = 0; - if (early) - return; - num_processors = 0; disabled_cpus = 0; for (i = 0; i < nr_cpu_ids; i++) { @@ -185,11 +182,6 @@ static void __init _get_smp_config(unsigned int early) smp_found_config = 1; } -static void __init xen_pv_smp_config(void) -{ - _get_smp_config(false); -} - static void __init xen_pv_smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != 0); @@ -463,5 +455,4 @@ void __init xen_smp_init(void) x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; - x86_init.mpparse.get_smp_config = _get_smp_config; } -- Gitee From 0f27791d6da2ff5d2d9c6b024947f79614dcc572 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:16 +0100 Subject: [PATCH 46/88] x86/mm/numa: Move early mptable evaluation into common code ANBZ: #26668 commit de6aec241750a4f9d33d0e055d97fb3e0170c31a upstream. There is no reason to have the early mptable evaluation conditionally invoked only from the AMD numa topology code. Make it explicit and invoke it from setup_arch() right after the corresponding ACPI init call. Remove the pointless wrapper and invoke x86_init::mpparse::early_parse_smp_config() directly. Intel-SIG: commit de6aec241750 x86/mm/numa: Move early mptable evaluation into common code. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.931761608@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 5 ----- arch/x86/kernel/setup.c | 2 ++ arch/x86/mm/amdtopology.c | 7 ------- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 82480b7d01f0..b423d11e002d 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -46,11 +46,6 @@ extern int smp_found_config; # define smp_found_config 0 #endif -static inline void early_get_smp_config(void) -{ - x86_init.mpparse.early_parse_smp_cfg(); -} - #ifdef CONFIG_X86_MPPARSE extern void e820__memblock_alloc_reserved_mpc_new(void); extern int enable_update_mptable; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ec2d2505f8c8..c5d56efcd281 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1267,7 +1267,9 @@ void __init setup_arch(char **cmdline_p) early_platform_quirks(); + /* Some platforms need the APIC registered for NUMA configuration */ early_acpi_boot_init(); + x86_init.mpparse.early_parse_smp_cfg(); initmem_init(); diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 5681b997b357..9332b36a1091 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -161,13 +161,6 @@ int __init amd_numa_init(void) */ cores = topology_get_domain_size(TOPO_CORE_DOMAIN); - /* - * Scan MPTABLE to map the local APIC and ensure that the boot CPU - * APIC ID is valid. This is required because on pre ACPI/SRAT - * systems IO-APICs are mapped before the boot CPU. - */ - early_get_smp_config(); - apicid = boot_cpu_physical_apicid; if (apicid > 0) pr_info("BSP APIC ID: %02x\n", apicid); -- Gitee From 01fd8c1fb79bdaf001167669620d39c7a5812acf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:17 +0100 Subject: [PATCH 47/88] x86/mpparse: Remove the physid_t bitmap wrapper ANBZ: #26668 commit 350b5e2730d1e15337a10bd913694ee4527c02f0 upstream. physid_t is a wrapper around bitmap. Just remove the onion layer and use bitmap functionality directly. Intel-SIG: commit 350b5e2730d1 x86/mpparse: Remove the physid_t bitmap wrapper. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154639.994904510@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 32 +++++++++--------------------- arch/x86/kernel/apic/apic.c | 11 +++++----- arch/x86/kernel/apic/apic_common.c | 12 +---------- arch/x86/kernel/apic/io_apic.c | 24 +++++++++++----------- arch/x86/kernel/apic/local.h | 1 - arch/x86/kernel/smpboot.c | 8 +++----- 6 files changed, 30 insertions(+), 58 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index b423d11e002d..1b79d0ee95df 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_MPSPEC_H #define _ASM_X86_MPSPEC_H +#include #include #include @@ -62,32 +63,17 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { } int generic_processor_info(int apicid); -#define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_LOCAL_APIC) +extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); -struct physid_mask { - unsigned long mask[PHYSID_ARRAY_SIZE]; -}; - -typedef struct physid_mask physid_mask_t; - -#define physid_set(physid, map) set_bit(physid, (map).mask) -#define physid_isset(physid, map) test_bit(physid, (map).mask) - -#define physids_clear(map) \ - bitmap_zero((map).mask, MAX_LOCAL_APIC) - -#define physids_empty(map) \ - bitmap_empty((map).mask, MAX_LOCAL_APIC) - -static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) +static inline void reset_phys_cpu_present_map(u32 apicid) { - physids_clear(*map); - physid_set(physid, *map); + bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); + set_bit(apicid, phys_cpu_present_map); } -#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } -#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } - -extern physid_mask_t phys_cpu_present_map; +static inline void copy_phys_cpu_present_map(unsigned long *dst) +{ + bitmap_copy(dst, phys_cpu_present_map, MAX_LOCAL_APIC); +} #endif /* _ASM_X86_MPSPEC_H */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 87c5f1931f4a..0700db748fbc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -77,10 +78,8 @@ EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; -/* - * Bitmask of physically existing CPUs: - */ -physid_mask_t phys_cpu_present_map; +/* Bitmap of physically present CPUs. */ +DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); /* * Processor to be disabled specified by kernel parameter @@ -2406,7 +2405,7 @@ static void cpu_update_apic(int cpu, u32 apicid) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; #endif set_cpu_possible(cpu, true); - physid_set(apicid, phys_cpu_present_map); + set_bit(apicid, phys_cpu_present_map); set_cpu_present(cpu, true); num_processors++; @@ -2508,7 +2507,7 @@ static void __init apic_bsp_up_setup(void) #ifdef CONFIG_X86_64 apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); #endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); + reset_phys_cpu_present_map(boot_cpu_physical_apicid); } /** diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index 8a00141073ea..d4dfa437081a 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -18,16 +18,6 @@ u32 apic_flat_calc_apicid(unsigned int cpu) return 1U << cpu; } -bool default_check_apicid_used(physid_mask_t *map, u32 apicid) -{ - return physid_isset(apicid, *map); -} - -void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) -{ - *retmap = *phys_map; -} - u32 default_cpu_present_to_apicid(int mps_cpu) { if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) @@ -39,7 +29,7 @@ EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); bool default_apic_id_registered(void) { - return physid_isset(read_apic_id(), phys_cpu_present_map); + return test_bit(read_apic_id(), phys_cpu_present_map); } /* diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b900b11ae0a5..b283bf719f33 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1451,7 +1451,7 @@ void restore_boot_irq_mode(void) */ static void __init setup_ioapic_ids_from_mpc_nocheck(void) { - physid_mask_t phys_id_present_map; + DECLARE_BITMAP(phys_id_present_map, MAX_LOCAL_APIC); const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsigned char old_id; @@ -1462,7 +1462,7 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = phys_cpu_present_map; + copy_phys_cpu_present_map(phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -1487,21 +1487,21 @@ static void __init setup_ioapic_ids_from_mpc_nocheck(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (physid_isset(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { + if (test_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map)) { pr_err(FW_BUG "IO-APIC#%d ID %d is already used!...\n", ioapic_idx, mpc_ioapic_id(ioapic_idx)); for (i = 0; i < broadcast_id; i++) - if (!physid_isset(i, phys_id_present_map)) + if (!test_bit(i, phys_id_present_map)) break; if (i >= broadcast_id) panic("Max APIC ID exceeded!\n"); pr_err("... fixing up to %d. (tell your hw vendor)\n", i); - physid_set(i, phys_id_present_map); + set_bit(i, phys_id_present_map); ioapics[ioapic_idx].mp_config.apicid = i; } else { apic_printk(APIC_VERBOSE, "Setting %d in the phys_id_present_map\n", mpc_ioapic_id(ioapic_idx)); - physid_set(mpc_ioapic_id(ioapic_idx), phys_id_present_map); + set_bit(mpc_ioapic_id(ioapic_idx), phys_id_present_map); } /* @@ -2482,15 +2482,15 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) #ifdef CONFIG_X86_32 static int io_apic_get_unique_id(int ioapic, int apic_id) { - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + static DECLARE_BITMAP(apic_id_map, MAX_LOCAL_APIC); const u32 broadcast_id = 0xF; union IO_APIC_reg_00 reg_00; unsigned long flags; int i = 0; /* Initialize the ID map */ - if (physids_empty(apic_id_map)) - apic_id_map = phys_cpu_present_map; + if (bitmap_empty(apic_id_map, MAX_LOCAL_APIC)) + copy_phys_cpu_present_map(apic_id_map); raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -2503,9 +2503,9 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) } /* Every APIC in a system must have a unique ID */ - if (physid_isset(apic_id, apic_id_map)) { + if (test_bit(apic_id, apic_id_map)) { for (i = 0; i < broadcast_id; i++) { - if (!physid_isset(i, apic_id_map)) + if (!test_bit(i, apic_id_map)) break; } @@ -2516,7 +2516,7 @@ static int io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - physid_set(apic_id, apic_id_map); + set_bit(apic_id, apic_id_map); if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index 8fd37c9d1b34..a77c23e62459 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -63,7 +63,6 @@ void default_send_IPI_all(int vector); void default_send_IPI_self(int vector); bool default_apic_id_registered(void); -bool default_check_apicid_used(physid_mask_t *map, u32 apicid); #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 30132691f3d6..d8808dc02db6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1061,7 +1061,7 @@ int native_kick_ap(unsigned int cpu, struct task_struct *tidle) pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); - if (apicid == BAD_APICID || !physid_isset(apicid, phys_cpu_present_map) || + if (apicid == BAD_APICID || !test_bit(apicid, phys_cpu_present_map) || !apic_id_valid(apicid)) { pr_err("%s: bad cpu %d\n", __func__, cpu); return -EINVAL; @@ -1136,10 +1136,8 @@ static __init void disable_smp(void) init_cpu_present(cpumask_of(0)); init_cpu_possible(cpumask_of(0)); - if (smp_found_config) - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - else - physid_set_mask_of_physid(0, &phys_cpu_present_map); + reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0); + cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); cpumask_set_cpu(0, topology_die_cpumask(0)); -- Gitee From 50733ea393014c708c48fe7bc86fd56ffe58d157 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:19 +0100 Subject: [PATCH 48/88] x86/apic: Remove the pointless writeback of boot_cpu_physical_apicid ANBZ: #26668 commit 58d16928358f91d48421838a7484321b3149130d upstream. There is absolutely no point to write the APIC ID which was read from the local APIC earlier, back into the local APIC for the 64-bit UP case. Remove that along with the apic callback which is solely there for this pointless exercise. Intel-SIG: commit 58d16928358f x86/apic: Remove the pointless writeback of boot_cpu_physical_apicid. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154640.055288922@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic.c | 3 --- arch/x86/kernel/apic/apic_flat_64.c | 7 ------- arch/x86/kernel/apic/apic_numachip.c | 12 ------------ arch/x86/kernel/apic/bigsmp_32.c | 1 - arch/x86/kernel/apic/local.h | 1 - arch/x86/kernel/apic/x2apic_cluster.c | 1 - arch/x86/kernel/apic/x2apic_phys.c | 6 ------ arch/x86/kernel/apic/x2apic_uv_x.c | 6 ------ arch/x86/xen/apic.c | 7 ------- 10 files changed, 45 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 7a261d29915c..01af344afa20 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -302,7 +302,6 @@ struct apic { u32 (*cpu_present_to_apicid)(int mps_cpu); u32 (*get_apic_id)(u32 id); - u32 (*set_apic_id)(u32 apicid); /* wakeup_secondary_cpu */ int (*wakeup_secondary_cpu)(u32 apicid, unsigned long start_eip); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0700db748fbc..04fad508abfe 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2504,9 +2504,6 @@ EXPORT_SYMBOL_GPL(x86_msi_msg_get_destid); static void __init apic_bsp_up_setup(void) { -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, apic->set_apic_id(boot_cpu_physical_apicid)); -#endif reset_phys_cpu_present_map(boot_cpu_physical_apicid); } diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 5c2d476be004..0efd66194f44 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -61,11 +61,6 @@ static u32 flat_get_apic_id(u32 x) return (x >> 24) & 0xFF; } -static u32 set_apic_id(u32 id) -{ - return (id & 0xFF) << 24; -} - static int flat_probe(void) { return 1; @@ -87,7 +82,6 @@ static struct apic apic_flat __ro_after_init = { .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, .calc_dest_apicid = apic_flat_calc_apicid, @@ -157,7 +151,6 @@ static struct apic apic_physflat __ro_after_init = { .max_apic_id = 0xFE, .get_apic_id = flat_get_apic_id, - .set_apic_id = set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 70bedbc1a5bd..56dde960f330 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -38,11 +38,6 @@ static u32 numachip1_get_apic_id(u32 x) return id; } -static u32 numachip1_set_apic_id(u32 id) -{ - return (id & 0xff) << 24; -} - static u32 numachip2_get_apic_id(u32 x) { u64 mcfg; @@ -51,11 +46,6 @@ static u32 numachip2_get_apic_id(u32 x) return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24); } -static u32 numachip2_set_apic_id(u32 id) -{ - return id << 24; -} - static void numachip1_apic_icr_write(int apicid, unsigned int val) { write_lcsr(CSR_G3_EXT_IRQ_GEN, (apicid << 16) | val); @@ -226,7 +216,6 @@ static const struct apic apic_numachip1 __refconst = { .max_apic_id = UINT_MAX, .get_apic_id = numachip1_get_apic_id, - .set_apic_id = numachip1_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, @@ -262,7 +251,6 @@ static const struct apic apic_numachip2 __refconst = { .max_apic_id = UINT_MAX, .get_apic_id = numachip2_get_apic_id, - .set_apic_id = numachip2_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 4371abd4b0df..565a70513f1c 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -73,7 +73,6 @@ static struct apic apic_bigsmp __ro_after_init = { .max_apic_id = 0xFE, .get_apic_id = bigsmp_get_apic_id, - .set_apic_id = NULL, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index a77c23e62459..f8a87f9e72e6 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -16,7 +16,6 @@ /* X2APIC */ void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest); u32 x2apic_get_apic_id(u32 id); -u32 x2apic_set_apic_id(u32 id); void x2apic_send_IPI_all(int vector); void x2apic_send_IPI_allbutself(int vector); diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 506d0d32de69..025c9ff66729 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -238,7 +238,6 @@ static struct apic apic_x2apic_cluster __ro_after_init = { .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, .calc_dest_apicid = x2apic_calc_apicid, diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 3396540fb46b..24ac3b05d7c6 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -129,11 +129,6 @@ u32 x2apic_get_apic_id(u32 id) return id; } -u32 x2apic_set_apic_id(u32 id) -{ - return id; -} - static struct apic apic_x2apic_phys __ro_after_init = { .name = "physical x2apic", @@ -150,7 +145,6 @@ static struct apic apic_x2apic_phys __ro_after_init = { .max_apic_id = UINT_MAX, .x2apic_set_max_apicid = true, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = x2apic_set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f296e85c75bb..f5d008be80be 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -745,11 +745,6 @@ static void uv_send_IPI_all(int vector) uv_send_IPI_mask(cpu_online_mask, vector); } -static u32 set_apic_id(u32 id) -{ - return id; -} - static int uv_probe(void) { return apic == &apic_x2apic_uv_x; @@ -770,7 +765,6 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .max_apic_id = UINT_MAX, .get_apic_id = x2apic_get_apic_id, - .set_apic_id = set_apic_id, .calc_dest_apicid = apic_default_calc_apicid, diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index d4752382b600..8835d1cc961d 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -33,12 +33,6 @@ static unsigned int xen_io_apic_read(unsigned apic, unsigned reg) return 0xfd; } -static u32 xen_set_apic_id(u32 x) -{ - WARN_ON(1); - return x; -} - static u32 xen_get_apic_id(u32 x) { return ((x)>>24) & 0xFFu; @@ -131,7 +125,6 @@ static struct apic xen_pv_apic __ro_after_init = { .max_apic_id = UINT_MAX, .get_apic_id = xen_get_apic_id, - .set_apic_id = xen_set_apic_id, .calc_dest_apicid = apic_flat_calc_apicid, -- Gitee From bbfa872ee454f13f699627ca587d7908e7be4be0 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:20 +0100 Subject: [PATCH 49/88] x86/apic: Remove yet another dubious callback ANBZ: #26668 commit 4a5f72a4a39f5d5dcf9b9dc1acc57ecbbb8d4caa upstream. Paranoia is not wrong, but having an APIC callback which is in most implementations a complete NOOP and in one actually looking whether the APICID of an upcoming CPU has been registered. The same APICID which was used to bring the CPU out of wait for startup. That's paranoia for the paranoia sake. Remove the voodoo. Intel-SIG: commit 4a5f72a4a39f x86/apic: Remove yet another dubious callback. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154640.116510935@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic/apic.c | 3 --- arch/x86/kernel/apic/apic_common.c | 5 ----- arch/x86/kernel/apic/apic_flat_64.c | 2 -- arch/x86/kernel/apic/local.h | 2 -- arch/x86/kernel/apic/probe_32.c | 1 - 6 files changed, 14 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 01af344afa20..1e78d44c5c4e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -296,7 +296,6 @@ struct apic { /* Probe, setup and smpboot functions */ int (*probe)(void); int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id); - bool (*apic_id_registered)(void); void (*init_apic_ldr)(void); u32 (*cpu_present_to_apicid)(int mps_cpu); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 04fad508abfe..b09a7c7ebcd9 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1550,9 +1550,6 @@ static void setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - /* Validate that the APIC is registered if required */ - BUG_ON(apic->apic_id_registered && !apic->apic_id_registered()); - /* * Intel recommends to set DFR, LDR and TPR before enabling * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c index d4dfa437081a..9ef3be866832 100644 --- a/arch/x86/kernel/apic/apic_common.c +++ b/arch/x86/kernel/apic/apic_common.c @@ -27,11 +27,6 @@ u32 default_cpu_present_to_apicid(int mps_cpu) } EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid); -bool default_apic_id_registered(void) -{ - return test_bit(read_apic_id(), phys_cpu_present_map); -} - /* * Set up the logical destination ID when the APIC operates in logical * destination mode. diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 0efd66194f44..47396dbc65d1 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c @@ -70,7 +70,6 @@ static struct apic apic_flat __ro_after_init = { .name = "flat", .probe = flat_probe, .acpi_madt_oem_check = flat_acpi_madt_oem_check, - .apic_id_registered = default_apic_id_registered, .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, @@ -140,7 +139,6 @@ static struct apic apic_physflat __ro_after_init = { .name = "physical flat", .probe = physflat_probe, .acpi_madt_oem_check = physflat_acpi_madt_oem_check, - .apic_id_registered = default_apic_id_registered, .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = false, diff --git a/arch/x86/kernel/apic/local.h b/arch/x86/kernel/apic/local.h index f8a87f9e72e6..842fe28496be 100644 --- a/arch/x86/kernel/apic/local.h +++ b/arch/x86/kernel/apic/local.h @@ -61,8 +61,6 @@ void default_send_IPI_allbutself(int vector); void default_send_IPI_all(int vector); void default_send_IPI_self(int vector); -bool default_apic_id_registered(void); - #ifdef CONFIG_X86_32 void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, int vector); void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, int vector); diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 25df3528c56b..80fba7521f0b 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -38,7 +38,6 @@ static struct apic apic_default __ro_after_init = { .name = "default", .probe = probe_default, - .apic_id_registered = default_apic_id_registered, .delivery_mode = APIC_DELIVERY_MODE_FIXED, .dest_mode_logical = true, -- Gitee From 3004ae4ef3060d899044caa4305cc85f6707ec21 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:21 +0100 Subject: [PATCH 50/88] x86/apic: Use a proper define for invalid ACPI CPU ID ANBZ: #26668 commit 1a5d0f62d10d5da44c2b6a97b6600dea8a7519fb upstream. The ACPI ID for CPUs is preset with U32_MAX which is completely non obvious. Use a proper define for it. Intel-SIG: commit 1a5d0f62d10d x86/apic: Use a proper define for invalid ACPI CPU ID. x86/topology: More cleanups and preparatory work Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240212154640.177504138@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 +++ arch/x86/kernel/apic/apic.c | 2 +- arch/x86/xen/enlighten_hvm.c | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1e78d44c5c4e..083841f1f3db 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -54,7 +54,10 @@ static inline void x86_32_probe_apic(void) { } extern u32 cpuid_to_apicid[]; +#define CPU_ACPIID_INVALID U32_MAX + #ifdef CONFIG_X86_LOCAL_APIC + extern int apic_verbosity; extern int local_apic_timer_c2_ok; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b09a7c7ebcd9..66cab7808638 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -111,7 +111,7 @@ static inline bool apic_accessible(void) * Map cpu index to physical APIC ID */ DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 70be57e8f51c..7410416cd8b6 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -161,7 +161,7 @@ static int xen_cpu_up_prepare_hvm(unsigned int cpu) */ xen_uninit_lock_cpu(cpu); - if (cpu_acpi_id(cpu) != U32_MAX) + if (cpu_acpi_id(cpu) != CPU_ACPIID_INVALID) per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu); else per_cpu(xen_vcpu_id, cpu) = cpu; -- Gitee From 64a73e2263dd3bc05d1bf326f914ab57d8a1381e Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Tue, 2 Apr 2024 07:40:27 -0700 Subject: [PATCH 51/88] x86/hyperv/vtl: Correct x86_init.mpparse.parse_smp_cfg assignment ANBZ: #26668 commit fe5e6b599fbc417662c549c04d278a13098eb52a upstream. this commit include updates of 222408cde4d0ab ("x86/of: Set the parse_smp_cfg for all the DeviceTree platforms by default") VTL platform uses DeviceTree for fetching SMP configuration, assign the correct parsing function x86_dtb_parse_smp_config() for it to parse_smp_cfg. Fixes: c22e19cd2c8a ("x86/hyperv/vtl: Prepare for separate mpparse callbacks") Intel-SIG: commit fe5e6b599fbc x86/hyperv/vtl: Correct x86_init.mpparse.parse_smp_cfg assignment. x86/topology: More cleanups and preparatory work Signed-off-by: Saurabh Sengar Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/1712068830-4513-2-git-send-email-ssengar@linux.microsoft.com [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/hyperv/hv_vtl.c | 1 - arch/x86/kernel/devicetree.c | 8 ++++---- arch/x86/platform/ce4100/ce4100.c | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 64ca4543ceca..d04ccd4b3b4a 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -35,7 +35,6 @@ void __init hv_vtl_init_platform(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = x86_init_noop; x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 64126b20a0b5..a7b3fd558044 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -277,9 +277,9 @@ static void __init dtb_apic_setup(void) dtb_ioapic_setup(); } -#ifdef CONFIG_OF_EARLY_FLATTREE static void __init x86_flattree_get_config(void) { +#ifdef CONFIG_OF_EARLY_FLATTREE u32 size, map_len; void *dt; @@ -301,10 +301,10 @@ static void __init x86_flattree_get_config(void) if (initial_dtb) early_memunmap(dt, map_len); -} -#else -static inline void x86_flattree_get_config(void) { } #endif + if (of_have_populated_dt()) + x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; +} void __init x86_dtb_parse_smp_config(void) { diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index f32451bdcfdd..f8126821a94d 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -139,7 +139,6 @@ void __init x86_ce4100_early_setup(void) x86_init.resources.probe_roms = x86_init_noop; x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; x86_init.pci.init = ce4100_pci_init; x86_init.pci.init_irq = sdv_pci_init; -- Gitee From 15e222230c81ed8a0ac5fd86e8a2ceac8bc3157f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:35 +0100 Subject: [PATCH 52/88] x86/cpu/topology: Move registration out of APIC code ANBZ: #26668 commit c0a66c2847908e41c771ca2355fba935a82a9f62 upstream. The APIC/CPU registration sits in the middle of the APIC code. In fact this is a topology evaluation function and has nothing to do with the inner workings of the local APIC. Move it out into a file which reflects what this is about. Intel-SIG: commit c0a66c284790 x86/cpu/topology: Move registration out of APIC code. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.543948812@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 2 + arch/x86/kernel/apic/apic.c | 185 +-------------------------------- arch/x86/kernel/cpu/Makefile | 12 ++- arch/x86/kernel/cpu/topology.c | 184 ++++++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 188 deletions(-) create mode 100644 arch/x86/kernel/cpu/topology.c diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 083841f1f3db..0a90b2e3d2c3 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -177,6 +177,8 @@ extern bool apic_needs_pit(void); extern void apic_send_IPI_allbutself(unsigned int vector); +extern void topology_register_boot_apic(u32 apic_id); + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 66cab7808638..1389fb20b4f1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -68,26 +68,12 @@ #include "local.h" -unsigned int num_processors; - -unsigned disabled_cpus; - /* Processor that is doing the boot up */ u32 boot_cpu_physical_apicid __ro_after_init = BAD_APICID; EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); u8 boot_cpu_apic_version __ro_after_init; -/* Bitmap of physically present CPUs. */ -DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); - -/* - * Processor to be disabled specified by kernel parameter - * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to - * avoid undefined behaviour caused by sending INIT from AP to BSP. - */ -static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; - /* * This variable controls which CPUs receive external NMIs. By default, * external NMIs are delivered only to the BSP. @@ -107,14 +93,6 @@ static inline bool apic_accessible(void) return x2apic_mode || apic_mmio_base; } -/* - * Map cpu index to physical APIC ID - */ -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); -DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); -EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); - #ifdef CONFIG_X86_32 /* Local APIC was disabled by the BIOS and enabled by the kernel */ static int enabled_via_apicbase __ro_after_init; @@ -1688,8 +1666,6 @@ void apic_ap_setup(void) end_local_APIC_setup(); } -static __init void cpu_set_boot_apic(void); - static __init void apic_read_boot_cpu_id(bool x2apic) { /* @@ -1704,7 +1680,8 @@ static __init void apic_read_boot_cpu_id(bool x2apic) boot_cpu_physical_apicid = read_apic_id(); boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR)); } - cpu_set_boot_apic(); + topology_register_boot_apic(boot_cpu_physical_apicid); + x86_32_probe_bigsmp_early(); } #ifdef CONFIG_X86_X2APIC @@ -2310,155 +2287,6 @@ void disconnect_bsp_APIC(int virt_wire_setup) apic_write(APIC_LVT1, value); } -/* - * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated - * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, - * so the maximum of nr_logical_cpuids is nr_cpu_ids. - * - * NOTE: Reserve 0 for BSP. - */ -static int nr_logical_cpuids = 1; - -/* - * Used to store mapping between logical CPU IDs and APIC IDs. - */ -u32 cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = BAD_APICID, }; - -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ - return phys_id == (u64)cpuid_to_apicid[cpu]; -} - -#ifdef CONFIG_SMP -static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) -{ - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - - if (smp_num_siblings == 1 || !(apicid & mask)) - cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); -} - -/* - * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid - * during early boot. Initialize the primary thread mask before SMP - * bringup. - */ -static int __init smp_init_primary_thread_mask(void) -{ - unsigned int cpu; - - /* - * XEN/PV provides either none or useless topology information. - * Pretend that all vCPUs are primary threads. - */ - if (xen_pv_domain()) { - cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask); - return 0; - } - - for (cpu = 0; cpu < nr_logical_cpuids; cpu++) - cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); - return 0; -} -early_initcall(smp_init_primary_thread_mask); -#else -static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } -#endif - -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(int apicid) -{ - int i; - - /* - * cpuid <-> apicid mapping is persistent, so when a cpu is up, - * check if the kernel has allocated a cpuid for it. - */ - for (i = 0; i < nr_logical_cpuids; i++) { - if (cpuid_to_apicid[i] == apicid) - return i; - } - - /* Allocate a new cpuid. */ - if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " - "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apicid); - return -EINVAL; - } - - cpuid_to_apicid[nr_logical_cpuids] = apicid; - return nr_logical_cpuids++; -} - -static void cpu_update_apic(int cpu, u32 apicid) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; -#endif - set_cpu_possible(cpu, true); - set_bit(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - num_processors++; - - if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apicid); -} - -static __init void cpu_set_boot_apic(void) -{ - cpuid_to_apicid[0] = boot_cpu_physical_apicid; - cpu_update_apic(0, boot_cpu_physical_apicid); - x86_32_probe_bigsmp_early(); -} - -int generic_processor_info(int apicid) -{ - int cpu, max = nr_cpu_ids; - - /* The boot CPU must be set before MADT/MPTABLE parsing happens */ - if (cpuid_to_apicid[0] == BAD_APICID) - panic("Boot CPU APIC not registered yet\n"); - - if (apicid == boot_cpu_physical_apicid) - return 0; - - if (disabled_cpu_apicid == apicid) { - int thiscpu = num_processors + disabled_cpus; - - pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", - thiscpu, apicid); - - disabled_cpus++; - return -ENODEV; - } - - if (num_processors >= nr_cpu_ids) { - int thiscpu = max + disabled_cpus; - - pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " - "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - - disabled_cpus++; - return -EINVAL; - } - - cpu = allocate_logical_cpuid(apicid); - if (cpu < 0) { - disabled_cpus++; - return -EINVAL; - } - - cpu_update_apic(cpu, apicid); - return cpu; -} - - void __irq_msi_compose_msg(struct irq_cfg *cfg, struct msi_msg *msg, bool dmar) { @@ -2847,15 +2675,6 @@ static int __init lapic_insert_resource(void) */ late_initcall(lapic_insert_resource); -static int __init apic_set_disabled_cpu_apicid(char *arg) -{ - if (!arg || !get_option(&arg, &disabled_cpu_apicid)) - return -EINVAL; - - return 0; -} -early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); - static int __init apic_set_extnmi(char *arg) { if (!arg) diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b5423bfbaad3..434f638827fe 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -26,15 +26,17 @@ obj-y += bugs.o obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-y += umwait.o +obj-y += capflags.o powerflags.o obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zxpause.o -obj-$(CONFIG_PROC_FS) += proc.o -obj-y += capflags.o powerflags.o +obj-$(CONFIG_X86_LOCAL_APIC) += topology.o -obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o +obj-$(CONFIG_PROC_FS) += proc.o + +obj-$(CONFIG_IA32_FEAT_CTL) += feat_ctl.o ifdef CONFIG_CPU_SUP_INTEL -obj-y += intel.o intel_pconfig.o tsx.o -obj-$(CONFIG_PM) += intel_epb.o +obj-y += intel.o intel_pconfig.o tsx.o +obj-$(CONFIG_PM) += intel_epb.o endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c new file mode 100644 index 000000000000..b99cd19b7d12 --- /dev/null +++ b/arch/x86/kernel/cpu/topology.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include + +#include + +#include +#include +#include + +/* + * Map cpu index to physical APIC ID + */ +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID); +DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); +EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); + +/* Bitmap of physically present CPUs. */ +DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; + +/* Used for CPU number allocation and parallel CPU bringup */ +u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; + +/* + * Processor to be disabled specified by kernel parameter + * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to + * avoid undefined behaviour caused by sending INIT from AP to BSP. + */ +static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; + +unsigned int num_processors; +unsigned disabled_cpus; + +/* + * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated + * contiguously, it equals to current allocated max logical CPU ID plus 1. + * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, + * so the maximum of nr_logical_cpuids is nr_cpu_ids. + * + * NOTE: Reserve 0 for BSP. + */ +static int nr_logical_cpuids = 1; + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == (u64)cpuid_to_apicid[cpu]; +} + +#ifdef CONFIG_SMP +static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) +{ + /* Isolate the SMT bit(s) in the APICID and check for 0 */ + u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; + + if (smp_num_siblings == 1 || !(apicid & mask)) + cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); +} + +/* + * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid + * during early boot. Initialize the primary thread mask before SMP + * bringup. + */ +static int __init smp_init_primary_thread_mask(void) +{ + unsigned int cpu; + + /* + * XEN/PV provides either none or useless topology information. + * Pretend that all vCPUs are primary threads. + */ + if (xen_pv_domain()) { + cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask); + return 0; + } + + for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); + return 0; +} +early_initcall(smp_init_primary_thread_mask); +#else +static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } +#endif + +/* + * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids + * and cpuid_to_apicid[] synchronized. + */ +static int allocate_logical_cpuid(int apicid) +{ + int i; + + /* + * cpuid <-> apicid mapping is persistent, so when a cpu is up, + * check if the kernel has allocated a cpuid for it. + */ + for (i = 0; i < nr_logical_cpuids; i++) { + if (cpuid_to_apicid[i] == apicid) + return i; + } + + /* Allocate a new cpuid. */ + if (nr_logical_cpuids >= nr_cpu_ids) { + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " + "Processor %d/0x%x and the rest are ignored.\n", + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; + } + + cpuid_to_apicid[nr_logical_cpuids] = apicid; + return nr_logical_cpuids++; +} + +static void cpu_update_apic(int cpu, u32 apicid) +{ +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) + early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; +#endif + set_cpu_possible(cpu, true); + set_bit(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; + + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apicid); +} + +void __init topology_register_boot_apic(u32 apic_id) +{ + cpuid_to_apicid[0] = apic_id; + cpu_update_apic(0, apic_id); +} + +int generic_processor_info(int apicid) +{ + int cpu, max = nr_cpu_ids; + + /* The boot CPU must be set before MADT/MPTABLE parsing happens */ + if (cpuid_to_apicid[0] == BAD_APICID) + panic("Boot CPU APIC not registered yet\n"); + + if (apicid == boot_cpu_physical_apicid) + return 0; + + if (disabled_cpu_apicid == apicid) { + int thiscpu = num_processors + disabled_cpus; + + pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", + thiscpu, apicid); + + disabled_cpus++; + return -ENODEV; + } + + if (num_processors >= nr_cpu_ids) { + int thiscpu = max + disabled_cpus; + + pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " + "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); + + disabled_cpus++; + return -EINVAL; + } + + cpu = allocate_logical_cpuid(apicid); + if (cpu < 0) { + disabled_cpus++; + return -EINVAL; + } + + cpu_update_apic(cpu, apicid); + return cpu; +} + +static int __init apic_set_disabled_cpu_apicid(char *arg) +{ + if (!arg || !get_option(&arg, &disabled_cpu_apicid)) + return -EINVAL; + + return 0; +} +early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); -- Gitee From aa449d139135bb38829f64a872538f17790464b1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:36 +0100 Subject: [PATCH 53/88] x86/cpu/topology: Provide separate APIC registration functions ANBZ: #26668 commit 4176b541c2c68bf79d0a05f316713ed8f0c9cdb4 upstream. generic_processor_info() aside of being a complete misnomer is used for both early boot registration and ACPI CPU hotplug. While it's arguable that this can share some code, it results in code which is hard to understand and kept around post init for no real reason. Also the call sites do lots of manual fiddling in topology related variables instead of having proper interfaces for the purpose which handle the topology internals correctly. Provide topology_register_apic(), topology_hotplug_apic() and topology_hotunplug_apic() which have the extra magic of the call sites incorporated and for now are wrappers around generic_processor_info(). Intel-SIG: commit 4176b541c2c6 x86/cpu/topology: Provide separate APIC registration functions. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.605007456@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 3 + arch/x86/kernel/cpu/topology.c | 113 +++++++++++++++++++++++++++------ 2 files changed, 98 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 0a90b2e3d2c3..a4fdd2f53ed7 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -177,7 +177,10 @@ extern bool apic_needs_pit(void); extern void apic_send_IPI_allbutself(unsigned int vector); +extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present); extern void topology_register_boot_apic(u32 apic_id); +extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id); +extern void topology_hotunplug_apic(unsigned int cpu); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b99cd19b7d12..3dd7e6c08cb5 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -84,32 +84,38 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(int apicid) +static int topo_lookup_cpuid(u32 apic_id) { int i; - /* - * cpuid <-> apicid mapping is persistent, so when a cpu is up, - * check if the kernel has allocated a cpuid for it. - */ + /* CPU# to APICID mapping is persistent once it is established */ for (i = 0; i < nr_logical_cpuids; i++) { - if (cpuid_to_apicid[i] == apicid) + if (cpuid_to_apicid[i] == apic_id) return i; } + return -ENODEV; +} + +/* + * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids + * and cpuid_to_apicid[] synchronized. + */ +static int allocate_logical_cpuid(u32 apic_id) +{ + int cpu = topo_lookup_cpuid(apic_id); + + if (cpu >= 0) + return cpu; /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apicid); + nr_cpu_ids, nr_logical_cpuids, apic_id); return -EINVAL; } - cpuid_to_apicid[nr_logical_cpuids] = apicid; + cpuid_to_apicid[nr_logical_cpuids] = apic_id; return nr_logical_cpuids++; } @@ -127,12 +133,6 @@ static void cpu_update_apic(int cpu, u32 apicid) cpu_mark_primary_thread(cpu, apicid); } -void __init topology_register_boot_apic(u32 apic_id) -{ - cpuid_to_apicid[0] = apic_id; - cpu_update_apic(0, apic_id); -} - int generic_processor_info(int apicid) { int cpu, max = nr_cpu_ids; @@ -174,6 +174,83 @@ int generic_processor_info(int apicid) return cpu; } +/** + * topology_register_apic - Register an APIC in early topology maps + * @apic_id: The APIC ID to set up + * @acpi_id: The ACPI ID associated to the APIC + * @present: True if the corresponding CPU is present + */ +void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) +{ + int cpu; + + if (apic_id >= MAX_LOCAL_APIC) { + pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); + return; + } + + if (!present) { + disabled_cpus++; + return; + } + + cpu = generic_processor_info(apic_id); + if (cpu >= 0) + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; +} + +/** + * topology_register_boot_apic - Register the boot CPU APIC + * @apic_id: The APIC ID to set up + * + * Separate so CPU #0 can be assigned + */ +void __init topology_register_boot_apic(u32 apic_id) +{ + cpuid_to_apicid[0] = apic_id; + cpu_update_apic(0, apic_id); +} + +#ifdef CONFIG_ACPI_HOTPLUG_CPU +/** + * topology_hotplug_apic - Handle a physical hotplugged APIC after boot + * @apic_id: The APIC ID to set up + * @acpi_id: The ACPI ID associated to the APIC + */ +int topology_hotplug_apic(u32 apic_id, u32 acpi_id) +{ + int cpu; + + if (apic_id >= MAX_LOCAL_APIC) + return -EINVAL; + + cpu = topo_lookup_cpuid(apic_id); + if (cpu < 0) { + cpu = generic_processor_info(apic_id); + if (cpu >= 0) + per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; + } + return cpu; +} + +/** + * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot + * @cpu: The CPU number for which the APIC ID is removed + */ +void topology_hotunplug_apic(unsigned int cpu) +{ + u32 apic_id = cpuid_to_apicid[cpu]; + + if (apic_id == BAD_APICID) + return; + + per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; + clear_bit(apic_id, phys_cpu_present_map); + set_cpu_present(cpu, false); + num_processors--; +} +#endif + static int __init apic_set_disabled_cpu_apicid(char *arg) { if (!arg || !get_option(&arg, &disabled_cpu_apicid)) -- Gitee From ee0242c2b1211f00cf2d64626e6dc81641a71169 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:37 +0100 Subject: [PATCH 54/88] x86/acpi: Use new APIC registration functions ANBZ: #26668 commit ff37b09c8495ed897ea470014d1461660db6a942 upstream. Use the new topology registration functions and make the early boot code path __init. No functional change intended. Intel-SIG: commit ff37b09c8495 x86/acpi: Use new APIC registration functions. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.664738831@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 44 ++++++------------------------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 13501fb665cf..2827a6b0783e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -163,33 +163,9 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) return 0; } -/** - * acpi_register_lapic - register a local apic and generates a logic cpu number - * @id: local apic id to register - * @acpiid: ACPI id to register - * @enabled: this cpu is enabled or not - * - * Returns the logic cpu number which maps to the local apic - */ -static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) +static __init void acpi_register_lapic(u32 apic_id, u32 acpi_id, bool present) { - int cpu; - - if (id >= MAX_LOCAL_APIC) { - pr_info("skipped apicid that is too big\n"); - return -EINVAL; - } - - if (!enabled) { - ++disabled_cpus; - return -EINVAL; - } - - cpu = generic_processor_info(id); - if (cpu >= 0) - early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; - - return cpu; + topology_register_apic(apic_id, acpi_id, present); } static bool __init acpi_is_processor_usable(u32 lapic_flags) @@ -834,12 +810,10 @@ static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) return 0; } -int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, - int *pcpu) +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu) { - int cpu; + int cpu = topology_hotplug_apic(physid, acpi_id); - cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info("Unable to map lapic to logical cpu number\n"); return cpu; @@ -858,15 +832,11 @@ int acpi_unmap_cpu(int cpu) #ifdef CONFIG_ACPI_NUMA set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); #endif - - per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; - set_cpu_present(cpu, false); - num_processors--; - - return (0); + topology_hotunplug_apic(cpu); + return 0; } EXPORT_SYMBOL(acpi_unmap_cpu); -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { -- Gitee From 1e0560be925176678c1ca9c493739a1f288a02fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:39 +0100 Subject: [PATCH 55/88] x86/jailhouse: Use new APIC registration function ANBZ: #26668 commit 8cd01c8a68b083e2a0af601c5464e2dfa64f1421 upstream. No functional change intended. Intel-SIG: commit 8cd01c8a68b0 x86/jailhouse: Use new APIC registration function. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.720970412@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/jailhouse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index bf47224c88b1..cd8ed1edbf9e 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -103,7 +103,7 @@ static void __init jailhouse_parse_smp_config(void) register_lapic_address(0xfee00000); for (cpu = 0; cpu < setup_data.v1.num_cpus; cpu++) - generic_processor_info(setup_data.v1.cpu_ids[cpu]); + topology_register_apic(setup_data.v1.cpu_ids[cpu], CPU_ACPIID_INVALID, true); smp_found_config = 1; -- Gitee From caaaf88f9974fb2e2cc4df4a498a71c60165162f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:40 +0100 Subject: [PATCH 56/88] x86/of: Use new APIC registration functions ANBZ: #26668 commit 7d319c0fcae68e489fcf806cdea46a795062eaf7 upstream. No functional change intended. Intel-SIG: commit 7d319c0fcae6 x86/of: Use new APIC registration functions. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.776009244@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/devicetree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index a7b3fd558044..516ae4040cab 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -136,7 +136,7 @@ static void __init dtb_cpu_setup(void) pr_warn("%pOF: missing local APIC ID\n", dn); continue; } - generic_processor_info(apic_id); + topology_register_apic(apic_id, CPU_ACPIID_INVALID, true); } } -- Gitee From d850a8603980ee2a251e7a7a06f1747496ddcff3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:42 +0100 Subject: [PATCH 57/88] x86/mpparse: Use new APIC registration function ANBZ: #26668 commit 8098428c541212e9835c1771ee90caa968ffef4f upstream. Aside of switching over to the new interface, record the number of registered CPUs locally, which allows to make num_processors and disabled_cpus confined to the topology code. No functional change intended. Intel-SIG: commit 8098428c5412 x86/mpparse: Use new APIC registration function. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.830955273@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/mpspec.h | 2 -- arch/x86/kernel/cpu/topology.c | 2 +- arch/x86/kernel/mpparse.c | 17 +++++++++-------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 1b79d0ee95df..c72c7ff78fcd 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -61,8 +61,6 @@ static inline void e820__memblock_alloc_reserved_mpc_new(void) { } #define mpparse_parse_smp_config x86_init_noop #endif -int generic_processor_info(int apicid); - extern DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC); static inline void reset_phys_cpu_present_map(u32 apicid) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 3dd7e6c08cb5..669e258b9e0b 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -133,7 +133,7 @@ static void cpu_update_apic(int cpu, u32 apicid) cpu_mark_primary_thread(cpu, apicid); } -int generic_processor_info(int apicid) +static int generic_processor_info(int apicid) { int cpu, max = nr_cpu_ids; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 9c000c409eb1..1ccd30c8246f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -36,6 +36,8 @@ * Checksum an MP configuration block. */ +static unsigned int num_procs __initdata; + static int __init mpf_checksum(unsigned char *mp, int len) { int sum = 0; @@ -50,16 +52,15 @@ static void __init MP_processor_info(struct mpc_cpu *m) { char *bootup_cpu = ""; - if (!(m->cpuflag & CPU_ENABLED)) { - disabled_cpus++; + topology_register_apic(m->apicid, CPU_ACPIID_INVALID, m->cpuflag & CPU_ENABLED); + if (!(m->cpuflag & CPU_ENABLED)) return; - } if (m->cpuflag & CPU_BOOTPROCESSOR) bootup_cpu = " (Bootup-CPU)"; pr_info("Processor #%d%s\n", m->apicid, bootup_cpu); - generic_processor_info(m->apicid); + num_procs++; } #ifdef CONFIG_X86_IO_APIC @@ -236,9 +237,9 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) } } - if (!num_processors) + if (!num_procs && !acpi_lapic) pr_err("MPTABLE: no processors registered!\n"); - return num_processors; + return num_procs || acpi_lapic; } #ifdef CONFIG_X86_IO_APIC @@ -529,8 +530,8 @@ static __init void mpparse_get_smp_config(unsigned int early) } else BUG(); - if (!early) - pr_info("Processors: %d\n", num_processors); + if (!early && !acpi_lapic) + pr_info("Processors: %d\n", num_procs); /* * Only use the first configuration found. */ -- Gitee From 2dcdd9b5c44a33e2751ac15bb87102e56a003279 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:43 +0100 Subject: [PATCH 58/88] x86/acpi: Dont invoke topology_register_apic() for XEN PV ANBZ: #26668 commit cab8e164a49c0ee5c9acb7edec33d76422d831bf upstream. The MADT table for XEN/PV dom0 is not really useful and registering the APICs is momentarily a pointless exercise because XENPV does not use an APIC at all. It overrides the x86_init.mpparse.parse_smp_config() callback, resets num_processors and counts how many of them are provided by the hypervisor. This is in the way of cleaning up the APIC registration. Prevent MADT registration for XEN/PV temporarily until the rework is completed and XEN/PV can use the MADT again. Intel-SIG: commit cab8e164a49c x86/acpi: Dont invoke topology_register_apic() for XEN PV. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.885489468@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 2827a6b0783e..0e28dd6a9e5c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -165,7 +167,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) static __init void acpi_register_lapic(u32 apic_id, u32 acpi_id, bool present) { - topology_register_apic(apic_id, acpi_id, present); + if (!xen_pv_domain()) + topology_register_apic(apic_id, acpi_id, present); } static bool __init acpi_is_processor_usable(u32 lapic_flags) @@ -1077,7 +1080,8 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) return count; } - register_lapic_address(acpi_lapic_addr); + if (!xen_pv_domain()) + register_lapic_address(acpi_lapic_addr); return count; } -- Gitee From e1786d1a7c990751a6d21393f7c788c11e289f71 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:44 +0100 Subject: [PATCH 59/88] x86/xen/smp_pv: Register fake APICs ANBZ: #26668 commit e7530702346637af46bca1d114e6d63312eb3461 upstream. XENPV does not use the APIC. It's just piggy packing on the infrastructure and fiddles with global variables as it sees fit. These global variables are going away, so let XENPV register pseudo APIC IDs to keep the accounting correct and keep up the illusion that XEN/PV is something sane. Intel-SIG: commit e75307023466 x86/xen/smp_pv: Register fake APICs. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.940043512@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 7f6f34056e13..98849b3f5eac 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -150,34 +151,16 @@ int xen_smp_intr_init_pv(unsigned int cpu) static void __init xen_pv_smp_config(void) { - int i, rc; - unsigned int subtract = 0; - - num_processors = 0; - disabled_cpus = 0; - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } else { - set_cpu_possible(i, false); - set_cpu_present(i, false); - subtract++; - } + u32 apicid = 0; + int i; + + topology_register_boot_apic(apicid++); + + for (i = 1; i < nr_cpu_ids; i++) { + if (HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) < 0) + break; + topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); } -#ifdef CONFIG_HOTPLUG_CPU - /* This is akin to using 'nr_cpus' on the Linux command line. - * Which is OK as when we use 'dom0_max_vcpus=X' we can only - * have up to X, while nr_cpu_ids is greater than X. This - * normally is not a problem, except when CPU hotplugging - * is involved and then there might be more than X CPUs - * in the guest - which will not work as there is no - * hypercall to expand the max number of VCPUs an already - * running guest has. So cap it up to X. */ - if (subtract) - set_nr_cpu_ids(nr_cpu_ids - subtract); -#endif /* Pretend to be a proper enumerated system */ smp_found_config = 1; } -- Gitee From ebc57c37aa7346592d8d5c55fb8a16ae6cec9f82 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:46 +0100 Subject: [PATCH 60/88] x86/cpu/topology: Confine topology information ANBZ: #26668 commit 58aa34abe9954cd5dfbf322fc612146c5f45e52b upstream. Now that all external fiddling with num_processors and disabled_cpus is gone, move the last user prefill_possible_map() into the topology code too and remove the global visibility of these variables. Intel-SIG: commit 58aa34abe995 x86/cpu/topology: Confine topology information. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210251.994756960@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/smp.h | 3 -- arch/x86/kernel/apic/apic.c | 1 - arch/x86/kernel/cpu/topology.c | 76 +++++++++++++++++++++++++++++++++- arch/x86/kernel/smpboot.c | 72 -------------------------------- 4 files changed, 74 insertions(+), 78 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4fab2ed454f3..f1510d6f4e84 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -9,7 +9,6 @@ #include extern int smp_num_siblings; -extern unsigned int num_processors; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -174,8 +173,6 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) } #endif /* CONFIG_SMP */ -extern unsigned disabled_cpus; - #ifdef CONFIG_DEBUG_NMI_SELFTEST extern void nmi_selftest(void); #else diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 1389fb20b4f1..ccb858cc18bb 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2072,7 +2072,6 @@ void __init init_apic_mappings(void) pr_info("APIC: disable apic facility\n"); apic_disable(); } - num_processors = 1; } } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 669e258b9e0b..a6c931434951 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -29,8 +29,8 @@ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; */ static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; -unsigned int num_processors; -unsigned disabled_cpus; +static unsigned int num_processors; +static unsigned int disabled_cpus; /* * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated @@ -174,6 +174,71 @@ static int generic_processor_info(int apicid) return cpu; } +static int __initdata setup_possible_cpus = -1; + +/* + * cpu_possible_mask should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and don't expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_mask on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with possible_cpus=NUM + * - Otherwise don't reserve additional CPUs. + * We do this because additional CPUs waste a lot of memory. + * -AK + */ +__init void prefill_possible_map(void) +{ + int i, possible; + + i = setup_max_cpus ?: 1; + if (setup_possible_cpus == -1) { + possible = num_processors; +#ifdef CONFIG_HOTPLUG_CPU + if (setup_max_cpus) + possible += disabled_cpus; +#else + if (possible > i) + possible = i; +#endif + } else + possible = setup_possible_cpus; + + total_cpus = max_t(int, possible, num_processors + disabled_cpus); + + /* nr_cpu_ids could be reduced via nr_cpus= */ + if (possible > nr_cpu_ids) { + pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", + possible, nr_cpu_ids); + possible = nr_cpu_ids; + } + +#ifdef CONFIG_HOTPLUG_CPU + if (!setup_max_cpus) +#endif + if (possible > i) { + pr_warn("%d Processors exceeds max_cpus limit of %u\n", + possible, setup_max_cpus); + possible = i; + } + + set_nr_cpu_ids(possible); + + pr_info("Allowing %d CPUs, %d hotplug CPUs\n", + possible, max_t(int, possible - num_processors, 0)); + + reset_cpu_possible_mask(); + + for (i = 0; i < possible; i++) + set_cpu_possible(i, true); +} + /** * topology_register_apic - Register an APIC in early topology maps * @apic_id: The APIC ID to set up @@ -251,6 +316,13 @@ void topology_hotunplug_apic(unsigned int cpu) } #endif +static int __init _setup_possible_cpus(char *str) +{ + get_option(&str, &setup_possible_cpus); + return 0; +} +early_param("possible_cpus", _setup_possible_cpus); + static int __init apic_set_disabled_cpu_apicid(char *arg) { if (!arg || !get_option(&arg, &disabled_cpu_apicid)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index d8808dc02db6..4d993c2b88f1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1280,78 +1280,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) cache_aps_init(); } -static int __initdata setup_possible_cpus = -1; -static int __init _setup_possible_cpus(char *str) -{ - get_option(&str, &setup_possible_cpus); - return 0; -} -early_param("possible_cpus", _setup_possible_cpus); - - -/* - * cpu_possible_mask should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and don't expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_mask on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM - * - Otherwise don't reserve additional CPUs. - * We do this because additional CPUs waste a lot of memory. - * -AK - */ -__init void prefill_possible_map(void) -{ - int i, possible; - - i = setup_max_cpus ?: 1; - if (setup_possible_cpus == -1) { - possible = num_processors; -#ifdef CONFIG_HOTPLUG_CPU - if (setup_max_cpus) - possible += disabled_cpus; -#else - if (possible > i) - possible = i; -#endif - } else - possible = setup_possible_cpus; - - total_cpus = max_t(int, possible, num_processors + disabled_cpus); - - /* nr_cpu_ids could be reduced via nr_cpus= */ - if (possible > nr_cpu_ids) { - pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", - possible, nr_cpu_ids); - possible = nr_cpu_ids; - } - -#ifdef CONFIG_HOTPLUG_CPU - if (!setup_max_cpus) -#endif - if (possible > i) { - pr_warn("%d Processors exceeds max_cpus limit of %u\n", - possible, setup_max_cpus); - possible = i; - } - - set_nr_cpu_ids(possible); - - pr_info("Allowing %d CPUs, %d hotplug CPUs\n", - possible, max_t(int, possible - num_processors, 0)); - - reset_cpu_possible_mask(); - - for (i = 0; i < possible; i++) - set_cpu_possible(i, true); -} - /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { -- Gitee From 5ffa343283cb9c446ca539c289b265f640b79b61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:47 +0100 Subject: [PATCH 61/88] x86/cpu/topology: Simplify APIC registration ANBZ: #26668 commit 4c4c6f38704ab0e3f85f660b7479de7aa559d79a upstream. Having the same check whether the number of assigned CPUs has reached the nr_cpu_ids limit twice in the same code path is pointless. Repeating the information that CPUs are ignored over and over is also pointless noise. Remove the redundant check and reduce the noise by using a pr_warn_once(). Intel-SIG: commit 4c4c6f38704a x86/cpu/topology: Simplify APIC registration. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.050264369@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a6c931434951..8b42918b7b58 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -107,14 +107,6 @@ static int allocate_logical_cpuid(u32 apic_id) if (cpu >= 0) return cpu; - /* Allocate a new cpuid. */ - if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %u reached. " - "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids, nr_logical_cpuids, apic_id); - return -EINVAL; - } - cpuid_to_apicid[nr_logical_cpuids] = apic_id; return nr_logical_cpuids++; } @@ -135,7 +127,7 @@ static void cpu_update_apic(int cpu, u32 apicid) static int generic_processor_info(int apicid) { - int cpu, max = nr_cpu_ids; + int cpu; /* The boot CPU must be set before MADT/MPTABLE parsing happens */ if (cpuid_to_apicid[0] == BAD_APICID) @@ -155,21 +147,12 @@ static int generic_processor_info(int apicid) } if (num_processors >= nr_cpu_ids) { - int thiscpu = max + disabled_cpus; - - pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. " - "Processor %d/0x%x ignored.\n", max, thiscpu, apicid); - + pr_warn_once("APIC: CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); disabled_cpus++; - return -EINVAL; + return -ENOSPC; } cpu = allocate_logical_cpuid(apicid); - if (cpu < 0) { - disabled_cpus++; - return -EINVAL; - } - cpu_update_apic(cpu, apicid); return cpu; } -- Gitee From b1892b98c7a9e5ccdb858a05ae097282c225400f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:49 +0100 Subject: [PATCH 62/88] x86/cpu/topology: Use a data structure for topology info ANBZ: #26668 commit 72530464ed609bcdd49a760e9d0bc3b16717ff2b upstream. Put the processor accounting into a data structure, which will gain more topology related information in the next steps, and sanitize the accounting. Intel-SIG: commit 72530464ed60 x86/cpu/topology: Use a data structure for topology info. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.111451909@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 59 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 8b42918b7b58..815e3eeed415 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -22,6 +22,18 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; /* Used for CPU number allocation and parallel CPU bringup */ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +/* + * Keep track of assigned, disabled and rejected CPUs. Present assigned + * with 1 as CPU #0 is reserved for the boot CPU. + */ +static struct { + unsigned int nr_assigned_cpus; + unsigned int nr_disabled_cpus; + unsigned int nr_rejected_cpus; +} topo_info __read_mostly = { + .nr_assigned_cpus = 1, +}; + /* * Processor to be disabled specified by kernel parameter * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to @@ -29,19 +41,6 @@ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; */ static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; -static unsigned int num_processors; -static unsigned int disabled_cpus; - -/* - * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated - * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, - * so the maximum of nr_logical_cpuids is nr_cpu_ids. - * - * NOTE: Reserve 0 for BSP. - */ -static int nr_logical_cpuids = 1; - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == (u64)cpuid_to_apicid[cpu]; @@ -75,7 +74,7 @@ static int __init smp_init_primary_thread_mask(void) return 0; } - for (cpu = 0; cpu < nr_logical_cpuids; cpu++) + for (cpu = 0; cpu < topo_info.nr_assigned_cpus; cpu++) cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); return 0; } @@ -89,7 +88,7 @@ static int topo_lookup_cpuid(u32 apic_id) int i; /* CPU# to APICID mapping is persistent once it is established */ - for (i = 0; i < nr_logical_cpuids; i++) { + for (i = 0; i < topo_info.nr_assigned_cpus; i++) { if (cpuid_to_apicid[i] == apic_id) return i; } @@ -107,22 +106,21 @@ static int allocate_logical_cpuid(u32 apic_id) if (cpu >= 0) return cpu; - cpuid_to_apicid[nr_logical_cpuids] = apic_id; - return nr_logical_cpuids++; + return topo_info.nr_assigned_cpus++; } -static void cpu_update_apic(int cpu, u32 apicid) +static void cpu_update_apic(unsigned int cpu, u32 apic_id) { #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; + early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; #endif + cpuid_to_apicid[cpu] = apic_id; set_cpu_possible(cpu, true); - set_bit(apicid, phys_cpu_present_map); + set_bit(apic_id, phys_cpu_present_map); set_cpu_present(cpu, true); - num_processors++; if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apicid); + cpu_mark_primary_thread(cpu, apic_id); } static int generic_processor_info(int apicid) @@ -137,18 +135,18 @@ static int generic_processor_info(int apicid) return 0; if (disabled_cpu_apicid == apicid) { - int thiscpu = num_processors + disabled_cpus; + int thiscpu = topo_info.nr_assigned_cpus + topo_info.nr_disabled_cpus; pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", thiscpu, apicid); - disabled_cpus++; + topo_info.nr_rejected_cpus++; return -ENODEV; } - if (num_processors >= nr_cpu_ids) { + if (topo_info.nr_assigned_cpus >= nr_cpu_ids) { pr_warn_once("APIC: CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); - disabled_cpus++; + topo_info.nr_rejected_cpus++; return -ENOSPC; } @@ -178,14 +176,16 @@ static int __initdata setup_possible_cpus = -1; */ __init void prefill_possible_map(void) { + unsigned int num_processors = topo_info.nr_assigned_cpus; + unsigned int disabled_cpus = topo_info.nr_disabled_cpus; int i, possible; i = setup_max_cpus ?: 1; if (setup_possible_cpus == -1) { - possible = num_processors; + possible = topo_info.nr_assigned_cpus; #ifdef CONFIG_HOTPLUG_CPU if (setup_max_cpus) - possible += disabled_cpus; + possible += num_processors; #else if (possible > i) possible = i; @@ -238,7 +238,7 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) } if (!present) { - disabled_cpus++; + topo_info.nr_disabled_cpus++; return; } @@ -295,7 +295,6 @@ void topology_hotunplug_apic(unsigned int cpu) per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; clear_bit(apic_id, phys_cpu_present_map); set_cpu_present(cpu, false); - num_processors--; } #endif -- Gitee From ca1f4b5ec3b07f05f5baef4683386bca7e7410e9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:50 +0100 Subject: [PATCH 63/88] x86/smpboot: Make error message actually useful ANBZ: #26668 commit 6055f6cf0d462fa0d9212a8279b6b0d1130552e1 upstream. "smpboot: native_kick_ap: bad cpu 33" is absolutely useless information. Replace it with something meaningful which allows to decode the failure condition. Intel-SIG: commit 6055f6cf0d46 x86/smpboot: Make error message actually useful. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.170806023@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/smpboot.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4d993c2b88f1..5f3155e09113 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1061,9 +1061,13 @@ int native_kick_ap(unsigned int cpu, struct task_struct *tidle) pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); - if (apicid == BAD_APICID || !test_bit(apicid, phys_cpu_present_map) || - !apic_id_valid(apicid)) { - pr_err("%s: bad cpu %d\n", __func__, cpu); + if (apicid == BAD_APICID || !apic_id_valid(apicid)) { + pr_err("CPU %u has invalid APIC ID %x. Aborting bringup\n", cpu, apicid); + return -EINVAL; + } + + if (!test_bit(apicid, phys_cpu_present_map)) { + pr_err("CPU %u APIC ID %x is not present. Aborting bringup\n", cpu, apicid); return -EINVAL; } -- Gitee From 55acb16d5c5c9ba6af309f99149948663c9b332c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:52 +0100 Subject: [PATCH 64/88] x86/cpu/topology: Sanitize the APIC admission logic ANBZ: #26668 commit 0e53e7b656cf5aa67c08eca381cec858478195a7 upstream. Move the actually required content of generic_processor_id() into the call sites and use common helper functions for them. This separates the early boot registration and the ACPI hotplug mechanism completely which allows further cleanups and improvements. Intel-SIG: commit 0e53e7b656cf x86/cpu/topology: Sanitize the APIC admission logic. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.230433953@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 157 ++++++++++++++++----------------- 1 file changed, 75 insertions(+), 82 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 815e3eeed415..71d5500e6d19 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -30,8 +30,10 @@ static struct { unsigned int nr_assigned_cpus; unsigned int nr_disabled_cpus; unsigned int nr_rejected_cpus; + u32 boot_cpu_apic_id; } topo_info __read_mostly = { .nr_assigned_cpus = 1, + .boot_cpu_apic_id = BAD_APICID, }; /* @@ -83,78 +85,6 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif -static int topo_lookup_cpuid(u32 apic_id) -{ - int i; - - /* CPU# to APICID mapping is persistent once it is established */ - for (i = 0; i < topo_info.nr_assigned_cpus; i++) { - if (cpuid_to_apicid[i] == apic_id) - return i; - } - return -ENODEV; -} - -/* - * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids - * and cpuid_to_apicid[] synchronized. - */ -static int allocate_logical_cpuid(u32 apic_id) -{ - int cpu = topo_lookup_cpuid(apic_id); - - if (cpu >= 0) - return cpu; - - return topo_info.nr_assigned_cpus++; -} - -static void cpu_update_apic(unsigned int cpu, u32 apic_id) -{ -#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) - early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; -#endif - cpuid_to_apicid[cpu] = apic_id; - set_cpu_possible(cpu, true); - set_bit(apic_id, phys_cpu_present_map); - set_cpu_present(cpu, true); - - if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apic_id); -} - -static int generic_processor_info(int apicid) -{ - int cpu; - - /* The boot CPU must be set before MADT/MPTABLE parsing happens */ - if (cpuid_to_apicid[0] == BAD_APICID) - panic("Boot CPU APIC not registered yet\n"); - - if (apicid == boot_cpu_physical_apicid) - return 0; - - if (disabled_cpu_apicid == apicid) { - int thiscpu = topo_info.nr_assigned_cpus + topo_info.nr_disabled_cpus; - - pr_warn("APIC: Disabling requested cpu. Processor %d/0x%x ignored.\n", - thiscpu, apicid); - - topo_info.nr_rejected_cpus++; - return -ENODEV; - } - - if (topo_info.nr_assigned_cpus >= nr_cpu_ids) { - pr_warn_once("APIC: CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); - topo_info.nr_rejected_cpus++; - return -ENOSPC; - } - - cpu = allocate_logical_cpuid(apicid); - cpu_update_apic(cpu, apicid); - return cpu; -} - static int __initdata setup_possible_cpus = -1; /* @@ -222,6 +152,43 @@ __init void prefill_possible_map(void) set_cpu_possible(i, true); } +static int topo_lookup_cpuid(u32 apic_id) +{ + int i; + + /* CPU# to APICID mapping is persistent once it is established */ + for (i = 0; i < topo_info.nr_assigned_cpus; i++) { + if (cpuid_to_apicid[i] == apic_id) + return i; + } + return -ENODEV; +} + +static int topo_get_cpunr(u32 apic_id) +{ + int cpu = topo_lookup_cpuid(apic_id); + + if (cpu >= 0) + return cpu; + + return topo_info.nr_assigned_cpus++; +} + +static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) +{ +#if defined(CONFIG_SMP) || defined(CONFIG_X86_64) + early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; + early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; +#endif + cpuid_to_apicid[cpu] = apic_id; + + set_cpu_possible(cpu, true); + set_cpu_present(cpu, true); + + if (system_state != SYSTEM_BOOTING) + cpu_mark_primary_thread(cpu, apic_id); +} + /** * topology_register_apic - Register an APIC in early topology maps * @apic_id: The APIC ID to set up @@ -234,17 +201,40 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) if (apic_id >= MAX_LOCAL_APIC) { pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); + topo_info.nr_rejected_cpus++; return; } - if (!present) { - topo_info.nr_disabled_cpus++; + if (disabled_cpu_apicid == apic_id) { + pr_info("Disabling CPU as requested via 'disable_cpu_apicid=0x%x'.\n", apic_id); + topo_info.nr_rejected_cpus++; return; } - cpu = generic_processor_info(apic_id); - if (cpu >= 0) - early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; + /* CPU numbers exhausted? */ + if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) { + pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids); + topo_info.nr_rejected_cpus++; + return; + } + + if (present) { + set_bit(apic_id, phys_cpu_present_map); + + /* + * Double registration is valid in case of the boot CPU + * APIC because that is registered before the enumeration + * of the APICs via firmware parsers or VM guest + * mechanisms. + */ + if (apic_id == topo_info.boot_cpu_apic_id) + cpu = 0; + else + cpu = topo_get_cpunr(apic_id); + topo_set_cpuids(cpu, apic_id, acpi_id); + } else { + topo_info.nr_disabled_cpus++; + } } /** @@ -255,8 +245,10 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) */ void __init topology_register_boot_apic(u32 apic_id) { - cpuid_to_apicid[0] = apic_id; - cpu_update_apic(0, apic_id); + WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); + + topo_info.boot_cpu_apic_id = apic_id; + topology_register_apic(apic_id, CPU_ACPIID_INVALID, true); } #ifdef CONFIG_ACPI_HOTPLUG_CPU @@ -274,10 +266,11 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) cpu = topo_lookup_cpuid(apic_id); if (cpu < 0) { - cpu = generic_processor_info(apic_id); - if (cpu >= 0) - per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; + if (topo_info.nr_assigned_cpus >= nr_cpu_ids) + return -ENOSPC; } + set_bit(apic_id, phys_cpu_present_map); + topo_set_cpuids(cpu, apic_id, acpi_id); return cpu; } -- Gitee From 069c53cbadacb728800943972a81028894b3512a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:53 +0100 Subject: [PATCH 65/88] x86/cpu/topology: Rework possible CPU management ANBZ: #26668 commit 7c0edad3643f4493c4dafa6f5dfcfb1a86432156 upstream. Managing possible CPUs is an unreadable and uncomprehensible maze. Aside of that it's backwards because it applies command line limits after registering all APICs. Rewrite it so that it: - Applies the command line limits upfront so that only the allowed amount of APIC IDs can be registered. - Applies eventual late restrictions in an understandable way - Uses simple min_t() calculations which are trivial to follow. - Provides a separate function for resetting to UP mode late in the bringup process. Intel-SIG: commit 7c0edad3643f x86/cpu/topology: Rework possible CPU management. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.290098853@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/apic.h | 5 + arch/x86/include/asm/cpu.h | 10 +- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/topology.c | 176 +++++++++++++++++++------------- arch/x86/kernel/setup.c | 9 +- arch/x86/kernel/smpboot.c | 6 +- 6 files changed, 118 insertions(+), 89 deletions(-) diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a4fdd2f53ed7..ddc1e2dc211a 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -181,6 +181,9 @@ extern void topology_register_apic(u32 apic_id, u32 acpi_id, bool present); extern void topology_register_boot_apic(u32 apic_id); extern int topology_hotplug_apic(u32 apic_id, u32 acpi_id); extern void topology_hotunplug_apic(unsigned int cpu); +extern void topology_apply_cmdline_limits_early(void); +extern void topology_init_possible_cpus(void); +extern void topology_reset_possible_cpus_up(void); #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -196,6 +199,8 @@ static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } static inline bool apic_needs_pit(void) { return true; } +static inline void topology_apply_cmdline_limits_early(void) { } +static inline void topology_init_possible_cpus(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 982b5aae556c..f4ca0c77b9a8 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -9,18 +9,10 @@ #include #include -#ifdef CONFIG_SMP - -extern void prefill_possible_map(void); - -#else /* CONFIG_SMP */ - -static inline void prefill_possible_map(void) {} - +#ifndef CONFIG_SMP #define cpu_physical_id(cpu) boot_cpu_physical_apicid #define cpu_acpi_id(cpu) 0 #define safe_smp_processor_id() 0 - #endif /* CONFIG_SMP */ struct x86_cpu { diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 2d3b65803033..f3ef2b6d6234 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -190,6 +190,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_primary_thread_mask); } + #else /* CONFIG_SMP */ #define topology_max_packages() (1) static inline int diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 71d5500e6d19..223960d9d2a8 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -85,73 +86,6 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif -static int __initdata setup_possible_cpus = -1; - -/* - * cpu_possible_mask should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and don't expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_mask on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * - Ashok Raj - * - * Three ways to find out the number of additional hotplug CPUs: - * - If the BIOS specified disabled CPUs in ACPI/mptables use that. - * - The user can overwrite it with possible_cpus=NUM - * - Otherwise don't reserve additional CPUs. - * We do this because additional CPUs waste a lot of memory. - * -AK - */ -__init void prefill_possible_map(void) -{ - unsigned int num_processors = topo_info.nr_assigned_cpus; - unsigned int disabled_cpus = topo_info.nr_disabled_cpus; - int i, possible; - - i = setup_max_cpus ?: 1; - if (setup_possible_cpus == -1) { - possible = topo_info.nr_assigned_cpus; -#ifdef CONFIG_HOTPLUG_CPU - if (setup_max_cpus) - possible += num_processors; -#else - if (possible > i) - possible = i; -#endif - } else - possible = setup_possible_cpus; - - total_cpus = max_t(int, possible, num_processors + disabled_cpus); - - /* nr_cpu_ids could be reduced via nr_cpus= */ - if (possible > nr_cpu_ids) { - pr_warn("%d Processors exceeds NR_CPUS limit of %u\n", - possible, nr_cpu_ids); - possible = nr_cpu_ids; - } - -#ifdef CONFIG_HOTPLUG_CPU - if (!setup_max_cpus) -#endif - if (possible > i) { - pr_warn("%d Processors exceeds max_cpus limit of %u\n", - possible, setup_max_cpus); - possible = i; - } - - set_nr_cpu_ids(possible); - - pr_info("Allowing %d CPUs, %d hotplug CPUs\n", - possible, max_t(int, possible - num_processors, 0)); - - reset_cpu_possible_mask(); - - for (i = 0; i < possible; i++) - set_cpu_possible(i, true); -} - static int topo_lookup_cpuid(u32 apic_id) { int i; @@ -291,12 +225,114 @@ void topology_hotunplug_apic(unsigned int cpu) } #endif -static int __init _setup_possible_cpus(char *str) +#ifdef CONFIG_SMP +static unsigned int max_possible_cpus __initdata = NR_CPUS; + +/** + * topology_apply_cmdline_limits_early - Apply topology command line limits early + * + * Ensure that command line limits are in effect before firmware parsing + * takes place. + */ +void __init topology_apply_cmdline_limits_early(void) { - get_option(&str, &setup_possible_cpus); + unsigned int possible = nr_cpu_ids; + + /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ + if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) + possible = 1; + + /* 'possible_cpus=N' */ + possible = min_t(unsigned int, max_possible_cpus, possible); + + if (possible < nr_cpu_ids) { + pr_info("Limiting to %u possible CPUs\n", possible); + set_nr_cpu_ids(possible); + } +} + +static __init bool restrict_to_up(void) +{ + if (!smp_found_config || ioapic_is_disabled) + return true; + /* + * XEN PV is special as it does not advertise the local APIC + * properly, but provides a fake topology for it so that the + * infrastructure works. So don't apply the restrictions vs. APIC + * here. + */ + if (xen_pv_domain()) + return false; + + return apic_is_disabled; +} + +void __init topology_init_possible_cpus(void) +{ + unsigned int assigned = topo_info.nr_assigned_cpus; + unsigned int disabled = topo_info.nr_disabled_cpus; + unsigned int total = assigned + disabled; + unsigned int cpu, allowed = 1; + + if (!restrict_to_up()) { + if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { + disabled += assigned - nr_cpu_ids; + assigned = nr_cpu_ids; + } + allowed = min_t(unsigned int, total, nr_cpu_ids); + } + + if (total > allowed) + pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed); + + assigned = min_t(unsigned int, allowed, assigned); + disabled = allowed - assigned; + + topo_info.nr_assigned_cpus = assigned; + topo_info.nr_disabled_cpus = disabled; + + total_cpus = allowed; + set_nr_cpu_ids(allowed); + + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); + if (topo_info.nr_rejected_cpus) + pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); + + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); + + for (cpu = 0; cpu < allowed; cpu++) { + u32 apicid = cpuid_to_apicid[cpu]; + + set_cpu_possible(cpu, true); + + if (apicid == BAD_APICID) + continue; + + set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); + } +} + +/* + * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed. + */ +void __init topology_reset_possible_cpus_up(void) +{ + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); + + bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC); + if (topo_info.boot_cpu_apic_id != BAD_APICID) + set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map); +} + +static int __init setup_possible_cpus(char *str) +{ + get_option(&str, &max_possible_cpus); return 0; } -early_param("possible_cpus", _setup_possible_cpus); +early_param("possible_cpus", setup_possible_cpus); +#endif static int __init apic_set_disabled_cpu_apicid(char *arg) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c5d56efcd281..546633cefa22 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1312,6 +1312,8 @@ void __init setup_arch(char **cmdline_p) early_quirks(); + topology_apply_cmdline_limits_early(); + /* * Parse SMP configuration. Try ACPI first and then the platform * specific parser. @@ -1319,13 +1321,10 @@ void __init setup_arch(char **cmdline_p) acpi_boot_init(); x86_init.mpparse.parse_smp_cfg(); - /* - * Systems w/o ACPI and mptables might not have it mapped the local - * APIC yet, but prefill_possible_map() might need to access it. - */ + /* Last opportunity to detect and map the local APIC */ init_apic_mappings(); - prefill_possible_map(); + topology_init_possible_cpus(); init_cpu_to_node(); init_gi_nodes(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5f3155e09113..aa72b884cae2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1136,11 +1136,7 @@ static __init void disable_smp(void) pr_info("SMP disabled\n"); disable_ioapic_support(); - - init_cpu_present(cpumask_of(0)); - init_cpu_possible(cpumask_of(0)); - - reset_phys_cpu_present_map(smp_found_config ? boot_cpu_physical_apicid : 0); + topology_reset_possible_cpus_up(); cpumask_set_cpu(0, topology_sibling_cpumask(0)); cpumask_set_cpu(0, topology_core_cpumask(0)); -- Gitee From f47812df9a15fe25bb7f4d80b77a4580f5836111 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:54 +0100 Subject: [PATCH 66/88] x86/cpu: Detect real BSP on crash kernels ANBZ: #26668 commit 5c5682b9f87a3b7bd4833884f300ec673685f6a6 upstream. When a kdump kernel is started from a crashing CPU then there is no guarantee that this CPU is the real boot CPU (BSP). If the kdump kernel tries to online the BSP then the INIT sequence will reset the machine. There is a command line option to prevent this, but in case of nested kdump kernels this is wrong. But that command line option is not required at all because the real BSP is enumerated as the first CPU by firmware. Support for the only known system which was different (Voyager) got removed long ago. Detect whether the boot CPU APIC ID is the first APIC ID enumerated by the firmware. If the first APIC ID enumerated is not matching the boot CPU APIC ID then skip registering it. Intel-SIG: commit 5c5682b9f87a x86/cpu: Detect real BSP on crash kernels. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.348542071@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- Documentation/admin-guide/kdump/kdump.rst | 7 +- .../admin-guide/kernel-parameters.txt | 9 -- arch/x86/kernel/cpu/topology.c | 97 +++++++++++-------- 3 files changed, 61 insertions(+), 52 deletions(-) diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index a748e7eb4429..7ab6b46f1244 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -191,9 +191,7 @@ Dump-capture kernel config options (Arch Dependent, i386 and x86_64) CPU is enough for kdump kernel to dump vmcore on most of systems. However, you can also specify nr_cpus=X to enable multiple processors - in kdump kernel. In this case, "disable_cpu_apicid=" is needed to - tell kdump kernel which cpu is 1st kernel's BSP. Please refer to - admin-guide/kernel-parameters.txt for more details. + in kdump kernel. With CONFIG_SMP=n, the above things are not related. @@ -485,8 +483,7 @@ Notes on loading the dump-capture kernel: to use multi-thread programs with it, such as parallel dump feature of makedumpfile. Otherwise, the multi-thread program may have a great performance degradation. To enable multi-cpu support, you should bring up an - SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X] - options while loading it. + SMP dump-capture kernel and specify maxcpus/nr_cpus options while loading it. * For s390x there are two kdump modes: If a ELF header is specified with the elfcorehdr= kernel parameter, it is used by the kdump kernel as it diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7b151e24a562..ae26c0b0f4fc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1085,15 +1085,6 @@ Disable TLBIE instruction. Currently does not work with KVM, with HASH MMU, or with coherent accelerators. - disable_cpu_apicid= [X86,APIC,SMP] - Format: - The number of initial APIC ID for the - corresponding CPU to be disabled at boot, - mostly used for the kdump 2nd kernel to - disable BSP to wake up multiple CPUs without - causing system reset or hang due to sending - INIT from AP to BSP. - disable_ddw [PPC/PSERIES] Disable Dynamic DMA Window support. Use this to workaround buggy firmware. diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 223960d9d2a8..831f0a241da5 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -32,18 +32,13 @@ static struct { unsigned int nr_disabled_cpus; unsigned int nr_rejected_cpus; u32 boot_cpu_apic_id; + u32 real_bsp_apic_id; } topo_info __read_mostly = { .nr_assigned_cpus = 1, .boot_cpu_apic_id = BAD_APICID, + .real_bsp_apic_id = BAD_APICID, }; -/* - * Processor to be disabled specified by kernel parameter - * disable_cpu_apicid=, mostly used for the kdump 2nd kernel to - * avoid undefined behaviour caused by sending INIT from AP to BSP. - */ -static u32 disabled_cpu_apicid __ro_after_init = BAD_APICID; - bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == (u64)cpuid_to_apicid[cpu]; @@ -123,6 +118,60 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) cpu_mark_primary_thread(cpu, apic_id); } +static __init bool check_for_real_bsp(u32 apic_id) +{ + /* + * There is no real good way to detect whether this a kdump() + * kernel, but except on the Voyager SMP monstrosity which is not + * longer supported, the real BSP APIC ID is the first one which is + * enumerated by firmware. That allows to detect whether the boot + * CPU is the real BSP. If it is not, then do not register the APIC + * because sending INIT to the real BSP would reset the whole + * system. + * + * The first APIC ID which is enumerated by firmware is detectable + * because the boot CPU APIC ID is registered before that without + * invoking this code. + */ + if (topo_info.real_bsp_apic_id != BAD_APICID) + return false; + + if (apic_id == topo_info.boot_cpu_apic_id) { + topo_info.real_bsp_apic_id = apic_id; + return false; + } + + pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", + topo_info.boot_cpu_apic_id, apic_id); + pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); + + topo_info.real_bsp_apic_id = apic_id; + return true; +} + +static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) +{ + int cpu; + + if (present) { + set_bit(apic_id, phys_cpu_present_map); + + /* + * Double registration is valid in case of the boot CPU + * APIC because that is registered before the enumeration + * of the APICs via firmware parsers or VM guest + * mechanisms. + */ + if (apic_id == topo_info.boot_cpu_apic_id) + cpu = 0; + else + cpu = topo_get_cpunr(apic_id); + topo_set_cpuids(cpu, apic_id, acpi_id); + } else { + topo_info.nr_disabled_cpus++; + } +} + /** * topology_register_apic - Register an APIC in early topology maps * @apic_id: The APIC ID to set up @@ -131,16 +180,13 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) */ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) { - int cpu; - if (apic_id >= MAX_LOCAL_APIC) { pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1); topo_info.nr_rejected_cpus++; return; } - if (disabled_cpu_apicid == apic_id) { - pr_info("Disabling CPU as requested via 'disable_cpu_apicid=0x%x'.\n", apic_id); + if (check_for_real_bsp(apic_id)) { topo_info.nr_rejected_cpus++; return; } @@ -152,23 +198,7 @@ void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present) return; } - if (present) { - set_bit(apic_id, phys_cpu_present_map); - - /* - * Double registration is valid in case of the boot CPU - * APIC because that is registered before the enumeration - * of the APICs via firmware parsers or VM guest - * mechanisms. - */ - if (apic_id == topo_info.boot_cpu_apic_id) - cpu = 0; - else - cpu = topo_get_cpunr(apic_id); - topo_set_cpuids(cpu, apic_id, acpi_id); - } else { - topo_info.nr_disabled_cpus++; - } + topo_register_apic(apic_id, acpi_id, present); } /** @@ -182,7 +212,7 @@ void __init topology_register_boot_apic(u32 apic_id) WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID); topo_info.boot_cpu_apic_id = apic_id; - topology_register_apic(apic_id, CPU_ACPIID_INVALID, true); + topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); } #ifdef CONFIG_ACPI_HOTPLUG_CPU @@ -333,12 +363,3 @@ static int __init setup_possible_cpus(char *str) } early_param("possible_cpus", setup_possible_cpus); #endif - -static int __init apic_set_disabled_cpu_apicid(char *arg) -{ - if (!arg || !get_option(&arg, &disabled_cpu_apicid)) - return -EINVAL; - - return 0; -} -early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); -- Gitee From 461f048b6d59d0b9ee4464bea68f3e6dc64cd6ef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:56 +0100 Subject: [PATCH 67/88] x86/topology: Add a mechanism to track topology via APIC IDs ANBZ: #26668 commit f1f758a80516775b5d12d7c93cbedb2a08cd4c98 upstream. Topology on X86 is determined by the registered APIC IDs and the segmentation information retrieved from CPUID. Depending on the granularity of the provided CPUID information the most fine grained scheme looks like this according to Intel terminology: [PKG][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] Not enumerated domain levels consume 0 bits in the APIC ID. This allows to provide a consistent view at the topology and determine other information precisely like the number of cores in a package on hybrid systems, where the existing assumption that number or cores == number of threads / threads per core does not hold. Provide per domain level bitmaps which record the APIC ID split into the domain levels to make later evaluation of domain level specific information simple. This allows to calculate e.g. the logical IDs without any further extra logic. Contrary to the existing registration mechanism this records disabled CPUs, which are subject to later hotplug as well. That's useful for boot time sizing of package or die dependent allocations without using heuristics. Intel-SIG: commit f1f758a80516 x86/topology: Add a mechanism to track topology via APIC IDs. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.406985021@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 48 ++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 831f0a241da5..b056c0930570 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -1,5 +1,27 @@ // SPDX-License-Identifier: GPL-2.0-only - +/* + * CPU/APIC topology + * + * The APIC IDs describe the system topology in multiple domain levels. + * The CPUID topology parser provides the information which part of the + * APIC ID is associated to the individual levels: + * + * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD] + * + * The root space contains the package (socket) IDs. + * + * Not enumerated levels consume 0 bits space, but conceptually they are + * always represented. If e.g. only CORE and THREAD levels are enumerated + * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE. + * + * If SMT is not supported, then the THREAD domain is still used. It then + * has the same physical ID as the CORE domain and is the only child of + * the core domain. + * + * This allows a unified view on the system independent of the enumerated + * domain levels without requiring any conditionals in the code. + */ +#define pr_fmt(fmt) "CPU topo: " fmt #include #include @@ -9,6 +31,8 @@ #include #include +#include "cpu.h" + /* * Map cpu index to physical APIC ID */ @@ -23,6 +47,9 @@ DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; /* Used for CPU number allocation and parallel CPU bringup */ u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +/* Bitmaps to mark registered APICs at each topology domain */ +static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; + /* * Keep track of assigned, disabled and rejected CPUs. Present assigned * with 1 as CPU #0 is reserved for the boot CPU. @@ -39,6 +66,8 @@ static struct { .real_bsp_apic_id = BAD_APICID, }; +#define domain_weight(_dom) bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC) + bool arch_match_cpu_phys_id(int cpu, u64 phys_id) { return phys_id == (u64)cpuid_to_apicid[cpu]; @@ -81,6 +110,17 @@ early_initcall(smp_init_primary_thread_mask); static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif +/* + * Convert the APIC ID to a domain level ID by masking out the low bits + * below the domain level @dom. + */ +static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom) +{ + if (dom == TOPO_SMT_DOMAIN) + return apicid; + return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]); +} + static int topo_lookup_cpuid(u32 apic_id) { int i; @@ -151,7 +191,7 @@ static __init bool check_for_real_bsp(u32 apic_id) static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) { - int cpu; + int cpu, dom; if (present) { set_bit(apic_id, phys_cpu_present_map); @@ -170,6 +210,10 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) } else { topo_info.nr_disabled_cpus++; } + + /* Register present and possible CPUs in the domain maps */ + for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) + set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } /** -- Gitee From d73c9a61e4b9cd2869ebfb051b9f7c97e31e952f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:57 +0100 Subject: [PATCH 68/88] x86/cpu/topology: Reject unknown APIC IDs on ACPI hotplug ANBZ: #26668 commit 7cdcdab1a660bbe9f98bf1591c048ce7ccee59e0 upstream. The topology bitmaps track all possible APIC IDs which have been registered during enumeration. As sizing and further topology information is going to be derived from these bitmaps, reject attempts to hotplug an APIC ID which was not registered during enumeration. Intel-SIG: commit 7cdcdab1a660 x86/cpu/topology: Reject unknown APIC IDs on ACPI hotplug. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.462231229@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b056c0930570..2f977aa30623 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -272,6 +272,10 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) if (apic_id >= MAX_LOCAL_APIC) return -EINVAL; + /* Reject if the APIC ID was not registered during enumeration. */ + if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map)) + return -ENODEV; + cpu = topo_lookup_cpuid(apic_id); if (cpu < 0) { if (topo_info.nr_assigned_cpus >= nr_cpu_ids) -- Gitee From e70bab19efaf4d235106182e1f27cf5ee285bc75 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:05:59 +0100 Subject: [PATCH 69/88] x86/cpu/topology: Assign hotpluggable CPUIDs during init ANBZ: #26668 commit ea2dd8a5d4361ef0b000196043fa407f05b16f1d upstream. There is no point in assigning the CPU numbers during ACPI physical hotplug. The number of possible hotplug CPUs is known when the possible map is initialized, so the CPU numbers can be associated to the registered non-present APIC IDs right there. This allows to put more code into the __init section and makes the related data __ro_after_init. Intel-SIG: commit ea2dd8a5d436 x86/cpu/topology: Assign hotpluggable CPUIDs during init. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.517339971@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 2f977aa30623..b4f6791cba49 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -45,7 +45,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid); DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly; /* Used for CPU number allocation and parallel CPU bringup */ -u32 cpuid_to_apicid[] __read_mostly = { [0 ... NR_CPUS - 1] = BAD_APICID, }; +u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, }; /* Bitmaps to mark registered APICs at each topology domain */ static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init; @@ -60,7 +60,7 @@ static struct { unsigned int nr_rejected_cpus; u32 boot_cpu_apic_id; u32 real_bsp_apic_id; -} topo_info __read_mostly = { +} topo_info __ro_after_init = { .nr_assigned_cpus = 1, .boot_cpu_apic_id = BAD_APICID, .real_bsp_apic_id = BAD_APICID, @@ -133,7 +133,7 @@ static int topo_lookup_cpuid(u32 apic_id) return -ENODEV; } -static int topo_get_cpunr(u32 apic_id) +static __init int topo_get_cpunr(u32 apic_id) { int cpu = topo_lookup_cpuid(apic_id); @@ -149,8 +149,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - cpuid_to_apicid[cpu] = apic_id; - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); @@ -206,6 +204,8 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) cpu = 0; else cpu = topo_get_cpunr(apic_id); + + cpuid_to_apicid[cpu] = apic_id; topo_set_cpuids(cpu, apic_id, acpi_id); } else { topo_info.nr_disabled_cpus++; @@ -277,10 +277,8 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) return -ENODEV; cpu = topo_lookup_cpuid(apic_id); - if (cpu < 0) { - if (topo_info.nr_assigned_cpus >= nr_cpu_ids) - return -ENOSPC; - } + if (cpu < 0) + return -ENOSPC; set_bit(apic_id, phys_cpu_present_map); topo_set_cpuids(cpu, apic_id, acpi_id); return cpu; @@ -351,6 +349,7 @@ void __init topology_init_possible_cpus(void) unsigned int disabled = topo_info.nr_disabled_cpus; unsigned int total = assigned + disabled; unsigned int cpu, allowed = 1; + u32 apicid; if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { @@ -379,8 +378,17 @@ void __init topology_init_possible_cpus(void) init_cpu_present(cpumask_of(0)); init_cpu_possible(cpumask_of(0)); + /* Assign CPU numbers to non-present CPUs */ + for (apicid = 0; disabled; disabled--, apicid++) { + apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map, + MAX_LOCAL_APIC, apicid); + if (apicid >= MAX_LOCAL_APIC) + break; + cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid; + } + for (cpu = 0; cpu < allowed; cpu++) { - u32 apicid = cpuid_to_apicid[cpu]; + apicid = cpuid_to_apicid[cpu]; set_cpu_possible(cpu, true); -- Gitee From 292313c24f14922569dc08045677ce24a711395d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:00 +0100 Subject: [PATCH 70/88] x86/xen/smp_pv: Count number of vCPUs early ANBZ: #26668 commit c8f808231f1fb63553f90d4b3796cb6804d1e693 upstream. XEN/PV has a completely broken vCPU enumeration scheme, which just works by chance and provides zero topology information. Each vCPU ends up being a single core package. Dom0 provides MADT which can be used for topology information, but that table is the unmodified host table, which means that there can be more CPUs registered than the number of vCPUs XEN provides for the dom0 guest. DomU does not have ACPI and both rely on counting the possible vCPUs via an hypercall. To prepare for using CPUID topology information either via MADT or via fake APIC IDs count the number of possible CPUs during early boot and adjust nr_cpu_ids() accordingly. Intel-SIG: commit c8f808231f1f x86/xen/smp_pv: Count number of vCPUs early. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.571795063@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/enlighten_pv.c | 3 +++ arch/x86/xen/smp.h | 2 ++ arch/x86/xen/smp_pv.c | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c6ab925c2c7b..e56ac4647c1d 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -200,6 +200,9 @@ static void __init xen_pv_init_platform(void) xen_set_mtrr_data(); else mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); + + /* Adjust nr_cpu_ids before "enumeration" happens */ + xen_smp_count_cpus(); } static void __init xen_pv_guest_late_init(void) diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index c20cbb14c82b..b8efdbc693f7 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -19,6 +19,7 @@ extern void xen_smp_intr_free(unsigned int cpu); int xen_smp_intr_init_pv(unsigned int cpu); void xen_smp_intr_free_pv(unsigned int cpu); +void xen_smp_count_cpus(void); void xen_smp_cpus_done(unsigned int max_cpus); void xen_smp_send_reschedule(int cpu); @@ -44,6 +45,7 @@ static inline int xen_smp_intr_init_pv(unsigned int cpu) return 0; } static inline void xen_smp_intr_free_pv(unsigned int cpu) {} +static inline void xen_smp_count_cpus(void) { } #endif /* CONFIG_SMP */ #endif diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 98849b3f5eac..44706f01bb9b 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -411,6 +411,20 @@ static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +void __init xen_smp_count_cpus(void) +{ + unsigned int cpus; + + for (cpus = 0; cpus < nr_cpu_ids; cpus++) { + if (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpus, NULL) < 0) + break; + } + + pr_info("Xen PV: Detected %u vCPUS\n", cpus); + if (cpus < nr_cpu_ids) + set_nr_cpu_ids(cpus); +} + static const struct smp_ops xen_smp_ops __initconst = { .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_pv_smp_prepare_cpus, -- Gitee From a6d37fcb3f705d4b065ba79bc7de421915329f97 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:02 +0100 Subject: [PATCH 71/88] x86/cpu/topology: Let XEN/PV use topology from CPUID/MADT ANBZ: #26668 commit 354da4cf57af5d8b5302251204d6077600b6d3d6 upstream. It turns out that XEN/PV Dom0 has halfways usable CPUID/MADT enumeration except that it cannot deal with CPUs which are enumerated as disabled in MADT. DomU has no MADT and provides at least rudimentary topology information in CPUID leaves 1 and 4. For both it's important that there are not more possible Linux CPUs than vCPUs provided by the hypervisor. As this is ensured by counting the vCPUs before enumeration happens: - lift the restrictions in the CPUID evaluation and the MADT parser - Utilize MADT registration for Dom0 - Keep the fake APIC ID registration for DomU - Fix the XEN APIC fake so the readout of the local APIC ID works for Dom0 via the hypercall and for DomU by returning the registered fake APIC IDs. With that the XEN/PV fake approximates usefulness. Intel-SIG: commit 354da4cf57af x86/cpu/topology: Let XEN/PV use topology from CPUID/MADT. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.626195405@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 25 ++++++++----------------- arch/x86/kernel/cpu/topology_common.c | 2 +- arch/x86/xen/apic.c | 14 +++++++------- arch/x86/xen/smp_pv.c | 13 ++++++++----- 4 files changed, 24 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0e28dd6a9e5c..25cd809d38cb 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,8 +23,6 @@ #include #include -#include - #include #include #include @@ -165,12 +163,6 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) return 0; } -static __init void acpi_register_lapic(u32 apic_id, u32 acpi_id, bool present) -{ - if (!xen_pv_domain()) - topology_register_apic(apic_id, acpi_id, present); -} - static bool __init acpi_is_processor_usable(u32 lapic_flags) { if (lapic_flags & ACPI_MADT_ENABLED) @@ -224,7 +216,7 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) return 0; } - acpi_register_lapic(apic_id, processor->uid, enabled); + topology_register_apic(apic_id, processor->uid, enabled); #else pr_warn("x2apic entry ignored\n"); #endif @@ -259,9 +251,9 @@ acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end) * to not preallocating memory for all NR_CPUS * when we use CPU hotplug. */ - acpi_register_lapic(processor->id, /* APIC ID */ - processor->processor_id, /* ACPI ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); + topology_register_apic(processor->id, /* APIC ID */ + processor->processor_id, /* ACPI ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); return 0; } @@ -278,9 +270,9 @@ acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end) acpi_table_print_madt_entry(&header->common); - acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */ - processor->processor_id, /* ACPI ID */ - processor->lapic_flags & ACPI_MADT_ENABLED); + topology_register_apic((processor->id << 8) | processor->eid,/* APIC ID */ + processor->processor_id, /* ACPI ID */ + processor->lapic_flags & ACPI_MADT_ENABLED); return 0; } @@ -1080,8 +1072,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) return count; } - if (!xen_pv_domain()) - register_lapic_address(acpi_lapic_addr); + register_lapic_address(acpi_lapic_addr); return count; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 263f198f24bc..2491d358e478 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -80,7 +80,7 @@ static bool fake_topology(struct topo_scan *tscan) topology_set_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); topology_set_dom(tscan, TOPO_CORE_DOMAIN, 0, 1); - return tscan->c->cpuid_level < 1 || xen_pv_domain(); + return tscan->c->cpuid_level < 1; } static void parse_topology(struct topo_scan *tscan, bool early) diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 8835d1cc961d..8b045dd25196 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -43,20 +43,20 @@ static u32 xen_apic_read(u32 reg) struct xen_platform_op op = { .cmd = XENPF_get_cpuinfo, .interface_version = XENPF_INTERFACE_VERSION, - .u.pcpu_info.xen_cpuid = 0, }; - int ret; - - /* Shouldn't need this as APIC is turned off for PV, and we only - * get called on the bootup processor. But just in case. */ - if (!xen_initial_domain() || smp_processor_id()) - return 0; + int ret, cpu; if (reg == APIC_LVR) return 0x14; if (reg != APIC_ID) return 0; + cpu = smp_processor_id(); + if (!xen_initial_domain()) + return cpu ? cpuid_to_apicid[cpu] << 24 : 0; + + op.u.pcpu_info.xen_cpuid = cpu; + ret = HYPERVISOR_platform_op(&op); if (ret) op.u.pcpu_info.apic_id = BAD_APICID; diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 44706f01bb9b..27d1a5b7f571 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -156,11 +156,9 @@ static void __init xen_pv_smp_config(void) topology_register_boot_apic(apicid++); - for (i = 1; i < nr_cpu_ids; i++) { - if (HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) < 0) - break; + for (i = 1; i < nr_cpu_ids; i++) topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); - } + /* Pretend to be a proper enumerated system */ smp_found_config = 1; } @@ -451,5 +449,10 @@ void __init xen_smp_init(void) /* Avoid searching for BIOS MP tables */ x86_init.mpparse.find_mptable = x86_init_noop; x86_init.mpparse.early_parse_smp_cfg = x86_init_noop; - x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; + + /* XEN/PV Dom0 has halfways sane topology information via CPUID/MADT */ + if (xen_initial_domain()) + x86_init.mpparse.parse_smp_cfg = x86_init_noop; + else + x86_init.mpparse.parse_smp_cfg = xen_pv_smp_config; } -- Gitee From 7e9d851a8d66548d8000a798922360dc7025eaa2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:03 +0100 Subject: [PATCH 72/88] x86/cpu/topology: Use topology bitmaps for sizing ANBZ: #26668 commit 090610ba704a66d7a58919be3bad195f24499ecb upstream. Now that all possible APIC IDs are tracked in the topology bitmaps, its trivial to retrieve the real information from there. This gets rid of the guesstimates for the maximal packages and dies per package as the actual numbers can be determined before a single AP has been brought up. The number of SMT threads can now be determined correctly from the bitmaps in all situations. Up to now a system which has SMT disabled in the BIOS will still claim that it is SMT capable, because the lowest APIC ID bit is reserved for that and CPUID leaf 0xb/0x1f still enumerates the SMT domain accordingly. By calculating the bitmap weights of the SMT and the CORE domain and setting them into relation the SMT disabled in BIOS situation reports correctly that the system is not SMT capable. It also handles the situation correctly when a hybrid systems boot CPU does not have SMT as it takes the SMT capability of the APs fully into account. Intel-SIG: commit 090610ba704a x86/cpu/topology: Use topology bitmaps for sizing. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.681709880@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/smp.h | 3 +-- arch/x86/include/asm/topology.h | 24 +++++++++++++----------- arch/x86/kernel/cpu/common.c | 9 ++++++--- arch/x86/kernel/cpu/debugfs.c | 2 +- arch/x86/kernel/cpu/topology.c | 20 +++++++++++++++++++- arch/x86/kernel/cpu/topology_common.c | 24 ------------------------ arch/x86/kernel/smpboot.c | 16 ---------------- arch/x86/xen/smp.c | 2 -- 8 files changed, 40 insertions(+), 60 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f1510d6f4e84..5318470cea02 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -8,7 +8,7 @@ #include #include -extern int smp_num_siblings; +extern unsigned int smp_num_siblings; DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); @@ -109,7 +109,6 @@ void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); void smp_prepare_cpus_common(void); void native_smp_prepare_cpus(unsigned int max_cpus); -void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); int common_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_kick_ap(unsigned int cpu, struct task_struct *tidle); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f3ef2b6d6234..bb32f3299d01 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -143,7 +143,19 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_amd_node_id(cpu) (cpu_data(cpu).topo.amd_node_id) -extern unsigned int __max_die_per_package; +extern unsigned int __max_dies_per_package; +extern unsigned int __max_logical_packages; + +static inline unsigned int topology_max_packages(void) +{ + return __max_logical_packages; +} + +static inline unsigned int topology_max_die_per_package(void) +{ + return __max_dies_per_package; +} + extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -154,14 +166,6 @@ extern struct cpumask __cpu_primary_thread_mask; #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) -extern unsigned int __max_logical_packages; -#define topology_max_packages() (__max_logical_packages) - -static inline int topology_max_die_per_package(void) -{ - return __max_die_per_package; -} - extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -192,13 +196,11 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #else /* CONFIG_SMP */ -#define topology_max_packages() (1) static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } -static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } static inline unsigned int topology_amd_nodes_per_pkg(void) { return 0; }; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0645b62de21d..a32d93442463 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -71,14 +71,17 @@ u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ -int smp_num_siblings = 1; +unsigned int smp_num_siblings __ro_after_init = 1; EXPORT_SYMBOL(smp_num_siblings); DEFINE_STATIC_KEY_FALSE(hygon_lmc_key); EXPORT_SYMBOL_GPL(hygon_lmc_key); -unsigned int __max_die_per_package __read_mostly = 1; -EXPORT_SYMBOL(__max_die_per_package); +unsigned int __max_dies_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__max_dies_per_package); + +unsigned int __max_logical_packages __ro_after_init = 1; +EXPORT_SYMBOL(__max_logical_packages); static struct ppin_info { int feature; diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 86de544cdb14..543efc487206 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -29,7 +29,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); - seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package); + seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); return 0; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b4f6791cba49..f2d264393a5e 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -347,8 +347,8 @@ void __init topology_init_possible_cpus(void) { unsigned int assigned = topo_info.nr_assigned_cpus; unsigned int disabled = topo_info.nr_disabled_cpus; + unsigned int cnta, cntb, cpu, allowed = 1; unsigned int total = assigned + disabled; - unsigned int cpu, allowed = 1; u32 apicid; if (!restrict_to_up()) { @@ -371,6 +371,24 @@ void __init topology_init_possible_cpus(void) total_cpus = allowed; set_nr_cpu_ids(allowed); + cnta = domain_weight(TOPO_PKG_DOMAIN); + cntb = domain_weight(TOPO_DIE_DOMAIN); + __max_logical_packages = cnta; + __max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta)); + + pr_info("Max. logical packages: %3u\n", cnta); + pr_info("Max. logical dies: %3u\n", cntb); + pr_info("Max. dies per package: %3u\n", __max_dies_per_package); + + cnta = domain_weight(TOPO_CORE_DOMAIN); + cntb = domain_weight(TOPO_SMT_DOMAIN); + /* + * Can't use order delta here as order(cnta) can be equal + * order(cntb) even if cnta != cntb. + */ + smp_num_siblings = DIV_ROUND_UP(cntb, cnta); + pr_info("Max. threads per core: %3u\n", smp_num_siblings); + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 2491d358e478..35e7bc8d3e8d 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -203,16 +203,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - /* Bug compatible with the existing parsers */ - if (tscan.dom_ncpus[TOPO_SMT_DOMAIN] > smp_num_siblings) { - if (system_state == SYSTEM_BOOTING) { - pr_warn_once("CPU%d: SMT detected and enabled late\n", cpu); - smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; - } else { - pr_warn_once("CPU%d: SMT detected after init. Too late!\n", cpu); - } - } - topo_set_ids(&tscan); topo_set_max_cores(&tscan); } @@ -238,20 +228,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) topo_set_ids(&tscan); topo_set_max_cores(&tscan); - /* - * Bug compatible with the existing code. If the boot CPU does not - * have SMT this ends up with one sibling. This needs way deeper - * changes further down the road to get it right during early boot. - */ - smp_num_siblings = tscan.dom_ncpus[TOPO_SMT_DOMAIN]; - - /* - * Neither it's clear whether there are as many dies as the APIC - * space indicating die level is. But assume that the actual number - * of CPUs gives a proper indication for now to stay bug compatible. - */ - __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] / - tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1]; /* * AMD systems have Nodes per package which cannot be mapped to * APIC ID. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index aa72b884cae2..7421121106d4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -136,8 +136,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { .phys_die_id = U32_MAX, }; -unsigned int __max_logical_packages __read_mostly; -EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; static unsigned int logical_die __read_mostly; @@ -1256,24 +1254,10 @@ void __init native_smp_prepare_boot_cpu(void) native_pv_lock_init(); } -void __init calculate_max_logical_packages(void) -{ - int ncpus; - - /* - * Today neither Intel nor AMD support heterogeneous systems so - * extrapolate the boot cpu's data to all packages. - */ - ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); - __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - pr_info("Max logical packages: %u\n", __max_logical_packages); -} - void __init native_smp_cpus_done(unsigned int max_cpus) { pr_debug("Boot done\n"); - calculate_max_logical_packages(); build_sched_topology(); nmi_selftest(); impress_friends(); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 1fb9a1644d94..935771726f9c 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -135,8 +135,6 @@ void __init xen_smp_cpus_done(unsigned int max_cpus) { if (xen_hvm_domain()) native_smp_cpus_done(max_cpus); - else - calculate_max_logical_packages(); } void xen_smp_send_reschedule(int cpu) -- Gitee From 9017da06de55048c9778172c3385b3a6347bd98e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:05 +0100 Subject: [PATCH 73/88] x86/cpu/topology: Mop up primary thread mask handling ANBZ: #26668 commit 882e0cff9ef340e7a47659a9aab9da64f4b9b847 upstream. The early initcall to initialize the primary thread mask is not longer required because topology_init_possible_cpus() can mark primary threads correctly when initializing the possible and present map as the number of SMT threads is already determined correctly. The XENPV workaround is not longer required because XENPV now registers fake APIC IDs which will just work like any other enumeration. Intel-SIG: commit 882e0cff9ef3 x86/cpu/topology: Mop up primary thread mask handling. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.736104257@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index f2d264393a5e..c88aca28022f 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -82,30 +82,6 @@ static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) if (smp_num_siblings == 1 || !(apicid & mask)) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } - -/* - * Due to the utter mess of CPUID evaluation smp_num_siblings is not valid - * during early boot. Initialize the primary thread mask before SMP - * bringup. - */ -static int __init smp_init_primary_thread_mask(void) -{ - unsigned int cpu; - - /* - * XEN/PV provides either none or useless topology information. - * Pretend that all vCPUs are primary threads. - */ - if (xen_pv_domain()) { - cpumask_copy(&__cpu_primary_thread_mask, cpu_possible_mask); - return 0; - } - - for (cpu = 0; cpu < topo_info.nr_assigned_cpus; cpu++) - cpu_mark_primary_thread(cpu, cpuid_to_apicid[cpu]); - return 0; -} -early_initcall(smp_init_primary_thread_mask); #else static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { } #endif @@ -151,9 +127,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) #endif set_cpu_possible(cpu, true); set_cpu_present(cpu, true); - - if (system_state != SYSTEM_BOOTING) - cpu_mark_primary_thread(cpu, apic_id); } static __init bool check_for_real_bsp(u32 apic_id) @@ -281,6 +254,7 @@ int topology_hotplug_apic(u32 apic_id, u32 acpi_id) return -ENOSPC; set_bit(apic_id, phys_cpu_present_map); topo_set_cpuids(cpu, apic_id, acpi_id); + cpu_mark_primary_thread(cpu, apic_id); return cpu; } @@ -413,6 +387,7 @@ void __init topology_init_possible_cpus(void) if (apicid == BAD_APICID) continue; + cpu_mark_primary_thread(cpu, apicid); set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map)); } } -- Gitee From e5fb25f8c8ae1e1890b17d0756a09d1a40db119c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:06 +0100 Subject: [PATCH 74/88] x86/cpu/topology: Simplify cpu_mark_primary_thread() ANBZ: #26668 commit 5e40fb2d4a4c7503cab4f923b7d985dbcf583581 upstream. No point in creating a mask via fls(). smp_num_siblings is guaranteed to be a power of 2. So just using (smp_num_siblings - 1) has the same effect. Intel-SIG: commit 5e40fb2d4a4c x86/cpu/topology: Simplify cpu_mark_primary_thread(). x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.791176581@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index c88aca28022f..f84996ee8c85 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -76,10 +76,7 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) #ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { - /* Isolate the SMT bit(s) in the APICID and check for 0 */ - u32 mask = (1U << (fls(smp_num_siblings) - 1)) - 1; - - if (smp_num_siblings == 1 || !(apicid & mask)) + if (!(apicid & (smp_num_siblings - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } #else -- Gitee From 1d269fbac475b49a9760b92d28082f4b82a4c1fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:07 +0100 Subject: [PATCH 75/88] x86/cpu/topology: Provide logical pkg/die mapping ANBZ: #26668 commit b7065f4f844c7876ed071b67e2ba57838152bd63 upstream. With the topology bitmaps in place the logical package and die IDs can trivially be retrieved by determining the bitmap weight of the relevant topology domain level up to and including the physical ID in question. Provide a function to that effect. Intel-SIG: commit b7065f4f844c x86/cpu/topology: Provide logical pkg/die mapping. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.846136196@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 9 +++++++++ arch/x86/kernel/cpu/topology.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index bb32f3299d01..f0a9c664d41a 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -159,6 +159,15 @@ static inline unsigned int topology_max_die_per_package(void) extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) +#ifdef CONFIG_X86_LOCAL_APIC +int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); +#else +static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) +{ + return 0; +} +#endif + #ifdef CONFIG_SMP #define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id) #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index f84996ee8c85..4de05aafbc7f 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -229,6 +229,34 @@ void __init topology_register_boot_apic(u32 apic_id) topo_register_apic(apic_id, CPU_ACPIID_INVALID, true); } +/** + * topology_get_logical_id - Retrieve the logical ID at a given topology domain level + * @apicid: The APIC ID for which to lookup the logical ID + * @at_level: The topology domain level to use + * + * @apicid must be a full APIC ID, not the normalized variant. It's valid to have + * all bits below the domain level specified by @at_level to be clear. So both + * real APIC IDs and backshifted normalized APIC IDs work correctly. + * + * Returns: + * - >= 0: The requested logical ID + * - -ERANGE: @apicid is out of range + * - -ENODEV: @apicid is not registered + */ +int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) +{ + /* Remove the bits below @at_level to get the proper level ID of @apicid */ + unsigned int lvlid = topo_apicid(apicid, at_level); + + if (lvlid >= MAX_LOCAL_APIC) + return -ERANGE; + if (!test_bit(lvlid, apic_maps[at_level].map)) + return -ENODEV; + /* Get the number of set bits before @lvlid. */ + return bitmap_weight(apic_maps[at_level].map, lvlid); +} +EXPORT_SYMBOL_GPL(topology_get_logical_id); + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot -- Gitee From f7fd5d5454f7d7cdcced49260556d48caa441d36 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:09 +0100 Subject: [PATCH 76/88] x86/cpu/topology: Use topology logical mapping mechanism ANBZ: #26668 commit 380414be78bf8dbe1c3ed98feb75e2579c4a1bae upstream. Replace the logical package and die management functionality and retrieve the logical IDs from the topology bitmaps. Intel-SIG: commit 380414be78bf x86/cpu/topology: Use topology logical mapping mechanism. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.901865302@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 15 ++-- arch/x86/kernel/cpu/common.c | 13 --- arch/x86/kernel/cpu/topology_common.c | 4 + arch/x86/kernel/smpboot.c | 111 +------------------------- 4 files changed, 12 insertions(+), 131 deletions(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f0a9c664d41a..d09ff57c39ca 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -175,6 +175,13 @@ static inline int topology_get_logical_id(u32 apicid, enum x86_topology_domains #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) + +static inline int topology_phys_to_logical_pkg(unsigned int pkg) +{ + return topology_get_logical_id(pkg << x86_topo_system.dom_shifts[TOPO_PKG_DOMAIN], + TOPO_PKG_DOMAIN); +} + extern int __max_smt_threads; static inline int topology_max_smt_threads(void) @@ -184,10 +191,6 @@ static inline int topology_max_smt_threads(void) #include -int topology_update_package_map(unsigned int apicid, unsigned int cpu); -int topology_update_die_map(unsigned int dieid, unsigned int cpu); -int topology_phys_to_logical_pkg(unsigned int pkg); - extern unsigned int __amd_nodes_per_pkg; static inline unsigned int topology_amd_nodes_per_pkg(void) @@ -205,10 +208,6 @@ static inline bool topology_is_primary_thread(unsigned int cpu) } #else /* CONFIG_SMP */ -static inline int -topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } -static inline int -topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a32d93442463..0ff79bf64525 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1827,18 +1827,6 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -static void update_package_map(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - unsigned int cpu = smp_processor_id(); - - BUG_ON(topology_update_package_map(c->topo.pkg_id, cpu)); - BUG_ON(topology_update_die_map(c->topo.die_id, cpu)); -#else - c->topo.logical_pkg_id = 0; -#endif -} - /* * This does the hard work of actually picking apart the CPU stuff... */ @@ -2026,7 +2014,6 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_32 enable_sep_cpu(); #endif - update_package_map(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); if (boot_cpu_has_bug(X86_BUG_GDS)) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 35e7bc8d3e8d..0f5b0e59e68b 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -10,6 +10,7 @@ #include "cpu.h" struct x86_topology_system x86_topo_system __ro_after_init; +EXPORT_SYMBOL_GPL(x86_topo_system); unsigned int __amd_nodes_per_pkg __ro_after_init; EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg); @@ -154,6 +155,9 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7421121106d4..09fce487c810 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -122,23 +122,6 @@ struct mwait_cpu_dead { */ static DEFINE_PER_CPU_ALIGNED(struct mwait_cpu_dead, mwait_cpu_dead); -/* Logical package management. */ -struct logical_maps { - u32 phys_pkg_id; - u32 phys_die_id; - u32 logical_pkg_id; - u32 logical_die_id; -}; - -/* Temporary workaround until the full topology mechanics is in place */ -static DEFINE_PER_CPU_READ_MOSTLY(struct logical_maps, logical_maps) = { - .phys_pkg_id = U32_MAX, - .phys_die_id = U32_MAX, -}; - -static unsigned int logical_packages __read_mostly; -static unsigned int logical_die __read_mostly; - /* Maximum number of SMT threads on any online core */ int __read_mostly __max_smt_threads = 1; @@ -331,103 +314,11 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -/** - * topology_phys_to_logical_pkg - Map a physical package id to a logical - * @phys_pkg: The physical package id to map - * - * Returns logical package id or -1 if not found - */ -int topology_phys_to_logical_pkg(unsigned int phys_pkg) -{ - int cpu; - - for_each_possible_cpu(cpu) { - if (per_cpu(logical_maps.phys_pkg_id, cpu) == phys_pkg) - return per_cpu(logical_maps.logical_pkg_id, cpu); - } - return -1; -} -EXPORT_SYMBOL(topology_phys_to_logical_pkg); - -/** - * topology_phys_to_logical_die - Map a physical die id to logical - * @die_id: The physical die id to map - * @cur_cpu: The CPU for which the mapping is done - * - * Returns logical die id or -1 if not found - */ -static int topology_phys_to_logical_die(unsigned int die_id, unsigned int cur_cpu) -{ - int cpu, proc_id = cpu_data(cur_cpu).topo.pkg_id; - - for_each_possible_cpu(cpu) { - if (per_cpu(logical_maps.phys_pkg_id, cpu) == proc_id && - per_cpu(logical_maps.phys_die_id, cpu) == die_id) - return per_cpu(logical_maps.logical_die_id, cpu); - } - return -1; -} - -/** - * topology_update_package_map - Update the physical to logical package map - * @pkg: The physical package id as retrieved via CPUID - * @cpu: The cpu for which this is updated - */ -int topology_update_package_map(unsigned int pkg, unsigned int cpu) -{ - int new; - - /* Already available somewhere? */ - new = topology_phys_to_logical_pkg(pkg); - if (new >= 0) - goto found; - - new = logical_packages++; - if (new != pkg) { - pr_info("CPU %u Converting physical %u to logical package %u\n", - cpu, pkg, new); - } -found: - per_cpu(logical_maps.phys_pkg_id, cpu) = pkg; - per_cpu(logical_maps.logical_pkg_id, cpu) = new; - cpu_data(cpu).topo.logical_pkg_id = new; - return 0; -} -/** - * topology_update_die_map - Update the physical to logical die map - * @die: The die id as retrieved via CPUID - * @cpu: The cpu for which this is updated - */ -int topology_update_die_map(unsigned int die, unsigned int cpu) -{ - int new; - - /* Already available somewhere? */ - new = topology_phys_to_logical_die(die, cpu); - if (new >= 0) - goto found; - - new = logical_die++; - if (new != die) { - pr_info("CPU %u Converting physical %u to logical die %u\n", - cpu, die, new); - } -found: - per_cpu(logical_maps.phys_die_id, cpu) = die; - per_cpu(logical_maps.logical_die_id, cpu) = new; - cpu_data(cpu).topo.logical_die_id = new; - return 0; -} - static void __init smp_store_boot_cpu_info(void) { - int id = 0; /* CPU 0 */ - struct cpuinfo_x86 *c = &cpu_data(id); + struct cpuinfo_x86 *c = &cpu_data(0); *c = boot_cpu_data; - c->cpu_index = id; - topology_update_package_map(c->topo.pkg_id, id); - topology_update_die_map(c->topo.die_id, id); c->initialized = true; } -- Gitee From 46bc5129fe09d8f7a3d14f9930f76ff7b9f9f933 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:10 +0100 Subject: [PATCH 77/88] x86/cpu/topology: Retrieve cores per package from topology bitmaps ANBZ: #26668 commit 3205c9833d69b97e8694efe3e193312dea4c571f upstream. Similar to other sizing information the number of cores per package can be established from the topology bitmap. Provide a function for retrieving that information and replace the buggy hack in the CPUID evaluation with it. Intel-SIG: commit 3205c9833d69 x86/cpu/topology: Retrieve cores per package from topology bitmaps. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210252.956858282@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 43 +++++++++++++++++++++++++++ arch/x86/kernel/cpu/topology.h | 11 +++++++ arch/x86/kernel/cpu/topology_common.c | 18 ++--------- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 4de05aafbc7f..337430819eee 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -257,6 +257,49 @@ int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level) } EXPORT_SYMBOL_GPL(topology_get_logical_id); +/** + * topology_unit_count - Retrieve the count of specified units at a given topology domain level + * @apicid: The APIC ID which specifies the search range + * @which_units: The domain level specifying the units to count + * @at_level: The domain level at which @which_units have to be counted + * + * This returns the number of possible units according to the enumerated + * information. + * + * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN) + * counts the number of possible cores in the package to which @apicid + * belongs. + * + * @at_level must obviously be greater than @which_level to produce useful + * results. If @at_level is equal to @which_units the result is + * unsurprisingly 1. If @at_level is less than @which_units the results + * is by definition undefined and the function returns 0. + */ +unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, + enum x86_topology_domains at_level) +{ + /* Remove the bits below @at_level to get the proper level ID of @apicid */ + unsigned int lvlid = topo_apicid(apicid, at_level); + unsigned int id, end, cnt = 0; + + if (lvlid >= MAX_LOCAL_APIC) + return 0; + if (!test_bit(lvlid, apic_maps[at_level].map)) + return 0; + if (which_units > at_level) + return 0; + if (which_units == at_level) + return 1; + + /* Calculate the exclusive end */ + end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); + /* Unfortunately there is no bitmap_weight_range() */ + for (id = find_next_bit(apic_maps[which_units].map, end, lvlid); + id < end; id = find_next_bit(apic_maps[which_units].map, end, ++id)) + cnt++; + return cnt; +} + #ifdef CONFIG_ACPI_HOTPLUG_CPU /** * topology_hotplug_apic - Handle a physical hotplugged APIC after boot diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h index 2a3c838b6044..37326297f80c 100644 --- a/arch/x86/kernel/cpu/topology.h +++ b/arch/x86/kernel/cpu/topology.h @@ -53,4 +53,15 @@ static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topolog tscan->dom_ncpus[dom] = ncpus; } +#ifdef CONFIG_X86_LOCAL_APIC +unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, + enum x86_topology_domains at_level); +#else +static inline unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units, + enum x86_topology_domains at_level) +{ + return 1; +} +#endif + #endif /* ARCH_X86_TOPOLOGY_H */ diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 0f5b0e59e68b..5a865de64846 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -162,25 +162,15 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; + /* Maximum number of cores on this package */ + c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); + c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) cpu_topology_fixup_amd(tscan); } -static void topo_set_max_cores(struct topo_scan *tscan) -{ - /* - * Bug compatible for now. This is broken on hybrid systems: - * 8 cores SMT + 8 cores w/o SMT - * tscan.dom_ncpus[TOPO_DIEGRP_DOMAIN] = 24; 24 / 2 = 12 !! - * - * Cannot be fixed without further topology enumeration changes. - */ - tscan->c->x86_max_cores = tscan->dom_ncpus[TOPO_DIEGRP_DOMAIN] >> - x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; -} - void cpu_parse_topology(struct cpuinfo_x86 *c) { unsigned int dom, cpu = smp_processor_id(); @@ -208,7 +198,6 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) } topo_set_ids(&tscan); - topo_set_max_cores(&tscan); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -230,7 +219,6 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) } topo_set_ids(&tscan); - topo_set_max_cores(&tscan); /* * AMD systems have Nodes per package which cannot be mapped to -- Gitee From a4a2b2a9464ca742b2c5a72a4210c73614c7c591 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:12 +0100 Subject: [PATCH 78/88] x86/cpu/topology: Rename smp_num_siblings ANBZ: #26668 commit 8078f4d6102f9370b3b7436d25717735d21f5c09 upstream. It's really a non-intuitive name. Rename it to __max_threads_per_core which is obvious. Intel-SIG: commit 8078f4d6102f x86/cpu/topology: Rename smp_num_siblings. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.011307973@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/perf_event_p4.h | 4 ++-- arch/x86/include/asm/smp.h | 2 -- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/cpu/common.c | 6 +++--- arch/x86/kernel/cpu/debugfs.c | 2 +- arch/x86/kernel/cpu/mce/inject.c | 2 +- arch/x86/kernel/cpu/topology.c | 6 +++--- arch/x86/kernel/process.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- 9 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 94de1a05aeba..d65e338b6a5f 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -181,7 +181,7 @@ static inline u64 p4_clear_ht_bit(u64 config) static inline int p4_ht_active(void) { #ifdef CONFIG_SMP - return smp_num_siblings > 1; + return __max_threads_per_core > 1; #endif return 0; } @@ -189,7 +189,7 @@ static inline int p4_ht_active(void) static inline int p4_ht_thread(int cpu) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2) + if (__max_threads_per_core == 2) return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map)); #endif return 0; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 5318470cea02..54d6d71e0eca 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -8,8 +8,6 @@ #include #include -extern unsigned int smp_num_siblings; - DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d09ff57c39ca..253498c2de84 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -145,6 +145,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_dies_per_package; extern unsigned int __max_logical_packages; +extern unsigned int __max_threads_per_core; static inline unsigned int topology_max_packages(void) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0ff79bf64525..231dc97d2611 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -71,8 +71,8 @@ u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ -unsigned int smp_num_siblings __ro_after_init = 1; -EXPORT_SYMBOL(smp_num_siblings); +unsigned int __max_threads_per_core __ro_after_init = 1; +EXPORT_SYMBOL(__max_threads_per_core); DEFINE_STATIC_KEY_FALSE(hygon_lmc_key); EXPORT_SYMBOL_GPL(hygon_lmc_key); @@ -2371,7 +2371,7 @@ void __init arch_cpu_finalize_init(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings); + cpu_smt_set_num_threads(__max_threads_per_core, __max_threads_per_core); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index 543efc487206..f40f3eecebc6 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -30,7 +30,7 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); seq_printf(m, "max_cores: %u\n", c->x86_max_cores); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); - seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings); + seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 2b290452e194..1e327881073f 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -433,7 +433,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * smp_num_siblings) / topology_amd_nodes_per_pkg(); + cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 337430819eee..75dae6c556d0 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -76,7 +76,7 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) #ifdef CONFIG_SMP static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { - if (!(apicid & (smp_num_siblings - 1))) + if (!(apicid & (__max_threads_per_core - 1))) cpumask_set_cpu(cpu, &__cpu_primary_thread_mask); } #else @@ -428,8 +428,8 @@ void __init topology_init_possible_cpus(void) * Can't use order delta here as order(cnta) can be equal * order(cntb) even if cnta != cntb. */ - smp_num_siblings = DIV_ROUND_UP(cntb, cnta); - pr_info("Max. threads per core: %3u\n", smp_num_siblings); + __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); + pr_info("Max. threads per core: %3u\n", __max_threads_per_core); pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e3c26cc45f70..05aa2744d53e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -952,7 +952,7 @@ static __cpuidle void mwait_idle(void) void select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - if (boot_option_idle_override == IDLE_POLL && smp_num_siblings > 1) + if (boot_option_idle_override == IDLE_POLL && __max_threads_per_core > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); #endif if (x86_idle_set() || boot_option_idle_override == IDLE_POLL) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 09fce487c810..2abb3b898db7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -552,7 +552,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { - bool has_smt = smp_num_siblings > 1; + bool has_smt = __max_threads_per_core > 1; bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; -- Gitee From a4f3ba6735a19ec4ea7e2da1e162ae348d613f0f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:13 +0100 Subject: [PATCH 79/88] x86/cpu/topology: Rename topology_max_die_per_package() ANBZ: #26668 commit bd745d1c41e7fa56242889eb5dc6df2d7dd5df32 upstream. The plural of die is dies. Intel-SIG: commit bd745d1c41e7 x86/cpu/topology: Rename topology_max_die_per_package(). x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.065874205@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/uncore.c | 2 +- arch/x86/events/intel/uncore_snbep.c | 2 +- arch/x86/events/rapl.c | 2 +- arch/x86/include/asm/topology.h | 2 +- drivers/hwmon/coretemp.c | 2 +- .../platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 2 +- drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c | 2 +- drivers/powercap/intel_rapl_common.c | 2 +- drivers/thermal/intel/intel_hfi.c | 2 +- drivers/thermal/intel/intel_powerclamp.c | 2 +- drivers/thermal/intel/x86_pkg_temp_thermal.c | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index d7ce7eec0cd2..b61d367b89e2 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -828,7 +828,7 @@ static int __init cstate_init(void) } if (has_cstate_pkg) { - if (topology_max_die_per_package() > 1) { + if (topology_max_dies_per_package() > 1) { err = perf_pmu_register(&cstate_pkg_pmu, "cstate_die", -1); } else { diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 2b3133215266..780f244ca8d4 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1909,7 +1909,7 @@ static int __init intel_uncore_init(void) return -ENODEV; __uncore_max_dies = - topology_max_packages() * topology_max_die_per_package(); + topology_max_packages() * topology_max_dies_per_package(); id = x86_match_cpu(intel_uncore_match); if (!id) { diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 45991b602066..02ef6d065eb1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1443,7 +1443,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool */ for (i = 0; i < 8; i++) { if (nodeid == GIDNIDMAP(config, i)) { - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) die_id = i; else die_id = topology_phys_to_logical_pkg(i); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 1579429846cc..ce645e0b60d2 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -678,7 +678,7 @@ static const struct attribute_group *rapl_attr_update[] = { static int __init init_rapl_pmus(void) { - int maxdie = topology_max_packages() * topology_max_die_per_package(); + int maxdie = topology_max_packages() * topology_max_dies_per_package(); size_t size; size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 253498c2de84..a1cf47c53084 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -152,7 +152,7 @@ static inline unsigned int topology_max_packages(void) return __max_logical_packages; } -static inline unsigned int topology_max_die_per_package(void) +static inline unsigned int topology_max_dies_per_package(void) { return __max_dies_per_package; } diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index b8fc8d1ef20d..b0991dde2e59 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -782,7 +782,7 @@ static int __init coretemp_init(void) if (!x86_match_cpu(coretemp_ids)) return -ENODEV; - max_zones = topology_max_packages() * topology_max_die_per_package(); + max_zones = topology_max_packages() * topology_max_dies_per_package(); zone_devices = kcalloc(max_zones, sizeof(struct platform_device *), GFP_KERNEL); if (!zone_devices) diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c index 23c44b82642a..5ab45b751666 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c @@ -584,7 +584,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_ auxiliary_set_drvdata(auxdev, tpmi_uncore); - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) return 0; tpmi_uncore->root_cluster.root_domain = true; diff --git a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c index a82e58750110..8a70650cb8ed 100644 --- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c +++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency.c @@ -259,7 +259,7 @@ static int __init intel_uncore_init(void) return -ENODEV; uncore_max_entries = topology_max_packages() * - topology_max_die_per_package(); + topology_max_dies_per_package(); uncore_instances = kcalloc(uncore_max_entries, sizeof(*uncore_instances), GFP_KERNEL); if (!uncore_instances) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index e25aff2f7a9c..c10503037210 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -2184,7 +2184,7 @@ struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *pr if (id_is_cpu) { rp->id = topology_logical_die_id(id); rp->lead_cpu = id; - if (topology_max_die_per_package() > 1) + if (topology_max_dies_per_package() > 1) snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d", topology_physical_package_id(id), topology_die_id(id)); else diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c index 1c5a429b2e3e..3d8d97dc28d9 100644 --- a/drivers/thermal/intel/intel_hfi.c +++ b/drivers/thermal/intel/intel_hfi.c @@ -604,7 +604,7 @@ void __init intel_hfi_init(void) /* There is one HFI instance per die/package. */ max_hfi_instances = topology_max_packages() * - topology_max_die_per_package(); + topology_max_dies_per_package(); /* * This allocation may fail. CPU hotplug callbacks must check diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 5ac5cb60bae6..e84678bfa84e 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -616,7 +616,7 @@ static int powerclamp_idle_injection_register(void) poll_pkg_cstate_enable = false; if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) { ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update); - if (topology_max_packages() == 1 && topology_max_die_per_package() == 1) + if (topology_max_packages() == 1 && topology_max_dies_per_package() == 1) poll_pkg_cstate_enable = true; } else { ii_dev = idle_inject_register(idle_injection_cpu_mask); diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c index 2e06b26be4ef..68a1002abfe1 100644 --- a/drivers/thermal/intel/x86_pkg_temp_thermal.c +++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c @@ -495,7 +495,7 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; - max_id = topology_max_packages() * topology_max_die_per_package(); + max_id = topology_max_packages() * topology_max_dies_per_package(); zones = kcalloc(max_id, sizeof(struct zone_device *), GFP_KERNEL); if (!zones) -- Gitee From 173fce0b03722aeffdb8e3223b52a503b22c6325 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:14 +0100 Subject: [PATCH 80/88] x86/cpu/topology: Provide __num_[cores|threads] _per_package ANBZ: #26668 commit fd43b8ae76e903c76f14d06eb939449bcc3f614f upstream. Expose properly accounted information and accessors so the fiddling with other topology variables can be replaced. Intel-SIG: commit fd43b8ae76e9 _per_package. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.120958987@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/include/asm/topology.h | 12 ++++++++++++ arch/x86/kernel/cpu/common.c | 6 ++++++ arch/x86/kernel/cpu/topology.c | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index a1cf47c53084..d7e39f3af1d3 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -146,6 +146,8 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); extern unsigned int __max_dies_per_package; extern unsigned int __max_logical_packages; extern unsigned int __max_threads_per_core; +extern unsigned int __num_threads_per_package; +extern unsigned int __num_cores_per_package; static inline unsigned int topology_max_packages(void) { @@ -160,6 +162,16 @@ static inline unsigned int topology_max_dies_per_package(void) extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) +static inline unsigned int topology_num_cores_per_package(void) +{ + return __num_cores_per_package; +} + +static inline unsigned int topology_num_threads_per_package(void) +{ + return __num_threads_per_package; +} + #ifdef CONFIG_X86_LOCAL_APIC int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level); #else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 231dc97d2611..19cc878ae2cd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -83,6 +83,12 @@ EXPORT_SYMBOL(__max_dies_per_package); unsigned int __max_logical_packages __ro_after_init = 1; EXPORT_SYMBOL(__max_logical_packages); +unsigned int __num_cores_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_cores_per_package); + +unsigned int __num_threads_per_package __ro_after_init = 1; +EXPORT_SYMBOL(__num_threads_per_package); + static struct ppin_info { int feature; int msr_ppin_ctl; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 75dae6c556d0..a35121e8a175 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -391,7 +391,7 @@ void __init topology_init_possible_cpus(void) unsigned int disabled = topo_info.nr_disabled_cpus; unsigned int cnta, cntb, cpu, allowed = 1; unsigned int total = assigned + disabled; - u32 apicid; + u32 apicid, firstid; if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { @@ -431,6 +431,12 @@ void __init topology_init_possible_cpus(void) __max_threads_per_core = DIV_ROUND_UP(cntb, cnta); pr_info("Max. threads per core: %3u\n", __max_threads_per_core); + firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC); + __num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); + pr_info("Num. cores per package: %3u\n", __num_cores_per_package); + __num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN); + pr_info("Num. threads per package: %3u\n", __num_threads_per_package); + pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled); if (topo_info.nr_rejected_cpus) pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus); -- Gitee From fb05d10a4aefe023289a97700d67fa16849baf4e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 Feb 2024 22:06:16 +0100 Subject: [PATCH 81/88] x86/cpu/topology: Get rid of cpuinfo::x86_max_cores ANBZ: #26668 commit 89b0f15f408f7c4ee98c1ec4c3224852fcbc3274 upstream. Now that __num_cores_per_package and __num_threads_per_package are available, cpuinfo::x86_max_cores and the related math all over the place can be replaced with the ready to consume data. Intel-SIG: commit 89b0f15f408f x86/cpu/topology: Get rid of cpuinfo::x86_max_cores. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Michael Kelley Tested-by: Sohil Mehta Link: https://lore.kernel.org/r/20240213210253.176147806@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- Documentation/arch/x86/topology.rst | 24 +++++++------------ arch/x86/events/intel/uncore_nhmex.c | 4 ++-- arch/x86/events/intel/uncore_snb.c | 8 +++---- arch/x86/events/intel/uncore_snbep.c | 16 ++++++------- arch/x86/include/asm/processor.h | 2 -- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/common.c | 1 - arch/x86/kernel/cpu/debugfs.c | 3 ++- arch/x86/kernel/cpu/mce/inject.c | 3 +-- arch/x86/kernel/cpu/microcode/intel.c | 2 +- arch/x86/kernel/cpu/topology_common.c | 3 --- arch/x86/kernel/smpboot.c | 2 +- .../gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- drivers/hwmon/fam15h_power.c | 2 +- 14 files changed, 31 insertions(+), 43 deletions(-) diff --git a/Documentation/arch/x86/topology.rst b/Documentation/arch/x86/topology.rst index 08ebf9edbfc1..7352ab89a55a 100644 --- a/Documentation/arch/x86/topology.rst +++ b/Documentation/arch/x86/topology.rst @@ -47,17 +47,21 @@ AMD nomenclature for package is 'Node'. Package-related topology information in the kernel: - - cpuinfo_x86.x86_max_cores: + - topology_num_threads_per_package() - The number of cores in a package. This information is retrieved via CPUID. + The number of threads in a package. - - cpuinfo_x86.x86_max_dies: + - topology_num_cores_per_package() - The number of dies in a package. This information is retrieved via CPUID. + The number of cores in a package. + + - topology_max_dies_per_package() + + The maximum number of dies in a package. - cpuinfo_x86.topo.die_id: - The physical ID of the die. This information is retrieved via CPUID. + The physical ID of the die. - cpuinfo_x86.topo.pkg_id: @@ -96,16 +100,6 @@ are SMT- or CMT-type threads. AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses "core". -Core-related topology information in the kernel: - - - smp_num_siblings: - - The number of threads in a core. The number of threads in a package can be - calculated by:: - - threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings - - Threads ======= A thread is a single scheduling unit. It's the equivalent to a logical Linux diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 56eea2c66cfb..92da8aaa5966 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1221,8 +1221,8 @@ void nhmex_uncore_cpu_init(void) uncore_nhmex = true; else nhmex_uncore_mbox.event_descs = wsmex_uncore_mbox_events; - if (nhmex_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - nhmex_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (nhmex_uncore_cbox.num_boxes > topology_num_cores_per_package()) + nhmex_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = nhmex_msr_uncores; } /* end of Nehalem-EX uncore support */ diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 3725bb6571c4..ade3cb2dd735 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -364,8 +364,8 @@ static struct intel_uncore_type *snb_msr_uncores[] = { void snb_uncore_cpu_init(void) { uncore_msr_uncores = snb_msr_uncores; - if (snb_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snb_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snb_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snb_uncore_cbox.num_boxes = topology_num_cores_per_package(); } static void skl_uncore_msr_init_box(struct intel_uncore_box *box) @@ -428,8 +428,8 @@ static struct intel_uncore_type *skl_msr_uncores[] = { void skl_uncore_cpu_init(void) { uncore_msr_uncores = skl_msr_uncores; - if (skl_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - skl_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (skl_uncore_cbox.num_boxes > topology_num_cores_per_package()) + skl_uncore_cbox.num_boxes = topology_num_cores_per_package(); snb_uncore_arb.ops = &skl_uncore_msr_ops; } diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 02ef6d065eb1..08faac369397 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1174,8 +1174,8 @@ static struct intel_uncore_type *snbep_msr_uncores[] = { void snbep_uncore_cpu_init(void) { - if (snbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - snbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (snbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + snbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = snbep_msr_uncores; } @@ -1839,8 +1839,8 @@ static struct intel_uncore_type *ivbep_msr_uncores[] = { void ivbep_uncore_cpu_init(void) { - if (ivbep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - ivbep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (ivbep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + ivbep_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = ivbep_msr_uncores; } @@ -2911,8 +2911,8 @@ static bool hswep_has_limit_sbox(unsigned int device) void hswep_uncore_cpu_init(void) { - if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (hswep_uncore_cbox.num_boxes > topology_num_cores_per_package()) + hswep_uncore_cbox.num_boxes = topology_num_cores_per_package(); /* Detect 6-8 core systems with only two SBOXes */ if (hswep_has_limit_sbox(HSWEP_PCU_DID)) @@ -3274,8 +3274,8 @@ static struct event_constraint bdx_uncore_pcu_constraints[] = { void bdx_uncore_cpu_init(void) { - if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) - bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; + if (bdx_uncore_cbox.num_boxes > topology_num_cores_per_package()) + bdx_uncore_cbox.num_boxes = topology_num_cores_per_package(); uncore_msr_uncores = bdx_msr_uncores; /* Detect systems with no SBOXes */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bfdce9ff6657..5ae83a398318 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -163,8 +163,6 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; /* protected processor identification number */ u64 ppin; - /* cpuid returned max cores value: */ - u16 x86_max_cores; u16 x86_clflush_size; /* number of cores as seen by the OS: */ u16 booted_cores; diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 2eeb3448a99e..af37688aaffe 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -301,7 +301,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; + eax->split.num_cores_on_die = topology_num_cores_per_package(); if (assoc == 0xffff) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 19cc878ae2cd..0a56741e5caf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1846,7 +1846,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_stepping = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_max_cores = 1; c->topo.llc_id = BAD_APICID; c->topo.l2c_id = BAD_APICID; #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/cpu/debugfs.c b/arch/x86/kernel/cpu/debugfs.c index f40f3eecebc6..3baf3e435834 100644 --- a/arch/x86/kernel/cpu/debugfs.c +++ b/arch/x86/kernel/cpu/debugfs.c @@ -28,7 +28,8 @@ static int cpu_debug_show(struct seq_file *m, void *p) seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id); seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id); seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg()); - seq_printf(m, "max_cores: %u\n", c->x86_max_cores); + seq_printf(m, "num_threads: %u\n", __num_threads_per_package); + seq_printf(m, "num_cores: %u\n", __num_cores_per_package); seq_printf(m, "max_dies_per_pkg: %u\n", __max_dies_per_package); seq_printf(m, "max_threads_per_core:%u\n", __max_threads_per_core); return 0; diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 1e327881073f..94953d749475 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -430,10 +430,9 @@ static void trigger_thr_int(void *info) static u32 get_nbc_for_node(int node_id) { - struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = (c->x86_max_cores * __max_threads_per_core) / topology_amd_nodes_per_pkg(); + cores_per_node = topology_num_threads_per_package() / topology_amd_nodes_per_pkg(); return cores_per_node * node_id; } diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index f27312b4bbb1..1dfa55b959fa 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -986,7 +986,7 @@ static __init void calc_llc_size_per_core(struct cpuinfo_x86 *c) { u64 llc_size = c->x86_cache_size * 1024ULL; - do_div(llc_size, c->x86_max_cores); + do_div(llc_size, topology_num_cores_per_package()); llc_size_per_core = (unsigned int)llc_size; } diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 5a865de64846..7899610d7fb9 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -162,9 +162,6 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> x86_topo_system.dom_shifts[TOPO_SMT_DOMAIN]; - /* Maximum number of cores on this package */ - c->x86_max_cores = topology_unit_count(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN); - c->topo.amd_node_id = tscan->amd_node_id; if (c->x86_vendor == X86_VENDOR_AMD) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2abb3b898db7..6cfe26179515 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -553,7 +553,7 @@ static void __init build_sched_topology(void) void set_cpu_sibling_map(int cpu) { bool has_smt = __max_threads_per_core > 1; - bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; + bool has_mp = has_smt || topology_num_cores_per_package() > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; int i, threads; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 454216bd6f1d..5d60fd6672c6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -475,7 +475,7 @@ static int vangogh_init_smc_tables(struct smu_context *smu) #ifdef CONFIG_X86 /* AMD x86 APU only */ - smu->cpu_core_num = boot_cpu_data.x86_max_cores; + smu->cpu_core_num = topology_num_cores_per_package(); #else smu->cpu_core_num = 4; #endif diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c index 6307112c2c0c..9ed2c4b6734e 100644 --- a/drivers/hwmon/fam15h_power.c +++ b/drivers/hwmon/fam15h_power.c @@ -209,7 +209,7 @@ static ssize_t power1_average_show(struct device *dev, * With the new x86 topology modelling, x86_max_cores is the * compute unit number. */ - cu_num = boot_cpu_data.x86_max_cores; + cu_num = topology_num_cores_per_package(); ret = read_registers(data); if (ret) -- Gitee From 5d40df4cd6a6b7d428f7d0743fc03ffd60c3800a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 6 Mar 2024 12:17:02 +0100 Subject: [PATCH 82/88] x86/topology: Ignore non-present APIC IDs in a present package ANBZ: #26668 commit f0551af021308a2a1163dc63d1f1bba3594208bd upstream. Borislav reported that one of his systems has a broken MADT table which advertises eight present APICs and 24 non-present APICs in the same package. The non-present ones are considered hot-pluggable by the topology evaluation code, which is obviously bogus as there is no way to hot-plug within the same package. As the topology evaluation code accounts for hot-pluggable CPUs in a package, the maximum number of cores per package is computed wrong, which in turn causes the uncore performance counter driver to access non-existing MSRs. It will probably confuse other entities which rely on the maximum number of cores and threads per package too. Cure this by ignoring hot-pluggable APIC IDs within a present package. In theory it would be reasonable to just do this unconditionally, but then there is this thing called reality^Wvirtualization which ruins everything. Virtualization is the only existing user of "physical" hotplug and the virtualization tools allow the above scenario. Whether that is actually in use or not is unknown. As it can be argued that the virtualization case is not affected by the issues which exposed the reported problem, allow the bogosity if the kernel determined that it is running in a VM for now. Fixes: 89b0f15f408f ("x86/cpu/topology: Get rid of cpuinfo::x86_max_cores") Reported-by: Borislav Petkov (AMD) Intel-SIG: commit f0551af02130 x86/topology: Ignore non-present APIC IDs in a present package. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/87a5nbvccx.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 39 ++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index a35121e8a175..b68cc2563a35 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -157,6 +158,20 @@ static __init bool check_for_real_bsp(u32 apic_id) return true; } +static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, + unsigned long *map) +{ + unsigned int id, end, cnt = 0; + + /* Calculate the exclusive end */ + end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); + + /* Unfortunately there is no bitmap_weight_range() */ + for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id)) + cnt++; + return cnt; +} + static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) { int cpu, dom; @@ -178,6 +193,20 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) cpuid_to_apicid[cpu] = apic_id; topo_set_cpuids(cpu, apic_id, acpi_id); } else { + u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN); + + /* + * Check for present APICs in the same package when running + * on bare metal. Allow the bogosity in a guest. + */ + if (hypervisor_is_type(X86_HYPER_NATIVE) && + topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) { + pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n", + apic_id); + topo_info.nr_rejected_cpus++; + return; + } + topo_info.nr_disabled_cpus++; } @@ -280,7 +309,6 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni { /* Remove the bits below @at_level to get the proper level ID of @apicid */ unsigned int lvlid = topo_apicid(apicid, at_level); - unsigned int id, end, cnt = 0; if (lvlid >= MAX_LOCAL_APIC) return 0; @@ -290,14 +318,7 @@ unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_uni return 0; if (which_units == at_level) return 1; - - /* Calculate the exclusive end */ - end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]); - /* Unfortunately there is no bitmap_weight_range() */ - for (id = find_next_bit(apic_maps[which_units].map, end, lvlid); - id < end; id = find_next_bit(apic_maps[which_units].map, end, ++id)) - cnt++; - return cnt; + return topo_unit_count(lvlid, at_level, apic_maps[which_units].map); } #ifdef CONFIG_ACPI_HOTPLUG_CPU -- Gitee From ccc255ddcf371b712ad088c09817985e6f3b2055 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Mar 2024 19:56:36 +0100 Subject: [PATCH 83/88] x86/topology: Don't evaluate logical IDs during early boot ANBZ: #26668 commit 7af541cee1e0eb48c6eb439bc6309175339fa96f upstream. The local APICs have not yet been enumerated so the logical ID evaluation from the topology bitmaps does not work and would return an error code. Skip the evaluation during the early boot CPUID evaluation and only apply it on the final run. Fixes: 380414be78bf ("x86/cpu/topology: Use topology logical mapping mechanism") Intel-SIG: commit 7af541cee1e0 x86/topology: Don't evaluate logical IDs during early boot. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov (AMD) Tested-by: Guenter Roeck Link: https://lore.kernel.org/r/20240322185305.186943142@linutronix.de [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology_common.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index 7899610d7fb9..02ed49f83902 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -147,7 +147,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) } } -static void topo_set_ids(struct topo_scan *tscan) +static void topo_set_ids(struct topo_scan *tscan, bool early) { struct cpuinfo_x86 *c = tscan->c; u32 apicid = c->topo.apicid; @@ -155,8 +155,10 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); - c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); - c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + if (!early) { + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + } /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> @@ -194,7 +196,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - topo_set_ids(&tscan); + topo_set_ids(&tscan, false); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -215,7 +217,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) x86_topo_system.dom_size[dom] = 1U << sft; } - topo_set_ids(&tscan); + topo_set_ids(&tscan, true); /* * AMD systems have Nodes per package which cannot be mapped to -- Gitee From 709165c1784641bbb66245a727b3628f5bb1fec5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 8 Apr 2024 15:22:01 +0200 Subject: [PATCH 84/88] x86/topology: Don't update cpu_possible_map in topo_set_cpuids() ANBZ: #26668 commit a9025cd1c673a8d6eefc79d911075b8b452eba8f upstream. topo_set_cpuids() updates cpu_present_map and cpu_possible map. It is invoked during enumeration and "physical hotplug" operations. In the latter case this results in a kernel crash because cpu_possible_map is marked read only after init completes. There is no reason to update cpu_possible_map in that function. During enumeration cpu_possible_map is not relevant and gets fully initialized after enumeration completed. On "physical hotplug" the bit is already set because the kernel allows only CPUs to be plugged which have been enumerated and associated to a CPU number during early boot. Remove the bogus update of cpu_possible_map. Fixes: 0e53e7b656cf ("x86/cpu/topology: Sanitize the APIC admission logic") Reported-by: Jonathan Cameron Intel-SIG: commit a9025cd1c673 x86/topology: Don't update cpu_possible_map in topo_set_cpuids(). x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87ttkc6kwx.ffs@tglx [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b68cc2563a35..9f2d1907ffa9 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); } @@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) topo_info.nr_disabled_cpus++; } - /* Register present and possible CPUs in the domain maps */ + /* + * Register present and possible CPUs in the domain + * maps. cpu_possible_map will be updated in + * topology_init_possible_cpus() after enumeration is done. + */ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } -- Gitee From ce13a4da3350a3f8ffee0a0264cae2679dbee0de Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 2 May 2024 16:39:47 +0200 Subject: [PATCH 85/88] x86/xen/smp_pv: Register the boot CPU APIC properly ANBZ: #26668 commit 8a95db3bf8a32186fe448b1a934afbd5f1da0263 upstream. The topology core expects the boot APIC to be registered from earhy APIC detection first and then again when the firmware tables are evaluated. This is used for detecting the real BSP CPU on a kexec kernel. The recent conversion of XEN/PV to register fake APIC IDs failed to register the boot CPU APIC correctly as it only registers it once. This causes the BSP detection mechanism to trigger wrongly: CPU topo: Boot CPU APIC ID not the first enumerated APIC ID: 0 > 1 Additionally this results in one CPU being ignored. Register the boot CPU APIC twice so that the XEN/PV fake enumeration behaves like real firmware. Reported-by: Juergen Gross Fixes: e75307023466 ("x86/xen/smp_pv: Register fake APICs") Intel-SIG: commit 8a95db3bf8a3 x86/xen/smp_pv: Register the boot CPU APIC properly. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/87a5l8s2fg.ffs@tglx Signed-off-by: Juergen Gross [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/xen/smp_pv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 27d1a5b7f571..ac41d83b38d3 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -154,9 +154,9 @@ static void __init xen_pv_smp_config(void) u32 apicid = 0; int i; - topology_register_boot_apic(apicid++); + topology_register_boot_apic(apicid); - for (i = 1; i < nr_cpu_ids; i++) + for (i = 0; i < nr_cpu_ids; i++) topology_register_apic(apicid++, CPU_ACPIID_INVALID, true); /* Pretend to be a proper enumerated system */ -- Gitee From b027b7efdeca56c4c2b63b35f6aed745a8be62a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 17 May 2024 16:40:36 +0200 Subject: [PATCH 86/88] x86/topology: Handle bogus ACPI tables correctly ANBZ: #26668 commit 9d22c96316ac59ed38e80920c698fed38717b91b upstream. The ACPI specification clearly states how the processors should be enumerated in the MADT: "To ensure that the boot processor is supported post initialization, two guidelines should be followed. The first is that OSPM should initialize processors in the order that they appear in the MADT. The second is that platform firmware should list the boot processor as the first processor entry in the MADT. ... Failure of OSPM implementations and platform firmware to abide by these guidelines can result in both unpredictable and non optimal platform operation." The kernel relies on that ordering to detect the real BSP on crash kernels which is important to avoid sending a INIT IPI to it as that would cause a full machine reset. On a Dell XPS 16 9640 the BIOS ignores this rule and enumerates the CPUs in the wrong order. As a consequence the kernel falsely detects a crash kernel and disables the corresponding CPU. Prevent this by checking the IA32_APICBASE MSR for the BSP bit on the boot CPU. If that bit is set, then the MADT based BSP detection can be safely ignored. If the kernel detects a mismatch between the BSP bit and the first enumerated MADT entry then emit a firmware bug message. This obviously also has to be taken into account when the boot APIC ID and the first enumerated APIC ID match. If the boot CPU does not have the BSP bit set in the APICBASE MSR then there is no way for the boot CPU to determine which of the CPUs is the real BSP. Sending an INIT to the real BSP would reset the machine so the only sane way to deal with that is to limit the number of CPUs to one and emit a corresponding warning message. Fixes: 5c5682b9f87a ("x86/cpu: Detect real BSP on crash kernels") Reported-by: Carsten Tolkmit Intel-SIG: commit 9d22c96316ac x86/topology: Handle bogus ACPI tables correctly. x86/apic: Rework APIC registration Signed-off-by: Thomas Gleixner Tested-by: Carsten Tolkmit Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/87le48jycb.ffs@tglx Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218837 [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 53 ++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 9f2d1907ffa9..149be811e4e8 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -128,6 +128,9 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) static __init bool check_for_real_bsp(u32 apic_id) { + bool is_bsp = false, has_apic_base = boot_cpu_data.x86 >= 6; + u64 msr; + /* * There is no real good way to detect whether this a kdump() * kernel, but except on the Voyager SMP monstrosity which is not @@ -144,17 +147,61 @@ static __init bool check_for_real_bsp(u32 apic_id) if (topo_info.real_bsp_apic_id != BAD_APICID) return false; + /* + * Check whether the enumeration order is broken by evaluating the + * BSP bit in the APICBASE MSR. If the CPU does not have the + * APICBASE MSR then the BSP detection is not possible and the + * kernel must rely on the firmware enumeration order. + */ + if (has_apic_base) { + rdmsrl(MSR_IA32_APICBASE, msr); + is_bsp = !!(msr & MSR_IA32_APICBASE_BSP); + } + if (apic_id == topo_info.boot_cpu_apic_id) { - topo_info.real_bsp_apic_id = apic_id; - return false; + /* + * If the boot CPU has the APIC BSP bit set then the + * firmware enumeration is agreeing. If the CPU does not + * have the APICBASE MSR then the only choice is to trust + * the enumeration order. + */ + if (is_bsp || !has_apic_base) { + topo_info.real_bsp_apic_id = apic_id; + return false; + } + /* + * If the boot APIC is enumerated first, but the APICBASE + * MSR does not have the BSP bit set, then there is no way + * to discover the real BSP here. Assume a crash kernel and + * limit the number of CPUs to 1 as an INIT to the real BSP + * would reset the machine. + */ + pr_warn("Enumerated BSP APIC %x is not marked in APICBASE MSR\n", apic_id); + pr_warn("Assuming crash kernel. Limiting to one CPU to prevent machine INIT\n"); + set_nr_cpu_ids(1); + goto fwbug; } - pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n", + pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x != %x\n", topo_info.boot_cpu_apic_id, apic_id); + + if (is_bsp) { + /* + * The boot CPU has the APIC BSP bit set. Use it and complain + * about the broken firmware enumeration. + */ + topo_info.real_bsp_apic_id = topo_info.boot_cpu_apic_id; + goto fwbug; + } + pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n"); topo_info.real_bsp_apic_id = apic_id; return true; + +fwbug: + pr_warn(FW_BUG "APIC enumeration order not specification compliant\n"); + return false; } static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level, -- Gitee From f15bd298d4f8c6667cf02b7d207c152998b325a6 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 2 Dec 2024 14:58:45 +0000 Subject: [PATCH 87/88] x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC ANBZ: #26668 commit 73da582a476ea6e3512f89f8ed57dfed945829a2 upstream. The rework of possible CPUs management erroneously disabled SMP when the IO/APIC is disabled either by the 'noapic' command line parameter or during IO/APIC setup. SMP is possible without IO/APIC. Remove the ioapic_is_disabled conditions from the relevant possible CPU management code paths to restore the orgininal behaviour. Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") Intel-SIG: commit 73da582a476e x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC. x86/apic: Rework APIC registration Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241202145905.1482-1-ffmancera@riseup.net [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/cpu/topology.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 149be811e4e8..5a29c1d00529 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -427,8 +427,8 @@ void __init topology_apply_cmdline_limits_early(void) { unsigned int possible = nr_cpu_ids; - /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */ - if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled) + /* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */ + if (!setup_max_cpus || apic_is_disabled) possible = 1; /* 'possible_cpus=N' */ @@ -442,7 +442,7 @@ void __init topology_apply_cmdline_limits_early(void) static __init bool restrict_to_up(void) { - if (!smp_found_config || ioapic_is_disabled) + if (!smp_found_config) return true; /* * XEN PV is special as it does not advertise the local APIC -- Gitee From 0913987f804b461cefdee947242783168e768623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20Van=C4=9Bk?= Date: Mon, 7 Apr 2025 15:24:27 +0200 Subject: [PATCH 88/88] x86/acpi: Don't limit CPUs to 1 for Xen PV guests due to disabled ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ANBZ: #26668 commit 8b37357a78d7fa13d88ea822b35b40137da1c85e upstream. Xen disables ACPI for PV guests in DomU, which causes acpi_mps_check() to return 1 when CONFIG_X86_MPPARSE is not set. As a result, the local APIC is disabled and the guest is later limited to a single vCPU, despite being configured with more. This regression was introduced in version 6.9 in commit 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management"), which added an early check that limits CPUs to 1 if apic_is_disabled. Update the acpi_mps_check() logic to return 0 early when running as a Xen PV guest in DomU, preventing APIC from being disabled in this specific case and restoring correct multi-vCPU behaviour. Fixes: 7c0edad3643f ("x86/cpu/topology: Rework possible CPU management") Intel-SIG: commit 8b37357a78d7 x86/acpi: Don't limit CPUs to 1 for Xen PV guests due to disabled ACPI. x86/apic: Rework APIC registration Signed-off-by: Petr Vaněk Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250407132445.6732-2-arkamar@atlas.cz [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- arch/x86/kernel/acpi/boot.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 25cd809d38cb..3baf2613fd55 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -23,6 +23,8 @@ #include #include +#include + #include #include #include @@ -1778,6 +1780,15 @@ int __init acpi_mps_check(void) { #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_X86_MPPARSE) /* mptable code is not built-in*/ + + /* + * Xen disables ACPI in PV DomU guests but it still emulates APIC and + * supports SMP. Returning early here ensures that APIC is not disabled + * unnecessarily and the guest is not limited to a single vCPU. + */ + if (xen_pv_domain() && !xen_initial_domain()) + return 0; + if (acpi_disabled || acpi_noirq) { pr_warn("MPS support code is not built-in, using acpi=off or acpi=noirq or pci=noacpi may have problem\n"); return 1; -- Gitee