From 3ece721331f107e5ff3b01f3248532745aacea2f Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Tue, 5 Dec 2023 13:52:33 +0800 Subject: [PATCH 1/8] x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit mainline inclusion from mainline-v5.13-rc1 commit a161545ab53b174c016b0eb63c2895266665d2f6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit a161545ab53b x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit. Cluster scheduler feature backport. -------------------------------- Add feature enumeration to identify a processor with Intel Hybrid Technology: one in which CPUs of more than one type are the same package. On a hybrid processor, all CPUs support the same homogeneous (i.e., symmetric) instruction set. All CPUs enumerate the same features in CPUID. Thus, software (user space and kernel) can run and migrate to any CPU in the system as well as utilize any of the enumerated features without any change or special provisions. The main difference among CPUs in a hybrid processor are power and performance properties. Signed-off-by: Ricardo Neri Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Tony Luck Reviewed-by: Len Brown Acked-by: Borislav Petkov Link: https://lkml.kernel.org/r/1618237865-33448-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Aubrey Li --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index ed1c49c6839c..b62fe2a12963 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -433,6 +433,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -- Gitee From f4128ae8e6ff03b4c805707fe75d0345797f7f53 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Fri, 19 Jan 2024 15:50:54 +0800 Subject: [PATCH 2/8] sched: Add cluster scheduler level for x86 mainline inclusion from mainline-v5.16-rc1 commit 66558b730f2533cc2bf2b74d51f5f80b81e2bad0 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit 66558b730f25 sched: Add cluster scheduler level for x86. Cluster scheduler feature backport. -------------------------------- There are x86 CPU architectures (e.g. Jacobsville) where L2 cahce is shared among a cluster of cores instead of being exclusive to one single core. To prevent oversubscription of L2 cache, load should be balanced between such L2 clusters, especially for tasks with no shared data. On benchmark such as SPECrate mcf test, this change provides a boost to performance especially on medium load system on Jacobsville. on a Jacobsville that has 24 Atom cores, arranged into 6 clusters of 4 cores each, the benchmark number is as follow: Improvement over baseline kernel for mcf_r copies run time base rate 1 -0.1% -0.2% 6 25.1% 25.1% 12 18.8% 19.0% 24 0.3% 0.3% So this looks pretty good. In terms of the system's task distribution, some pretty bad clumping can be seen for the vanilla kernel without the L2 cluster domain for the 6 and 12 copies case. With the extra domain for cluster, the load does get evened out between the clusters. Note this patch isn't an universal win as spreading isn't necessarily a win, particually for those workload who can benefit from packing. Signed-off-by: Tim Chen Signed-off-by: Barry Song Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210924085104.44806-4-21cnbao@gmail.com Signed-off-by: Aubrey Li --- arch/x86/Kconfig | 11 +++++++++ arch/x86/include/asm/smp.h | 7 ++++++ arch/x86/include/asm/topology.h | 3 +++ arch/x86/kernel/cpu/cacheinfo.c | 1 + arch/x86/kernel/cpu/common.c | 5 ++++ arch/x86/kernel/smpboot.c | 44 ++++++++++++++++++++++++++++++++- 6 files changed, 70 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 04046abd1ccf..5d1efac9061a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1010,6 +1010,17 @@ config NR_CPUS This is purely to save memory: each supported CPU adds about 8KB to the kernel image. +config SCHED_CLUSTER + bool "Cluster scheduler support" + depends on SMP + default y + help + Cluster scheduler support improves the CPU scheduler's decision + making when dealing with machines that have clusters of CPUs. + Cluster usually means a couple of CPUs which are placed closely + by sharing mid-level caches, last-level cache tags or internal + busses. + config SCHED_SMT def_bool y if SMP diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 630ff08532be..08b0e90623ad 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -16,7 +16,9 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_die_map); /* cpus sharing the last level cache: */ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); +DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); +DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); static inline struct cpumask *cpu_llc_shared_mask(int cpu) @@ -24,6 +26,11 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu) return per_cpu(cpu_llc_shared_map, cpu); } +static inline struct cpumask *cpu_l2c_shared_mask(int cpu) +{ + return per_cpu(cpu_l2c_shared_map, cpu); +} + DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid); diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 1f6caceccbb0..833575f92eac 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -103,6 +103,7 @@ static inline void setup_node_to_cpumask_map(void) { } #include extern const struct cpumask *cpu_coregroup_mask(int cpu); +extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_logical_package_id(cpu) (cpu_data(cpu).logical_proc_id) #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) @@ -113,7 +114,9 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_die_per_package; #ifdef CONFIG_SMP +#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu)) #define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu)) +#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu)) #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) #define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index f33d55338da4..e66b0d1cc2d6 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -865,6 +865,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) l2 = new_l2; #ifdef CONFIG_SMP per_cpu(cpu_llc_id, cpu) = l2_id; + per_cpu(cpu_l2c_id, cpu) = l2_id; #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5707b0d59982..48bd58e3f518 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -85,6 +85,7 @@ EXPORT_SYMBOL(smp_num_siblings); /* Last level cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; +EXPORT_SYMBOL(cpu_llc_id); u16 get_llc_id(unsigned int cpu) { @@ -92,6 +93,10 @@ u16 get_llc_id(unsigned int cpu) } EXPORT_SYMBOL_GPL(get_llc_id); +/* L2 cache ID of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID; +EXPORT_SYMBOL(cpu_l2c_id); + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2b37b16685ed..0f281bec38dc 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -97,6 +97,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_die_map); DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); +DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); + /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); @@ -480,6 +482,21 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) return false; } +static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) +{ + int cpu1 = c->cpu_index, cpu2 = o->cpu_index; + + /* Do not match if we do not have a valid APICID for cpu: */ + if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID) + return false; + + /* Do not match if L2 cache id does not match: */ + if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2)) + return false; + + return topology_sane(c, o, "l2c"); +} + /* * Unlike the other levels, we do not enforce keeping a * multicore group inside a NUMA node. If this happens, we will @@ -539,7 +556,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) } -#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC) +#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC) static inline int x86_sched_itmt_flags(void) { return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0; @@ -557,12 +574,21 @@ static int x86_smt_flags(void) return cpu_smt_flags() | x86_sched_itmt_flags(); } #endif +#ifdef CONFIG_SCHED_CLUSTER +static int x86_cluster_flags(void) +{ + return cpu_cluster_flags() | x86_sched_itmt_flags(); +} +#endif #endif static struct sched_domain_topology_level x86_numa_in_package_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, #endif +#ifdef CONFIG_SCHED_CLUSTER + { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }, +#endif #ifdef CONFIG_SCHED_MC { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, #endif @@ -573,6 +599,9 @@ static struct sched_domain_topology_level x86_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, #endif +#ifdef CONFIG_SCHED_CLUSTER + { cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) }, +#endif #ifdef CONFIG_SCHED_MC { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, #endif @@ -600,6 +629,7 @@ void set_cpu_sibling_map(int cpu) if (!has_mp) { cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu)); cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); + cpumask_set_cpu(cpu, cpu_l2c_shared_mask(cpu)); cpumask_set_cpu(cpu, topology_core_cpumask(cpu)); cpumask_set_cpu(cpu, topology_die_cpumask(cpu)); c->booted_cores = 1; @@ -618,6 +648,9 @@ void set_cpu_sibling_map(int cpu) if ((i == cpu) || (has_mp && match_llc(c, o))) link_mask(cpu_llc_shared_mask, cpu, i); + if ((i == cpu) || (has_mp && match_l2c(c, o))) + link_mask(cpu_l2c_shared_mask, cpu, i); + if ((i == cpu) || (has_mp && match_die(c, o))) link_mask(topology_die_cpumask, cpu, i); } @@ -665,6 +698,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu) return cpu_llc_shared_mask(cpu); } +const struct cpumask *cpu_clustergroup_mask(int cpu) +{ + return cpu_l2c_shared_mask(cpu); +} + static void impress_friends(void) { int cpu; @@ -1350,6 +1388,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL); } /* @@ -1574,7 +1613,10 @@ static void remove_siblinginfo(int cpu) cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling)); + for_each_cpu(sibling, cpu_l2c_shared_mask(cpu)) + cpumask_clear_cpu(cpu, cpu_l2c_shared_mask(sibling)); cpumask_clear(cpu_llc_shared_mask(cpu)); + cpumask_clear(cpu_l2c_shared_mask(cpu)); cpumask_clear(topology_sibling_cpumask(cpu)); cpumask_clear(topology_core_cpumask(cpu)); cpumask_clear(topology_die_cpumask(cpu)); -- Gitee From 79f23173ba49f4ca001f57c906460bbe669f8327 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2023 14:18:39 +0800 Subject: [PATCH 3/8] sched,x86: Fix L2 cache mask mainline inclusion from mainline-v5.16-rc1 commit 55409ac5c371c6403012d5f4df5e7c6cf0e7dce6 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit 55409ac5c371 sched,x86: Fix L2 cache mask. Cluster scheduler feature backport. -------------------------------- Currently AMD/Hygon do not populate l2c_id, this means that for SMT enabled systems they report an L2 per thread. This is ofcourse not true but was harmless so far. However, since commit: 66558b730f25 ("sched: Add cluster scheduler level for x86") the scheduler topology setup requires: SMT <= L2 <= LLC Which leads to noisy warnings and possibly weird behaviour on affected chips. Therefore change the topology generation such that if l2c_id is not populated it follows the SMT topology, thereby satisfying the constraint. Fixes: 66558b730f25 ("sched: Add cluster scheduler level for x86") Reported-by: Tom Lendacky Signed-off-by: Peter Zijlstra (Intel) Tested-by: Tom Lendacky Signed-off-by: Aubrey Li --- arch/x86/kernel/smpboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0f281bec38dc..867b39bd4477 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -486,9 +486,9 @@ static bool match_l2c(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) { int cpu1 = c->cpu_index, cpu2 = o->cpu_index; - /* Do not match if we do not have a valid APICID for cpu: */ + /* If the arch didn't set up l2c_id, fall back to SMT */ if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID) - return false; + return match_smt(c, o); /* Do not match if L2 cache id does not match: */ if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2)) -- Gitee From 55516c5b7ad88494e0e2eee4b3955a023e95382e Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 5 Dec 2023 14:24:14 +0800 Subject: [PATCH 4/8] x86/smp: Factor out parts of native_smp_prepare_cpus() mainline inclusion from mainline-v5.16-rc1 commit ce2612b6706b4d0a70732795253722e3bd4ed953 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit ce2612b6706b x86/smp: Factor out parts of native_smp_prepare_cpus(). Cluster scheduler feature backport. -------------------------------- Commit 66558b730f25 ("sched: Add cluster scheduler level for x86") introduced cpu_l2c_shared_map mask which is expected to be initialized by smp_op.smp_prepare_cpus(). That commit only updated native_smp_prepare_cpus() version but not xen_pv_smp_prepare_cpus(). As result Xen PV guests crash in set_cpu_sibling_map(). While the new mask can be allocated in xen_pv_smp_prepare_cpus() one can see that both versions of smp_prepare_cpus ops share a number of common operations that can be factored out. So do that instead. Fixes: 66558b730f25 ("sched: Add cluster scheduler level for x86") Signed-off-by: Boris Ostrovsky Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Juergen Gross Link: https://lkml.kernel.org/r/1635896196-18961-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Aubrey Li --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/smpboot.c | 18 ++++++++++++------ arch/x86/xen/smp_pv.c | 12 ++---------- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 08b0e90623ad..81a0211a372d 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -126,6 +126,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) void cpu_disable_common(void); void native_smp_prepare_boot_cpu(void); +void smp_prepare_cpus_common(void); void native_smp_prepare_cpus(unsigned int max_cpus); void calculate_max_logical_packages(void); void native_smp_cpus_done(unsigned int max_cpus); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 867b39bd4477..de554762151d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1365,12 +1365,7 @@ static void __init smp_get_logical_apicid(void) cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); } -/* - * Prepare for SMP bootup. - * @max_cpus: configured maximum number of CPUs, It is a legacy parameter - * for common interface support. - */ -void __init native_smp_prepare_cpus(unsigned int max_cpus) +void __init smp_prepare_cpus_common(void) { unsigned int i; @@ -1401,6 +1396,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_sched_topology(x86_topology); set_cpu_sibling_map(0); +} + +/* + * Prepare for SMP bootup. + * @max_cpus: configured maximum number of CPUs, It is a legacy parameter + * for common interface support. + */ +void __init native_smp_prepare_cpus(unsigned int max_cpus) +{ + smp_prepare_cpus_common(); + init_freq_invariance(false); smp_sanity_check(); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 26babb8a1897..e8bcbd732fe7 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -211,7 +211,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; - unsigned int i; if (skip_ioapic_setup) { char *m = (max_cpus == 0) ? @@ -224,16 +223,9 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) } xen_init_lock_cpu(0); - smp_store_boot_cpu_info(); - cpu_data(0).x86_max_cores = 1; + smp_prepare_cpus_common(); - for_each_possible_cpu(i) { - zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL); - zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); - } - set_cpu_sibling_map(0); + cpu_data(0).x86_max_cores = 1; speculative_store_bypass_ht_init(); -- Gitee From d476b6df304debf798767e263b966e3fab753c8b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Dec 2023 14:27:22 +0800 Subject: [PATCH 5/8] sched,x86: Don't use cluster topology for x86 hybrid CPUs mainline inclusion from mainline-v5.16-rc5 commit cabdc3a8475b918e55744f43719b26a82dc8fa6b category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit cabdc3a8475b sched,x86: Don't use cluster topology for x86 hybrid CPUs. Cluster scheduler feature backport. -------------------------------- For x86 hybrid CPUs like Alder Lake, the order of CPU selection should be based strictly on CPU priority. Don't include cluster topology for hybrid CPUs to avoid interference with such CPU selection order. On Alder Lake, the Atom CPU cluster has more capacity (4 Atom CPUs) vs Big core cluster (2 hyperthread CPUs). This could potentially bias CPU selection towards Atom over Big Core, when Big core CPU has higher priority. Fixes: 66558b730f25 ("sched: Add cluster scheduler level for x86") Suggested-by: Tim Chen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tim Chen Tested-by: Ricardo Neri Link: https://lkml.kernel.org/r/20211204091402.GM16608@worktop.programming.kicks-ass.net Signed-off-by: Aubrey Li --- arch/x86/kernel/smpboot.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index de554762151d..99f1a0b08bc2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -595,6 +595,17 @@ static struct sched_domain_topology_level x86_numa_in_package_topology[] = { { NULL, }, }; +static struct sched_domain_topology_level x86_hybrid_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, +#endif +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + static struct sched_domain_topology_level x86_topology[] = { #ifdef CONFIG_SCHED_SMT { cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) }, @@ -1484,8 +1495,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus) calculate_max_logical_packages(); + /* XXX for now assume numa-in-package and hybrid don't overlap */ if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) + set_sched_topology(x86_hybrid_topology); nmi_selftest(); impress_friends(); -- Gitee From 210dce8164d9a281677b6f7ef373e8dc0449085c Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 5 Dec 2023 14:30:43 +0800 Subject: [PATCH 6/8] topology: Remove unused cpu_cluster_mask() mainline inclusion from mainline-v5.19-rc1 commit 991d8d8142cad94f9c5c05db25e67fa83d6f772a category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit 991d8d8142ca topology: Remove unused cpu_cluster_mask(). Cluster scheduler feature backport. -------------------------------- default_topology[] uses cpu_clustergroup_mask() for the CLS level (guarded by CONFIG_SCHED_CLUSTER) which is currently provided by x86 (arch/x86/kernel/smpboot.c) and arm64 (drivers/base/arch_topology.c). Fixes: 778c558f49a2c ("sched: Add cluster scheduler level in core and related Kconfig for ARM64") Signed-off-by: Dietmar Eggemann Signed-off-by: Peter Zijlstra (Intel) Acked-by: Barry Song Link: https://lore.kernel.org/r/20220513093433.425163-1-dietmar.eggemann@arm.com Signed-off-by: Aubrey Li --- include/linux/topology.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/linux/topology.h b/include/linux/topology.h index 42bcfd5d9fdb..4a6b52fa471c 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -213,13 +213,6 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif -#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask) -static inline const struct cpumask *cpu_cluster_mask(int cpu) -{ - return topology_cluster_cpumask(cpu); -} -#endif - static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); -- Gitee From 4f1d508c391a6f73653996a4c8395d75e956a09c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 19 Jan 2024 07:52:46 +0800 Subject: [PATCH 7/8] tools headers cpufeatures: Sync with the kernel sources mainline inclusion from mainline-v5.13-rc2 commit 6faf64f5248166ecaf50107e883c383e0b66bb70 category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX CVE: N/A -------------------------------- Intel-SIG: commit 6faf64f52481 tools headers cpufeatures: Sync with the kernel sources. Cluster scheduler feature backport. -------------------------------- To pick the changes from: 4e6292114c741221 ("x86/paravirt: Add new features for paravirt patching") a161545ab53b174c ("x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit") a89dfde3dc3c2dbf ("x86: Remove dynamic NOP selection") b8921dccf3b25798 ("x86/cpufeatures: Add SGX1 and SGX2 sub-features") f21d4d3b97a86035 ("x86/cpufeatures: Enumerate #DB for bus lock detection") f333374e108e7e4c ("x86/cpufeatures: Add the Virtual SPEC_CTRL feature") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Babu Moger Cc: Borislav Petkov Cc: Fenghua Yu Cc: Juergen Gross Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ricardo Neri Cc: Sean Christopherson Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Aubrey Li --- tools/arch/x86/include/asm/cpufeatures.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 238287abee0c..d93b336baeec 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ @@ -236,6 +236,8 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -289,6 +291,8 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ #define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ @@ -349,6 +353,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -367,6 +372,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ @@ -393,6 +399,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -- Gitee From 5ce7f4502c4274d365fca336746fe9e5061ee3e0 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Fri, 19 Jan 2024 22:17:09 +0800 Subject: [PATCH 8/8] sched,x86: enable CONFIG_SCHED_CLUSTER in openeuler_defconfig hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I8LVOX ------------------------------- Intel-SIG: enable CONFIG_SCHED_CLUSTER in openeuler_defconfig Cluster scheduler feature backport. Signed-off-by: Aubrey Li --- arch/x86/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 4f912e543270..c3bf825b93dc 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -385,6 +385,7 @@ CONFIG_NR_CPUS_RANGE_BEGIN=8192 CONFIG_NR_CPUS_RANGE_END=8192 CONFIG_NR_CPUS_DEFAULT=8192 CONFIG_NR_CPUS=8192 +CONFIG_SCHED_CLUSTER=y CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y CONFIG_SCHED_MC_PRIO=y -- Gitee