diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 553dfbac6e4dc1b0ef30d39ea3d50541a351aaa7..da5ba498c9d675de38af59170a92b479cef9ff7b 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -189,6 +189,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_KEEP_ON_CORE=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 9f07c24ea89d57dc98fe4a9e02dd336f3b76b272..b9ec78308cea317f44babc9e1351f3824a6f1437 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -194,6 +194,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_KEEP_ON_CORE=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 5cd5b3c579d3735bfb8109f57bfb590dc59b3359..fb1436286994823eae153fab5a95fb227ed816fb 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -35,6 +35,10 @@ extern unsigned int sysctl_sched_child_runs_first; extern int sysctl_sched_util_low_pct; #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +extern int sysctl_sched_util_keep_on_core; +#endif + #ifdef CONFIG_QOS_SCHED_SMART_GRID extern unsigned int sysctl_smart_grid_strategy_ctrl; extern int sysctl_affinity_adjust_delay_ms; diff --git a/init/Kconfig b/init/Kconfig index e552194efbeacebaec67a3fb6644ac7f7152dc97..16a1d7ac726b075bfa3fcae8890ac50d971e8985 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1370,6 +1370,17 @@ config SCHED_STEAL If unsure, say N here. +config SCHED_KEEP_ON_CORE + bool "Prefer physical cores when migrating tasks" + depends on SMP + default n + help + When cpu hyperthreading is enabled, one physical core can virtualize + multiple logical cpus. Assume that physical core0 virtualizes two + logical cpus, cpu0 and cpu1. Only when the load of cpu0 exceeds the + set threshold, the task will be migrated to the cpu1, otherwise the + task will not be migrated and cpu0 will still be used. + config CHECKPOINT_RESTORE bool "Checkpoint/restore support" select PROC_CHILDREN diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 95d1841f8a208579cfc80042dded8739a5d3e9e3..4f3d81537bab1385f2e4f2a714e7f09482eeca6b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4497,6 +4497,12 @@ static inline void overload_clear(struct rq *rq) {} static inline void overload_set(struct rq *rq) {} #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +static int core_has_spare(int cpu); +#else +static inline int core_has_spare(int cpu) { return 0; } +#endif + #else /* CONFIG_SMP */ #define UPDATE_TG 0x0 @@ -4523,6 +4529,7 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) static inline void rq_idle_stamp_update(struct rq *rq) {} static inline void rq_idle_stamp_clear(struct rq *rq) {} static inline int try_steal(struct rq *this_rq, struct rq_flags *rf) { return 0; } +static inline int core_has_spare(int cpu) { return 0; } static inline void overload_clear(struct rq *rq) {} static inline void overload_set(struct rq *rq) {} @@ -8210,6 +8217,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + if (static_branch_likely(&sched_smt_present) && + sched_feat(KEEP_ON_CORE)) + if (core_has_spare(new_cpu)) + new_cpu = cpumask_first(cpu_smt_mask((new_cpu))); +#endif + rcu_read_unlock(); #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY @@ -9701,6 +9715,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) } #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + if (static_branch_likely(&sched_smt_present) && + sched_feat(KEEP_ON_CORE)) + if (core_has_spare(env->dst_cpu) && + cpumask_first(cpu_smt_mask((env->dst_cpu))) != env->dst_cpu) + return 0; +#endif + /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or @@ -13189,6 +13211,20 @@ static int try_steal(struct rq *dst_rq, struct rq_flags *dst_rf) } #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +int sysctl_sched_util_keep_on_core = 100; + +static int core_has_spare(int cpu) +{ + int core_id = cpumask_first(cpu_smt_mask(cpu)); + struct rq *rq = cpu_rq(core_id); + unsigned long util = rq->cfs.avg.util_avg; + unsigned long capacity = rq->cpu_capacity; + + return util * 100 < capacity * sysctl_sched_util_keep_on_core; +} +#endif + static void rq_online_fair(struct rq *rq) { update_sysctl(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 76fade025c4bdfb1b9b816b3e74fa0bbe23fb244..fb885b20ba34ec753ac76b44a9774eaa89e267bf 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -70,6 +70,10 @@ SCHED_FEAT(SIS_UTIL, false) SCHED_FEAT(STEAL, false) #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +SCHED_FEAT(KEEP_ON_CORE, false) +#endif + /* * Issue a WARN when we do multiple update_rq_clock() calls * in a single rq->lock section. Default disabled because the diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3941856c19d1d7ff3366bbdad43c058d90f0f11b..9abc019826453faadca0f9b5d0af60f8d6ac4cca 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2817,6 +2817,16 @@ static struct ctl_table kern_table[] = { .extra2 = &one_hundred, }, #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + { + .procname = "sched_util_keep_on_core", + .data = &sysctl_sched_util_keep_on_core, + .maxlen = sizeof(sysctl_sched_util_keep_on_core), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#endif #ifdef CONFIG_QOS_SCHED_SMART_GRID { .procname = "smart_grid_strategy_ctrl",