From ad23908838bfbaac3eec9899c3b1a80114dc77d0 Mon Sep 17 00:00:00 2001 From: Cheng Yu Date: Mon, 12 Aug 2024 14:28:18 +0800 Subject: [PATCH] sched/fair: Prefer physical cores when migrating tasks Only when the load of the physical core exceeds the set threshold, the task will be migrated, otherwise the task will keep on the physical core. External impacts: 1) default config in arm64,x86: CONFIG_SCHED_KEEP_ON_CORE=y 2) sysctl: /proc/sys/kernel/sched_util_keep_on_core 3) sched features: KEEP_ON_CORE (default NO_KEEP_ON_CORE) Signed-off-by: Cheng Yu --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/sched/sysctl.h | 4 +++ init/Kconfig | 9 +++++++ kernel/sched/fair.c | 36 ++++++++++++++++++++++++++ kernel/sched/features.h | 4 +++ kernel/sysctl.c | 10 +++++++ 7 files changed, 65 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 553dfbac6e4d..da5ba498c9d6 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -189,6 +189,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_KEEP_ON_CORE=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 9f07c24ea89d..b9ec78308cea 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -194,6 +194,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_KEEP_ON_CORE=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 5cd5b3c579d3..fb1436286994 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -35,6 +35,10 @@ extern unsigned int sysctl_sched_child_runs_first; extern int sysctl_sched_util_low_pct; #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +extern int sysctl_sched_util_keep_on_core; +#endif + #ifdef CONFIG_QOS_SCHED_SMART_GRID extern unsigned int sysctl_smart_grid_strategy_ctrl; extern int sysctl_affinity_adjust_delay_ms; diff --git a/init/Kconfig b/init/Kconfig index e552194efbea..ee94515b2f04 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1370,6 +1370,15 @@ config SCHED_STEAL If unsure, say N here. +config SCHED_KEEP_ON_CORE + bool "Prefer physical cores when migrating tasks" + depends on SMP + default n + help + Only when the load of the physical core exceeds the set threshold, + the task will be migrated, otherwise the task will keep on the + physical core. + config CHECKPOINT_RESTORE bool "Checkpoint/restore support" select PROC_CHILDREN diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 95d1841f8a20..4f3d81537bab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4497,6 +4497,12 @@ static inline void overload_clear(struct rq *rq) {} static inline void overload_set(struct rq *rq) {} #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +static int core_has_spare(int cpu); +#else +static inline int core_has_spare(int cpu) { return 0; } +#endif + #else /* CONFIG_SMP */ #define UPDATE_TG 0x0 @@ -4523,6 +4529,7 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) static inline void rq_idle_stamp_update(struct rq *rq) {} static inline void rq_idle_stamp_clear(struct rq *rq) {} static inline int try_steal(struct rq *this_rq, struct rq_flags *rf) { return 0; } +static inline int core_has_spare(int cpu) { return 0; } static inline void overload_clear(struct rq *rq) {} static inline void overload_set(struct rq *rq) {} @@ -8210,6 +8217,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + if (static_branch_likely(&sched_smt_present) && + sched_feat(KEEP_ON_CORE)) + if (core_has_spare(new_cpu)) + new_cpu = cpumask_first(cpu_smt_mask((new_cpu))); +#endif + rcu_read_unlock(); #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY @@ -9701,6 +9715,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) } #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + if (static_branch_likely(&sched_smt_present) && + sched_feat(KEEP_ON_CORE)) + if (core_has_spare(env->dst_cpu) && + cpumask_first(cpu_smt_mask((env->dst_cpu))) != env->dst_cpu) + return 0; +#endif + /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or @@ -13189,6 +13211,20 @@ static int try_steal(struct rq *dst_rq, struct rq_flags *dst_rf) } #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +int sysctl_sched_util_keep_on_core = 100; + +static int core_has_spare(int cpu) +{ + int core_id = cpumask_first(cpu_smt_mask(cpu)); + struct rq *rq = cpu_rq(core_id); + unsigned long util = rq->cfs.avg.util_avg; + unsigned long capacity = rq->cpu_capacity; + + return util * 100 < capacity * sysctl_sched_util_keep_on_core; +} +#endif + static void rq_online_fair(struct rq *rq) { update_sysctl(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 76fade025c4b..fb885b20ba34 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -70,6 +70,10 @@ SCHED_FEAT(SIS_UTIL, false) SCHED_FEAT(STEAL, false) #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE +SCHED_FEAT(KEEP_ON_CORE, false) +#endif + /* * Issue a WARN when we do multiple update_rq_clock() calls * in a single rq->lock section. Default disabled because the diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3941856c19d1..9abc01982645 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2817,6 +2817,16 @@ static struct ctl_table kern_table[] = { .extra2 = &one_hundred, }, #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + { + .procname = "sched_util_keep_on_core", + .data = &sysctl_sched_util_keep_on_core, + .maxlen = sizeof(sysctl_sched_util_keep_on_core), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#endif #ifdef CONFIG_QOS_SCHED_SMART_GRID { .procname = "smart_grid_strategy_ctrl", -- Gitee