diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1f550c8c82adfee461fb09c7531a4e8c44a46d7..3c8daac96773ef268bbf1be97440ba11d83906d5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -106,6 +106,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_SUPPORTS_SCHED_PARAL + select ARCH_SUPPORTS_SCHED_SOFT_QUOTA select ARCH_SUPPORTS_PAGE_TABLE_CHECK select ARCH_SUPPORTS_PER_VMA_LOCK select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 37c970407d37106e832732d0b6bbdb6d876dd201..4b8b1c4fa9f361ac565f771c91905ac147205f28 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -210,6 +210,7 @@ CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y CONFIG_SCHED_PARAL=y +CONFIG_SCHED_SOFT_QUOTA=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y CONFIG_RELAY=y diff --git a/arch/arm64/kernel/idle.c b/arch/arm64/kernel/idle.c index 46a40b693da86c3348f960c1a0fa9e250c0e54af..31d9bfbe10b88fc928759d5a341a834d1fd375f2 100644 --- a/arch/arm64/kernel/idle.c +++ b/arch/arm64/kernel/idle.c @@ -45,6 +45,26 @@ void noinstr arch_cpu_idle(void) } EXPORT_SYMBOL_GPL(arch_cpu_idle); +#ifdef CONFIG_SCHED_SOFT_QUOTA +static DEFINE_PER_CPU(int, sibling_idle) = 1; + +int is_sibling_idle(void) +{ + return this_cpu_read(sibling_idle); +} + +static void smt_measurement_begin(void) +{ +} + +static void smt_measurement_done(void) +{ +} +#else +static inline void smt_measurement_begin(void) { } +static inline void smt_measurement_done(void) { } +#endif + #ifdef CONFIG_ACTLR_XCALL_XINT struct arm_cpuidle_xcall_xint_context { unsigned long actlr_el1; @@ -57,6 +77,8 @@ void arch_cpu_idle_enter(void) { struct arm_cpuidle_xcall_xint_context *context; + smt_measurement_begin(); + if (!system_uses_xcall_xint()) return; @@ -71,6 +93,8 @@ void arch_cpu_idle_exit(void) { struct arm_cpuidle_xcall_xint_context *context; + smt_measurement_done(); + if (!system_uses_xcall_xint()) return; @@ -81,6 +105,13 @@ void arch_cpu_idle_exit(void) put_cpu_var(contexts); } #else -void arch_cpu_idle_enter(void) {} -void arch_cpu_idle_exit(void) {} +void arch_cpu_idle_enter(void) +{ + smt_measurement_begin(); +} + +void arch_cpu_idle_exit(void) +{ + smt_measurement_done(); +} #endif diff --git a/init/Kconfig b/init/Kconfig index 925e8517a7e801e8cf3b0efd7e99a340d251b50d..2720083aaa17d5d47cacdd37c51371e11e3aea97 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1174,6 +1174,24 @@ config SCHED_SOFT_DOMAIN If in doubt, say N. +# +# For architectures that want to enable the support for SCHED_SOFT_QUOTA +# +config ARCH_SUPPORTS_SCHED_SOFT_QUOTA + bool + +config SCHED_SOFT_QUOTA + bool "More flexible use of CPU quota" + depends on ARCH_SUPPORTS_SCHED_SOFT_QUOTA + depends on CFS_BANDWIDTH + default n + help + This option allows users to use CPU quota more flexibly when CPU + is idle. It is better for users to have some understanding of + CFS_BANDWIDTH. It cannot be used in scenarios where there are strict + restrictions on the use of the CPU quota, such as some commercial + scenarios that charge based on the use of CPU quota. + config SCHED_MM_CID def_bool n depends on SMP && RSEQ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fab904f44c879c4666889cbd432c31de79d5bcbf..ba3cd68cbd03d92c75f5f16931545d2390653965 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -11682,6 +11682,30 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css, } #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +static int cpu_soft_quota_write(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 soft_quota) +{ + struct task_group *tg = css_tg(css); + + if (soft_quota != 1 && soft_quota != 0) + return -EINVAL; + + if (tg->soft_quota == soft_quota) + return 0; + + tg->soft_quota = soft_quota; + + return 0; +} + +static inline s64 cpu_soft_quota_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return css_tg(css)->soft_quota; +} +#endif + #ifdef CONFIG_BPF_SCHED void sched_settag(struct task_struct *tsk, s64 tag) { @@ -11928,6 +11952,14 @@ static struct cftype cpu_legacy_files[] = { .write_s64 = cpu_qos_write, }, #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + { + .name = "soft_quota", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = cpu_soft_quota_read, + .write_s64 = cpu_soft_quota_write, + }, +#endif #ifdef CONFIG_BPF_SCHED { .name = "tag", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 99175318885c7d1584806afc973317878ee3d9de..89ce1269b474540d6166e245dc917b9e5545572f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -190,6 +190,10 @@ unsigned int sysctl_qos_level_weights[5] = { static long qos_reweight(long shares, struct task_group *tg); #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, soft_quota_throttled_cfs_rq); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -223,6 +227,10 @@ extern unsigned int sysctl_smart_grid_strategy_ctrl; static int sysctl_affinity_adjust_delay_ms = 5000; #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +unsigned int sysctl_soft_runtime_ratio = 20; +#endif + #ifdef CONFIG_SYSCTL static struct ctl_table sched_fair_sysctls[] = { { @@ -322,6 +330,17 @@ static struct ctl_table sched_fair_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = &hundred_thousand, }, +#endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + { + .procname = "sched_soft_runtime_ratio", + .data = &sysctl_soft_runtime_ratio, + .maxlen = sizeof(sysctl_soft_runtime_ratio), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = SYSCTL_ONE_HUNDRED, + }, #endif {} }; @@ -592,10 +611,11 @@ static inline struct sched_entity *parent_entity(const struct sched_entity *se) return se->parent; } -static void +static bool find_matching_se(struct sched_entity **se, struct sched_entity **pse) { int se_depth, pse_depth; + bool ret = false; /* * preemption test can be made between sibling entities who are in the @@ -609,6 +629,10 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) pse_depth = (*pse)->depth; while (se_depth > pse_depth) { +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (!ret && cfs_rq_of(*se)->soft_quota_enable == 1) + ret = true; +#endif se_depth--; *se = parent_entity(*se); } @@ -619,9 +643,15 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) } while (!is_same_group(*se, *pse)) { +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (!ret && cfs_rq_of(*se)->soft_quota_enable == 1) + ret = true; +#endif *se = parent_entity(*se); *pse = parent_entity(*pse); } + + return ret; } static int tg_is_idle(struct task_group *tg) @@ -667,9 +697,10 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return NULL; } -static inline void +static inline bool find_matching_se(struct sched_entity **se, struct sched_entity **pse) { + return false; } static inline int tg_is_idle(struct task_group *tg) @@ -6030,6 +6061,14 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) SCHED_WARN_ON(cfs_rq->throttled_clock); if (cfs_rq->nr_running) cfs_rq->throttled_clock = rq_clock(rq); + +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (cfs_rq->tg->soft_quota == 1) { + list_add(&cfs_rq->soft_quota_throttled_list, + &per_cpu(soft_quota_throttled_cfs_rq, cpu_of(rq))); + } +#endif + return true; } @@ -6046,6 +6085,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) se = cfs_rq->tg->se[cpu_of(rq)]; +#ifdef CONFIG_SCHED_SOFT_QUOTA + list_del_init(&cfs_rq->soft_quota_throttled_list); +#endif + #ifdef CONFIG_QOS_SCHED /* * if this cfs_rq throttled by qos, not need unthrottle it. @@ -6244,6 +6287,16 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) } rq_lock_irqsave(rq, &rf); + +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (cfs_rq->soft_quota_enable == 1) { + if (cfs_rq->runtime_remaining > 0) + cfs_rq->runtime_remaining = 0; + + cfs_rq->soft_quota_enable = 0; + } +#endif + if (!cfs_rq_throttled(cfs_rq)) goto next; @@ -6306,6 +6359,17 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) return throttled; } +#ifdef CONFIG_SCHED_SOFT_QUOTA +static inline void init_tg_sum_soft_runtime(struct cfs_bandwidth *cfs_b) +{ + unsigned int cpu; + struct task_group *tg = container_of(cfs_b, struct task_group, cfs_bandwidth); + + for_each_possible_cpu(cpu) + tg->cfs_rq[cpu]->sum_soft_runtime = 0; +} +#endif + /* * Responsible for refilling a task_group's bandwidth and unthrottling its * cfs_rqs as appropriate. If there has been no activity within the last @@ -6323,6 +6387,10 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u throttled = !list_empty(&cfs_b->throttled_cfs_rq); cfs_b->nr_periods += overrun; +#ifdef CONFIG_SCHED_SOFT_QUOTA + init_tg_sum_soft_runtime(cfs_b); +#endif + /* Refill extra burst quota even if cfs_b->idle */ __refill_cfs_bandwidth_runtime(cfs_b); @@ -6637,6 +6705,9 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) #ifdef CONFIG_QOS_SCHED INIT_LIST_HEAD(&cfs_rq->qos_throttled_list); #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + INIT_LIST_HEAD(&cfs_rq->soft_quota_throttled_list); +#endif } void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) @@ -9457,6 +9528,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ struct cfs_rq *cfs_rq = task_cfs_rq(curr); int next_buddy_marked = 0; int cse_is_idle, pse_is_idle; + bool ret = 0; if (unlikely(se == pse)) return; @@ -9491,7 +9563,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ if (!sched_feat(WAKEUP_PREEMPTION)) return; - find_matching_se(&se, &pse); + ret = find_matching_se(&se, &pse); +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (ret) + goto preempt; +#endif + WARN_ON_ONCE(!pse); cse_is_idle = se_is_idle(se); @@ -14982,6 +15059,9 @@ void unregister_fair_sched_group(struct task_group *tg) unsigned long flags; struct rq *rq; int cpu; +#ifdef CONFIG_SCHED_SOFT_QUOTA + struct cfs_rq *cfs_rq; +#endif destroy_cfs_bandwidth(tg_cfs_bandwidth(tg)); destroy_auto_affinity(tg); @@ -14991,10 +15071,16 @@ void unregister_fair_sched_group(struct task_group *tg) if (tg->se[cpu]) remove_entity_load_avg(tg->se[cpu]); - #ifdef CONFIG_QOS_SCHED - if (tg->cfs_rq && tg->cfs_rq[cpu]) - unthrottle_qos_sched_group(tg->cfs_rq[cpu]); - #endif +#ifdef CONFIG_QOS_SCHED + if (tg->cfs_rq && tg->cfs_rq[cpu]) + unthrottle_qos_sched_group(tg->cfs_rq[cpu]); +#endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (tg->cfs_rq && tg->cfs_rq[cpu]) { + cfs_rq = tg->cfs_rq[cpu]; + list_del_init(&cfs_rq->soft_quota_throttled_list); + } +#endif /* * Only empty task groups can be destroyed; so we can speculatively @@ -15309,6 +15395,11 @@ __init void init_sched_fair_class(void) INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i)); #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(soft_quota_throttled_cfs_rq, i)); +#endif + open_softirq(SCHED_SOFTIRQ, run_rebalance_domains); #ifdef CONFIG_NO_HZ_COMMON @@ -15319,3 +15410,66 @@ __init void init_sched_fair_class(void) #endif /* SMP */ } + +#ifdef CONFIG_SCHED_SOFT_QUOTA +static bool check_soft_runtime(struct task_group *tg, int slice) +{ + int cpu; + u64 sum_soft_runtime = slice; + struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; + + if (cfs_b->quota == RUNTIME_INF) + return true; + + for_each_possible_cpu(cpu) + sum_soft_runtime += tg->cfs_rq[cpu]->sum_soft_runtime; + + return sum_soft_runtime < sysctl_soft_runtime_ratio * cfs_b->quota / 100; +} + +int __weak is_sibling_idle(void) +{ + return 0; +} + +bool unthrottle_cfs_rq_soft_quota(struct rq *rq) +{ + int max_cnt = 0; + bool ret = false; + struct cfs_rq *cfs_rq, *tmp_rq; + struct cfs_bandwidth *cfs_b; + int slice = sched_cfs_bandwidth_slice(); + + if (!is_sibling_idle()) + return ret; + + list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(soft_quota_throttled_cfs_rq, cpu_of(rq)), + soft_quota_throttled_list) { + if (max_cnt++ > 20) + break; + + if (cfs_rq->throttled) { + cfs_b = tg_cfs_bandwidth(cfs_rq->tg); + raw_spin_lock(&cfs_b->lock); + + if (!check_soft_runtime(cfs_rq->tg, slice)) { + raw_spin_unlock(&cfs_b->lock); + continue; + } + + raw_spin_unlock(&cfs_b->lock); + + if (cfs_rq->runtime_remaining + slice > 0) { + cfs_rq->runtime_remaining += slice; + cfs_rq->sum_soft_runtime += slice; + cfs_rq->soft_quota_enable = 1; + unthrottle_cfs_rq(cfs_rq); + ret = true; + break; + } + } + } + + return ret; +} +#endif diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 67939d04542fc14a2eba7e7a422060e2a9ed2903..b95797360dd60b90dc546052132ef33fb45bcbe6 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -114,3 +114,7 @@ SCHED_FEAT(DA_UTIL_TASKGROUP, true) #ifdef CONFIG_SCHED_SOFT_DOMAIN SCHED_FEAT(SOFT_DOMAIN, false) #endif + +#ifdef CONFIG_SCHED_SOFT_QUOTA +SCHED_FEAT(SOFT_QUOTA, false) +#endif diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 5007b25c5bc653a9ca0696af1f7136150cd6131e..3518a1a28e8c5964cc5a6e7ae14f7ed65f64ba20 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -427,6 +427,13 @@ struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle; +#ifdef CONFIG_SCHED_SOFT_QUOTA + if (sched_feat(SOFT_QUOTA)) { + if (unthrottle_cfs_rq_soft_quota(rq) && rq->cfs.nr_running) + return pick_next_task_fair(rq, NULL, NULL); + } +#endif + set_next_task_idle(rq, next, true); return next; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f6a3f93d1f7552b61f782ee5edd5ab249d7cd3fa..0e21ad151ec952c84c393a6bd98e48b1ffc18878 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -484,7 +484,11 @@ struct task_group { #else KABI_RESERVE(1) #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + KABI_USE(2, u64 soft_quota) +#else KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) KABI_RESERVE(5) @@ -578,6 +582,10 @@ static inline void tg_update_affinity_domains(int cpu, int online) {} static inline void offline_auto_affinity(struct task_group *tg) { } #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA +extern bool unthrottle_cfs_rq_soft_quota(struct rq *rq); +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); @@ -769,10 +777,17 @@ struct cfs_rq { unsigned long qos_idle_h_nr_running_padding; }; #endif +#ifdef CONFIG_SCHED_SOFT_QUOTA + KABI_USE(1, u64 soft_quota_enable) + KABI_USE(2, u64 sum_soft_runtime) + KABI_REPLACE(_KABI_RESERVE(3); _KABI_RESERVE(4), + struct list_head soft_quota_throttled_list) +#else KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) +#endif KABI_RESERVE(5) KABI_RESERVE(6) KABI_RESERVE(7)