diff --git a/include/linux/sched.h b/include/linux/sched.h index a1d6559bdb1d595f4f69062b2a5cb528004734bc..48fdda47b88d50fd1cdd207598e9ad472714626c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -66,6 +66,7 @@ struct signal_struct; struct task_delay_info; struct task_group; struct io_uring_task; +struct cgroup; /* * Task state bitmask. NOTE! These bits are also @@ -2424,4 +2425,24 @@ static inline bool jbd2_proxy_exec_disabled(void) { return !static_branch_unlikely(&__jbd2_proxy_exec_enabled); } +#ifdef CONFIG_GROUP_BALANCER +extern bool group_balancer_enabled(void); +extern int get_tg_specs(struct task_group *tg); +extern void tg_specs_change(struct task_group *tg, u64 specs_before); +extern bool tg_group_balancer_enabled(struct task_group *tg); +extern struct task_group *cgroup_tg(struct cgroup *cgrp); +extern struct cgroup *tg_cgroup(struct task_group *tg); +extern void lock_cfs_constraints_mutex(void); +extern void unlock_cfs_constraints_mutex(void); +#ifdef CONFIG_CPUSETS +extern struct cpumask *task_group_cpus_allowed(struct task_group *tg); +#else +static inline struct cpumask *task_group_cpus_allowed(struct task_group *tg) +{ + return NULL; +} +#endif +#else +static inline void tg_specs_change(struct task_group *tg) { } +#endif #endif diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 536765522aecdaf48c385d5800e27e0c4d473612..c4991a5778070d9e10f0598cc1b25f8e1cc9ebca 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -131,6 +131,7 @@ extern int sched_acpu_enable_handler(struct ctl_table *table, int write, #endif #ifdef CONFIG_GROUP_BALANCER extern unsigned int sysctl_sched_group_balancer_enabled; +extern unsigned long sysctl_sched_gb_expiration_ms; extern int sched_group_balancer_enable_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 164f5bee99da05cd22e99b9c7ba870ba217ebaa8..d48296132e71cf93f27d6a4cdd26baa9d17fcbf9 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -205,6 +205,46 @@ static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) return css ? container_of(css, struct cpuset, css) : NULL; } +#ifdef CONFIG_GROUP_BALANCER +static inline struct cpuset *cgroup_cs(struct cgroup *cgrp) +{ + return container_of(global_cgroup_css(cgrp, cpuset_cgrp_id), + struct cpuset, css); +} + +struct cpumask *task_group_cpus_allowed(struct task_group *tg) +{ + struct cgroup *cg = tg_cgroup(tg); + struct cpuset *cs = cgroup_cs(cg); + + if (cs) + return (struct cpumask *)cs->cpus_allowed; + + return NULL; +} + +static void update_cpumask_for_group_balancer(struct cpuset *cs) +{ + struct cgroup *cg = cs->css.cgroup; + struct task_group *tg; + + if (!group_balancer_enabled()) + return; + + tg = cgroup_tg(cg); + if (!tg) + return; + if (!tg_group_balancer_enabled(tg)) + return; + + lock_cfs_constraints_mutex(); + tg_specs_change(tg, get_tg_specs(tg)); + unlock_cfs_constraints_mutex(); +} +#else +static inline void update_cpumask_for_group_balancer(struct cpuset *cs) { } +#endif + /* Retrieve the cpuset for a task */ static inline struct cpuset *task_cs(struct task_struct *task) { @@ -1498,6 +1538,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) /* deleted = old - new = old & (~new) */ cpumask_andnot(&deleted, &old_cpus, tmp->new_cpus); cpuacct_cpuset_changed(cs->css.cgroup, &deleted, NULL); + update_cpumask_for_group_balancer(cs); /* * On legacy hierarchy, if the effective cpumask of any non- diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fefc9e372b4d0b88b6a72e460c4333b5c08886e2..68c95c75c8c52c085950b1cf1894df75a3830860 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9013,6 +9013,8 @@ struct task_group *sched_create_group(struct task_group *parent) tg->group_balancer = 0; tg->soft_cpus_version = 0; tg->gb_sd = NULL; + tg->preferred_gb_sd = NULL; + tg->expiration_start = 0; raw_spin_lock_init(&tg->gb_lock); #endif return tg; @@ -9592,6 +9594,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, if (runtime_enabled && !runtime_was_enabled) cfs_bandwidth_usage_inc(); raw_spin_lock_irq(&cfs_b->lock); + tg_burst_change(tg, burst); cfs_b->period = ns_to_ktime(period); cfs_b->quota = quota; cfs_b->burst = burst; @@ -10106,36 +10109,14 @@ static u64 cpu_group_balancer_read_u64(struct cgroup_subsys_state *css, return tg->group_balancer; } -static int tg_validate_group_balancer_down(struct task_group *tg, void *data) +void lock_cfs_constraints_mutex(void) { - if (tg->group_balancer) - return -EINVAL; - return 0; + mutex_lock(&cfs_constraints_mutex); } -/* - * There is only one task group allowed to enable group balancer in the path from - * root_task_group to a certion leaf task group. - */ -static int validate_group_balancer(struct task_group *tg) +void unlock_cfs_constraints_mutex(void) { - int retval = 0; - - rcu_read_lock(); - retval = walk_tg_tree_from(tg, tg_validate_group_balancer_down, - tg_nop, NULL); - if (retval) - goto out; - - for (; tg != &root_task_group; tg = tg->parent) { - if (tg->group_balancer) { - retval = -EINVAL; - break; - } - } -out: - rcu_read_unlock(); - return retval; + mutex_unlock(&cfs_constraints_mutex); } static int cpu_group_balancer_write_u64(struct cgroup_subsys_state *css, @@ -10151,7 +10132,7 @@ static int cpu_group_balancer_write_u64(struct cgroup_subsys_state *css, if (tg == &root_task_group || task_group_is_autogroup(tg)) return -EACCES; - if (new > 1) + if (new > 2) return -EINVAL; write_lock(&group_balancer_lock); @@ -10161,16 +10142,17 @@ static int cpu_group_balancer_write_u64(struct cgroup_subsys_state *css, if (old == new) goto out; - if (new) { - retval = validate_group_balancer(tg); - if (retval) - goto out; - retval = attach_tg_to_group_balancer_sched_domain(tg, NULL, true); - if (retval) - goto out; - } else { - detach_tg_from_group_balancer_sched_domain(tg, true); + if (!!old == !!new) { + mutex_lock(&cfs_constraints_mutex); + tg_specs_change(tg, tg->specs_ratio); + mutex_unlock(&cfs_constraints_mutex); + tg->group_balancer = new; + goto out; } + + retval = update_group_balancer(tg, new); + if (retval) + goto out; tg->group_balancer = new; out: raw_spin_unlock(&tg->gb_lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ca96537f9e460ff44864a11c371808718a13c2b8..a3fdcca1bd57abac703cb251abfd1d54ea730e15 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8571,6 +8571,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* At this point se is NULL and we are at root level*/ add_nr_running(rq, 1); id_update_nr_running(task_group(p), p, rq, 1); + gb_update_nr_running(task_group(p), rq, 1); /* * Since new tasks are assigned an initial util_avg equal to @@ -8697,6 +8698,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) /* At this point se is NULL and we are at root level*/ sub_nr_running(rq, 1); id_update_nr_running(task_group(p), p, rq, -1); + gb_update_nr_running(task_group(p), rq, -1); /* balance early to pull high priority tasks */ if (unlikely(!was_sched_idle && sched_idle_rq(rq))) @@ -9240,12 +9242,27 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co struct sched_domain *this_sd; u64 time; bool is_seeker; +#ifdef CONFIG_GROUP_BALANCER + struct task_group *tg = task_group(p); + bool gb_tried = false; + struct group_balancer_sched_domain *preferred = tg->preferred_gb_sd; +#endif this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) return -1; +#ifdef CONFIG_GROUP_BALANCER +retry: + if (group_balancer_enabled() && !gb_tried && tg_group_balancer_enabled(tg) && preferred) { + cpumask_and(cpus, get_gb_sd_span(preferred), task_allowed_cpu(p)); + } else { + gb_tried = true; + cpumask_and(cpus, sched_domain_span(sd), task_allowed_cpu(p)); + } +#else cpumask_and(cpus, sched_domain_span(sd), task_allowed_cpu(p)); +#endif if (sched_feat(SIS_PROP) && !has_idle_core) { u64 avg_cost, avg_idle, span_avg; @@ -9282,7 +9299,7 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co return i; } else { if (--nr <= 0) - return -1; + goto out; idle_cpu = __select_idle_cpu(cpu, p, &id_backup); if ((unsigned int)idle_cpu < nr_cpumask_bits) return idle_cpu; @@ -9299,13 +9316,20 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co return i; } else { if (--nr <= 0) - return -1; + goto out; idle_cpu = __select_idle_cpu(cpu, p, &id_backup); if ((unsigned int)idle_cpu < nr_cpumask_bits) break; } } +#ifdef CONFIG_GROUP_BALANCER + if (!gb_tried) { + gb_tried = true; + goto retry; + } +#endif + if (has_idle_core) set_idle_cores(target, false); @@ -9317,6 +9341,14 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co if (!group_identity_disabled()) return (unsigned int)idle_cpu < nr_cpumask_bits ? idle_cpu : id_backup; return idle_cpu; +out: +#ifdef CONFIG_GROUP_BALANCER + if (!gb_tried) { + gb_tried = true; + goto retry; + } +#endif + return -1; } /* @@ -14732,19 +14764,18 @@ void free_fair_sched_group(struct task_group *tg) void tg_set_specs_ratio(struct task_group *tg) { u64 quota = tg_cfs_bandwidth(tg)->hierarchical_quota; - u64 specs_ratio; + u64 specs_ratio, specs_before; + specs_before = tg->specs_ratio; if (quota == RUNTIME_INF) { tg->specs_ratio = -1; - return; + } else { + specs_ratio = quota / ((1 << BW_SHIFT) / 100); + /* If specs_ratio is bigger than INT_MAX, set specs_ratio -1. */ + tg->specs_ratio = specs_ratio > INT_MAX ? -1 : specs_ratio; } - - specs_ratio = quota / ((1 << BW_SHIFT) / 100); - - /* If specs_ratio is bigger than INT_MAX, set specs_ratio -1. */ - tg->specs_ratio = specs_ratio > INT_MAX ? -1 : specs_ratio; if (tg->group_balancer) - tg_specs_change(tg); + tg_specs_change(tg, specs_before); } #endif @@ -15302,3 +15333,86 @@ int sched_trace_rq_nr_running(struct rq *rq) return rq ? rq->nr_running : -1; } EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running); + +#ifdef CONFIG_GROUP_BALANCER +static int tg_validate_group_balancer_down(struct task_group *tg, void *data) +{ + if (tg->group_balancer) + return -EINVAL; + return 0; +} + +/* + * There is only one task group allowed to enable group balancer in the path from + * root_task_group to a certion leaf task group. + */ +static int validate_group_balancer(struct task_group *tg) +{ + int retval = 0; + + rcu_read_lock(); + retval = walk_tg_tree_from(tg, tg_validate_group_balancer_down, + tg_nop, NULL); + if (retval) + goto out; + + for (; tg != &root_task_group; tg = tg->parent) { + if (tg->group_balancer) { + retval = -EINVAL; + break; + } + } +out: + rcu_read_unlock(); + return retval; +} + +int update_group_balancer(struct task_group *tg, u64 new) +{ + int cpu, retval; + struct rq_flags rf; + unsigned int delta; + + if (new) { + retval = validate_group_balancer(tg); + if (retval) + return retval; + retval = attach_tg_to_group_balancer_sched_domain(tg, NULL, true); + if (retval) + return retval; + } else { + detach_tg_from_group_balancer_sched_domain(tg, true); + } + + cpus_read_lock(); + for_each_online_cpu(cpu) { + bool on_rq, throttled; + struct rq *rq = cpu_rq(cpu); + struct cfs_rq *cfs_rq; + struct sched_entity *se; + + rq_lock_irq(rq, &rf); + se = tg->se[cpu]; + cfs_rq = cfs_rq_of(se); + throttled = throttled_hierarchy(cfs_rq); + delta = se->my_q->h_nr_running; + on_rq = se->on_rq; + + if (on_rq && !throttled) { + if (new) + rq->nr_gb_running += delta; + else + rq->nr_gb_running -= delta; + } + rq_unlock_irq(rq, &rf); + } + cpus_read_unlock(); + + return 0; +} + +int get_tg_specs(struct task_group *tg) +{ + return tg->specs_ratio; +} +#endif diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 8f30a023365f1cd29ae28be04be3ecd628bfd846..d45ae1e86d1694dcc982dfa081c490915ce402ea 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -119,6 +119,10 @@ SCHED_FEAT(SCHED_CORE_HT_AWARE_QUOTA, false) SCHED_FEAT(SCHED_CORE_VRUNTIME, false) #endif +#ifdef CONFIG_GROUP_BALANCER +SCHED_FEAT(GB_SPECS_BALANCE, false) +#endif + SCHED_FEAT(SCHED_FEAT_RESERVE1, false) SCHED_FEAT(SCHED_FEAT_RESERVE2, false) SCHED_FEAT(SCHED_FEAT_RESERVE3, false) diff --git a/kernel/sched/group_balancer.c b/kernel/sched/group_balancer.c index cbbe57b8aefe67b58c273fe941d8711e86922f4d..3ef7220983554c52ad69ad9b4731753891044c5d 100644 --- a/kernel/sched/group_balancer.c +++ b/kernel/sched/group_balancer.c @@ -8,6 +8,7 @@ #include "sched.h" #include #include +#include struct gb_lb_env { int src_cpu; @@ -18,6 +19,9 @@ struct gb_lb_env { unsigned long nr_balance_failed; enum migration_type migration_type; struct rb_root task_groups; +#ifdef CONFIG_CFS_BANDWIDTH + bool burst; +#endif CK_KABI_RESERVE(1) CK_KABI_RESERVE(2) @@ -38,10 +42,14 @@ struct group_balancer_sched_domain { unsigned int span_weight; unsigned int nr_children; /* If free_tg_specs is less than zero, the gb_sd is overloaded. */ - int free_tg_specs; + atomic_t free_tg_specs; unsigned int depth; raw_spinlock_t lock; struct rb_root task_groups; +#ifdef CONFIG_CFS_BANDWIDTH + struct rb_root burstable_task_groups; + atomic_t h_nr_burst_tg; +#endif struct kernfs_node *kn; unsigned long last_balance_timestamp; unsigned long lower_interval; @@ -145,6 +153,7 @@ struct group_balancer_size_level { LIST_HEAD(group_balancer_sched_domains); DEFINE_RWLOCK(group_balancer_sched_domain_lock); +DEFINE_MUTEX(group_balancer_select_lock); struct cpumask root_cpumask; @@ -268,11 +277,23 @@ struct group_balancer_sched_domain *group_balancer_root_domain; #define GB_OVERLOAD 0x1 #define GB_OVERUTILIZED 0x2 +/* + * The time threshold that the preferred gb_sd expires. + * Unit: ms + * Default: 6000000 + */ +unsigned long sysctl_sched_gb_expiration_ms = 60000; + static inline struct cpumask *gb_sd_span(struct group_balancer_sched_domain *gb_sd) { return to_cpumask(gb_sd->span); } +struct cpumask *get_gb_sd_span(struct group_balancer_sched_domain *gb_sd) +{ + return gb_sd_span(gb_sd); +} + static unsigned int get_size_level(struct group_balancer_sched_domain *gb_sd) { int size_level = ilog2(gb_sd->span_weight); @@ -302,6 +323,125 @@ static void add_to_size_level(struct group_balancer_sched_domain *gb_sd) __add_to_size_level(gb_sd, size_level); } +bool tg_group_balancer_enabled(struct task_group *tg) +{ + return !!tg->group_balancer; +} + +struct cgroup *tg_cgroup(struct task_group *tg) +{ + return tg->css.cgroup; +} + +#ifdef CONFIG_CPUSETS +static inline bool +gb_sd_satisfies_task_group(struct task_group *tg, struct group_balancer_sched_domain *gb_sd) +{ + struct cpumask *cpus_allowed = task_group_cpus_allowed(tg); + struct cpumask soft_cpus_allowed; + unsigned int soft_cpus_weight; + + if (!cpus_allowed) { + soft_cpus_weight = gb_sd->span_weight; + } else { + cpumask_and(&soft_cpus_allowed, cpus_allowed, gb_sd_span(gb_sd)); + soft_cpus_weight = cpumask_weight(&soft_cpus_allowed); + } + /* tg->group_balancer = 2 means that tg aquires double logical cpus. */ + return tg->group_balancer * tg->specs_ratio <= 100 * soft_cpus_weight; +} +#else +static inline bool +gb_sd_satisfies_task_group(struct task_group *tg, struct group_balancer_sched_domain *gb_sd) +{ + return true; +} +#endif + +#ifdef CONFIG_CFS_BANDWIDTH +static inline bool is_burstable_task_group(struct task_group *tg) +{ + return !!tg->cfs_bandwidth.burst; +} + +static inline struct rb_root +*gb_rb_root(struct task_group *tg, struct group_balancer_sched_domain *gb_sd) +{ + if (unlikely(is_burstable_task_group(tg))) + return &gb_sd->burstable_task_groups; + return &gb_sd->task_groups; +} +static inline void update_h_nr_burst_tg(struct task_group *tg, bool add) +{ + struct group_balancer_sched_domain *gb_sd = tg->gb_sd; + + if (!is_burstable_task_group(tg)) + return; + + for (; gb_sd; gb_sd = gb_sd->parent) { + if (add) + atomic_inc(&gb_sd->h_nr_burst_tg); + else + atomic_dec(&gb_sd->h_nr_burst_tg); + } +} + +static inline bool tg_specs_less(struct rb_node *a, const struct rb_node *b); +void tg_burst_change(struct task_group *tg, u64 burst) +{ + bool burst_before, burst_now; + struct group_balancer_sched_domain *gb_sd; + + if (!group_balancer_enabled()) + return; + if (!tg_group_balancer_enabled(tg)) + return; + + gb_sd = tg->gb_sd; + burst_before = !!tg->cfs_bandwidth.burst; + burst_now = !!burst; + if (burst_before == burst_now) + return; + + read_lock(&group_balancer_sched_domain_lock); + raw_spin_lock(&gb_sd->lock); + if (!burst_before) { + rb_erase(&tg->gb_node, &gb_sd->task_groups); + rb_add(&tg->gb_node, &gb_sd->burstable_task_groups, tg_specs_less); + update_h_nr_burst_tg(tg, true); + } else { + rb_erase(&tg->gb_node, &gb_sd->burstable_task_groups); + rb_add(&tg->gb_node, &gb_sd->task_groups, tg_specs_less); + update_h_nr_burst_tg(tg, false); + } + raw_spin_unlock(&gb_sd->lock); + read_unlock(&group_balancer_sched_domain_lock); +} +#else +static inline bool is_burstable_task_group(struct task_group *tg) +{ + return false; +} + +static inline rb_root *gb_rb_root(struct task_group *tg, struct group_balancer_sched_domain *gb_sd) +{ + return &gb_sd->task_groups; +} + +static inline void update_h_nr_burst_tg(struct task_group *tg, bool add) { } +#endif + +static inline bool +is_preferred_gb_sd(struct task_group *tg, struct group_balancer_sched_domain *gb_sd) +{ + struct group_balancer_sched_domain *p_gb_sd = tg->preferred_gb_sd; + + if (!p_gb_sd) + return true; + + return cpumask_subset(gb_sd_span(p_gb_sd), gb_sd_span(gb_sd)); +} + static int group_balancer_seqfile_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; @@ -597,6 +737,7 @@ static inline struct group_balancer_sched_domain raw_spin_lock_init(&new->lock); new->task_groups = RB_ROOT; + new->burstable_task_groups = RB_ROOT; new->imbalance_pct = 117; return new; @@ -633,7 +774,7 @@ static void add_to_tree(struct group_balancer_sched_domain *gb_sd, } gb_sd->span_weight = cpumask_weight(gb_sd_span(gb_sd)); gb_sd->lower_interval = ilog2(gb_sd->span_weight) * gb_sd->span_weight; - gb_sd->free_tg_specs = 100 * gb_sd->span_weight; + atomic_set(&gb_sd->free_tg_specs, 100 * gb_sd->span_weight); add_to_size_level(gb_sd); if (!gb_sd->nr_children) { @@ -644,12 +785,10 @@ static void add_to_tree(struct group_balancer_sched_domain *gb_sd, } } -#define __node_2_task_group(n) rb_entry((n), struct task_group, gb_node) - static inline bool tg_specs_less(struct rb_node *a, const struct rb_node *b) { - struct task_group *tg_a = __node_2_task_group(a); - struct task_group *tg_b = __node_2_task_group(b); + struct task_group *tg_a = __gb_node_2_tg(a); + struct task_group *tg_b = __gb_node_2_tg(b); int specs_a = tg_a->specs_ratio; int specs_b = tg_b->specs_ratio; @@ -683,17 +822,31 @@ static void free_group_balancer_sched_domain(struct group_balancer_sched_domain struct task_group *tg; struct group_balancer_sched_domain *parent = gb_sd->parent; struct rb_node *node; - struct rb_root *root = &gb_sd->task_groups; + struct rb_root *roots[2] = { +#ifdef CONFIG_CFS_BANDWIDTH + &gb_sd->burstable_task_groups, +#else + NULL, +#endif + &gb_sd->task_groups, + }; + struct rb_root *root; + int i; if (parent) { parent->nr_children--; /* Move the task_groups to parent. */ - while (!RB_EMPTY_ROOT(root)) { - node = root->rb_node; - tg = __node_2_task_group(node); - rb_erase(node, root); - rb_add(node, &parent->task_groups, tg_specs_less); - walk_tg_tree_from(tg, tg_set_gb_tg_down, tg_nop, tg); + for (i = 0; i < 2; i++) { + root = roots[i]; + if (!root) + continue; + while (!RB_EMPTY_ROOT(root)) { + node = root->rb_node; + tg = __gb_node_2_tg(node); + rb_erase(node, root); + rb_add(node, &parent->task_groups, tg_specs_less); + walk_tg_tree_from(tg, tg_set_gb_tg_down, tg_nop, tg); + } } } @@ -1030,8 +1183,8 @@ static int build_group_balancer_sched_domains(void) group_balancer_root_domain->lower_interval = ilog2(group_balancer_root_domain->span_weight) * group_balancer_root_domain->span_weight; - group_balancer_root_domain->free_tg_specs = - 100 * group_balancer_root_domain->span_weight; + atomic_set(&group_balancer_root_domain->free_tg_specs, + 100 * group_balancer_root_domain->span_weight); } if (!zalloc_cpumask_var(&trial_cpumask, GFP_KERNEL)) { @@ -1402,8 +1555,35 @@ static unsigned long gb_sd_capacity(struct group_balancer_sched_domain *gb_sd) return cap; } -static struct group_balancer_sched_domain *select_idle_gb_sd(int specs) +static unsigned int gb_sd_nr_running(struct group_balancer_sched_domain *gb_sd) +{ + int cpu; + int nr_running = 0; + + for_each_cpu(cpu, gb_sd_span(gb_sd)) + nr_running += cpu_rq(cpu)->nr_gb_running; + + return nr_running; +} + +static unsigned int +tg_gb_sd_nr_running(struct task_group *tg, struct group_balancer_sched_domain *gb_sd) { + int cpu; + int nr_running = 0; + struct cfs_rq *cfs_rq; + + for_each_cpu(cpu, gb_sd_span(gb_sd)) { + cfs_rq = tg->cfs_rq[cpu]; + nr_running += cfs_rq->h_nr_running; + } + + return nr_running; +} + +static struct group_balancer_sched_domain *select_idle_gb_sd(struct task_group *tg) +{ + int specs = tg->specs_ratio; struct group_balancer_sched_domain *gb_sd, *child; if (specs == -1 || specs > group_balancer_root_domain->span_weight * 100) @@ -1418,14 +1598,16 @@ static struct group_balancer_sched_domain *select_idle_gb_sd(int specs) int max_unsatisfied_free_specs = INT_MIN; for_each_gb_sd_child(child, gb_sd) { - if (child->span_weight * 100 >= specs && - child->free_tg_specs > max_free_specs) { + int free_tg_specs = atomic_read(&child->free_tg_specs); + + if (gb_sd_satisfies_task_group(tg, child) && + free_tg_specs > max_free_specs) { max_free_child = child; - max_free_specs = child->free_tg_specs; + max_free_specs = free_tg_specs; } else if (child->span_weight * 100 < specs && - child->free_tg_specs > max_unsatisfied_free_specs) { + free_tg_specs > max_unsatisfied_free_specs) { max_unsatisfied_free_child = child; - max_unsatisfied_free_specs = child->free_tg_specs; + max_unsatisfied_free_specs = free_tg_specs; } } if (!max_free_child) @@ -1443,9 +1625,9 @@ static struct group_balancer_sched_domain *select_idle_gb_sd(int specs) * specs cannot fully represent the degree of idleness if the span weight is * different. */ - if (max_free_specs < specs && + if (max_free_specs < specs && (!max_unsatisfied_free_child || max_free_specs / max_free_child->span_weight < - max_unsatisfied_free_specs / max_unsatisfied_free_child->span_weight) + max_unsatisfied_free_specs / max_unsatisfied_free_child->span_weight)) break; gb_sd = max_free_child; } @@ -1460,13 +1642,14 @@ check_task_group_leap_level(struct task_group *tg, struct group_balancer_sched_d int specs = tg->specs_ratio; for_each_gb_sd_child(child, gb_sd) { - if (specs <= 100 * child->span_weight) { + if (gb_sd_satisfies_task_group(tg, child)) { tg->leap_level = true; tg->leap_level_timestamp = jiffies; return; } } + tg->preferred_gb_sd = gb_sd; tg->leap_level = false; } @@ -1474,13 +1657,8 @@ void update_free_tg_specs(struct group_balancer_sched_domain *gb_sd, int specs) { struct group_balancer_sched_domain *parent; - if (specs != -1) { - for (parent = gb_sd; parent; parent = parent->parent) { - raw_spin_lock(&parent->lock); - parent->free_tg_specs += specs; - raw_spin_unlock(&parent->lock); - } - } + for (parent = gb_sd; parent; parent = parent->parent) + atomic_add(specs, &parent->free_tg_specs); } /* @@ -1497,9 +1675,12 @@ void add_tg_to_group_balancer_sched_domain_locked(struct task_group *tg, struct group_balancer_sched_domain *gb_sd, bool enable) { - tg->gb_sd = gb_sd; - rb_add(&tg->gb_node, &gb_sd->task_groups, tg_specs_less); + struct rb_root *root; + tg->gb_sd = gb_sd; + root = gb_rb_root(tg, gb_sd); + rb_add(&tg->gb_node, root, tg_specs_less); + update_h_nr_burst_tg(tg, true); tg->soft_cpus_allowed_ptr = gb_sd_span(gb_sd); tg_inc_soft_cpus_version(tg); if (enable) @@ -1507,6 +1688,8 @@ void add_tg_to_group_balancer_sched_domain_locked(struct task_group *tg, check_task_group_leap_level(tg, gb_sd); tg->adjust_level_timestamp = jiffies; + if (tg->specs_ratio != -1) + update_free_tg_specs(gb_sd, -tg->specs_ratio); } void add_tg_to_group_balancer_sched_domain(struct task_group *tg, @@ -1516,7 +1699,6 @@ void add_tg_to_group_balancer_sched_domain(struct task_group *tg, raw_spin_lock(&gb_sd->lock); add_tg_to_group_balancer_sched_domain_locked(tg, gb_sd, enable); raw_spin_unlock(&gb_sd->lock); - update_free_tg_specs(gb_sd, -tg->specs_ratio); } static void @@ -1524,11 +1706,16 @@ remove_tg_from_group_balancer_sched_domain_locked(struct task_group *tg, struct group_balancer_sched_domain *gb_sd, bool disable) { - tg->gb_sd = NULL; - rb_erase(&tg->gb_node, &gb_sd->task_groups); + struct rb_root *root = gb_rb_root(tg, gb_sd); + + rb_erase(&tg->gb_node, root); RB_CLEAR_NODE(&tg->gb_node); + tg->gb_sd = NULL; + update_h_nr_burst_tg(tg, false); if (disable) walk_tg_tree_from(tg, tg_unset_gb_tg_down, tg_nop, NULL); + if (tg->specs_ratio != -1) + update_free_tg_specs(gb_sd, tg->specs_ratio); } static void @@ -1540,7 +1727,6 @@ remove_tg_from_group_balancer_sched_domain(struct task_group *tg, raw_spin_lock(&gb_sd->lock); remove_tg_from_group_balancer_sched_domain_locked(tg, gb_sd, disable); raw_spin_unlock(&gb_sd->lock); - update_free_tg_specs(gb_sd, tg->specs_ratio); read_unlock(&group_balancer_sched_domain_lock); } @@ -1552,16 +1738,20 @@ int attach_tg_to_group_balancer_sched_domain(struct task_group *tg, int ret = 0; read_lock(&group_balancer_sched_domain_lock); - if (enable) - gb_sd = select_idle_gb_sd(tg->specs_ratio); - else + if (enable) { + mutex_lock(&group_balancer_select_lock); + gb_sd = select_idle_gb_sd(tg); + } else { gb_sd = target; + } if (!gb_sd) { ret = -ESRCH; goto out; } add_tg_to_group_balancer_sched_domain(tg, gb_sd, enable); out: + if (enable) + mutex_unlock(&group_balancer_select_lock); read_unlock(&group_balancer_sched_domain_lock); return ret; } @@ -1586,11 +1776,16 @@ static void tg_upper_level(struct task_group *tg, struct group_balancer_sched_do static bool tg_lower_level(struct task_group *tg) { struct group_balancer_sched_domain *gb_sd = tg->gb_sd; - struct group_balancer_sched_domain *child, *dst; + struct group_balancer_sched_domain *child, *dst = NULL; unsigned long tg_child_load, tg_load = 0, tg_dst_load = 0; unsigned long child_load, src_load, dst_load, total_load = 0, migrate_load; unsigned long child_cap, total_cap = 0, src_cap, dst_cap = 0; + unsigned int child_nr_running, dst_nr_running = 0, tg_child_nr_running; + unsigned int tg_nr_running = 0, tg_dst_nr_running = 0, migrate_nr_running; unsigned long src_imb, dst_imb; + int total_free_specs = 0, child_free_specs = 0, dst_free_specs = 0, src_free_specs = 0; + int tg_specs; + unsigned int src_span_weight, dst_span_weight; if (!gb_sd) goto fail; @@ -1612,36 +1807,65 @@ static bool tg_lower_level(struct task_group *tg) for_each_gb_sd_child(child, gb_sd) { child_load = gb_sd_load(child); total_load += child_load; + child_nr_running = gb_sd_nr_running(child); child_cap = gb_sd_capacity(child); total_cap += child_cap; tg_child_load = tg_gb_sd_load(tg, child); + tg_load += tg_child_load; + tg_child_nr_running = tg_gb_sd_nr_running(tg, child); + tg_nr_running += tg_child_nr_running; + + child_free_specs = atomic_read(&child->free_tg_specs); + total_free_specs += child_free_specs; + if (!gb_sd_satisfies_task_group(tg, child)) + continue; if (!dst || tg_child_load > tg_dst_load) { dst = child; tg_dst_load = tg_child_load; dst_load = child_load; dst_cap = child_cap; + tg_dst_nr_running = tg_child_nr_running; + dst_nr_running = child_nr_running; + dst_free_specs = child_free_specs; } else if (tg_child_load == tg_dst_load) { if (dst_load * child_cap > child_load * dst_cap) { dst = child; tg_dst_load = tg_child_load; dst_load = child_load; dst_cap = child_cap; + tg_dst_nr_running = tg_child_nr_running; + dst_nr_running = child_nr_running; + dst_free_specs = child_free_specs; } } - tg_load += tg_child_load; } if (tg_load == 0) goto fail; - if (tg->specs_ratio > 100 * dst->span_weight) + if (!dst) goto fail; -#ifdef CONFIG_NUMA + if (!is_preferred_gb_sd(tg, gb_sd)) { + /* + * If the task group stays in the upper level for too long, + * make the preferred gb sd to expire. + */ + if (!time_after(jiffies, + tg->expiration_start + msecs_to_jiffies(sysctl_sched_gb_expiration_ms))) + goto fail; + tg->preferred_gb_sd = NULL; + } + /* We won't allow a task group span more than two numa nodes too long. */ - if (dst->gb_flags & GROUP_BALANCER_NUMA_FLAG) + if (dst->gb_flags & GROUP_BALANCER_LLC_FLAG) goto lower; -#endif + + /* If migration won't cause overload, do migrate.*/ + migrate_nr_running = tg_nr_running - tg_dst_nr_running; + if (dst_nr_running + migrate_nr_running <= dst->span_weight) + goto lower; + /* If we lower the level, we have to make sure that we will not cause imbalance. * * src_load dst_load @@ -1662,13 +1886,38 @@ static bool tg_lower_level(struct task_group *tg) if (dst_imb > src_imb) goto fail; + + if (!sched_feat(GB_SPECS_BALANCE)) + goto lower; + /* + * If we lower the level, we'd better guarantee that free specs won't be more imbalance. + * + * src_free_specs dst_free_specs + * --------------- vs -------------- + * src_span_weight dst_span_weight + * + */ + tg_specs = tg->specs_ratio; + src_free_specs = total_free_specs - dst_free_specs; + dst_span_weight = dst->span_weight; + src_span_weight = gb_sd->span_weight - dst_span_weight; + src_imb = abs(src_free_specs * dst_span_weight - dst_free_specs * src_span_weight); + dst_imb = abs(src_free_specs * dst_span_weight - + (dst_free_specs - tg_specs) * src_span_weight); + + if (dst_free_specs * src_span_weight > src_free_specs * dst_span_weight) + goto fail; + + if (dst_imb > src_imb) + goto fail; + #ifdef CONFIG_NUMA lower: #endif detach_tg_from_group_balancer_sched_domain(tg, false); attach_tg_to_group_balancer_sched_domain(tg, dst, false); /* The task group maybe still leap level, check it. */ - check_task_group_leap_level(tg, gb_sd); + check_task_group_leap_level(tg, dst); return true; fail: @@ -1721,7 +1970,7 @@ void task_tick_gb(struct task_struct *p) raw_spin_unlock(&tg->gb_lock); } -void tg_specs_change(struct task_group *tg) +void tg_specs_change(struct task_group *tg, u64 specs_before) { struct group_balancer_sched_domain *gb_sd; int specs = tg->specs_ratio; @@ -1731,24 +1980,34 @@ void tg_specs_change(struct task_group *tg) /* tg->group_balancer is always true here, so find a gb_sd to attach. */ goto upper; + if (specs_before != specs) { + if (specs_before != -1) + update_free_tg_specs(gb_sd, specs_before); + if (specs != -1) + update_free_tg_specs(gb_sd, -specs); + } + /* If the task group leaps level after specs change, we will lower it later. */ check_task_group_leap_level(tg, gb_sd); - if (tg->leap_level) + if (tg->leap_level) { + tg->preferred_gb_sd = NULL; return; + } /* This gb_sd still satisfy, don't do anything. */ - if (specs <= gb_sd->span_weight * 100 || gb_sd == group_balancer_root_domain) + if (gb_sd_satisfies_task_group(tg, gb_sd) || gb_sd == group_balancer_root_domain) return; /* The specs doesn't satisfy anymore, upper to find a satisfied gb_sd. */ /* Fast path, if the specs is -1 or too large, move it to root domain. */ - if (specs == -1 || specs > group_balancer_root_domain->span_weight * 100) { + if (specs == -1 || + tg->group_balancer * specs > group_balancer_root_domain->span_weight * 100) { gb_sd = group_balancer_root_domain; goto upper; } for (; gb_sd; gb_sd = gb_sd->parent) { - if (specs <= gb_sd->span_weight * 100) + if (gb_sd_satisfies_task_group(tg, gb_sd)) break; } @@ -1808,49 +2067,76 @@ gb_detach_task_groups_from_gb_sd(struct gb_lb_env *gb_env, struct task_group *tg, *n; unsigned long load, util; int detached = 0; + struct rb_root *roots[2] = { +#ifdef CONFIG_CFS_BANDWIDTH + &gb_sd->burstable_task_groups, +#else + NULL, +#endif + &gb_sd->task_groups, + }; + int i, max_idx = 1; + struct rb_root *root; raw_spin_lock(&gb_sd->lock); - /* Try the task cgroups with little specs first. */ - gb_for_each_tg_safe(tg, n, &gb_sd->task_groups) { - if (!time_after(jiffies, tg->adjust_level_timestamp + 2 * gb_sd->lower_interval)) - continue; - switch (gb_env->migration_type) { -#ifdef CONFIG_GROUP_IDENTITY - case migrate_identity: - fallthrough; +#ifdef CONFIG_CFS_BANDWIDTH + /* + * When burst if true, the interval of load balance is too short, + * we migrate burst task groups only. + */ + if (gb_env->burst) + max_idx = 0; #endif - case migrate_load: - load = tg_gb_sd_load(tg, gb_sd); - if (load == 0) - continue; - if (shr_bound(load, gb_env->nr_balance_failed) > gb_env->imbalance) - continue; - gb_env->imbalance -= load; - break; - case migrate_util: - util = tg_gb_sd_util(tg, gb_sd); - if (util == 0) - continue; - if (shr_bound(util, gb_env->nr_balance_failed) > gb_env->imbalance) + for (i = 0; i <= max_idx; i++) { + root = roots[i]; + if (!root) + continue; + if (gb_env->burst && i == 1) + continue; + /* Try the task cgroups with little specs first. */ + gb_for_each_tg_safe(tg, n, root) { + if (i > 0 && !time_after(jiffies, + tg->adjust_level_timestamp + 2 * gb_sd->lower_interval)) continue; - gb_env->imbalance -= util; - break; - case migrate_task: - gb_env->imbalance = 0; - break; - /*TODO: Perfect strategy of migrate_misfit*/ - case migrate_misfit: - gb_env->imbalance = 0; - break; - default: - break; - } - remove_tg_from_group_balancer_sched_domain_locked(tg, gb_sd, false); - rb_add(&tg->gb_node, &gb_env->task_groups, tg_specs_less); - detached++; - if (gb_env->imbalance <= 0) { - raw_spin_unlock(&gb_sd->lock); - return detached; + switch (gb_env->migration_type) { + #ifdef CONFIG_GROUP_IDENTITY + case migrate_identity: + fallthrough; + #endif + case migrate_load: + load = tg_gb_sd_load(tg, gb_sd); + if (load == 0) + continue; + if (shr_bound(load, gb_env->nr_balance_failed) > gb_env->imbalance) + continue; + gb_env->imbalance -= load; + break; + case migrate_util: + util = tg_gb_sd_util(tg, gb_sd); + if (util == 0) + continue; + if (shr_bound(util, gb_env->nr_balance_failed) > gb_env->imbalance) + continue; + gb_env->imbalance -= util; + break; + case migrate_task: + gb_env->imbalance = 0; + break; + /*TODO: Perfect strategy of migrate_misfit*/ + case migrate_misfit: + gb_env->imbalance = 0; + break; + default: + break; + } + remove_tg_from_group_balancer_sched_domain_locked(tg, gb_sd, false); + tg->expiration_start = jiffies; + rb_add(&tg->gb_node, &gb_env->task_groups, tg_specs_less); + detached++; + if (gb_env->imbalance <= 0) { + raw_spin_unlock(&gb_sd->lock); + return detached; + } } } raw_spin_unlock(&gb_sd->lock); @@ -1906,18 +2192,18 @@ static void gb_attach_task_groups(struct gb_lb_env *gb_env) static void __update_gb_sd_status(struct group_balancer_sched_domain *gb_sd, int *gb_sd_status) { - int i, nr_running; + int i, nr_gb_running = 0; for_each_cpu(i, gb_sd_span(gb_sd)) { struct rq *rq = cpu_rq(i); - nr_running = rq->nr_running; - if (nr_running > 1) - *gb_sd_status |= GB_OVERLOAD; - - if (gb_cpu_overutilized(i)) - *gb_sd_status |= GB_OVERUTILIZED; + nr_gb_running += rq->nr_gb_running; + /* TODO: Improve the utilization of GB_OVERUTILIZED.*/ +// if (gb_cpu_overutilized(i)) +// *gb_sd_status |= GB_OVERUTILIZED; } + if (nr_gb_running > gb_sd->span_weight) + *gb_sd_status |= GB_OVERLOAD; } static void update_gb_sd_status(struct gb_lb_env *gb_env, int *gb_sd_status) @@ -1938,6 +2224,10 @@ void gb_load_balance(struct lb_env *env) int gb_sd_status = 0; struct cpumask *gb_mask = this_cpu_cpumask_var_ptr(group_balancer_mask); unsigned long src_load, src_cap, dst_load, dst_cap; +#ifdef CONFIG_CFS_BANDWIDTH + bool burst = false; +#endif + int src_status = 0; if (!group_balancer_enabled()) return; @@ -1961,15 +2251,28 @@ void gb_load_balance(struct lb_env *env) if (!gb_sd) goto unlock; - if (!time_after(jiffies, gb_sd->last_balance_timestamp + 2 * gb_sd->lower_interval)) - goto unlock; + if (!time_after(jiffies, gb_sd->last_balance_timestamp + 2 * gb_sd->lower_interval)) { +#ifdef CONFIG_CFS_BANDWIDTH + if (atomic_read(&dst->h_nr_burst_tg)) + burst = true; + else +#endif + goto unlock; + } + gb_sd->last_balance_timestamp = jiffies; src_load = gb_sd_load(src); src_cap = gb_sd_capacity(src); dst_load = gb_sd_load(dst); dst_cap = gb_sd_capacity(dst); + __update_gb_sd_status(src, &src_status); - if (dst_load * src_cap * gb_sd->imbalance_pct >= src_load * dst_cap * 100) + /* + * If the imbalance isn't larger than imbalance_pct, and it isn't the case that + * dst is idle and src is overload, don't do balance. + */ + if (dst_load * src_cap * gb_sd->imbalance_pct >= src_load * dst_cap * 100 && + !(available_idle_cpu(env->dst_cpu) && src_status)) goto unlock; gb_env = (struct gb_lb_env){ @@ -1981,6 +2284,9 @@ void gb_load_balance(struct lb_env *env) .imbalance = env->imbalance, .nr_balance_failed = env->sd->nr_balance_failed, .task_groups = RB_ROOT, +#ifdef CONFIG_CFS_BANDWIDTH + .burst = burst, +#endif }; /* @@ -1988,10 +2294,26 @@ void gb_load_balance(struct lb_env *env) * and we don't migrate tg in this case. */ for (parent = gb_sd; parent; parent = parent->parent) { - for (node = rb_first(&parent->task_groups); node; node = rb_next(node)) { - tg = __node_2_task_group(node); - if (tg->cfs_rq[env->src_cpu]->h_nr_running) - goto unlock; + struct rb_root *roots[2] = { +#ifdef CONFIG_CFS_BANDWIDTH + &gb_sd->burstable_task_groups, +#else + NULL, +#endif + &gb_sd->task_groups, + }; + struct rb_root *root; + int i; + + for (i = 0; i < 2; i++) { + root = roots[i]; + if (!root) + continue; + for (node = rb_first(root); node; node = rb_next(node)) { + tg = __gb_node_2_tg(node); + if (tg->cfs_rq[env->src_cpu]->h_nr_running) + goto unlock; + } } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 89cb253c8c9c411ac000ec39cb24e58215f21810..8b9eb580b9dc4b3512da6ead605d7c378f65abab 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -618,11 +618,13 @@ struct task_group { int specs_ratio; struct rb_node gb_node; struct group_balancer_sched_domain *gb_sd; + struct group_balancer_sched_domain *preferred_gb_sd; struct task_group *gb_tg; - bool group_balancer; + unsigned int group_balancer; bool leap_level; unsigned long leap_level_timestamp; unsigned long adjust_level_timestamp; + unsigned long expiration_start; raw_spinlock_t gb_lock; #endif long priority; @@ -1547,6 +1549,8 @@ struct rq { #ifdef CONFIG_GROUP_BALANCER struct group_balancer_sched_domain *gb_sd; + unsigned int nr_gb_running; + long nr_gb_make_up; bool group_balancer_enabled; #endif @@ -2965,6 +2969,20 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) sched_update_tick_dependency(rq); } +#ifdef CONFIG_GROUP_BALANCER +static inline void gb_update_nr_running(struct task_group *tg, struct rq *rq, int delta) +{ + if (!group_balancer_enabled()) + return; + if (!tg || !tg_group_balancer_enabled(tg)) + return; + rq->nr_gb_running += delta; +} +extern int update_group_balancer(struct task_group *tg, u64 new); +#else +static inline void gb_update_nr_running(struct task_group *tg, struct rq *rq, int delta) { } +#endif + extern void activate_task(struct rq *rq, struct task_struct *p, int flags); extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); @@ -3713,7 +3731,6 @@ extern void sched_dynamic_update(int mode); #endif #ifdef CONFIG_GROUP_BALANCER -extern bool group_balancer_enabled(void); extern bool group_balancer_rq_enabled(struct rq *rq); static inline const struct cpumask *task_allowed_cpu(struct task_struct *p) { @@ -3738,11 +3755,6 @@ static inline void tg_inc_soft_cpus_version(struct task_group *tg) tg->soft_cpus_version = 0; } -static inline bool tg_group_balancer_enabled(struct task_group *tg) -{ - return tg->group_balancer; -} - extern void sched_init_group_balancer_sched_domains(void); extern void sched_clear_group_balancer_sched_domains(void); extern void tg_set_specs_ratio(struct task_group *tg); @@ -3751,13 +3763,16 @@ extern int attach_tg_to_group_balancer_sched_domain(struct task_group *tg, bool enable); extern void detach_tg_from_group_balancer_sched_domain(struct task_group *tg, bool disable); extern void update_group_balancer_root_cpumask(void); -extern void tg_specs_change(struct task_group *tg); extern unsigned long cfs_h_load(struct cfs_rq *cfs_rq); extern bool gb_cpu_overutilized(int cpu); extern void gb_load_balance(struct lb_env *env); extern void task_tick_gb(struct task_struct *p); extern void util_est_reenqueue_all(void); extern void util_est_clear_all(void); +extern struct cpumask *get_gb_sd_span(struct group_balancer_sched_domain *gb_sd); +#ifdef CONFIG_CFS_BANDWIDTH +extern void tg_burst_change(struct task_group *tg, u64 burst); +#endif #else static inline bool group_balancer_rq_enabled(struct rq *rq) { return false; } static inline const struct cpumask *task_allowed_cpu(struct task_struct *p) @@ -3766,7 +3781,6 @@ static inline const struct cpumask *task_allowed_cpu(struct task_struct *p) } static inline void tg_set_specs_ratio(struct task_group *tg) { } static inline void update_group_balancer_root_cpumask(void) { } -static inline void tg_specs_change(struct task_group *tg) { } #ifdef CONFIG_SMP static inline void gb_load_balance(struct lb_env *env) { } #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ff72c63f6129e2015cec43659468868781f949d9..fc0da990ae3786f9dd3c882b4eea2a6bf1f1975d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2116,6 +2116,15 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "sched_gb_expiration_ms", + .data = &sysctl_sched_gb_expiration_ms, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &zero_ul, + .extra2 = &long_max, + }, #endif #ifdef CONFIG_PROVE_LOCKING {