diff --git a/include/linux/sched.h b/include/linux/sched.h
index a1d6559bdb1d595f4f69062b2a5cb528004734bc..48fdda47b88d50fd1cdd207598e9ad472714626c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -66,6 +66,7 @@ struct signal_struct;
 struct task_delay_info;
 struct task_group;
 struct io_uring_task;
+struct cgroup;
 
 /*
  * Task state bitmask. NOTE! These bits are also
@@ -2424,4 +2425,24 @@ static inline bool jbd2_proxy_exec_disabled(void)
 {
 	return !static_branch_unlikely(&__jbd2_proxy_exec_enabled);
 }
+#ifdef CONFIG_GROUP_BALANCER
+extern bool group_balancer_enabled(void);
+extern int get_tg_specs(struct task_group *tg);
+extern void tg_specs_change(struct task_group *tg, u64 specs_before);
+extern bool tg_group_balancer_enabled(struct task_group *tg);
+extern struct task_group *cgroup_tg(struct cgroup *cgrp);
+extern struct cgroup *tg_cgroup(struct task_group *tg);
+extern void lock_cfs_constraints_mutex(void);
+extern void unlock_cfs_constraints_mutex(void);
+#ifdef CONFIG_CPUSETS
+extern struct cpumask *task_group_cpus_allowed(struct task_group *tg);
+#else
+static inline struct cpumask *task_group_cpus_allowed(struct task_group *tg)
+{
+	return NULL;
+}
+#endif
+#else
+static inline void tg_specs_change(struct task_group *tg) { }
+#endif
 #endif
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 536765522aecdaf48c385d5800e27e0c4d473612..c4991a5778070d9e10f0598cc1b25f8e1cc9ebca 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -131,6 +131,7 @@ extern int sched_acpu_enable_handler(struct ctl_table *table, int write,
 #endif
 #ifdef CONFIG_GROUP_BALANCER
 extern unsigned int sysctl_sched_group_balancer_enabled;
+extern unsigned long sysctl_sched_gb_expiration_ms;
 extern int sched_group_balancer_enable_handler(struct ctl_table *table, int write,
 					       void __user *buffer, size_t *lenp,
 					       loff_t *ppos);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 164f5bee99da05cd22e99b9c7ba870ba217ebaa8..d48296132e71cf93f27d6a4cdd26baa9d17fcbf9 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -205,6 +205,46 @@ static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
 	return css ? container_of(css, struct cpuset, css) : NULL;
 }
 
+#ifdef CONFIG_GROUP_BALANCER
+static inline struct cpuset *cgroup_cs(struct cgroup *cgrp)
+{
+	return container_of(global_cgroup_css(cgrp, cpuset_cgrp_id),
+			    struct cpuset, css);
+}
+
+struct cpumask *task_group_cpus_allowed(struct task_group *tg)
+{
+	struct cgroup *cg = tg_cgroup(tg);
+	struct cpuset *cs = cgroup_cs(cg);
+
+	if (cs)
+		return (struct cpumask *)cs->cpus_allowed;
+
+	return NULL;
+}
+
+static void update_cpumask_for_group_balancer(struct cpuset *cs)
+{
+	struct cgroup *cg = cs->css.cgroup;
+	struct task_group *tg;
+
+	if (!group_balancer_enabled())
+		return;
+
+	tg = cgroup_tg(cg);
+	if (!tg)
+		return;
+	if (!tg_group_balancer_enabled(tg))
+		return;
+
+	lock_cfs_constraints_mutex();
+	tg_specs_change(tg, get_tg_specs(tg));
+	unlock_cfs_constraints_mutex();
+}
+#else
+static inline void update_cpumask_for_group_balancer(struct cpuset *cs) { }
+#endif
+
 /* Retrieve the cpuset for a task */
 static inline struct cpuset *task_cs(struct task_struct *task)
 {
@@ -1498,6 +1538,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
 		/* deleted = old - new = old & (~new) */
 		cpumask_andnot(&deleted, &old_cpus, tmp->new_cpus);
 		cpuacct_cpuset_changed(cs->css.cgroup, &deleted, NULL);
+		update_cpumask_for_group_balancer(cs);
 
 		/*
 		 * On legacy hierarchy, if the effective cpumask of any non-
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fefc9e372b4d0b88b6a72e460c4333b5c08886e2..68c95c75c8c52c085950b1cf1894df75a3830860 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9013,6 +9013,8 @@ struct task_group *sched_create_group(struct task_group *parent)
 	tg->group_balancer = 0;
 	tg->soft_cpus_version = 0;
 	tg->gb_sd = NULL;
+	tg->preferred_gb_sd = NULL;
+	tg->expiration_start = 0;
 	raw_spin_lock_init(&tg->gb_lock);
 #endif
 	return tg;
@@ -9592,6 +9594,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota,
 	if (runtime_enabled && !runtime_was_enabled)
 		cfs_bandwidth_usage_inc();
 	raw_spin_lock_irq(&cfs_b->lock);
+	tg_burst_change(tg, burst);
 	cfs_b->period = ns_to_ktime(period);
 	cfs_b->quota = quota;
 	cfs_b->burst = burst;
@@ -10106,36 +10109,14 @@ static u64 cpu_group_balancer_read_u64(struct cgroup_subsys_state *css,
 	return tg->group_balancer;
 }
 
-static int tg_validate_group_balancer_down(struct task_group *tg, void *data)
+void lock_cfs_constraints_mutex(void)
 {
-	if (tg->group_balancer)
-		return -EINVAL;
-	return 0;
+	mutex_lock(&cfs_constraints_mutex);
 }
 
-/*
- * There is only one task group allowed to enable group balancer in the path from
- * root_task_group to a certion leaf task group.
- */
-static int validate_group_balancer(struct task_group *tg)
+void unlock_cfs_constraints_mutex(void)
 {
-	int retval = 0;
-
-	rcu_read_lock();
-	retval = walk_tg_tree_from(tg, tg_validate_group_balancer_down,
-				   tg_nop, NULL);
-	if (retval)
-		goto out;
-
-	for (; tg != &root_task_group; tg = tg->parent) {
-		if (tg->group_balancer) {
-			retval = -EINVAL;
-			break;
-		}
-	}
-out:
-	rcu_read_unlock();
-	return retval;
+	mutex_unlock(&cfs_constraints_mutex);
 }
 
 static int cpu_group_balancer_write_u64(struct cgroup_subsys_state *css,
@@ -10151,7 +10132,7 @@ static int cpu_group_balancer_write_u64(struct cgroup_subsys_state *css,
 	if (tg == &root_task_group || task_group_is_autogroup(tg))
 		return -EACCES;
 
-	if (new > 1)
+	if (new > 2)
 		return -EINVAL;
 
 	write_lock(&group_balancer_lock);
@@ -10161,16 +10142,17 @@ static int cpu_group_balancer_write_u64(struct cgroup_subsys_state *css,
 	if (old == new)
 		goto out;
 
-	if (new) {
-		retval = validate_group_balancer(tg);
-		if (retval)
-			goto out;
-		retval = attach_tg_to_group_balancer_sched_domain(tg, NULL, true);
-		if (retval)
-			goto out;
-	} else {
-		detach_tg_from_group_balancer_sched_domain(tg, true);
+	if (!!old == !!new) {
+		mutex_lock(&cfs_constraints_mutex);
+		tg_specs_change(tg, tg->specs_ratio);
+		mutex_unlock(&cfs_constraints_mutex);
+		tg->group_balancer = new;
+		goto out;
 	}
+
+	retval = update_group_balancer(tg, new);
+	if (retval)
+		goto out;
 	tg->group_balancer = new;
 out:
 	raw_spin_unlock(&tg->gb_lock);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ca96537f9e460ff44864a11c371808718a13c2b8..a3fdcca1bd57abac703cb251abfd1d54ea730e15 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8571,6 +8571,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	/* At this point se is NULL and we are at root level*/
 	add_nr_running(rq, 1);
 	id_update_nr_running(task_group(p), p, rq, 1);
+	gb_update_nr_running(task_group(p), rq, 1);
 
 	/*
 	 * Since new tasks are assigned an initial util_avg equal to
@@ -8697,6 +8698,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	/* At this point se is NULL and we are at root level*/
 	sub_nr_running(rq, 1);
 	id_update_nr_running(task_group(p), p, rq, -1);
+	gb_update_nr_running(task_group(p), rq, -1);
 
 	/* balance early to pull high priority tasks */
 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
@@ -9240,12 +9242,27 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co
 	struct sched_domain *this_sd;
 	u64 time;
 	bool is_seeker;
+#ifdef CONFIG_GROUP_BALANCER
+	struct task_group *tg = task_group(p);
+	bool gb_tried = false;
+	struct group_balancer_sched_domain *preferred = tg->preferred_gb_sd;
+#endif
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
 		return -1;
 
+#ifdef CONFIG_GROUP_BALANCER
+retry:
+	if (group_balancer_enabled() && !gb_tried && tg_group_balancer_enabled(tg) && preferred) {
+		cpumask_and(cpus, get_gb_sd_span(preferred), task_allowed_cpu(p));
+	} else {
+		gb_tried = true;
+		cpumask_and(cpus, sched_domain_span(sd), task_allowed_cpu(p));
+	}
+#else
 	cpumask_and(cpus, sched_domain_span(sd), task_allowed_cpu(p));
+#endif
 
 	if (sched_feat(SIS_PROP) && !has_idle_core) {
 		u64 avg_cost, avg_idle, span_avg;
@@ -9282,7 +9299,7 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co
 						return i;
 				} else {
 					if (--nr <= 0)
-						return -1;
+						goto out;
 					idle_cpu = __select_idle_cpu(cpu, p, &id_backup);
 					if ((unsigned int)idle_cpu < nr_cpumask_bits)
 						return idle_cpu;
@@ -9299,13 +9316,20 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co
 				return i;
 		} else {
 			if (--nr <= 0)
-				return -1;
+				goto out;
 			idle_cpu = __select_idle_cpu(cpu, p, &id_backup);
 			if ((unsigned int)idle_cpu < nr_cpumask_bits)
 				break;
 		}
 	}
 
+#ifdef CONFIG_GROUP_BALANCER
+	if (!gb_tried) {
+		gb_tried = true;
+		goto retry;
+	}
+#endif
+
 	if (has_idle_core)
 		set_idle_cores(target, false);
 
@@ -9317,6 +9341,14 @@ select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool has_idle_co
 	if (!group_identity_disabled())
 		return (unsigned int)idle_cpu < nr_cpumask_bits ? idle_cpu : id_backup;
 	return idle_cpu;
+out:
+#ifdef CONFIG_GROUP_BALANCER
+	if (!gb_tried) {
+		gb_tried = true;
+		goto retry;
+	}
+#endif
+	return -1;
 }
 
 /*
@@ -14732,19 +14764,18 @@ void free_fair_sched_group(struct task_group *tg)
 void tg_set_specs_ratio(struct task_group *tg)
 {
 	u64 quota = tg_cfs_bandwidth(tg)->hierarchical_quota;
-	u64 specs_ratio;
+	u64 specs_ratio, specs_before;
 
+	specs_before = tg->specs_ratio;
 	if (quota == RUNTIME_INF) {
 		tg->specs_ratio = -1;
-		return;
+	} else {
+		specs_ratio = quota / ((1 << BW_SHIFT) / 100);
+		/* If specs_ratio is bigger than INT_MAX, set specs_ratio -1. */
+		tg->specs_ratio = specs_ratio > INT_MAX ? -1 : specs_ratio;
 	}
-
-	specs_ratio = quota / ((1 << BW_SHIFT) / 100);
-
-	/* If specs_ratio is bigger than INT_MAX, set specs_ratio -1. */
-	tg->specs_ratio = specs_ratio > INT_MAX ? -1 : specs_ratio;
 	if (tg->group_balancer)
-		tg_specs_change(tg);
+		tg_specs_change(tg, specs_before);
 }
 #endif
 
@@ -15302,3 +15333,86 @@ int sched_trace_rq_nr_running(struct rq *rq)
         return rq ? rq->nr_running : -1;
 }
 EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running);
+
+#ifdef CONFIG_GROUP_BALANCER
+static int tg_validate_group_balancer_down(struct task_group *tg, void *data)
+{
+	if (tg->group_balancer)
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * There is only one task group allowed to enable group balancer in the path from
+ * root_task_group to a certion leaf task group.
+ */
+static int validate_group_balancer(struct task_group *tg)
+{
+	int retval = 0;
+
+	rcu_read_lock();
+	retval = walk_tg_tree_from(tg, tg_validate_group_balancer_down,
+				   tg_nop, NULL);
+	if (retval)
+		goto out;
+
+	for (; tg != &root_task_group; tg = tg->parent) {
+		if (tg->group_balancer) {
+			retval = -EINVAL;
+			break;
+		}
+	}
+out:
+	rcu_read_unlock();
+	return retval;
+}
+
+int update_group_balancer(struct task_group *tg, u64 new)
+{
+	int cpu, retval;
+	struct rq_flags rf;
+	unsigned int delta;
+
+	if (new) {
+		retval = validate_group_balancer(tg);
+		if (retval)
+			return retval;
+		retval = attach_tg_to_group_balancer_sched_domain(tg, NULL, true);
+		if (retval)
+			return retval;
+	} else {
+		detach_tg_from_group_balancer_sched_domain(tg, true);
+	}
+
+	cpus_read_lock();
+	for_each_online_cpu(cpu) {
+		bool on_rq, throttled;
+		struct rq *rq = cpu_rq(cpu);
+		struct cfs_rq *cfs_rq;
+		struct sched_entity *se;
+
+		rq_lock_irq(rq, &rf);
+		se = tg->se[cpu];
+		cfs_rq = cfs_rq_of(se);
+		throttled = throttled_hierarchy(cfs_rq);
+		delta = se->my_q->h_nr_running;
+		on_rq = se->on_rq;
+
+		if (on_rq && !throttled) {
+			if (new)
+				rq->nr_gb_running += delta;
+			else
+				rq->nr_gb_running -= delta;
+		}
+		rq_unlock_irq(rq, &rf);
+	}
+	cpus_read_unlock();
+
+	return 0;
+}
+
+int get_tg_specs(struct task_group *tg)
+{
+	return tg->specs_ratio;
+}
+#endif
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 8f30a023365f1cd29ae28be04be3ecd628bfd846..d45ae1e86d1694dcc982dfa081c490915ce402ea 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -119,6 +119,10 @@ SCHED_FEAT(SCHED_CORE_HT_AWARE_QUOTA, false)
 SCHED_FEAT(SCHED_CORE_VRUNTIME, false)
 #endif
 
+#ifdef CONFIG_GROUP_BALANCER
+SCHED_FEAT(GB_SPECS_BALANCE, false)
+#endif
+
 SCHED_FEAT(SCHED_FEAT_RESERVE1, false)
 SCHED_FEAT(SCHED_FEAT_RESERVE2, false)
 SCHED_FEAT(SCHED_FEAT_RESERVE3, false)
diff --git a/kernel/sched/group_balancer.c b/kernel/sched/group_balancer.c
index cbbe57b8aefe67b58c273fe941d8711e86922f4d..3ef7220983554c52ad69ad9b4731753891044c5d 100644
--- a/kernel/sched/group_balancer.c
+++ b/kernel/sched/group_balancer.c
@@ -8,6 +8,7 @@
 #include "sched.h"
 #include <linux/log2.h>
 #include <linux/fs_context.h>
+#include <linux/cpuset.h>
 
 struct gb_lb_env {
 	int					src_cpu;
@@ -18,6 +19,9 @@ struct gb_lb_env {
 	unsigned long				nr_balance_failed;
 	enum migration_type			migration_type;
 	struct rb_root				task_groups;
+#ifdef CONFIG_CFS_BANDWIDTH
+	bool					burst;
+#endif
 
 	CK_KABI_RESERVE(1)
 	CK_KABI_RESERVE(2)
@@ -38,10 +42,14 @@ struct group_balancer_sched_domain {
 	unsigned int					span_weight;
 	unsigned int					nr_children;
 	/* If free_tg_specs is less than zero, the gb_sd is overloaded. */
-	int						free_tg_specs;
+	atomic_t					free_tg_specs;
 	unsigned int					depth;
 	raw_spinlock_t					lock;
 	struct rb_root					task_groups;
+#ifdef CONFIG_CFS_BANDWIDTH
+	struct rb_root					burstable_task_groups;
+	atomic_t					h_nr_burst_tg;
+#endif
 	struct kernfs_node				*kn;
 	unsigned long					last_balance_timestamp;
 	unsigned long					lower_interval;
@@ -145,6 +153,7 @@ struct group_balancer_size_level {
 LIST_HEAD(group_balancer_sched_domains);
 
 DEFINE_RWLOCK(group_balancer_sched_domain_lock);
+DEFINE_MUTEX(group_balancer_select_lock);
 
 struct cpumask root_cpumask;
 
@@ -268,11 +277,23 @@ struct group_balancer_sched_domain *group_balancer_root_domain;
 #define GB_OVERLOAD		0x1
 #define GB_OVERUTILIZED		0x2
 
+/*
+ * The time threshold that the preferred gb_sd expires.
+ * Unit: ms
+ * Default: 6000000
+ */
+unsigned long sysctl_sched_gb_expiration_ms = 60000;
+
 static inline struct cpumask *gb_sd_span(struct group_balancer_sched_domain *gb_sd)
 {
 	return to_cpumask(gb_sd->span);
 }
 
+struct cpumask *get_gb_sd_span(struct group_balancer_sched_domain *gb_sd)
+{
+	return gb_sd_span(gb_sd);
+}
+
 static unsigned int get_size_level(struct group_balancer_sched_domain *gb_sd)
 {
 	int size_level = ilog2(gb_sd->span_weight);
@@ -302,6 +323,125 @@ static void add_to_size_level(struct group_balancer_sched_domain *gb_sd)
 	__add_to_size_level(gb_sd, size_level);
 }
 
+bool tg_group_balancer_enabled(struct task_group *tg)
+{
+	return !!tg->group_balancer;
+}
+
+struct cgroup *tg_cgroup(struct task_group *tg)
+{
+	return tg->css.cgroup;
+}
+
+#ifdef CONFIG_CPUSETS
+static inline bool
+gb_sd_satisfies_task_group(struct task_group *tg, struct group_balancer_sched_domain *gb_sd)
+{
+	struct cpumask *cpus_allowed = task_group_cpus_allowed(tg);
+	struct cpumask soft_cpus_allowed;
+	unsigned int soft_cpus_weight;
+
+	if (!cpus_allowed) {
+		soft_cpus_weight = gb_sd->span_weight;
+	} else {
+		cpumask_and(&soft_cpus_allowed, cpus_allowed, gb_sd_span(gb_sd));
+		soft_cpus_weight = cpumask_weight(&soft_cpus_allowed);
+	}
+	/* tg->group_balancer = 2 means that tg aquires double logical cpus. */
+	return tg->group_balancer * tg->specs_ratio <= 100 * soft_cpus_weight;
+}
+#else
+static inline bool
+gb_sd_satisfies_task_group(struct task_group *tg, struct group_balancer_sched_domain *gb_sd)
+{
+	return true;
+}
+#endif
+
+#ifdef CONFIG_CFS_BANDWIDTH
+static inline bool is_burstable_task_group(struct task_group *tg)
+{
+	return !!tg->cfs_bandwidth.burst;
+}
+
+static inline struct rb_root
+*gb_rb_root(struct task_group *tg, struct group_balancer_sched_domain *gb_sd)
+{
+	if (unlikely(is_burstable_task_group(tg)))
+		return &gb_sd->burstable_task_groups;
+	return &gb_sd->task_groups;
+}
+static inline void update_h_nr_burst_tg(struct task_group *tg, bool add)
+{
+	struct group_balancer_sched_domain *gb_sd = tg->gb_sd;
+
+	if (!is_burstable_task_group(tg))
+		return;
+
+	for (; gb_sd; gb_sd = gb_sd->parent) {
+		if (add)
+			atomic_inc(&gb_sd->h_nr_burst_tg);
+		else
+			atomic_dec(&gb_sd->h_nr_burst_tg);
+	}
+}
+
+static inline bool tg_specs_less(struct rb_node *a, const struct rb_node *b);
+void tg_burst_change(struct task_group *tg, u64 burst)
+{
+	bool burst_before, burst_now;
+	struct group_balancer_sched_domain *gb_sd;
+
+	if (!group_balancer_enabled())
+		return;
+	if (!tg_group_balancer_enabled(tg))
+		return;
+
+	gb_sd = tg->gb_sd;
+	burst_before = !!tg->cfs_bandwidth.burst;
+	burst_now = !!burst;
+	if (burst_before == burst_now)
+		return;
+
+	read_lock(&group_balancer_sched_domain_lock);
+	raw_spin_lock(&gb_sd->lock);
+	if (!burst_before) {
+		rb_erase(&tg->gb_node, &gb_sd->task_groups);
+		rb_add(&tg->gb_node, &gb_sd->burstable_task_groups, tg_specs_less);
+		update_h_nr_burst_tg(tg, true);
+	} else {
+		rb_erase(&tg->gb_node, &gb_sd->burstable_task_groups);
+		rb_add(&tg->gb_node, &gb_sd->task_groups, tg_specs_less);
+		update_h_nr_burst_tg(tg, false);
+	}
+	raw_spin_unlock(&gb_sd->lock);
+	read_unlock(&group_balancer_sched_domain_lock);
+}
+#else
+static inline bool is_burstable_task_group(struct task_group *tg)
+{
+	return false;
+}
+
+static inline rb_root *gb_rb_root(struct task_group *tg, struct group_balancer_sched_domain *gb_sd)
+{
+	return &gb_sd->task_groups;
+}
+
+static inline void update_h_nr_burst_tg(struct task_group *tg, bool add) { }
+#endif
+
+static inline bool
+is_preferred_gb_sd(struct task_group *tg, struct group_balancer_sched_domain *gb_sd)
+{
+	struct group_balancer_sched_domain *p_gb_sd = tg->preferred_gb_sd;
+
+	if (!p_gb_sd)
+		return true;
+
+	return cpumask_subset(gb_sd_span(p_gb_sd), gb_sd_span(gb_sd));
+}
+
 static int group_balancer_seqfile_show(struct seq_file *m, void *arg)
 {
 	struct kernfs_open_file *of = m->private;
@@ -597,6 +737,7 @@ static inline struct group_balancer_sched_domain
 
 	raw_spin_lock_init(&new->lock);
 	new->task_groups = RB_ROOT;
+	new->burstable_task_groups = RB_ROOT;
 	new->imbalance_pct = 117;
 
 	return new;
@@ -633,7 +774,7 @@ static void add_to_tree(struct group_balancer_sched_domain *gb_sd,
 	}
 	gb_sd->span_weight = cpumask_weight(gb_sd_span(gb_sd));
 	gb_sd->lower_interval = ilog2(gb_sd->span_weight) * gb_sd->span_weight;
-	gb_sd->free_tg_specs = 100 * gb_sd->span_weight;
+	atomic_set(&gb_sd->free_tg_specs, 100 * gb_sd->span_weight);
 	add_to_size_level(gb_sd);
 
 	if (!gb_sd->nr_children) {
@@ -644,12 +785,10 @@ static void add_to_tree(struct group_balancer_sched_domain *gb_sd,
 	}
 }
 
-#define __node_2_task_group(n) rb_entry((n), struct task_group, gb_node)
-
 static inline bool tg_specs_less(struct rb_node *a, const struct rb_node *b)
 {
-	struct task_group *tg_a = __node_2_task_group(a);
-	struct task_group *tg_b = __node_2_task_group(b);
+	struct task_group *tg_a = __gb_node_2_tg(a);
+	struct task_group *tg_b = __gb_node_2_tg(b);
 	int specs_a = tg_a->specs_ratio;
 	int specs_b = tg_b->specs_ratio;
 
@@ -683,17 +822,31 @@ static void free_group_balancer_sched_domain(struct group_balancer_sched_domain
 	struct task_group *tg;
 	struct group_balancer_sched_domain *parent = gb_sd->parent;
 	struct rb_node *node;
-	struct rb_root *root = &gb_sd->task_groups;
+	struct rb_root *roots[2] = {
+#ifdef CONFIG_CFS_BANDWIDTH
+		&gb_sd->burstable_task_groups,
+#else
+		NULL,
+#endif
+		&gb_sd->task_groups,
+	};
+	struct rb_root *root;
+	int i;
 
 	if (parent) {
 		parent->nr_children--;
 		/* Move the task_groups to parent. */
-		while (!RB_EMPTY_ROOT(root)) {
-			node = root->rb_node;
-			tg = __node_2_task_group(node);
-			rb_erase(node, root);
-			rb_add(node, &parent->task_groups, tg_specs_less);
-			walk_tg_tree_from(tg, tg_set_gb_tg_down, tg_nop, tg);
+		for (i = 0; i < 2; i++) {
+			root = roots[i];
+			if (!root)
+				continue;
+			while (!RB_EMPTY_ROOT(root)) {
+				node = root->rb_node;
+				tg = __gb_node_2_tg(node);
+				rb_erase(node, root);
+				rb_add(node, &parent->task_groups, tg_specs_less);
+				walk_tg_tree_from(tg, tg_set_gb_tg_down, tg_nop, tg);
+			}
 		}
 	}
 
@@ -1030,8 +1183,8 @@ static int build_group_balancer_sched_domains(void)
 		group_balancer_root_domain->lower_interval =
 			ilog2(group_balancer_root_domain->span_weight) *
 			group_balancer_root_domain->span_weight;
-		group_balancer_root_domain->free_tg_specs =
-			100 * group_balancer_root_domain->span_weight;
+		atomic_set(&group_balancer_root_domain->free_tg_specs,
+			100 * group_balancer_root_domain->span_weight);
 	}
 
 	if (!zalloc_cpumask_var(&trial_cpumask, GFP_KERNEL)) {
@@ -1402,8 +1555,35 @@ static unsigned long gb_sd_capacity(struct group_balancer_sched_domain *gb_sd)
 	return cap;
 }
 
-static struct group_balancer_sched_domain *select_idle_gb_sd(int specs)
+static unsigned int gb_sd_nr_running(struct group_balancer_sched_domain *gb_sd)
+{
+	int cpu;
+	int nr_running = 0;
+
+	for_each_cpu(cpu, gb_sd_span(gb_sd))
+		nr_running += cpu_rq(cpu)->nr_gb_running;
+
+	return nr_running;
+}
+
+static unsigned int
+tg_gb_sd_nr_running(struct task_group *tg, struct group_balancer_sched_domain *gb_sd)
 {
+	int cpu;
+	int nr_running = 0;
+	struct cfs_rq *cfs_rq;
+
+	for_each_cpu(cpu, gb_sd_span(gb_sd)) {
+		cfs_rq = tg->cfs_rq[cpu];
+		nr_running += cfs_rq->h_nr_running;
+	}
+
+	return nr_running;
+}
+
+static struct group_balancer_sched_domain *select_idle_gb_sd(struct task_group *tg)
+{
+	int specs = tg->specs_ratio;
 	struct group_balancer_sched_domain *gb_sd, *child;
 
 	if (specs == -1 || specs > group_balancer_root_domain->span_weight * 100)
@@ -1418,14 +1598,16 @@ static struct group_balancer_sched_domain *select_idle_gb_sd(int specs)
 		int max_unsatisfied_free_specs = INT_MIN;
 
 		for_each_gb_sd_child(child, gb_sd) {
-			if (child->span_weight * 100 >= specs &&
-			    child->free_tg_specs > max_free_specs) {
+			int free_tg_specs = atomic_read(&child->free_tg_specs);
+
+			if (gb_sd_satisfies_task_group(tg, child) &&
+			    free_tg_specs > max_free_specs) {
 				max_free_child = child;
-				max_free_specs = child->free_tg_specs;
+				max_free_specs = free_tg_specs;
 			} else if (child->span_weight * 100 < specs &&
-				   child->free_tg_specs > max_unsatisfied_free_specs) {
+				   free_tg_specs > max_unsatisfied_free_specs) {
 				max_unsatisfied_free_child = child;
-				max_unsatisfied_free_specs = child->free_tg_specs;
+				max_unsatisfied_free_specs = free_tg_specs;
 			}
 		}
 		if (!max_free_child)
@@ -1443,9 +1625,9 @@ static struct group_balancer_sched_domain *select_idle_gb_sd(int specs)
 		 * specs cannot fully represent the degree of idleness if the span weight is
 		 * different.
 		 */
-		if (max_free_specs < specs &&
+		if (max_free_specs < specs && (!max_unsatisfied_free_child ||
 		    max_free_specs / max_free_child->span_weight <
-		    max_unsatisfied_free_specs / max_unsatisfied_free_child->span_weight)
+		    max_unsatisfied_free_specs / max_unsatisfied_free_child->span_weight))
 			break;
 		gb_sd = max_free_child;
 	}
@@ -1460,13 +1642,14 @@ check_task_group_leap_level(struct task_group *tg, struct group_balancer_sched_d
 	int specs = tg->specs_ratio;
 
 	for_each_gb_sd_child(child, gb_sd) {
-		if (specs <= 100 * child->span_weight) {
+		if (gb_sd_satisfies_task_group(tg, child)) {
 			tg->leap_level = true;
 			tg->leap_level_timestamp = jiffies;
 			return;
 		}
 	}
 
+	tg->preferred_gb_sd = gb_sd;
 	tg->leap_level = false;
 }
 
@@ -1474,13 +1657,8 @@ void update_free_tg_specs(struct group_balancer_sched_domain *gb_sd, int specs)
 {
 	struct group_balancer_sched_domain *parent;
 
-	if (specs != -1) {
-		for (parent = gb_sd; parent; parent = parent->parent) {
-			raw_spin_lock(&parent->lock);
-			parent->free_tg_specs += specs;
-			raw_spin_unlock(&parent->lock);
-		}
-	}
+	for (parent = gb_sd; parent; parent = parent->parent)
+		atomic_add(specs, &parent->free_tg_specs);
 }
 
 /*
@@ -1497,9 +1675,12 @@ void add_tg_to_group_balancer_sched_domain_locked(struct task_group *tg,
 						  struct group_balancer_sched_domain *gb_sd,
 						  bool enable)
 {
-	tg->gb_sd = gb_sd;
-	rb_add(&tg->gb_node, &gb_sd->task_groups, tg_specs_less);
+	struct rb_root *root;
 
+	tg->gb_sd = gb_sd;
+	root = gb_rb_root(tg, gb_sd);
+	rb_add(&tg->gb_node, root, tg_specs_less);
+	update_h_nr_burst_tg(tg, true);
 	tg->soft_cpus_allowed_ptr = gb_sd_span(gb_sd);
 	tg_inc_soft_cpus_version(tg);
 	if (enable)
@@ -1507,6 +1688,8 @@ void add_tg_to_group_balancer_sched_domain_locked(struct task_group *tg,
 
 	check_task_group_leap_level(tg, gb_sd);
 	tg->adjust_level_timestamp = jiffies;
+	if (tg->specs_ratio != -1)
+		update_free_tg_specs(gb_sd, -tg->specs_ratio);
 }
 
 void add_tg_to_group_balancer_sched_domain(struct task_group *tg,
@@ -1516,7 +1699,6 @@ void add_tg_to_group_balancer_sched_domain(struct task_group *tg,
 	raw_spin_lock(&gb_sd->lock);
 	add_tg_to_group_balancer_sched_domain_locked(tg, gb_sd, enable);
 	raw_spin_unlock(&gb_sd->lock);
-	update_free_tg_specs(gb_sd, -tg->specs_ratio);
 }
 
 static void
@@ -1524,11 +1706,16 @@ remove_tg_from_group_balancer_sched_domain_locked(struct task_group *tg,
 						  struct group_balancer_sched_domain *gb_sd,
 						  bool disable)
 {
-	tg->gb_sd = NULL;
-	rb_erase(&tg->gb_node, &gb_sd->task_groups);
+	struct rb_root *root = gb_rb_root(tg, gb_sd);
+
+	rb_erase(&tg->gb_node, root);
 	RB_CLEAR_NODE(&tg->gb_node);
+	tg->gb_sd = NULL;
+	update_h_nr_burst_tg(tg, false);
 	if (disable)
 		walk_tg_tree_from(tg, tg_unset_gb_tg_down, tg_nop, NULL);
+	if (tg->specs_ratio != -1)
+		update_free_tg_specs(gb_sd, tg->specs_ratio);
 }
 
 static void
@@ -1540,7 +1727,6 @@ remove_tg_from_group_balancer_sched_domain(struct task_group *tg,
 	raw_spin_lock(&gb_sd->lock);
 	remove_tg_from_group_balancer_sched_domain_locked(tg, gb_sd, disable);
 	raw_spin_unlock(&gb_sd->lock);
-	update_free_tg_specs(gb_sd, tg->specs_ratio);
 	read_unlock(&group_balancer_sched_domain_lock);
 }
 
@@ -1552,16 +1738,20 @@ int attach_tg_to_group_balancer_sched_domain(struct task_group *tg,
 	int ret = 0;
 
 	read_lock(&group_balancer_sched_domain_lock);
-	if (enable)
-		gb_sd = select_idle_gb_sd(tg->specs_ratio);
-	else
+	if (enable) {
+		mutex_lock(&group_balancer_select_lock);
+		gb_sd = select_idle_gb_sd(tg);
+	} else {
 		gb_sd = target;
+	}
 	if (!gb_sd) {
 		ret = -ESRCH;
 		goto out;
 	}
 	add_tg_to_group_balancer_sched_domain(tg, gb_sd, enable);
 out:
+	if (enable)
+		mutex_unlock(&group_balancer_select_lock);
 	read_unlock(&group_balancer_sched_domain_lock);
 	return ret;
 }
@@ -1586,11 +1776,16 @@ static void tg_upper_level(struct task_group *tg, struct group_balancer_sched_do
 static bool tg_lower_level(struct task_group *tg)
 {
 	struct group_balancer_sched_domain *gb_sd = tg->gb_sd;
-	struct group_balancer_sched_domain *child, *dst;
+	struct group_balancer_sched_domain *child, *dst = NULL;
 	unsigned long tg_child_load, tg_load = 0, tg_dst_load = 0;
 	unsigned long child_load, src_load, dst_load, total_load = 0, migrate_load;
 	unsigned long child_cap, total_cap = 0, src_cap, dst_cap = 0;
+	unsigned int child_nr_running, dst_nr_running = 0, tg_child_nr_running;
+	unsigned int tg_nr_running = 0, tg_dst_nr_running = 0, migrate_nr_running;
 	unsigned long src_imb, dst_imb;
+	int total_free_specs = 0, child_free_specs = 0, dst_free_specs = 0, src_free_specs = 0;
+	int tg_specs;
+	unsigned int src_span_weight, dst_span_weight;
 
 	if (!gb_sd)
 		goto fail;
@@ -1612,36 +1807,65 @@ static bool tg_lower_level(struct task_group *tg)
 	for_each_gb_sd_child(child, gb_sd) {
 		child_load = gb_sd_load(child);
 		total_load += child_load;
+		child_nr_running = gb_sd_nr_running(child);
 
 		child_cap = gb_sd_capacity(child);
 		total_cap += child_cap;
 
 		tg_child_load = tg_gb_sd_load(tg, child);
+		tg_load += tg_child_load;
+		tg_child_nr_running = tg_gb_sd_nr_running(tg, child);
+		tg_nr_running += tg_child_nr_running;
+
+		child_free_specs = atomic_read(&child->free_tg_specs);
+		total_free_specs += child_free_specs;
+		if (!gb_sd_satisfies_task_group(tg, child))
+			continue;
 		if (!dst || tg_child_load > tg_dst_load) {
 			dst = child;
 			tg_dst_load = tg_child_load;
 			dst_load = child_load;
 			dst_cap = child_cap;
+			tg_dst_nr_running = tg_child_nr_running;
+			dst_nr_running = child_nr_running;
+			dst_free_specs = child_free_specs;
 		} else if (tg_child_load == tg_dst_load) {
 			if (dst_load * child_cap > child_load * dst_cap) {
 				dst = child;
 				tg_dst_load = tg_child_load;
 				dst_load = child_load;
 				dst_cap = child_cap;
+				tg_dst_nr_running = tg_child_nr_running;
+				dst_nr_running = child_nr_running;
+				dst_free_specs = child_free_specs;
 			}
 		}
-		tg_load += tg_child_load;
 	}
 
 	if (tg_load == 0)
 		goto fail;
-	if (tg->specs_ratio > 100 * dst->span_weight)
+	if (!dst)
 		goto fail;
-#ifdef CONFIG_NUMA
+	if (!is_preferred_gb_sd(tg, gb_sd)) {
+		/*
+		 * If the task group stays in the upper level for too long,
+		 * make the preferred gb sd to expire.
+		 */
+		if (!time_after(jiffies,
+		    tg->expiration_start + msecs_to_jiffies(sysctl_sched_gb_expiration_ms)))
+			goto fail;
+		tg->preferred_gb_sd = NULL;
+	}
+
 	/* We won't allow a task group span more than two numa nodes too long. */
-	if (dst->gb_flags & GROUP_BALANCER_NUMA_FLAG)
+	if (dst->gb_flags & GROUP_BALANCER_LLC_FLAG)
 		goto lower;
-#endif
+
+	/* If migration won't cause overload, do migrate.*/
+	migrate_nr_running = tg_nr_running - tg_dst_nr_running;
+	if (dst_nr_running + migrate_nr_running <= dst->span_weight)
+		goto lower;
+
 	/* If we lower the level, we have to make sure that we will not cause imbalance.
 	 *
 	 * src_load        dst_load
@@ -1662,13 +1886,38 @@ static bool tg_lower_level(struct task_group *tg)
 
 	if (dst_imb > src_imb)
 		goto fail;
+
+	if (!sched_feat(GB_SPECS_BALANCE))
+		goto lower;
+	/*
+	 * If we lower the level, we'd better guarantee that free specs won't be more imbalance.
+	 *
+	 * src_free_specs	dst_free_specs
+	 * ---------------  vs  --------------
+	 * src_span_weight	dst_span_weight
+	 *
+	 */
+	tg_specs = tg->specs_ratio;
+	src_free_specs = total_free_specs - dst_free_specs;
+	dst_span_weight = dst->span_weight;
+	src_span_weight = gb_sd->span_weight - dst_span_weight;
+	src_imb = abs(src_free_specs * dst_span_weight - dst_free_specs * src_span_weight);
+	dst_imb = abs(src_free_specs * dst_span_weight -
+		      (dst_free_specs - tg_specs) * src_span_weight);
+
+	if (dst_free_specs * src_span_weight > src_free_specs * dst_span_weight)
+		goto fail;
+
+	if (dst_imb > src_imb)
+		goto fail;
+
 #ifdef CONFIG_NUMA
 lower:
 #endif
 	detach_tg_from_group_balancer_sched_domain(tg, false);
 	attach_tg_to_group_balancer_sched_domain(tg, dst, false);
 	/* The task group maybe still leap level, check it. */
-	check_task_group_leap_level(tg, gb_sd);
+	check_task_group_leap_level(tg, dst);
 
 	return true;
 fail:
@@ -1721,7 +1970,7 @@ void task_tick_gb(struct task_struct *p)
 	raw_spin_unlock(&tg->gb_lock);
 }
 
-void tg_specs_change(struct task_group *tg)
+void tg_specs_change(struct task_group *tg, u64 specs_before)
 {
 	struct group_balancer_sched_domain *gb_sd;
 	int specs = tg->specs_ratio;
@@ -1731,24 +1980,34 @@ void tg_specs_change(struct task_group *tg)
 		/* tg->group_balancer is always true here, so find a gb_sd to attach. */
 		goto upper;
 
+	if (specs_before != specs) {
+		if (specs_before != -1)
+			update_free_tg_specs(gb_sd, specs_before);
+		if (specs != -1)
+			update_free_tg_specs(gb_sd, -specs);
+	}
+
 	/* If the task group leaps level after specs change, we will lower it later. */
 	check_task_group_leap_level(tg, gb_sd);
-	if (tg->leap_level)
+	if (tg->leap_level) {
+		tg->preferred_gb_sd = NULL;
 		return;
+	}
 
 	/* This gb_sd still satisfy, don't do anything. */
-	if (specs <= gb_sd->span_weight * 100 || gb_sd == group_balancer_root_domain)
+	if (gb_sd_satisfies_task_group(tg, gb_sd) || gb_sd == group_balancer_root_domain)
 		return;
 
 	/* The specs doesn't satisfy anymore, upper to find a satisfied gb_sd. */
 	/* Fast path, if the specs is -1 or too large, move it to root domain. */
-	if (specs == -1 || specs > group_balancer_root_domain->span_weight * 100) {
+	if (specs == -1 ||
+	    tg->group_balancer * specs > group_balancer_root_domain->span_weight * 100) {
 		gb_sd = group_balancer_root_domain;
 		goto upper;
 	}
 
 	for (; gb_sd; gb_sd = gb_sd->parent) {
-		if (specs <= gb_sd->span_weight * 100)
+		if (gb_sd_satisfies_task_group(tg, gb_sd))
 			break;
 	}
 
@@ -1808,49 +2067,76 @@ gb_detach_task_groups_from_gb_sd(struct gb_lb_env *gb_env,
 	struct task_group *tg, *n;
 	unsigned long load, util;
 	int detached = 0;
+	struct rb_root *roots[2] = {
+#ifdef CONFIG_CFS_BANDWIDTH
+		&gb_sd->burstable_task_groups,
+#else
+		NULL,
+#endif
+		&gb_sd->task_groups,
+	};
+	int i, max_idx = 1;
+	struct rb_root *root;
 
 	raw_spin_lock(&gb_sd->lock);
-	/* Try the task cgroups with little specs first. */
-	gb_for_each_tg_safe(tg, n, &gb_sd->task_groups) {
-		if (!time_after(jiffies, tg->adjust_level_timestamp + 2 * gb_sd->lower_interval))
-			continue;
-		switch (gb_env->migration_type) {
-#ifdef CONFIG_GROUP_IDENTITY
-		case migrate_identity:
-			fallthrough;
+#ifdef CONFIG_CFS_BANDWIDTH
+	/*
+	 * When burst if true, the interval of load balance is too short,
+	 * we migrate burst task groups only.
+	 */
+	if (gb_env->burst)
+		max_idx = 0;
 #endif
-		case migrate_load:
-			load = tg_gb_sd_load(tg, gb_sd);
-			if (load == 0)
-				continue;
-			if (shr_bound(load, gb_env->nr_balance_failed) > gb_env->imbalance)
-				continue;
-			gb_env->imbalance -= load;
-			break;
-		case migrate_util:
-			util = tg_gb_sd_util(tg, gb_sd);
-			if (util == 0)
-				continue;
-			if (shr_bound(util, gb_env->nr_balance_failed) > gb_env->imbalance)
+	for (i = 0; i <= max_idx; i++) {
+		root = roots[i];
+		if (!root)
+			continue;
+		if (gb_env->burst && i == 1)
+			continue;
+		/* Try the task cgroups with little specs first. */
+		gb_for_each_tg_safe(tg, n, root) {
+			if (i > 0 && !time_after(jiffies,
+			    tg->adjust_level_timestamp + 2 * gb_sd->lower_interval))
 				continue;
-			gb_env->imbalance -= util;
-			break;
-		case migrate_task:
-			gb_env->imbalance = 0;
-			break;
-		/*TODO: Perfect strategy of migrate_misfit*/
-		case migrate_misfit:
-			gb_env->imbalance = 0;
-			break;
-		default:
-			break;
-		}
-		remove_tg_from_group_balancer_sched_domain_locked(tg, gb_sd, false);
-		rb_add(&tg->gb_node, &gb_env->task_groups, tg_specs_less);
-		detached++;
-		if (gb_env->imbalance <= 0) {
-			raw_spin_unlock(&gb_sd->lock);
-			return detached;
+			switch (gb_env->migration_type) {
+	#ifdef CONFIG_GROUP_IDENTITY
+			case migrate_identity:
+				fallthrough;
+	#endif
+			case migrate_load:
+				load = tg_gb_sd_load(tg, gb_sd);
+				if (load == 0)
+					continue;
+				if (shr_bound(load, gb_env->nr_balance_failed) > gb_env->imbalance)
+					continue;
+				gb_env->imbalance -= load;
+				break;
+			case migrate_util:
+				util = tg_gb_sd_util(tg, gb_sd);
+				if (util == 0)
+					continue;
+				if (shr_bound(util, gb_env->nr_balance_failed) > gb_env->imbalance)
+					continue;
+				gb_env->imbalance -= util;
+				break;
+			case migrate_task:
+				gb_env->imbalance = 0;
+				break;
+			/*TODO: Perfect strategy of migrate_misfit*/
+			case migrate_misfit:
+				gb_env->imbalance = 0;
+				break;
+			default:
+				break;
+			}
+			remove_tg_from_group_balancer_sched_domain_locked(tg, gb_sd, false);
+			tg->expiration_start = jiffies;
+			rb_add(&tg->gb_node, &gb_env->task_groups, tg_specs_less);
+			detached++;
+			if (gb_env->imbalance <= 0) {
+				raw_spin_unlock(&gb_sd->lock);
+				return detached;
+			}
 		}
 	}
 	raw_spin_unlock(&gb_sd->lock);
@@ -1906,18 +2192,18 @@ static void gb_attach_task_groups(struct gb_lb_env *gb_env)
 
 static void __update_gb_sd_status(struct group_balancer_sched_domain *gb_sd, int *gb_sd_status)
 {
-	int i, nr_running;
+	int i, nr_gb_running = 0;
 
 	for_each_cpu(i, gb_sd_span(gb_sd)) {
 		struct rq *rq = cpu_rq(i);
 
-		nr_running = rq->nr_running;
-		if (nr_running > 1)
-			*gb_sd_status |= GB_OVERLOAD;
-
-		if (gb_cpu_overutilized(i))
-			*gb_sd_status |= GB_OVERUTILIZED;
+		nr_gb_running += rq->nr_gb_running;
+		/* TODO: Improve the utilization of GB_OVERUTILIZED.*/
+//		if (gb_cpu_overutilized(i))
+//			*gb_sd_status |= GB_OVERUTILIZED;
 	}
+	if (nr_gb_running > gb_sd->span_weight)
+		*gb_sd_status |= GB_OVERLOAD;
 }
 
 static void update_gb_sd_status(struct gb_lb_env *gb_env, int *gb_sd_status)
@@ -1938,6 +2224,10 @@ void gb_load_balance(struct lb_env *env)
 	int gb_sd_status = 0;
 	struct cpumask *gb_mask = this_cpu_cpumask_var_ptr(group_balancer_mask);
 	unsigned long src_load, src_cap, dst_load, dst_cap;
+#ifdef CONFIG_CFS_BANDWIDTH
+	bool burst = false;
+#endif
+	int src_status = 0;
 
 	if (!group_balancer_enabled())
 		return;
@@ -1961,15 +2251,28 @@ void gb_load_balance(struct lb_env *env)
 	if (!gb_sd)
 		goto unlock;
 
-	if (!time_after(jiffies, gb_sd->last_balance_timestamp + 2 * gb_sd->lower_interval))
-		goto unlock;
+	if (!time_after(jiffies, gb_sd->last_balance_timestamp + 2 * gb_sd->lower_interval)) {
+#ifdef CONFIG_CFS_BANDWIDTH
+		if (atomic_read(&dst->h_nr_burst_tg))
+			burst = true;
+		else
+#endif
+			goto unlock;
+	}
+	gb_sd->last_balance_timestamp = jiffies;
 
 	src_load = gb_sd_load(src);
 	src_cap = gb_sd_capacity(src);
 	dst_load = gb_sd_load(dst);
 	dst_cap = gb_sd_capacity(dst);
+	__update_gb_sd_status(src, &src_status);
 
-	if (dst_load * src_cap * gb_sd->imbalance_pct >= src_load * dst_cap * 100)
+	/*
+	 * If the imbalance isn't larger than imbalance_pct, and it isn't the case that
+	 * dst is idle and src is overload, don't do balance.
+	 */
+	if (dst_load * src_cap * gb_sd->imbalance_pct >= src_load * dst_cap * 100 &&
+	    !(available_idle_cpu(env->dst_cpu) && src_status))
 		goto unlock;
 
 	gb_env = (struct gb_lb_env){
@@ -1981,6 +2284,9 @@ void gb_load_balance(struct lb_env *env)
 		.imbalance		= env->imbalance,
 		.nr_balance_failed	= env->sd->nr_balance_failed,
 		.task_groups		= RB_ROOT,
+#ifdef CONFIG_CFS_BANDWIDTH
+		.burst			= burst,
+#endif
 	};
 
 	/*
@@ -1988,10 +2294,26 @@ void gb_load_balance(struct lb_env *env)
 	 * and we don't migrate tg in this case.
 	 */
 	for (parent = gb_sd; parent; parent = parent->parent) {
-		for (node = rb_first(&parent->task_groups); node; node = rb_next(node)) {
-			tg = __node_2_task_group(node);
-			if (tg->cfs_rq[env->src_cpu]->h_nr_running)
-				goto unlock;
+		struct rb_root *roots[2] = {
+#ifdef CONFIG_CFS_BANDWIDTH
+			&gb_sd->burstable_task_groups,
+#else
+			NULL,
+#endif
+			&gb_sd->task_groups,
+		};
+		struct rb_root *root;
+		int i;
+
+		for (i = 0; i < 2; i++) {
+			root = roots[i];
+			if (!root)
+				continue;
+			for (node = rb_first(root); node; node = rb_next(node)) {
+				tg = __gb_node_2_tg(node);
+				if (tg->cfs_rq[env->src_cpu]->h_nr_running)
+					goto unlock;
+			}
 		}
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 89cb253c8c9c411ac000ec39cb24e58215f21810..8b9eb580b9dc4b3512da6ead605d7c378f65abab 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -618,11 +618,13 @@ struct task_group {
 	int			specs_ratio;
 	struct rb_node		gb_node;
 	struct group_balancer_sched_domain *gb_sd;
+	struct group_balancer_sched_domain *preferred_gb_sd;
 	struct task_group	*gb_tg;
-	bool			group_balancer;
+	unsigned int		group_balancer;
 	bool			leap_level;
 	unsigned long		leap_level_timestamp;
 	unsigned long		adjust_level_timestamp;
+	unsigned long		expiration_start;
 	raw_spinlock_t		gb_lock;
 #endif
 	long			priority;
@@ -1547,6 +1549,8 @@ struct rq {
 
 #ifdef CONFIG_GROUP_BALANCER
 	struct group_balancer_sched_domain *gb_sd;
+	unsigned int		nr_gb_running;
+	long			nr_gb_make_up;
 	bool			group_balancer_enabled;
 #endif
 
@@ -2965,6 +2969,20 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
 	sched_update_tick_dependency(rq);
 }
 
+#ifdef CONFIG_GROUP_BALANCER
+static inline void gb_update_nr_running(struct task_group *tg, struct rq *rq, int delta)
+{
+	if (!group_balancer_enabled())
+		return;
+	if (!tg || !tg_group_balancer_enabled(tg))
+		return;
+	rq->nr_gb_running += delta;
+}
+extern int update_group_balancer(struct task_group *tg, u64 new);
+#else
+static inline void gb_update_nr_running(struct task_group *tg, struct rq *rq, int delta) { }
+#endif
+
 extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
 extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
 
@@ -3713,7 +3731,6 @@ extern void sched_dynamic_update(int mode);
 #endif
 
 #ifdef CONFIG_GROUP_BALANCER
-extern bool group_balancer_enabled(void);
 extern bool group_balancer_rq_enabled(struct rq *rq);
 static inline const struct cpumask *task_allowed_cpu(struct task_struct *p)
 {
@@ -3738,11 +3755,6 @@ static inline void tg_inc_soft_cpus_version(struct task_group *tg)
 		tg->soft_cpus_version = 0;
 }
 
-static inline bool tg_group_balancer_enabled(struct task_group *tg)
-{
-	return tg->group_balancer;
-}
-
 extern void sched_init_group_balancer_sched_domains(void);
 extern void sched_clear_group_balancer_sched_domains(void);
 extern void tg_set_specs_ratio(struct task_group *tg);
@@ -3751,13 +3763,16 @@ extern int attach_tg_to_group_balancer_sched_domain(struct task_group *tg,
 						    bool enable);
 extern void detach_tg_from_group_balancer_sched_domain(struct task_group *tg, bool disable);
 extern void update_group_balancer_root_cpumask(void);
-extern void tg_specs_change(struct task_group *tg);
 extern unsigned long cfs_h_load(struct cfs_rq *cfs_rq);
 extern bool gb_cpu_overutilized(int cpu);
 extern void gb_load_balance(struct lb_env *env);
 extern void task_tick_gb(struct task_struct *p);
 extern void util_est_reenqueue_all(void);
 extern void util_est_clear_all(void);
+extern struct cpumask *get_gb_sd_span(struct group_balancer_sched_domain *gb_sd);
+#ifdef CONFIG_CFS_BANDWIDTH
+extern void tg_burst_change(struct task_group *tg, u64 burst);
+#endif
 #else
 static inline bool group_balancer_rq_enabled(struct rq *rq) { return false; }
 static inline const struct cpumask *task_allowed_cpu(struct task_struct *p)
@@ -3766,7 +3781,6 @@ static inline const struct cpumask *task_allowed_cpu(struct task_struct *p)
 }
 static inline void tg_set_specs_ratio(struct task_group *tg) { }
 static inline void update_group_balancer_root_cpumask(void) { }
-static inline void tg_specs_change(struct task_group *tg) { }
 #ifdef CONFIG_SMP
 static inline void gb_load_balance(struct lb_env *env) { }
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ff72c63f6129e2015cec43659468868781f949d9..fc0da990ae3786f9dd3c882b4eea2a6bf1f1975d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2116,6 +2116,15 @@ static struct ctl_table kern_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "sched_gb_expiration_ms",
+		.data		= &sysctl_sched_gb_expiration_ms,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &zero_ul,
+		.extra2		= &long_max,
+	},
 #endif
 #ifdef CONFIG_PROVE_LOCKING
 	{