diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 3781d138c3e3ec96df2befaf5758c4f6dc6c286b..bb08df0cbe06885d1dfbb838da94591d34a2d401 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -161,6 +161,7 @@ CONFIG_MEMCG_KMEM=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CGROUP_WRITEBACK=y
 CONFIG_CGROUP_SCHED=y
+CONFIG_QOS_SCHED=y
 CONFIG_FAIR_GROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index e19bf53c0bd9856032515f84609faad002c6878f..bf732080d4ff2e7cd512459aef8045128621a6d0 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -183,6 +183,7 @@ CONFIG_MEMCG_KMEM=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CGROUP_WRITEBACK=y
 CONFIG_CGROUP_SCHED=y
+CONFIG_QOS_SCHED=y
 CONFIG_FAIR_GROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b307013b9c6c9a2c892644f7fba94698bb82133b..7fa51b600ee81dc1f363676b7542097b625d66cc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -855,4 +855,8 @@ static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
 
 #endif /* CONFIG_CGROUP_BPF */
 
+#ifdef CONFIG_QOS_SCHED
+void cgroup_move_task_to_root(struct task_struct *tsk);
+#endif
+
 #endif /* _LINUX_CGROUP_H */
diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h
index f8f3e958e9cf2fbf0777fdbf5e3fd993c889cee3..255372856812332854aa635956ad7a8ea9446254 100644
--- a/include/linux/resume_user_mode.h
+++ b/include/linux/resume_user_mode.h
@@ -59,6 +59,11 @@ static inline void resume_user_mode_work(struct pt_regs *regs)
 	blkcg_maybe_throttle_current();
 
 	rseq_handle_notify_resume(NULL, regs);
+
+#ifdef CONFIG_QOS_SCHED
+	sched_qos_offline_wait();
+#endif
+
 }
 
 #endif /* LINUX_RESUME_USER_MODE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4412f8818386f1164a5051c7c20fd15a20028e08..3520e3fbaa916670190eea018a4a6a01f78d5010 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2458,4 +2458,15 @@ static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
 
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
 
+#ifdef CONFIG_QOS_SCHED
+void sched_move_offline_task(struct task_struct *p);
+void sched_qos_offline_wait(void);
+int sched_qos_cpu_overload(void);
+#else
+static inline int sched_qos_cpu_overload(void)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/init/Kconfig b/init/Kconfig
index c94c82cc539faae9d9f8eda5ae229c8133772a24..b6952df34ec305e83c48240ed6c98bf667ad63c3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -985,6 +985,18 @@ menuconfig CGROUP_SCHED
 	  tasks.
 
 if CGROUP_SCHED
+config QOS_SCHED
+	bool "Qos task scheduling"
+	depends on CGROUP_SCHED
+	depends on CFS_BANDWIDTH
+	default n
+	help
+	  This option enable qos scheduler, and support co-location online
+	  services (Latency Sensitive) and offline tasks. colocation can
+	  effectively improve the resource utilization.
+
+	  If in doubt, say N.
+
 config FAIR_GROUP_SCHED
 	bool "Group scheduling for SCHED_OTHER"
 	depends on CGROUP_SCHED
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 518725b57200c2fe6d1d01a2280bd5156073ae0c..534522bd5b73aff412403948972b6364a7939d8d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2936,6 +2936,26 @@ void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked
 			ss->post_attach();
 }
 
+#ifdef CONFIG_QOS_SCHED
+void cgroup_move_task_to_root(struct task_struct *tsk)
+{
+	struct cgroup *cpu_cgrp;
+	struct cgroup *cpu_root_cgrp;
+
+	mutex_lock(&cgroup_mutex);
+	percpu_down_write(&cgroup_threadgroup_rwsem);
+
+	spin_lock_irq(&css_set_lock);
+	cpu_cgrp = task_cgroup(tsk, cpu_cgrp_id);
+	cpu_root_cgrp = &cpu_cgrp->root->cgrp;
+	spin_unlock_irq(&css_set_lock);
+
+	(void)cgroup_attach_task(cpu_root_cgrp, tsk, false);
+	percpu_up_write(&cgroup_threadgroup_rwsem);
+	mutex_unlock(&cgroup_mutex);
+}
+#endif
+
 static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
 {
 	struct cgroup_subsys *ss;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a854b71836dd5b8a811d098a0cc24a2381e1e727..a1c73dea1f778c4038fef05cab1875335bc3dd17 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7727,6 +7727,18 @@ static int __sched_setscheduler(struct task_struct *p,
 	}
 change:
 
+#ifdef CONFIG_QOS_SCHED
+	/*
+	 * If the scheduling policy of an offline task is set to a policy
+	 * other than SCHED_IDLE, the online task preemption and cpu resource
+	 * isolation will be invalid, so return -EINVAL in this case.
+	 */
+	if (unlikely(task_group(p)->qos_level == -1 && !idle_policy(policy))) {
+		retval = -EINVAL;
+		goto unlock;
+	}
+#endif
+
 	if (user) {
 #ifdef CONFIG_RT_GROUP_SCHED
 		/*
@@ -10006,6 +10018,9 @@ void __init sched_init(void)
 		 * We achieve this by letting root_task_group's tasks sit
 		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
 		 */
+#ifdef CONFIG_QOS_SCHED
+		init_qos_hrtimer(i);
+#endif
 		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -10345,6 +10360,67 @@ void ia64_set_curr_task(int cpu, struct task_struct *p)
 /* task_group_lock serializes the addition/removal of task groups */
 static DEFINE_SPINLOCK(task_group_lock);
 
+#ifdef CONFIG_QOS_SCHED
+static inline int alloc_qos_sched_group(struct task_group *tg,
+					struct task_group *parent)
+{
+	tg->qos_level = parent->qos_level;
+
+	return 1;
+}
+
+static void sched_change_qos_group(struct task_struct *tsk, struct task_group *tg)
+{
+	struct sched_attr attr = {0};
+
+	/*
+	 * No need to re-setcheduler when a task is exiting or the task
+	 * is in an autogroup.
+	 */
+	if (!(tsk->flags & PF_EXITING) &&
+	    !task_group_is_autogroup(tg) &&
+	    (tg->qos_level == -1)) {
+		attr.sched_priority = 0;
+		attr.sched_policy = SCHED_IDLE;
+		attr.sched_nice = PRIO_TO_NICE(tsk->static_prio);
+		__setscheduler_params(tsk, &attr);
+		__setscheduler_prio(tsk, normal_prio(tsk));
+	}
+}
+
+struct offline_args {
+	struct work_struct work;
+	struct task_struct *p;
+};
+
+static void sched_move_work(struct work_struct *work)
+{
+	struct sched_param param = { .sched_priority = 0 };
+	struct offline_args *args = container_of(work, struct offline_args, work);
+
+	cgroup_move_task_to_root(args->p);
+	sched_setscheduler(args->p, SCHED_NORMAL, &param);
+	put_task_struct(args->p);
+	kfree(args);
+}
+
+void sched_move_offline_task(struct task_struct *p)
+{
+	struct offline_args *args;
+
+	if (unlikely(task_group(p)->qos_level != -1))
+		return;
+
+	args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC);
+	if (args) {
+		get_task_struct(p);
+		args->p = p;
+		INIT_WORK(&args->work, sched_move_work);
+		queue_work(system_highpri_wq, &args->work);
+	}
+}
+#endif
+
 static inline void alloc_uclamp_sched_group(struct task_group *tg,
 					    struct task_group *parent)
 {
@@ -10395,6 +10471,11 @@ struct task_group *sched_create_group(struct task_group *parent)
 	if (!alloc_fair_sched_group(tg, parent))
 		goto err;
 
+#ifdef CONFIG_QOS_SCHED
+	if (!alloc_qos_sched_group(tg, parent))
+		goto err;
+#endif
+
 	if (!alloc_rt_sched_group(tg, parent))
 		goto err;
 
@@ -10481,6 +10562,10 @@ static void sched_change_group(struct task_struct *tsk, struct task_group *group
 {
 	tsk->sched_task_group = group;
 
+#ifdef CONFIG_QOS_SCHED
+	sched_change_qos_group(tsk, group);
+#endif
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->task_change_group)
 		tsk->sched_class->task_change_group(tsk);
@@ -11209,6 +11294,69 @@ static int cpu_idle_write_s64(struct cgroup_subsys_state *css,
 }
 #endif
 
+#ifdef CONFIG_QOS_SCHED
+static int tg_change_scheduler(struct task_group *tg, void *data)
+{
+	int policy;
+	struct css_task_iter it;
+	struct sched_param param;
+	struct task_struct *tsk;
+	s64 qos_level = *(s64 *)data;
+	struct cgroup_subsys_state *css = &tg->css;
+
+	tg->qos_level = qos_level;
+	if (qos_level == -1)
+		policy = SCHED_IDLE;
+	else
+		policy = SCHED_NORMAL;
+
+	param.sched_priority = 0;
+	css_task_iter_start(css, 0, &it);
+	while ((tsk = css_task_iter_next(&it)))
+		sched_setscheduler(tsk, policy, &param);
+	css_task_iter_end(&it);
+
+	return 0;
+}
+
+static int cpu_qos_write(struct cgroup_subsys_state *css,
+			 struct cftype *cftype, s64 qos_level)
+{
+	struct task_group *tg = css_tg(css);
+
+	if (!tg->se[0])
+		return -EINVAL;
+
+	if (qos_level != -1 && qos_level != 0)
+		return -EINVAL;
+
+	if (tg->qos_level == qos_level)
+		goto done;
+
+	if (tg->qos_level == -1 && qos_level == 0)
+		return -EINVAL;
+
+	cpus_read_lock();
+	if (qos_level == -1)
+		cfs_bandwidth_usage_inc();
+	else
+		cfs_bandwidth_usage_dec();
+	cpus_read_unlock();
+
+	rcu_read_lock();
+	walk_tg_tree_from(tg, tg_change_scheduler, tg_nop, (void *)(&qos_level));
+	rcu_read_unlock();
+done:
+	return 0;
+}
+
+static inline s64 cpu_qos_read(struct cgroup_subsys_state *css,
+			       struct cftype *cft)
+{
+	return css_tg(css)->qos_level;
+}
+#endif
+
 static struct cftype cpu_legacy_files[] = {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	{
@@ -11272,6 +11420,13 @@ static struct cftype cpu_legacy_files[] = {
 		.seq_show = cpu_uclamp_max_show,
 		.write = cpu_uclamp_max_write,
 	},
+#endif
+#ifdef CONFIG_QOS_SCHED
+	{
+		.name = "qos_level",
+		.read_s64 = cpu_qos_read,
+		.write_s64 = cpu_qos_write,
+	},
 #endif
 	{ }	/* Terminate */
 };
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2430c88e04284f130a4c2b04c544baa026d4e489..8ae0d65713aad4348d4631a290a538d299813fd3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -57,6 +57,11 @@
 #include "stats.h"
 #include "autogroup.h"
 
+#ifdef CONFIG_QOS_SCHED
+#include <linux/delay.h>
+#include <linux/resume_user_mode.h>
+#endif
+
 /*
  * The initial- and re-scaling of tunables is configurable
  *
@@ -124,6 +129,24 @@ int __weak arch_asym_cpu_priority(int cpu)
 #define capacity_greater(cap1, cap2) ((cap1) * 1024 > (cap2) * 1078)
 #endif
 
+#ifdef CONFIG_QOS_SCHED
+
+/*
+ * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled
+ * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1).
+ */
+#define QOS_THROTTLED	2
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct hrtimer, qos_overload_timer);
+static DEFINE_PER_CPU(int, qos_cpu_overload);
+unsigned int sysctl_overload_detect_period = 5000;  /* in ms */
+unsigned int sysctl_offline_wait_interval = 100;  /* in ms */
+static int one_thousand = 1000;
+static int hundred_thousand = 100000;
+static int unthrottle_qos_cfs_rqs(int cpu);
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -172,6 +195,26 @@ static struct ctl_table sched_fair_sysctls[] = {
 		.extra1		= SYSCTL_ZERO,
 	},
 #endif /* CONFIG_NUMA_BALANCING */
+#ifdef CONFIG_QOS_SCHED
+	{
+		.procname	= "qos_overload_detect_period_ms",
+		.data		= &sysctl_overload_detect_period,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE_HUNDRED,
+		.extra2		= &hundred_thousand,
+	},
+	{
+		.procname	= "qos_offline_wait_interval_ms",
+		.data		= &sysctl_offline_wait_interval,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE_HUNDRED,
+		.extra2		= &one_thousand,
+	},
+#endif
 	{}
 };
 
@@ -5639,6 +5682,14 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 	se = cfs_rq->tg->se[cpu_of(rq)];
 
+#ifdef CONFIG_QOS_SCHED
+	/*
+	 * if this cfs_rq throttled by qos, not need unthrottle it.
+	 */
+	if (cfs_rq->throttled == QOS_THROTTLED)
+		return;
+#endif
+
 	cfs_rq->throttled = 0;
 
 	update_rq_clock(rq);
@@ -5823,7 +5874,20 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
 			goto next;
 #endif
 
-		/* By the above checks, this should never be true */
+		/*
+		 * CPU hotplug callbacks race against distribute_cfs_runtime()
+		 * when the QOS_SCHED feature is enabled, there may be
+		 * situations where the runtime_remaining > 0.
+		 * Qos_sched does not care whether the cfs_rq has time left,
+		 * so no longer allocate time to cfs_rq in this scenario.
+		 */
+#ifdef CONFIG_QOS_SCHED
+		if (cfs_rq->throttled == QOS_THROTTLED &&
+			cfs_rq->runtime_remaining > 0)
+			goto next;
+#endif
+
+		/* By the above check, this should never be true */
 		SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
 
 		raw_spin_lock(&cfs_b->lock);
@@ -6191,6 +6255,9 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 #ifdef CONFIG_SMP
 	INIT_LIST_HEAD(&cfs_rq->throttled_csd_list);
 #endif
+#ifdef CONFIG_QOS_SCHED
+	INIT_LIST_HEAD(&cfs_rq->qos_throttled_list);
+#endif
 }
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@ -6280,6 +6347,9 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 	 * the rq clock again in unthrottle_cfs_rq().
 	 */
 	rq_clock_start_loop_update(rq);
+#ifdef CONFIG_QOS_SCHED
+	unthrottle_qos_cfs_rqs(cpu_of(rq));
+#endif
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -6305,6 +6375,9 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
 	rcu_read_unlock();
 
 	rq_clock_stop_loop_update(rq);
+#ifdef CONFIG_QOS_SCHED
+	unthrottle_qos_cfs_rqs(cpu_of(rq));
+#endif
 }
 
 bool cfs_task_bw_constrained(struct task_struct *p)
@@ -8115,6 +8188,278 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	resched_curr(rq);
 }
 
+#ifdef CONFIG_QOS_SCHED
+static inline bool is_offline_task(struct task_struct *p)
+{
+	return task_group(p)->qos_level == -1;
+}
+
+static void start_qos_hrtimer(int cpu);
+
+static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	struct rq *rq = rq_of(cfs_rq);
+	struct sched_entity *se;
+	long task_delta, idle_task_delta;
+
+	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
+
+	/* freeze hierarchy runnable averages while throttled */
+	rcu_read_lock();
+	walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
+	rcu_read_unlock();
+
+	task_delta = cfs_rq->h_nr_running;
+	idle_task_delta = cfs_rq->idle_h_nr_running;
+	for_each_sched_entity(se) {
+		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+		/* throttled entity or throttle-on-deactivate */
+		if (!se->on_rq)
+			goto done;
+
+		dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
+
+		qcfs_rq->h_nr_running -= task_delta;
+		qcfs_rq->idle_h_nr_running -= idle_task_delta;
+
+		if (qcfs_rq->load.weight) {
+			/* Avoid re-evaluating load for this entity: */
+			se = parent_entity(se);
+			break;
+		}
+	}
+
+	for_each_sched_entity(se) {
+		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
+		/* throttled entity or throttle-on-deactivate */
+		if (!se->on_rq)
+			goto done;
+
+		update_load_avg(qcfs_rq, se, 0);
+		se_update_runnable(se);
+
+		if (cfs_rq_is_idle(group_cfs_rq(se)))
+			idle_task_delta = cfs_rq->h_nr_running;
+
+		qcfs_rq->h_nr_running -= task_delta;
+		qcfs_rq->idle_h_nr_running -= idle_task_delta;
+	}
+
+	/* At this point se is NULL and we are at root level*/
+	sub_nr_running(rq, task_delta);
+
+done:
+	if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq))))
+		start_qos_hrtimer(cpu_of(rq));
+
+	cfs_rq->throttled = QOS_THROTTLED;
+
+	list_add(&cfs_rq->qos_throttled_list,
+		 &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
+}
+
+static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	struct rq *rq = rq_of(cfs_rq);
+	struct sched_entity *se;
+	long task_delta, idle_task_delta;
+
+	se = cfs_rq->tg->se[cpu_of(rq)];
+
+	if (cfs_rq->throttled != QOS_THROTTLED)
+		return;
+
+	cfs_rq->throttled = 0;
+
+	update_rq_clock(rq);
+	list_del_init(&cfs_rq->qos_throttled_list);
+
+	/* update hierarchical throttle state */
+	rcu_read_lock();
+	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
+	rcu_read_unlock();
+
+	if (!cfs_rq->load.weight) {
+		if (!cfs_rq->on_list)
+			return;
+		/*
+		 * Nothing to run but something to decay (on_list)?
+		 * Complete the branch.
+		 */
+		for_each_sched_entity(se) {
+			if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
+				break;
+		}
+		goto unthrottle_throttle;
+	}
+
+	task_delta = cfs_rq->h_nr_running;
+	idle_task_delta = cfs_rq->idle_h_nr_running;
+	for_each_sched_entity(se) {
+		if (se->on_rq)
+			break;
+
+		cfs_rq = cfs_rq_of(se);
+		enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+
+		cfs_rq->h_nr_running += task_delta;
+		cfs_rq->idle_h_nr_running += idle_task_delta;
+
+		if (cfs_rq_throttled(cfs_rq))
+			goto unthrottle_throttle;
+	}
+
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		update_load_avg(cfs_rq, se, UPDATE_TG);
+		se_update_runnable(se);
+
+		cfs_rq->h_nr_running += task_delta;
+		cfs_rq->idle_h_nr_running += idle_task_delta;
+
+		/* end evaluation on encountering a throttled cfs_rq */
+		if (cfs_rq_throttled(cfs_rq))
+			goto unthrottle_throttle;
+	}
+
+	add_nr_running(rq, task_delta);
+
+unthrottle_throttle:
+
+	assert_list_leaf_cfs_rq(rq);
+
+	/* Determine whether we need to wake up potentially idle CPU: */
+	if (rq->curr == rq->idle && rq->cfs.nr_running)
+		resched_curr(rq);
+}
+
+static int __unthrottle_qos_cfs_rqs(int cpu)
+{
+	struct cfs_rq *cfs_rq, *tmp_rq;
+	int res = 0;
+
+	list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(qos_throttled_cfs_rq, cpu),
+				 qos_throttled_list) {
+		if (cfs_rq_throttled(cfs_rq)) {
+			unthrottle_qos_cfs_rq(cfs_rq);
+			res++;
+		}
+	}
+
+	return res;
+}
+
+static int unthrottle_qos_cfs_rqs(int cpu)
+{
+	int res;
+
+	res = __unthrottle_qos_cfs_rqs(cpu);
+	if (res)
+		hrtimer_cancel(&(per_cpu(qos_overload_timer, cpu)));
+
+	return res;
+}
+
+static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq)
+{
+	if (unlikely(__this_cpu_read(qos_cpu_overload)))
+		return false;
+
+	if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 &&
+		     !sched_idle_cpu(smp_processor_id()) &&
+		     cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) {
+
+		if (!rq_of(cfs_rq)->online)
+			return false;
+
+		throttle_qos_cfs_rq(cfs_rq);
+		return true;
+	}
+
+	return false;
+}
+
+static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq)
+{
+	struct rq *rq = rq_of(cfs_rq);
+	struct rq_flags rf;
+
+	rq_lock_irqsave(rq, &rf);
+	if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq))
+		unthrottle_qos_cfs_rq(cfs_rq);
+	rq_unlock_irqrestore(rq, &rf);
+}
+
+void sched_qos_offline_wait(void)
+{
+	long qos_level;
+
+	while (unlikely(this_cpu_read(qos_cpu_overload))) {
+		rcu_read_lock();
+		qos_level = task_group(current)->qos_level;
+		rcu_read_unlock();
+		if (qos_level != -1 || fatal_signal_pending(current))
+			break;
+
+		schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval));
+	}
+}
+
+int sched_qos_cpu_overload(void)
+{
+	return __this_cpu_read(qos_cpu_overload);
+}
+
+static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer)
+{
+	struct rq_flags rf;
+	struct rq *rq = this_rq();
+
+	rq_lock_irqsave(rq, &rf);
+	if (__unthrottle_qos_cfs_rqs(smp_processor_id()))
+		__this_cpu_write(qos_cpu_overload, 1);
+	rq_unlock_irqrestore(rq, &rf);
+
+	return HRTIMER_NORESTART;
+}
+
+static void start_qos_hrtimer(int cpu)
+{
+	ktime_t time;
+	struct hrtimer *hrtimer = &(per_cpu(qos_overload_timer, cpu));
+
+	time = ktime_add_ms(hrtimer->base->get_time(), (u64)sysctl_overload_detect_period);
+	hrtimer_set_expires(hrtimer, time);
+	hrtimer_start_expires(hrtimer, HRTIMER_MODE_ABS_PINNED);
+}
+
+void init_qos_hrtimer(int cpu)
+{
+	struct hrtimer *hrtimer = &(per_cpu(qos_overload_timer, cpu));
+
+	hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+	hrtimer->function = qos_overload_timer_handler;
+}
+
+/*
+ * To avoid Priority inversion issues, when this cpu is qos_cpu_overload,
+ * we should schedule offline tasks to run so that they can leave kernel
+ * critical sections, and throttle them before returning to user mode.
+ */
+static void qos_schedule_throttle(struct task_struct *p)
+{
+	if (unlikely(current->flags & PF_KTHREAD))
+		return;
+
+	if (unlikely(this_cpu_read(qos_cpu_overload))) {
+		if (is_offline_task(p))
+			set_notify_resume(p);
+	}
+}
+
+#endif
+
 #ifdef CONFIG_SMP
 static struct task_struct *pick_task_fair(struct rq *rq)
 {
@@ -8205,6 +8550,16 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 
 		se = pick_next_entity(cfs_rq, curr);
 		cfs_rq = group_cfs_rq(se);
+#ifdef CONFIG_QOS_SCHED
+		if (check_qos_cfs_rq(cfs_rq)) {
+			cfs_rq = &rq->cfs;
+			WARN(cfs_rq->nr_running == 0,
+			    "rq->nr_running=%u, cfs_rq->idle_h_nr_running=%u\n",
+			    rq->nr_running, cfs_rq->idle_h_nr_running);
+			if (unlikely(!cfs_rq->nr_running))
+				return NULL;
+		}
+#endif
 	} while (cfs_rq);
 
 	p = task_of(se);
@@ -8265,6 +8620,10 @@ done: __maybe_unused;
 	update_misfit_status(p, rq);
 	sched_fair_update_stop_tick(rq, p);
 
+#ifdef CONFIG_QOS_SCHED
+	qos_schedule_throttle(p);
+#endif
+
 	return p;
 
 idle:
@@ -8284,6 +8643,14 @@ done: __maybe_unused;
 	if (new_tasks > 0)
 		goto again;
 
+#ifdef CONFIG_QOS_SCHED
+	if (unthrottle_qos_cfs_rqs(cpu_of(rq))) {
+		rq->idle_stamp = 0;
+		goto again;
+	}
+
+	__this_cpu_write(qos_cpu_overload, 0);
+#endif
 	/*
 	 * rq is about to be idle, check if we need to update the
 	 * lost_idle_time of clock_pelt
@@ -12600,6 +12967,10 @@ void free_fair_sched_group(struct task_group *tg)
 	int i;
 
 	for_each_possible_cpu(i) {
+#ifdef CONFIG_QOS_SCHED
+		if (tg->cfs_rq && tg->cfs_rq[i])
+			unthrottle_qos_sched_group(tg->cfs_rq[i]);
+#endif
 		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
 		if (tg->se)
@@ -12989,6 +13360,11 @@ __init void init_sched_fair_class(void)
 #endif
 	}
 
+#ifdef CONFIG_QOS_SCHED
+	for_each_possible_cpu(i)
+		INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i));
+#endif
+
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 
 #ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 04846272409cc00f20f24d8cc6456554d67aba0a..3de84e95baf1cbee2a0a79d9bdcdb1f4b960b103 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -403,6 +403,10 @@ struct task_group {
 
 	struct cfs_bandwidth	cfs_bandwidth;
 
+#ifdef CONFIG_QOS_SCHED
+	long qos_level;
+#endif
+
 #ifdef CONFIG_UCLAMP_TASK_GROUP
 	/* The two decimal precision [%] value requested from user-space */
 	unsigned int		uclamp_pct[UCLAMP_CNT];
@@ -649,6 +653,10 @@ struct cfs_rq {
 #endif
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#if defined(CONFIG_QOS_SCHED)
+	struct list_head	qos_throttled_list;
+#endif
 };
 
 static inline int rt_bandwidth_enabled(void)
@@ -1403,6 +1411,10 @@ do {						\
 	flags = _raw_spin_rq_lock_irqsave(rq);	\
 } while (0)
 
+#ifdef CONFIG_QOS_SCHED
+void init_qos_hrtimer(int cpu);
+#endif
+
 #ifdef CONFIG_SCHED_SMT
 extern void __update_idle_core(struct rq *rq);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index 09019017d6690a50e0a6931a20fcfdb6fb15c04b..28cddef3977869bea3b97fb756aaafa594a7f4d1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1060,6 +1060,9 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
 			signal->group_stop_count = 0;
 			t = p;
 			do {
+#ifdef CONFIG_QOS_SCHED
+				sched_move_offline_task(t);
+#endif
 				task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
 				sigaddset(&t->pending.signal, SIGKILL);
 				signal_wake_up(t, 1);