diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2b318f2161f04dc9776ed0e8b848c965dd281c2e..835d83c1af86910e493910abd2d30066f1bfa935 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -87,6 +87,7 @@
 #include <linux/user_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 #include <linux/sched/autogroup.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/coredump.h>
@@ -1559,6 +1560,70 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
 
 #endif /* CONFIG_SCHED_AUTOGROUP */
 
+#ifdef CONFIG_SCHED_WALT
+static int sched_init_task_load_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct task_struct *p;
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+
+	seq_printf(m, "%d\n", sched_get_init_task_load(p));
+
+	put_task_struct(p);
+
+	return 0;
+}
+
+static ssize_t
+sched_init_task_load_write(struct file *file, const char __user *buf,
+	    size_t count, loff_t *offset)
+{
+	struct inode *inode = file_inode(file);
+	struct task_struct *p;
+	char buffer[PROC_NUMBUF];
+	int init_task_load, err;
+
+	memset(buffer, 0, sizeof(buffer));
+	if (count > sizeof(buffer) - 1)
+		count = sizeof(buffer) - 1;
+	if (copy_from_user(buffer, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
+
+	err = kstrtoint(strstrip(buffer), 0, &init_task_load);
+	if (err)
+		goto out;
+
+	p = get_proc_task(inode);
+	if (!p)
+		return -ESRCH;
+
+	err = sched_set_init_task_load(p, init_task_load);
+
+	put_task_struct(p);
+
+out:
+	return err < 0 ? err : count;
+}
+
+static int sched_init_task_load_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_init_task_load_show, inode);
+}
+
+static const struct file_operations proc_pid_sched_init_task_load_operations = {
+	.open		= sched_init_task_load_open,
+	.read		= seq_read,
+	.write		= sched_init_task_load_write,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif	/* CONFIG_SCHED_WALT */
+
 #ifdef CONFIG_TIME_NS
 static int timens_offsets_show(struct seq_file *m, void *v)
 {
@@ -3261,6 +3326,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 	ONE("status",     S_IRUGO, proc_pid_status),
 	ONE("personality", S_IRUSR, proc_pid_personality),
 	ONE("limits",	  S_IRUGO, proc_pid_limits),
+#ifdef CONFIG_SCHED_WALT
+	REG("sched_init_task_load", 00644, proc_pid_sched_init_task_load_operations),
+#endif
 #ifdef CONFIG_SCHED_DEBUG
 	REG("sched",      S_IRUGO|S_IWUSR, proc_pid_sched_operations),
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d3198acc29af318b39ef85a111161ab0c99238b0..89869b515c64c0fb797a3ca8c55ee8e28afa1cd0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -291,6 +291,14 @@ struct user_event_mm;
 enum {
 	TASK_COMM_LEN = 16,
 };
+enum task_event {
+	PUT_PREV_TASK   = 0,
+	PICK_NEXT_TASK  = 1,
+	TASK_WAKE       = 2,
+	TASK_MIGRATE    = 3,
+	TASK_UPDATE     = 4,
+	IRQ_UPDATE      = 5,
+};
 
 extern void scheduler_tick(void);
 
@@ -590,6 +598,53 @@ struct sched_entity {
 #endif
 };
 
+#ifdef CONFIG_SCHED_WALT
+extern void sched_exit(struct task_struct *p);
+extern int sched_set_init_task_load(struct task_struct *p, int init_load_pct);
+extern u32 sched_get_init_task_load(struct task_struct *p);
+extern void free_task_load_ptrs(struct task_struct *p);
+#define RAVG_HIST_SIZE_MAX  5
+struct ravg {
+	/*
+	 * 'mark_start' marks the beginning of an event (task waking up, task
+	 * starting to execute, task being preempted) within a window
+	 *
+	 * 'sum' represents how runnable a task has been within current
+	 * window. It incorporates both running time and wait time and is
+	 * frequency scaled.
+	 *
+	 * 'sum_history' keeps track of history of 'sum' seen over previous
+	 * RAVG_HIST_SIZE windows. Windows where task was entirely sleeping are
+	 * ignored.
+	 *
+	 * 'demand' represents maximum sum seen over previous
+	 * sysctl_sched_ravg_hist_size windows. 'demand' could drive frequency
+	 * demand for tasks.
+	 *
+	 * 'curr_window_cpu' represents task's contribution to cpu busy time on
+	 * various CPUs in the current window
+	 *
+	 * 'prev_window_cpu' represents task's contribution to cpu busy time on
+	 * various CPUs in the previous window
+	 *
+	 * 'curr_window' represents the sum of all entries in curr_window_cpu
+	 *
+	 * 'prev_window' represents the sum of all entries in prev_window_cpu
+	 *
+	 */
+	u64 mark_start;
+	u32 sum, demand;
+	u32 sum_history[RAVG_HIST_SIZE_MAX];
+	u32 *curr_window_cpu, *prev_window_cpu;
+	u32 curr_window, prev_window;
+	u16 active_windows;
+	u16 demand_scaled;
+};
+#else
+static inline void sched_exit(struct task_struct *p) { }
+static inline void free_task_load_ptrs(struct task_struct *p) { }
+#endif /* CONFIG_SCHED_WALT */
+
 struct sched_rt_entity {
 	struct list_head		run_list;
 	unsigned long			timeout;
@@ -800,6 +855,15 @@ struct task_struct {
 
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
+#ifdef CONFIG_SCHED_WALT
+	struct ravg ravg;
+	/*
+	 * 'init_load_pct' represents the initial task load assigned to children
+	 * of this task
+	 */
+	u32 init_load_pct;
+	u64 last_sleep_ts;
+#endif
 	struct sched_dl_entity		dl;
 	const struct sched_class	*sched_class;
 
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index bdd31ab93bc51dec77f73a5496160cac0083cfac..ee97c8e891c44f68f80aa379957ec358314d06dd 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -9,6 +9,8 @@
  */
 
 #define SCHED_CPUFREQ_IOWAIT	(1U << 0)
+#define SCHED_CPUFREQ_WALT	(1U << 1)
+#define SCHED_CPUFREQ_CONTINUE	(1U << 2)
 
 #ifdef CONFIG_CPU_FREQ
 struct cpufreq_policy;
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 0108a38bb64d75389ff7f0a7f68843487bc03646..0b2189197aca1bb7b146e017db70d5cc11d135a0 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -22,6 +22,14 @@ extern bool single_task_running(void);
 extern unsigned int nr_iowait(void);
 extern unsigned int nr_iowait_cpu(int cpu);
 
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sched_get_cpu_util(int cpu);
+#else
+static inline unsigned int sched_get_cpu_util(int cpu)
+{
+	return 0;
+}
+#endif
 static inline int sched_info_on(void)
 {
 	return IS_ENABLED(CONFIG_SCHED_INFO);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 980adc56cfe7553c314881cdc7a69dabb413ac69..0e098c5a0ff323eabdd19f47e062d1b0b6fb8621 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -14,7 +14,16 @@ enum { sysctl_hung_task_timeout_secs = 0 };
 
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_wakeup_granularity;
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int sysctl_sched_use_walt_task_util;
+extern unsigned int sysctl_sched_walt_init_task_load_pct;
+extern unsigned int sysctl_sched_cpu_high_irqload;
 
+extern int
+sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table,
+		int write, void __user *buffer, size_t *length, loff_t *ppos);
+#endif
 enum sched_tunable_scaling {
 	SCHED_TUNABLESCALING_NONE,
 	SCHED_TUNABLESCALING_LOG,
diff --git a/include/trace/events/walt.h b/include/trace/events/walt.h
new file mode 100755
index 0000000000000000000000000000000000000000..9af92c8689b986bf51405ba3fa0d0000204a24a4
--- /dev/null
+++ b/include/trace/events/walt.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM walt
+
+#if !defined(_TRACE_WALT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WALT_H
+
+#include <linux/trace_seq.h>
+#include <linux/tracepoint.h>
+
+struct rq;
+extern const char *task_event_names[];
+
+#if defined(CREATE_TRACE_POINTS) && defined(CONFIG_SCHED_WALT)
+static inline void __window_data(u32 *dst, u32 *src)
+{
+	if (src)
+		memcpy(dst, src, nr_cpu_ids * sizeof(u32));
+	else
+		memset(dst, 0, nr_cpu_ids * sizeof(u32));
+}
+
+struct trace_seq;
+const char *__window_print(struct trace_seq *p, const u32 *buf, int buf_len)
+{
+	int i;
+	const char *ret = p->buffer + seq_buf_used(&p->seq);
+
+	for (i = 0; i < buf_len; i++)
+		trace_seq_printf(p, "%u ", buf[i]);
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
+static inline s64 __rq_update_sum(struct rq *rq, bool curr, bool new)
+{
+	if (curr)
+		if (new)
+			return rq->nt_curr_runnable_sum;
+		else
+			return rq->curr_runnable_sum;
+	else
+		if (new)
+			return rq->nt_prev_runnable_sum;
+		else
+			return rq->prev_runnable_sum;
+}
+
+#ifdef CONFIG_SCHED_RTG
+static inline s64 __grp_update_sum(struct rq *rq, bool curr, bool new)
+{
+	if (curr)
+		if (new)
+			return rq->grp_time.nt_curr_runnable_sum;
+		else
+			return rq->grp_time.curr_runnable_sum;
+	else
+		if (new)
+			return rq->grp_time.nt_prev_runnable_sum;
+		else
+			return rq->grp_time.prev_runnable_sum;
+}
+
+static inline s64
+__get_update_sum(struct rq *rq, enum migrate_types migrate_type,
+		 bool src, bool new, bool curr)
+{
+	switch (migrate_type) {
+	case RQ_TO_GROUP:
+		if (src)
+			return __rq_update_sum(rq, curr, new);
+		else
+			return __grp_update_sum(rq, curr, new);
+	case GROUP_TO_RQ:
+		if (src)
+			return __grp_update_sum(rq, curr, new);
+		else
+			return __rq_update_sum(rq, curr, new);
+	default:
+		WARN_ON_ONCE(1);
+		return -1;
+	}
+}
+#endif
+#endif
+
+TRACE_EVENT(sched_update_history,
+
+	TP_PROTO(struct rq *rq, struct task_struct *p, u32 runtime, int samples,
+			enum task_event evt),
+
+	TP_ARGS(rq, p, runtime, samples, evt),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(unsigned int, runtime)
+		__field(int, samples)
+		__field(enum task_event, evt)
+		__field(unsigned int, demand)
+		__array(u32, hist, RAVG_HIST_SIZE_MAX)
+		__field(int, cpu)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid            = p->pid;
+		__entry->runtime        = runtime;
+		__entry->samples        = samples;
+		__entry->evt            = evt;
+		__entry->demand         = p->ravg.demand;
+		memcpy(__entry->hist, p->ravg.sum_history,
+					RAVG_HIST_SIZE_MAX * sizeof(u32));
+		__entry->cpu            = rq->cpu;
+	),
+
+	TP_printk("%d (%s): runtime %u samples %d event %s demand %u (hist: %u %u %u %u %u) cpu %d",
+		__entry->pid, __entry->comm,
+		__entry->runtime, __entry->samples,
+		task_event_names[__entry->evt], __entry->demand,
+		__entry->hist[0], __entry->hist[1],
+		__entry->hist[2], __entry->hist[3],
+		__entry->hist[4], __entry->cpu)
+);
+
+TRACE_EVENT(sched_update_task_ravg,
+
+	TP_PROTO(struct task_struct *p, struct rq *rq, enum task_event evt,
+		 u64 wallclock, u64 irqtime),
+
+	TP_ARGS(p, rq, evt, wallclock, irqtime),
+
+	TP_STRUCT__entry(
+		__array(char, comm, TASK_COMM_LEN)
+		__field(pid_t, pid)
+		__field(pid_t, cur_pid)
+		__field(unsigned int, cur_freq)
+		__field(u64, wallclock)
+		__field(u64, mark_start)
+		__field(u64, delta_m)
+		__field(u64, win_start)
+		__field(u64, delta)
+		__field(u64, irqtime)
+		__field(enum task_event, evt)
+		__field(unsigned int, demand)
+		__field(unsigned int, sum)
+		__field(int, cpu)
+		__field(u64, rq_cs)
+		__field(u64, rq_ps)
+		__field(u32, curr_window)
+		__field(u32, prev_window)
+		__dynamic_array(u32, curr_sum, nr_cpu_ids)
+		__dynamic_array(u32, prev_sum, nr_cpu_ids)
+		__field(u64, nt_cs)
+		__field(u64, nt_ps)
+		__field(u32, active_windows)
+	),
+
+	TP_fast_assign(
+		__entry->wallclock      = wallclock;
+		__entry->win_start      = rq->window_start;
+		__entry->delta          = (wallclock - rq->window_start);
+		__entry->evt            = evt;
+		__entry->cpu            = rq->cpu;
+		__entry->cur_pid        = rq->curr->pid;
+		__entry->cur_freq       = rq->cluster->cur_freq;
+		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
+		__entry->pid            = p->pid;
+		__entry->mark_start     = p->ravg.mark_start;
+		__entry->delta_m        = (wallclock - p->ravg.mark_start);
+		__entry->demand         = p->ravg.demand;
+		__entry->sum            = p->ravg.sum;
+		__entry->irqtime        = irqtime;
+		__entry->rq_cs          = rq->curr_runnable_sum;
+		__entry->rq_ps          = rq->prev_runnable_sum;
+		__entry->curr_window    = p->ravg.curr_window;
+		__entry->prev_window    = p->ravg.prev_window;
+		__window_data(__get_dynamic_array(curr_sum), p->ravg.curr_window_cpu);
+		__window_data(__get_dynamic_array(prev_sum), p->ravg.prev_window_cpu);
+		__entry->nt_cs          = rq->nt_curr_runnable_sum;
+		__entry->nt_ps          = rq->nt_prev_runnable_sum;
+		__entry->active_windows = p->ravg.active_windows;
+	),
+
+	TP_printk("wc %llu ws %llu delta %llu event %s cpu %d cur_freq %u cur_pid %d task %d (%s) ms %llu delta %llu demand %u sum %u irqtime %llu rq_cs %llu rq_ps %llu cur_window %u (%s) prev_window %u (%s) nt_cs %llu nt_ps %llu active_wins %u",
+		__entry->wallclock, __entry->win_start, __entry->delta,
+		task_event_names[__entry->evt], __entry->cpu,
+		__entry->cur_freq, __entry->cur_pid,
+		__entry->pid, __entry->comm, __entry->mark_start,
+		__entry->delta_m, __entry->demand,
+		__entry->sum, __entry->irqtime,
+		__entry->rq_cs, __entry->rq_ps, __entry->curr_window,
+		__window_print(p, __get_dynamic_array(curr_sum), nr_cpu_ids),
+		__entry->prev_window,
+		__window_print(p, __get_dynamic_array(prev_sum), nr_cpu_ids),
+		__entry->nt_cs, __entry->nt_ps,
+		__entry->active_windows)
+);
+
+extern const char *migrate_type_names[];
+
+#ifdef CONFIG_SCHED_RTG
+TRACE_EVENT(sched_migration_update_sum,
+
+	TP_PROTO(struct task_struct *p, enum migrate_types migrate_type, struct rq *rq),
+
+	TP_ARGS(p, migrate_type, rq),
+
+	TP_STRUCT__entry(
+		__field(int, tcpu)
+		__field(int, pid)
+		__field(enum migrate_types, migrate_type)
+		__field(s64, src_cs)
+		__field(s64, src_ps)
+		__field(s64, dst_cs)
+		__field(s64, dst_ps)
+		__field(s64, src_nt_cs)
+		__field(s64, src_nt_ps)
+		__field(s64, dst_nt_cs)
+		__field(s64, dst_nt_ps)
+	),
+
+	TP_fast_assign(
+		__entry->tcpu		= task_cpu(p);
+		__entry->pid		= p->pid;
+		__entry->migrate_type	= migrate_type;
+		__entry->src_cs		= __get_update_sum(rq, migrate_type,
+							   true, false, true);
+		__entry->src_ps		= __get_update_sum(rq, migrate_type,
+							   true, false, false);
+		__entry->dst_cs		= __get_update_sum(rq, migrate_type,
+							   false, false, true);
+		__entry->dst_ps		= __get_update_sum(rq, migrate_type,
+							   false, false, false);
+		__entry->src_nt_cs	= __get_update_sum(rq, migrate_type,
+							   true, true, true);
+		__entry->src_nt_ps	= __get_update_sum(rq, migrate_type,
+							   true, true, false);
+		__entry->dst_nt_cs	= __get_update_sum(rq, migrate_type,
+							   false, true, true);
+		__entry->dst_nt_ps	= __get_update_sum(rq, migrate_type,
+							   false, true, false);
+	),
+
+	TP_printk("pid %d task_cpu %d migrate_type %s src_cs %llu src_ps %llu dst_cs %lld dst_ps %lld src_nt_cs %llu src_nt_ps %llu dst_nt_cs %lld dst_nt_ps %lld",
+		__entry->pid, __entry->tcpu, migrate_type_names[__entry->migrate_type],
+		__entry->src_cs, __entry->src_ps, __entry->dst_cs, __entry->dst_ps,
+		__entry->src_nt_cs, __entry->src_nt_ps, __entry->dst_nt_cs, __entry->dst_nt_ps)
+);
+#endif
+#endif /* _TRACE_WALT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/Kconfig b/init/Kconfig
index 879f6dc1cb274a9f92a03601106cb1d1e1d35765..b12f17a062e8b5a6041b060ad1ce5f22f12d8092 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -565,6 +565,14 @@ config SCHED_THERMAL_PRESSURE
 
 	  This requires the architecture to implement
 	  arch_update_thermal_pressure() and arch_scale_thermal_pressure().
+config SCHED_WALT
+	bool "Support window based load tracking"
+	depends on SMP
+	help
+	This feature will allow the scheduler to maintain a tunable window
+	based set of metrics for tasks and runqueues. These metrics can be
+	used to guide task placement as well as task frequency requirements
+	for cpufreq governors.
 
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
diff --git a/kernel/exit.c b/kernel/exit.c
index 21a59a6e1f2e8941a314116e06b5337e1ff986dc..20dbf5cc67883140b11e3f719f9074409ad8cb02 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -826,6 +826,7 @@ void __noreturn do_exit(long code)
 
 	io_uring_files_cancel();
 	exit_signals(tsk);  /* sets PF_EXITING */
+	sched_exit(tsk);
 
 	/* sync mm's RSS info before statistics gathering */
 	if (tsk->mm)
diff --git a/kernel/fork.c b/kernel/fork.c
index 486248a10b8b75497afdfe9cb3caca30450d3d7a..ead78df4cf987718cdb4f32cd57f5ade10e1de68 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2783,6 +2783,7 @@ __latent_entropy struct task_struct *copy_process(
 	perf_event_free_task(p);
 bad_fork_cleanup_policy:
 	lockdep_free_task(p);
+	free_task_load_ptrs(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 #endif
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 976092b7bd4520ebc1a607734520ac342585a120..c46379af99d98f11f5fbba1e0686d3a6613fb5f1 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -32,3 +32,5 @@ obj-y += core.o
 obj-y += fair.o
 obj-y += build_policy.o
 obj-y += build_utility.o
+obj-$(CONFIG_SCHED_WALT) += walt.o
+obj-$(CONFIG_SCHED_RUNNING_AVG) += sched_avg.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 461c49c8db244abb42f7f7b7bd11b423a8cd0087..ecb69c975d4ba3a5c0c7402121e97a423aa58d3f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -91,6 +91,7 @@
 #include "pelt.h"
 #include "smp.h"
 #include "stats.h"
+#include "walt.h"
 
 #include "../workqueue_internal.h"
 #include "../../io_uring/io-wq.h"
@@ -2562,8 +2563,17 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 	lockdep_assert_rq_held(rq);
 
 	deactivate_task(rq, p, DEQUEUE_NOCLOCK);
+#ifdef CONFIG_SCHED_WALT
+	double_lock_balance(rq, cpu_rq(new_cpu));
+	if (!(rq->clock_update_flags & RQCF_UPDATED))
+		update_rq_clock(rq);
+#endif
 	set_task_cpu(p, new_cpu);
+#ifdef CONFIG_SCHED_WALT
+	double_rq_unlock(cpu_rq(new_cpu), rq);
+#else
 	rq_unlock(rq, rf);
+#endif
 
 	rq = cpu_rq(new_cpu);
 
@@ -3435,6 +3445,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		rseq_migrate(p);
 		sched_mm_cid_migrate_from(p);
 		perf_event_task_migrate(p);
+		fixup_busy_time(p, new_cpu);
 	}
 
 	__set_task_cpu(p, new_cpu);
@@ -4206,6 +4217,26 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
  * accesses to the task state; see try_to_wake_up() and set_current_state().
  */
 
+#ifdef CONFIG_SMP
+#ifdef CONFIG_SCHED_WALT
+/* utility function to update walt signals at wakeup */
+static inline void walt_try_to_wake_up(struct task_struct *p)
+{
+	struct rq *rq = cpu_rq(task_cpu(p));
+	struct rq_flags rf;
+	u64 wallclock;
+
+	rq_lock_irqsave(rq, &rf);
+	wallclock = sched_ktime_clock();
+	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+	update_task_ravg(p, rq, TASK_WAKE, wallclock, 0);
+	rq_unlock_irqrestore(rq, &rf);
+}
+#else
+#define walt_try_to_wake_up(a) {}
+#endif
+#endif
+
 /**
  * try_to_wake_up - wake up a thread
  * @p: the thread to be awakened
@@ -4333,6 +4364,7 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 		 */
 		smp_acquire__after_ctrl_dep();
 
+		walt_try_to_wake_up(p);
 		/*
 		 * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
 		 * == 0), which means we need to do an enqueue, change p->state to
@@ -4774,6 +4806,7 @@ late_initcall(sched_core_sysctl_init);
  */
 int sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
+	init_new_task_load(p);
 	__sched_fork(clone_flags, p);
 	/*
 	 * We mark the process as NEW here. This guarantees that
@@ -4926,6 +4959,8 @@ void wake_up_new_task(struct task_struct *p)
 	update_rq_clock(rq);
 	post_init_entity_util_avg(p);
 
+	mark_task_starting(p);
+
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
 	trace_sched_wakeup_new(p);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -5696,6 +5731,7 @@ void scheduler_tick(void)
 	struct rq *rq = cpu_rq(cpu);
 	struct task_struct *curr = rq->curr;
 	struct rq_flags rf;
+	u64 wallclock;
 	unsigned long thermal_pressure;
 	u64 resched_latency;
 
@@ -5706,6 +5742,9 @@ void scheduler_tick(void)
 
 	rq_lock(rq, &rf);
 
+	set_window_start(rq);
+	wallclock = sched_ktime_clock();
+	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
 	update_rq_clock(rq);
 	thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
 	update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure);
@@ -6636,6 +6675,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 	struct rq_flags rf;
 	struct rq *rq;
 	int cpu;
+	u64 wallclock;
 
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
@@ -6715,11 +6755,18 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 	next = pick_next_task(rq, prev, &rf);
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
+	wallclock = sched_ktime_clock();
 #ifdef CONFIG_SCHED_DEBUG
 	rq->last_seen_need_resched_ns = 0;
 #endif
 
 	if (likely(prev != next)) {
+#ifdef CONFIG_SCHED_WALT
+		if (!prev->on_rq)
+			prev->last_sleep_ts = wallclock;
+#endif
+		update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
+		update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
 		rq->nr_switches++;
 		/*
 		 * RCU users of rcu_dereference(rq->curr) may not see
@@ -6750,6 +6797,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
 		/* Also unlocks the rq: */
 		rq = context_switch(rq, prev, next, &rf);
 	} else {
+		update_task_ravg(prev, rq, TASK_UPDATE, wallclock, 0);
 		rq_unpin_lock(rq, &rf);
 		__balance_callbacks(rq);
 		raw_spin_rq_unlock_irq(rq);
@@ -9827,6 +9875,11 @@ int sched_cpu_deactivate(unsigned int cpu)
 static void sched_rq_cpu_starting(unsigned int cpu)
 {
 	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rq->__lock, flags);
+	set_window_start(rq);
+	raw_spin_unlock_irqrestore(&rq->__lock, flags);
 
 	rq->calc_load_update = calc_load_update;
 	update_max_interval();
@@ -9931,6 +9984,8 @@ void __init sched_init_smp(void)
 	sched_init_domains(cpu_active_mask);
 	mutex_unlock(&sched_domains_mutex);
 
+	update_cluster_topology();
+
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
 		BUG();
@@ -9991,6 +10046,8 @@ void __init sched_init(void)
 
 	wait_bit_init();
 
+	init_clusters();
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	ptr += 2 * nr_cpu_ids * sizeof(void **);
 #endif
@@ -10095,6 +10152,7 @@ void __init sched_init(void)
 		rq->wake_stamp = jiffies;
 		rq->wake_avg_idle = rq->avg_idle;
 		rq->max_idle_balance_cost = sysctl_sched_migration_cost;
+		walt_sched_init_rq(rq);
 
 		INIT_LIST_HEAD(&rq->cfs_tasks);
 
@@ -10149,6 +10207,7 @@ void __init sched_init(void)
 	 * when this runqueue becomes "idle".
 	 */
 	init_idle(current, smp_processor_id());
+	init_new_task_load(current);
 
 	calc_load_update = jiffies + LOAD_FREQ;
 
@@ -12187,3 +12246,48 @@ void sched_mm_cid_fork(struct task_struct *t)
 	t->mm_cid_active = 1;
 }
 #endif
+
+#ifdef CONFIG_SCHED_WALT
+/*
+ * sched_exit() - Set EXITING_TASK_MARKER in task's ravg.demand field
+ *
+ * Stop accounting (exiting) task's future cpu usage
+ *
+ * We need this so that reset_all_windows_stats() can function correctly.
+ * reset_all_window_stats() depends on do_each_thread/for_each_thread task
+ * iterators to reset *all* task's statistics. Exiting tasks however become
+ * invisible to those iterators. sched_exit() is called on a exiting task prior
+ * to being removed from task_list, which will let reset_all_window_stats()
+ * function correctly.
+ */
+void sched_exit(struct task_struct *p)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+	u64 wallclock;
+
+#ifdef CONFIG_SCHED_RTG
+	sched_set_group_id(p, 0);
+#endif
+
+	rq = task_rq_lock(p, &rf);
+
+	/* rq->curr == p */
+	wallclock = sched_ktime_clock();
+	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+	dequeue_task(rq, p, 0);
+	/*
+	 * task's contribution is already removed from the
+	 * cumulative window demand in dequeue. As the
+	 * task's stats are reset, the next enqueue does
+	 * not change the cumulative window demand.
+	 */
+	reset_task_stats(p);
+	p->ravg.mark_start = wallclock;
+	p->ravg.sum_history[0] = EXITING_TASK_MARKER;
+
+	enqueue_task(rq, p, 0);
+	task_rq_unlock(rq, p, &rf);
+	free_task_load_ptrs(p);
+}
+#endif /* CONFIG_SCHED_WALT */
diff --git a/kernel/sched/core_ctl.c b/kernel/sched/core_ctl.c
new file mode 100755
index 0000000000000000000000000000000000000000..eef1d69211782dd890a4aab03788421adf64e80f
--- /dev/null
+++ b/kernel/sched/core_ctl.c
@@ -0,0 +1,1061 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2014-2021, The Linux Foundation. All rights reserved.
+ */
+
+#define pr_fmt(fmt)	"core_ctl: " fmt
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/sched/rt.h>
+#include <linux/syscore_ops.h>
+#include <uapi/linux/sched/types.h>
+#include <linux/sched/core_ctl.h>
+
+#include <trace/events/sched.h>
+#include "sched.h"
+#include "walt.h"
+
+#define MAX_CPUS_PER_CLUSTER 6
+#define MAX_CLUSTERS 3
+
+struct cluster_data {
+	bool inited;
+	unsigned int min_cpus;
+	unsigned int max_cpus;
+	unsigned int offline_delay_ms;
+	unsigned int busy_up_thres[MAX_CPUS_PER_CLUSTER];
+	unsigned int busy_down_thres[MAX_CPUS_PER_CLUSTER];
+	unsigned int active_cpus;
+	unsigned int num_cpus;
+	unsigned int nr_isolated_cpus;
+	unsigned int nr_not_preferred_cpus;
+	cpumask_t cpu_mask;
+	unsigned int need_cpus;
+	unsigned int task_thres;
+	unsigned int max_nr;
+	unsigned int nr_prev_assist;
+	unsigned int nr_prev_assist_thresh;
+	s64 need_ts;
+	struct list_head lru;
+	bool pending;
+	spinlock_t pending_lock;
+	bool enable;
+	int nrrun;
+	struct task_struct *core_ctl_thread;
+	unsigned int first_cpu;
+	unsigned int boost;
+	struct kobject kobj;
+};
+
+struct cpu_data {
+	bool is_busy;
+	unsigned int busy;
+	unsigned int cpu;
+	bool not_preferred;
+	struct cluster_data *cluster;
+	struct list_head sib;
+	bool isolated_by_us;
+};
+
+static DEFINE_PER_CPU(struct cpu_data, cpu_state);
+static struct cluster_data cluster_state[MAX_CLUSTERS];
+static unsigned int num_clusters;
+
+#define for_each_cluster(cluster, idx) \
+	for (; (idx) < num_clusters && ((cluster) = &cluster_state[idx]);\
+		(idx)++)
+
+static DEFINE_SPINLOCK(state_lock);
+static void apply_need(struct cluster_data *state);
+static void wake_up_core_ctl_thread(struct cluster_data *state);
+static bool initialized;
+
+ATOMIC_NOTIFIER_HEAD(core_ctl_notifier);
+static unsigned int last_nr_big;
+
+static unsigned int get_active_cpu_count(const struct cluster_data *cluster);
+
+/* ========================= sysfs interface =========================== */
+
+static ssize_t store_min_cpus(struct cluster_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	state->min_cpus = min(val, state->max_cpus);
+	wake_up_core_ctl_thread(state);
+
+	return count;
+}
+
+static ssize_t show_min_cpus(const struct cluster_data *state, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", state->min_cpus);
+}
+
+static ssize_t store_max_cpus(struct cluster_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	val = min(val, state->num_cpus);
+	state->max_cpus = val;
+	state->min_cpus = min(state->min_cpus, state->max_cpus);
+	wake_up_core_ctl_thread(state);
+
+	return count;
+}
+
+static ssize_t show_max_cpus(const struct cluster_data *state, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", state->max_cpus);
+}
+
+static ssize_t store_enable(struct cluster_data *state,
+				const char *buf, size_t count)
+{
+	unsigned int val;
+	bool bval;
+
+	if (sscanf(buf, "%u\n", &val) != 1)
+		return -EINVAL;
+
+	bval = !!val;
+	if (bval != state->enable) {
+		state->enable = bval;
+		apply_need(state);
+	}
+
+	return count;
+}
+
+static ssize_t show_enable(const struct cluster_data *state, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", state->enable);
+}
+
+static ssize_t show_need_cpus(const struct cluster_data *state, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", state->need_cpus);
+}
+
+static ssize_t show_active_cpus(const struct cluster_data *state, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", state->active_cpus);
+}
+
+static ssize_t show_global_state(const struct cluster_data *state, char *buf)
+{
+	struct cpu_data *c;
+	struct cluster_data *cluster;
+	ssize_t count = 0;
+	unsigned int cpu;
+
+	spin_lock_irq(&state_lock);
+	for_each_possible_cpu(cpu) {
+		c = &per_cpu(cpu_state, cpu);
+		cluster = c->cluster;
+		if (!cluster || !cluster->inited)
+			continue;
+
+		count += sysfs_emit_at(buf, count,
+					"CPU%u\n", cpu);
+		count += sysfs_emit_at(buf, count,
+					"\tCPU: %u\n", c->cpu);
+		count += sysfs_emit_at(buf, count,
+					"\tOnline: %u\n",
+					cpu_online(c->cpu));
+		count += sysfs_emit_at(buf, count,
+					"\tIsolated: %u\n",
+					cpu_isolated(c->cpu));
+		count += sysfs_emit_at(buf, count,
+					"\tFirst CPU: %u\n",
+						cluster->first_cpu);
+		count += sysfs_emit_at(buf, count,
+					"\tBusy%%: %u\n", c->busy);
+		count += sysfs_emit_at(buf, count,
+					"\tIs busy: %u\n", c->is_busy);
+		count += sysfs_emit_at(buf, count,
+					"\tNot preferred: %u\n",
+						c->not_preferred);
+		count += sysfs_emit_at(buf, count,
+					"\tNr running: %u\n", cluster->nrrun);
+		count += sysfs_emit_at(buf, count,
+			"\tActive CPUs: %u\n", get_active_cpu_count(cluster));
+		count += sysfs_emit_at(buf, count,
+				"\tNeed CPUs: %u\n", cluster->need_cpus);
+		count += sysfs_emit_at(buf, count,
+				"\tNr isolated CPUs: %u\n",
+						cluster->nr_isolated_cpus);
+		count += sysfs_emit_at(buf, count,
+				"\tBoost: %u\n", (unsigned int) cluster->boost);
+	}
+	spin_unlock_irq(&state_lock);
+
+	return count;
+}
+
+struct core_ctl_attr {
+	struct attribute attr;
+	ssize_t (*show)(const struct cluster_data *, char *);
+	ssize_t (*store)(struct cluster_data *, const char *, size_t count);
+};
+
+#define core_ctl_attr_ro(_name)		\
+static struct core_ctl_attr _name =	\
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define core_ctl_attr_rw(_name)			\
+static struct core_ctl_attr _name =		\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+core_ctl_attr_rw(min_cpus);
+core_ctl_attr_rw(max_cpus);
+core_ctl_attr_ro(need_cpus);
+core_ctl_attr_ro(active_cpus);
+core_ctl_attr_ro(global_state);
+core_ctl_attr_rw(enable);
+
+static struct attribute *default_attrs[] = {
+	&min_cpus.attr,
+	&max_cpus.attr,
+	&enable.attr,
+	&need_cpus.attr,
+	&active_cpus.attr,
+	&global_state.attr,
+	NULL
+};
+
+#define to_cluster_data(k) container_of(k, struct cluster_data, kobj)
+#define to_attr(a) container_of(a, struct core_ctl_attr, attr)
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct cluster_data *data = to_cluster_data(kobj);
+	struct core_ctl_attr *cattr = to_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (cattr->show)
+		ret = cattr->show(data, buf);
+
+	return ret;
+}
+
+static ssize_t store(struct kobject *kobj, struct attribute *attr,
+		     const char *buf, size_t count)
+{
+	struct cluster_data *data = to_cluster_data(kobj);
+	struct core_ctl_attr *cattr = to_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (cattr->store)
+		ret = cattr->store(data, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops sysfs_ops = {
+	.show	= show,
+	.store	= store,
+};
+
+static struct kobj_type ktype_core_ctl = {
+	.sysfs_ops	= &sysfs_ops,
+	.default_attrs	= default_attrs,
+};
+
+/* ==================== runqueue based core count =================== */
+
+static struct sched_avg_stats nr_stats[NR_CPUS];
+
+/*
+ * nr_need:
+ *   Number of tasks running on this cluster plus
+ *   tasks running on higher capacity clusters.
+ *   To find out CPUs needed from this cluster.
+ *
+ * For example:
+ *   On dual cluster system with 4 min capacity
+ *   CPUs and 4 max capacity CPUs, if there are
+ *   4 small tasks running on min capacity CPUs
+ *   and 2 big tasks running on 2 max capacity
+ *   CPUs, nr_need has to be 6 for min capacity
+ *   cluster and 2 for max capacity cluster.
+ *   This is because, min capacity cluster has to
+ *   account for tasks running on max capacity
+ *   cluster, so that, the min capacity cluster
+ *   can be ready to accommodate tasks running on max
+ *   capacity CPUs if the demand of tasks goes down.
+ */
+static int compute_cluster_nr_need(int index)
+{
+	int cpu;
+	struct cluster_data *cluster;
+	int nr_need = 0;
+
+	for_each_cluster(cluster, index) {
+		for_each_cpu(cpu, &cluster->cpu_mask)
+			nr_need += nr_stats[cpu].nr;
+	}
+
+	return nr_need;
+}
+
+/*
+ * prev_misfit_need:
+ *   Tasks running on smaller capacity cluster which
+ *   needs to be migrated to higher capacity cluster.
+ *   To find out how many tasks need higher capacity CPUs.
+ *
+ * For example:
+ *   On dual cluster system with 4 min capacity
+ *   CPUs and 4 max capacity CPUs, if there are
+ *   2 small tasks and 2 big tasks running on
+ *   min capacity CPUs and no tasks running on
+ *   max cpacity, prev_misfit_need of min capacity
+ *   cluster will be 0 and prev_misfit_need of
+ *   max capacity cluster will be 2.
+ */
+static int compute_prev_cluster_misfit_need(int index)
+{
+	int cpu;
+	struct cluster_data *prev_cluster;
+	int prev_misfit_need = 0;
+
+	/*
+	 * Lowest capacity cluster does not have to
+	 * accommodate any misfit tasks.
+	 */
+	if (index == 0)
+		return 0;
+
+	prev_cluster = &cluster_state[index - 1];
+
+	for_each_cpu(cpu, &prev_cluster->cpu_mask)
+		prev_misfit_need += nr_stats[cpu].nr_misfit;
+
+	return prev_misfit_need;
+}
+
+static int compute_cluster_max_nr(int index)
+{
+	int cpu;
+	struct cluster_data *cluster = &cluster_state[index];
+	int max_nr = 0;
+
+	for_each_cpu(cpu, &cluster->cpu_mask)
+		max_nr = max(max_nr, nr_stats[cpu].nr_max);
+
+	return max_nr;
+}
+
+static int cluster_real_big_tasks(int index)
+{
+	int nr_big = 0;
+	int cpu;
+	struct cluster_data *cluster = &cluster_state[index];
+
+	if (index == 0) {
+		for_each_cpu(cpu, &cluster->cpu_mask)
+			nr_big += nr_stats[cpu].nr_misfit;
+	} else {
+		for_each_cpu(cpu, &cluster->cpu_mask)
+			nr_big += nr_stats[cpu].nr;
+	}
+
+	return nr_big;
+}
+
+/*
+ * prev_nr_need_assist:
+ *   Tasks that are eligible to run on the previous
+ *   cluster but cannot run because of insufficient
+ *   CPUs there. prev_nr_need_assist is indicative
+ *   of number of CPUs in this cluster that should
+ *   assist its previous cluster to makeup for
+ *   insufficient CPUs there.
+ *
+ * For example:
+ *   On tri-cluster system with 4 min capacity
+ *   CPUs, 3 intermediate capacity CPUs and 1
+ *   max capacity CPU, if there are 4 small
+ *   tasks running on min capacity CPUs, 4 big
+ *   tasks running on intermediate capacity CPUs
+ *   and no tasks running on max capacity CPU,
+ *   prev_nr_need_assist for min & max capacity
+ *   clusters will be 0, but, for intermediate
+ *   capacity cluster prev_nr_need_assist will
+ *   be 1 as it has 3 CPUs, but, there are 4 big
+ *   tasks to be served.
+ */
+static int prev_cluster_nr_need_assist(int index)
+{
+	int need = 0;
+	int cpu;
+	struct cluster_data *prev_cluster;
+
+	if (index == 0)
+		return 0;
+
+	index--;
+	prev_cluster = &cluster_state[index];
+
+	/*
+	 * Next cluster should not assist, while there are isolated cpus
+	 * in this cluster.
+	 */
+	if (prev_cluster->nr_isolated_cpus)
+		return 0;
+
+	for_each_cpu(cpu, &prev_cluster->cpu_mask)
+		need += nr_stats[cpu].nr;
+
+	need += compute_prev_cluster_misfit_need(index);
+
+	if (need > prev_cluster->active_cpus)
+		need = need - prev_cluster->active_cpus;
+	else
+		need = 0;
+
+	return need;
+}
+
+static void update_running_avg(void)
+{
+	struct cluster_data *cluster;
+	unsigned int index = 0;
+	unsigned long flags;
+	int big_avg = 0;
+
+	sched_get_nr_running_avg(nr_stats);
+
+	spin_lock_irqsave(&state_lock, flags);
+	for_each_cluster(cluster, index) {
+		int nr_need, prev_misfit_need;
+
+		if (!cluster->inited)
+			continue;
+
+		nr_need = compute_cluster_nr_need(index);
+		prev_misfit_need = compute_prev_cluster_misfit_need(index);
+
+
+		cluster->nrrun = nr_need + prev_misfit_need;
+		cluster->max_nr = compute_cluster_max_nr(index);
+		cluster->nr_prev_assist = prev_cluster_nr_need_assist(index);
+		trace_core_ctl_update_nr_need(cluster->first_cpu, nr_need,
+					prev_misfit_need,
+					cluster->nrrun, cluster->max_nr,
+					cluster->nr_prev_assist);
+		big_avg += cluster_real_big_tasks(index);
+	}
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	last_nr_big = big_avg;
+}
+
+#define MAX_NR_THRESHOLD	4
+/* adjust needed CPUs based on current runqueue information */
+static unsigned int apply_task_need(const struct cluster_data *cluster,
+				    unsigned int new_need)
+{
+	/* unisolate all cores if there are enough tasks */
+	if (cluster->nrrun >= cluster->task_thres)
+		return cluster->num_cpus;
+
+	/*
+	 * unisolate as many cores as the previous cluster
+	 * needs assistance with.
+	 */
+	if (cluster->nr_prev_assist >= cluster->nr_prev_assist_thresh)
+		new_need = new_need + cluster->nr_prev_assist;
+
+	/* only unisolate more cores if there are tasks to run */
+	if (cluster->nrrun > new_need)
+		new_need = new_need + 1;
+
+	/*
+	 * We don't want tasks to be overcrowded in a cluster.
+	 * If any CPU has more than MAX_NR_THRESHOLD in the last
+	 * window, bring another CPU to help out.
+	 */
+	if (cluster->max_nr > MAX_NR_THRESHOLD)
+		new_need = new_need + 1;
+
+	return new_need;
+}
+
+/* ======================= load based core count  ====================== */
+
+static unsigned int apply_limits(const struct cluster_data *cluster,
+				 unsigned int need_cpus)
+{
+	return min(max(cluster->min_cpus, need_cpus), cluster->max_cpus);
+}
+
+static unsigned int get_active_cpu_count(const struct cluster_data *cluster)
+{
+	return cluster->num_cpus -
+				sched_isolate_count(&cluster->cpu_mask, true);
+}
+
+static bool is_active(const struct cpu_data *state)
+{
+	return cpu_online(state->cpu) && !cpu_isolated(state->cpu);
+}
+
+static bool adjustment_possible(const struct cluster_data *cluster,
+							unsigned int need)
+{
+	return (need < cluster->active_cpus || (need > cluster->active_cpus &&
+						cluster->nr_isolated_cpus));
+}
+
+static bool eval_need(struct cluster_data *cluster)
+{
+	unsigned long flags;
+	struct cpu_data *c;
+	unsigned int need_cpus = 0, last_need, thres_idx;
+	int ret = 0;
+	bool need_flag = false;
+	unsigned int new_need;
+	s64 now, elapsed;
+
+	if (unlikely(!cluster->inited))
+		return 0;
+
+	spin_lock_irqsave(&state_lock, flags);
+
+	if (cluster->boost || !cluster->enable) {
+		need_cpus = cluster->max_cpus;
+	} else {
+		cluster->active_cpus = get_active_cpu_count(cluster);
+		thres_idx = cluster->active_cpus ? cluster->active_cpus - 1 : 0;
+		list_for_each_entry(c, &cluster->lru, sib) {
+			bool old_is_busy = c->is_busy;
+			int high_irqload = sched_cpu_high_irqload(c->cpu);
+
+			if (c->busy >= cluster->busy_up_thres[thres_idx] ||
+			    high_irqload)
+				c->is_busy = true;
+			else if (c->busy < cluster->busy_down_thres[thres_idx])
+				c->is_busy = false;
+			trace_core_ctl_set_busy(c->cpu, c->busy, old_is_busy,
+						c->is_busy, high_irqload);
+			need_cpus += c->is_busy;
+		}
+		need_cpus = apply_task_need(cluster, need_cpus);
+	}
+	new_need = apply_limits(cluster, need_cpus);
+	need_flag = adjustment_possible(cluster, new_need);
+
+	last_need = cluster->need_cpus;
+	now = ktime_to_ms(ktime_get());
+
+	if (new_need > cluster->active_cpus) {
+		ret = 1;
+	} else {
+		/*
+		 * When there is no change in need and there are no more
+		 * active CPUs than currently needed, just update the
+		 * need time stamp and return.
+		 */
+		if (new_need == last_need && new_need == cluster->active_cpus) {
+			cluster->need_ts = now;
+			spin_unlock_irqrestore(&state_lock, flags);
+			return 0;
+		}
+
+		elapsed =  now - cluster->need_ts;
+		ret = elapsed >= cluster->offline_delay_ms;
+	}
+
+	if (ret) {
+		cluster->need_ts = now;
+		cluster->need_cpus = new_need;
+	}
+	trace_core_ctl_eval_need(cluster->first_cpu, last_need, new_need,
+				 ret && need_flag);
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	return ret && need_flag;
+}
+
+static void apply_need(struct cluster_data *cluster)
+{
+	if (eval_need(cluster))
+		wake_up_core_ctl_thread(cluster);
+}
+
+/* ========================= core count enforcement ==================== */
+
+static void wake_up_core_ctl_thread(struct cluster_data *cluster)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cluster->pending_lock, flags);
+	cluster->pending = true;
+	spin_unlock_irqrestore(&cluster->pending_lock, flags);
+
+	wake_up_process(cluster->core_ctl_thread);
+}
+
+static u64 core_ctl_check_timestamp;
+
+int core_ctl_set_boost(bool boost)
+{
+	unsigned int index = 0;
+	struct cluster_data *cluster = NULL;
+	unsigned long flags;
+	int ret = 0;
+	bool boost_state_changed = false;
+
+	if (unlikely(!initialized))
+		return 0;
+
+	spin_lock_irqsave(&state_lock, flags);
+	for_each_cluster(cluster, index) {
+		if (boost) {
+			boost_state_changed = !cluster->boost;
+			++cluster->boost;
+		} else {
+			if (!cluster->boost) {
+				ret = -EINVAL;
+				break;
+			} else {
+				--cluster->boost;
+				boost_state_changed = !cluster->boost;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	if (boost_state_changed) {
+		index = 0;
+		for_each_cluster(cluster, index)
+			apply_need(cluster);
+	}
+
+	if (cluster)
+		trace_core_ctl_set_boost(cluster->boost, ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(core_ctl_set_boost);
+
+void core_ctl_check(u64 window_start)
+{
+	int cpu;
+	struct cpu_data *c;
+	struct cluster_data *cluster;
+	unsigned int index = 0;
+	unsigned long flags;
+
+	if (unlikely(!initialized))
+		return;
+
+	if (window_start == core_ctl_check_timestamp)
+		return;
+
+	core_ctl_check_timestamp = window_start;
+
+	spin_lock_irqsave(&state_lock, flags);
+	for_each_possible_cpu(cpu) {
+
+		c = &per_cpu(cpu_state, cpu);
+		cluster = c->cluster;
+
+		if (!cluster || !cluster->inited)
+			continue;
+
+		c->busy = sched_get_cpu_util(cpu);
+	}
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	update_running_avg();
+
+	for_each_cluster(cluster, index) {
+		if (eval_need(cluster))
+			wake_up_core_ctl_thread(cluster);
+	}
+}
+
+static void move_cpu_lru(struct cpu_data *cpu_data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&state_lock, flags);
+	list_del(&cpu_data->sib);
+	list_add_tail(&cpu_data->sib, &cpu_data->cluster->lru);
+	spin_unlock_irqrestore(&state_lock, flags);
+}
+
+static void try_to_isolate(struct cluster_data *cluster, unsigned int need)
+{
+	struct cpu_data *c, *tmp;
+	unsigned long flags;
+	unsigned int num_cpus = cluster->num_cpus;
+	unsigned int nr_isolated = 0;
+	bool first_pass = cluster->nr_not_preferred_cpus;
+
+	/*
+	 * Protect against entry being removed (and added at tail) by other
+	 * thread (hotplug).
+	 */
+	spin_lock_irqsave(&state_lock, flags);
+	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
+		if (!num_cpus--)
+			break;
+
+		if (!is_active(c))
+			continue;
+		if (cluster->active_cpus == need)
+			break;
+		/* Don't isolate busy CPUs. */
+		if (c->is_busy)
+			continue;
+
+		/*
+		 * We isolate only the not_preferred CPUs. If none
+		 * of the CPUs are selected as not_preferred, then
+		 * all CPUs are eligible for isolation.
+		 */
+		if (cluster->nr_not_preferred_cpus && !c->not_preferred)
+			continue;
+
+		spin_unlock_irqrestore(&state_lock, flags);
+
+		pr_debug("Trying to isolate CPU%u\n", c->cpu);
+		if (!sched_isolate_cpu(c->cpu)) {
+			c->isolated_by_us = true;
+			move_cpu_lru(c);
+			nr_isolated++;
+		} else {
+			pr_debug("Unable to isolate CPU%u\n", c->cpu);
+		}
+		cluster->active_cpus = get_active_cpu_count(cluster);
+		spin_lock_irqsave(&state_lock, flags);
+	}
+	cluster->nr_isolated_cpus += nr_isolated;
+	spin_unlock_irqrestore(&state_lock, flags);
+
+again:
+	/*
+	 * If the number of active CPUs is within the limits, then
+	 * don't force isolation of any busy CPUs.
+	 */
+	if (cluster->active_cpus <= cluster->max_cpus)
+		return;
+
+	nr_isolated = 0;
+	num_cpus = cluster->num_cpus;
+	spin_lock_irqsave(&state_lock, flags);
+	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
+		if (!num_cpus--)
+			break;
+
+		if (!is_active(c))
+			continue;
+		if (cluster->active_cpus <= cluster->max_cpus)
+			break;
+
+		if (first_pass && !c->not_preferred)
+			continue;
+
+		spin_unlock_irqrestore(&state_lock, flags);
+
+		pr_debug("Trying to isolate CPU%u\n", c->cpu);
+		if (!sched_isolate_cpu(c->cpu)) {
+			c->isolated_by_us = true;
+			move_cpu_lru(c);
+			nr_isolated++;
+		} else {
+			pr_debug("Unable to isolate CPU%u\n", c->cpu);
+		}
+		cluster->active_cpus = get_active_cpu_count(cluster);
+		spin_lock_irqsave(&state_lock, flags);
+	}
+	cluster->nr_isolated_cpus += nr_isolated;
+	spin_unlock_irqrestore(&state_lock, flags);
+
+	if (first_pass && cluster->active_cpus > cluster->max_cpus) {
+		first_pass = false;
+		goto again;
+	}
+}
+
+static void __try_to_unisolate(struct cluster_data *cluster,
+			       unsigned int need, bool force)
+{
+	struct cpu_data *c, *tmp;
+	unsigned long flags;
+	unsigned int num_cpus = cluster->num_cpus;
+	unsigned int nr_unisolated = 0;
+
+	/*
+	 * Protect against entry being removed (and added at tail) by other
+	 * thread (hotplug).
+	 */
+	spin_lock_irqsave(&state_lock, flags);
+	list_for_each_entry_safe(c, tmp, &cluster->lru, sib) {
+		if (!num_cpus--)
+			break;
+
+		if (!c->isolated_by_us)
+			continue;
+		if ((cpu_online(c->cpu) && !cpu_isolated(c->cpu)) ||
+			(!force && c->not_preferred))
+			continue;
+		if (cluster->active_cpus == need)
+			break;
+
+		spin_unlock_irqrestore(&state_lock, flags);
+
+		pr_debug("Trying to unisolate CPU%u\n", c->cpu);
+		if (!sched_unisolate_cpu(c->cpu)) {
+			c->isolated_by_us = false;
+			move_cpu_lru(c);
+			nr_unisolated++;
+		} else {
+			pr_debug("Unable to unisolate CPU%u\n", c->cpu);
+		}
+		cluster->active_cpus = get_active_cpu_count(cluster);
+		spin_lock_irqsave(&state_lock, flags);
+	}
+	cluster->nr_isolated_cpus -= nr_unisolated;
+	spin_unlock_irqrestore(&state_lock, flags);
+}
+
+static void try_to_unisolate(struct cluster_data *cluster, unsigned int need)
+{
+	bool force_use_non_preferred = false;
+
+	__try_to_unisolate(cluster, need, force_use_non_preferred);
+
+	if (cluster->active_cpus == need)
+		return;
+
+	force_use_non_preferred = true;
+	__try_to_unisolate(cluster, need, force_use_non_preferred);
+}
+
+static void __ref do_core_ctl(struct cluster_data *cluster)
+{
+	unsigned int need;
+
+	need = apply_limits(cluster, cluster->need_cpus);
+
+	if (adjustment_possible(cluster, need)) {
+		pr_debug("Trying to adjust group %u from %u to %u\n",
+				cluster->first_cpu, cluster->active_cpus, need);
+
+		if (cluster->active_cpus > need)
+			try_to_isolate(cluster, need);
+		else if (cluster->active_cpus < need)
+			try_to_unisolate(cluster, need);
+	}
+}
+
+static int __ref try_core_ctl(void *data)
+{
+	struct cluster_data *cluster = data;
+	unsigned long flags;
+
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_lock_irqsave(&cluster->pending_lock, flags);
+		if (!cluster->pending) {
+			spin_unlock_irqrestore(&cluster->pending_lock, flags);
+			schedule();
+			if (kthread_should_stop())
+				break;
+			spin_lock_irqsave(&cluster->pending_lock, flags);
+		}
+		set_current_state(TASK_RUNNING);
+		cluster->pending = false;
+		spin_unlock_irqrestore(&cluster->pending_lock, flags);
+
+		do_core_ctl(cluster);
+	}
+
+	return 0;
+}
+
+static int isolation_cpuhp_state(unsigned int cpu,  bool online)
+{
+	struct cpu_data *state = &per_cpu(cpu_state, cpu);
+	struct cluster_data *cluster = state->cluster;
+	unsigned int need;
+	bool do_wakeup = false, unisolated = false;
+	unsigned long flags;
+
+	if (unlikely(!cluster || !cluster->inited))
+		return 0;
+
+	if (online) {
+		cluster->active_cpus = get_active_cpu_count(cluster);
+
+		/*
+		 * Moving to the end of the list should only happen in
+		 * CPU_ONLINE and not on CPU_UP_PREPARE to prevent an
+		 * infinite list traversal when thermal (or other entities)
+		 * reject trying to online CPUs.
+		 */
+		move_cpu_lru(state);
+	} else {
+		/*
+		 * We don't want to have a CPU both offline and isolated.
+		 * So unisolate a CPU that went down if it was isolated by us.
+		 */
+		if (state->isolated_by_us) {
+			sched_unisolate_cpu_unlocked(cpu);
+			state->isolated_by_us = false;
+			unisolated = true;
+		}
+
+		/* Move a CPU to the end of the LRU when it goes offline. */
+		move_cpu_lru(state);
+
+		state->busy = 0;
+		cluster->active_cpus = get_active_cpu_count(cluster);
+	}
+
+	need = apply_limits(cluster, cluster->need_cpus);
+	spin_lock_irqsave(&state_lock, flags);
+	if (unisolated)
+		cluster->nr_isolated_cpus--;
+	do_wakeup = adjustment_possible(cluster, need);
+	spin_unlock_irqrestore(&state_lock, flags);
+	if (do_wakeup)
+		wake_up_core_ctl_thread(cluster);
+
+	return 0;
+}
+
+static int core_ctl_isolation_online_cpu(unsigned int cpu)
+{
+	return isolation_cpuhp_state(cpu, true);
+}
+
+static int core_ctl_isolation_dead_cpu(unsigned int cpu)
+{
+	return isolation_cpuhp_state(cpu, false);
+}
+
+/* ============================ init code ============================== */
+
+static struct cluster_data *find_cluster_by_first_cpu(unsigned int first_cpu)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_clusters; ++i) {
+		if (cluster_state[i].first_cpu == first_cpu)
+			return &cluster_state[i];
+	}
+
+	return NULL;
+}
+
+static int cluster_init(const struct cpumask *mask)
+{
+	struct device *dev;
+	unsigned int first_cpu = cpumask_first(mask);
+	struct cluster_data *cluster;
+	struct cpu_data *state;
+	unsigned int cpu;
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+
+	if (find_cluster_by_first_cpu(first_cpu))
+		return 0;
+
+	dev = get_cpu_device(first_cpu);
+	if (!dev)
+		return -ENODEV;
+
+	pr_info("Creating CPU group %d\n", first_cpu);
+
+	if (num_clusters == MAX_CLUSTERS) {
+		pr_err("Unsupported number of clusters. Only %u supported\n",
+								MAX_CLUSTERS);
+		return -EINVAL;
+	}
+	cluster = &cluster_state[num_clusters];
+	++num_clusters;
+
+	cpumask_copy(&cluster->cpu_mask, mask);
+	cluster->num_cpus = cpumask_weight(mask);
+	if (cluster->num_cpus > MAX_CPUS_PER_CLUSTER) {
+		pr_err("HW configuration not supported\n");
+		return -EINVAL;
+	}
+	cluster->first_cpu = first_cpu;
+	cluster->min_cpus = 1;
+	cluster->max_cpus = cluster->num_cpus;
+	cluster->need_cpus = cluster->num_cpus;
+	cluster->offline_delay_ms = 100;
+	cluster->task_thres = UINT_MAX;
+	cluster->nr_prev_assist_thresh = UINT_MAX;
+	cluster->nrrun = cluster->num_cpus;
+	cluster->enable = true;
+	cluster->nr_not_preferred_cpus = 0;
+	INIT_LIST_HEAD(&cluster->lru);
+	spin_lock_init(&cluster->pending_lock);
+
+	for_each_cpu(cpu, mask) {
+		pr_info("Init CPU%u state\n", cpu);
+
+		state = &per_cpu(cpu_state, cpu);
+		state->cluster = cluster;
+		state->cpu = cpu;
+		list_add_tail(&state->sib, &cluster->lru);
+	}
+	cluster->active_cpus = get_active_cpu_count(cluster);
+
+	cluster->core_ctl_thread = kthread_run(try_core_ctl, (void *) cluster,
+					"core_ctl/%d", first_cpu);
+	if (IS_ERR(cluster->core_ctl_thread))
+		return PTR_ERR(cluster->core_ctl_thread);
+
+	sched_setscheduler_nocheck(cluster->core_ctl_thread, SCHED_FIFO,
+				   &param);
+
+	cluster->inited = true;
+
+	kobject_init(&cluster->kobj, &ktype_core_ctl);
+	return kobject_add(&cluster->kobj, &dev->kobj, "core_ctl");
+}
+
+static int __init core_ctl_init(void)
+{
+	struct sched_cluster *cluster;
+	int ret;
+
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+			"core_ctl/isolation:online",
+			core_ctl_isolation_online_cpu, NULL);
+
+	cpuhp_setup_state_nocalls(CPUHP_CORE_CTL_ISOLATION_DEAD,
+			"core_ctl/isolation:dead",
+			NULL, core_ctl_isolation_dead_cpu);
+
+	for_each_sched_cluster(cluster) {
+		ret = cluster_init(&cluster->cpus);
+		if (ret)
+			pr_warn("unable to create core ctl group: %d\n", ret);
+	}
+
+	initialized = true;
+	return 0;
+}
+
+late_initcall(core_ctl_init);
diff --git a/kernel/sched/core_ctl.h b/kernel/sched/core_ctl.h
new file mode 100755
index 0000000000000000000000000000000000000000..0be55ac6a526ba8c9ad246e1b674e4f26c17728a
--- /dev/null
+++ b/kernel/sched/core_ctl.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2016, 2019-2020, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __CORE_CTL_H
+#define __CORE_CTL_H
+
+#ifdef CONFIG_SCHED_CORE_CTRL
+void core_ctl_check(u64 wallclock);
+int core_ctl_set_boost(bool boost);
+#else
+static inline void core_ctl_check(u64 wallclock) {}
+static inline int core_ctl_set_boost(bool boost)
+{
+	return 0;
+}
+#endif
+#endif
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 458d359f5991ca7977fb655da4cbb1f71b53bebc..035ed07966e0c09c1d694599181b8becdb1a1602 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -159,8 +159,12 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
 
 	sg_cpu->bw_dl = cpu_bw_dl(rq);
+#ifdef CONFIG_SCHED_WALT
+	cpu_util_freq_walt(sg_cpu->cpu);
+#else
 	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, util,
 					  FREQUENCY_UTIL, NULL);
+#endif
 }
 
 /**
@@ -448,7 +452,12 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 
 	ignore_dl_rate_limit(sg_cpu);
 
+#ifdef CONFIG_SCHED_WALT
+	if ((sugov_should_update_freq(sg_policy, time))
+			&& !(flags & SCHED_CPUFREQ_CONTINUE)) {
+#else
 	if (sugov_should_update_freq(sg_policy, time)) {
+#endif
 		next_f = sugov_next_freq_shared(sg_cpu, time);
 
 		if (!sugov_update_next_freq(sg_policy, time, next_f))
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index af7952f12e6cf19ac9b953fbbf54df823372f497..46b5a71f49b271a92d92674d33e769b1a061e2c4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -6,6 +6,7 @@
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  #include <asm/cputime.h>
 #endif
+#include "walt.h"
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
@@ -56,11 +57,18 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
 	unsigned int pc;
 	s64 delta;
 	int cpu;
+#ifdef CONFIG_SCHED_WALT
+	u64 wallclock;
+	bool account = true;
+#endif
 
 	if (!sched_clock_irqtime)
 		return;
 
 	cpu = smp_processor_id();
+#ifdef CONFIG_SCHED_WALT
+	wallclock = sched_clock_cpu(cpu);
+#endif
 	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
 	irqtime->irq_start_time += delta;
 	pc = irq_count() - offset;
@@ -75,6 +83,13 @@ void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
 		irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
 	else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd())
 		irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
+#ifdef CONFIG_SCHED_WALT
+	else
+		account = false;
+
+	if (account)
+		sched_account_irqtime(cpu, curr, delta, wallclock);
+#endif
 }
 
 static u64 irqtime_tick_accounted(u64 maxtime)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d78f2e8769fb4ca428ddc6b407fcdef92dd13eb8..f1a58834527c44af0b81d89d2d792f2b7b5014ca 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/cpuset.h>
+#include "walt.h"
 
 /*
  * Default limits for DL period; on the top end we guard against small util
@@ -1502,6 +1503,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	WARN_ON(!dl_prio(prio));
 	dl_rq->dl_nr_running++;
 	add_nr_running(rq_of_dl_rq(dl_rq), 1);
+	walt_inc_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
 
 	inc_dl_deadline(dl_rq, deadline);
 	inc_dl_migration(dl_se, dl_rq);
@@ -1516,6 +1518,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
 	WARN_ON(!dl_rq->dl_nr_running);
 	dl_rq->dl_nr_running--;
 	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
+	walt_dec_cumulative_runnable_avg(rq_of_dl_rq(dl_rq), dl_task_of(dl_se));
 
 	dec_dl_deadline(dl_rq, dl_se->deadline);
 	dec_dl_migration(dl_se, dl_rq);
@@ -2752,6 +2755,9 @@ DEFINE_SCHED_CLASS(dl) = {
 #ifdef CONFIG_SCHED_CORE
 	.task_is_throttled	= task_is_throttled_dl,
 #endif
+#ifdef CONFIG_SCHED_WALT
+	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
+#endif
 };
 
 /* Used for dl_bw check and update, used under sched_rt_handler()::mutex */
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 83932b92dbb392d4e211b6af24292e84f3e1aa89..4a60da9af1ac59648a4e58ac6cd30c22c0df3fca 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -793,6 +793,17 @@ do {									\
 	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
 	PN(clock);
 	PN(clock_task);
+#ifdef CONFIG_SCHED_WALT
+	P(cluster->load_scale_factor);
+	P(cluster->capacity);
+	P(cluster->max_possible_capacity);
+	P(cluster->efficiency);
+	P(cluster->cur_freq);
+	P(cluster->max_freq);
+	P(cluster->exec_scale_factor);
+	SEQ_printf(m, "  .%-30s: %llu\n", "walt_stats.cumulative_runnable_avg",
+			rq->walt_stats.cumulative_runnable_avg_scaled);
+#endif
 #undef P
 #undef PN
 
@@ -867,6 +878,12 @@ static void sched_debug_header(struct seq_file *m)
 	PN(sysctl_sched_base_slice);
 	P(sysctl_sched_child_runs_first);
 	P(sysctl_sched_features);
+#ifdef CONFIG_SCHED_WALT
+	P(sched_init_task_load_windows);
+	P(min_capacity);
+	P(max_capacity);
+	P(sched_ravg_window);
+#endif
 #undef PN
 #undef P
 
@@ -1040,6 +1057,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 		P_SCHEDSTAT(nr_wakeups_affine_attempts);
 		P_SCHEDSTAT(nr_wakeups_passive);
 		P_SCHEDSTAT(nr_wakeups_idle);
+#ifdef CONFIG_SCHED_WALT
+		P(ravg.demand);
+#endif
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 566a4708cfd2751d3740e4710040b4fb19adc178..f722e118ea2bfc39878cd0aab0ece77b00d21f94 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -56,6 +56,34 @@
 #include "sched.h"
 #include "stats.h"
 #include "autogroup.h"
+#include "walt.h"
+
+#ifdef CONFIG_SCHED_WALT
+static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
+					u16 updated_demand_scaled);
+#endif
+
+#if defined(CONFIG_SCHED_WALT) && defined(CONFIG_CFS_BANDWIDTH)
+static void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq);
+static void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq,
+				  struct task_struct *p);
+static void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq,
+				  struct task_struct *p);
+static void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
+					    struct cfs_rq *cfs_rq);
+static void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
+					    struct cfs_rq *cfs_rq);
+#else
+static inline void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq) {}
+static inline void
+walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
+static inline void
+walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p) {}
+
+#define walt_inc_throttled_cfs_rq_stats(...)
+#define walt_dec_throttled_cfs_rq_stats(...)
+
+#endif
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
@@ -4753,6 +4781,10 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
 
 static inline unsigned long task_util(struct task_struct *p)
 {
+#ifdef CONFIG_SCHED_WALT
+	if (likely(!walt_disabled && sysctl_sched_use_walt_task_util))
+		return p->ravg.demand_scaled;
+#endif
 	return READ_ONCE(p->se.avg.util_avg);
 }
 
@@ -4765,6 +4797,10 @@ static inline unsigned long _task_util_est(struct task_struct *p)
 
 static inline unsigned long task_util_est(struct task_struct *p)
 {
+#ifdef CONFIG_SCHED_WALT
+	if (likely(!walt_disabled && sysctl_sched_use_walt_task_util))
+		return p->ravg.demand_scaled;
+#endif
 	return max(task_util(p), _task_util_est(p));
 }
 
@@ -5758,10 +5794,12 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 
 		qcfs_rq->h_nr_running -= task_delta;
 		qcfs_rq->idle_h_nr_running -= idle_task_delta;
+		walt_dec_throttled_cfs_rq_stats(&qcfs_rq->walt_stats, cfs_rq);
 	}
 
 	/* At this point se is NULL and we are at root level*/
 	sub_nr_running(rq, task_delta);
+		walt_dec_throttled_cfs_rq_stats(&rq->walt_stats, cfs_rq);
 
 done:
 	/*
@@ -5781,6 +5819,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
 	long task_delta, idle_task_delta;
+	struct cfs_rq *tcfs_rq __maybe_unused = cfs_rq;
 
 	se = cfs_rq->tg->se[cpu_of(rq)];
 
@@ -5827,6 +5866,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 		qcfs_rq->h_nr_running += task_delta;
 		qcfs_rq->idle_h_nr_running += idle_task_delta;
+		walt_inc_throttled_cfs_rq_stats(&cfs_rq->walt_stats, tcfs_rq);
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(qcfs_rq))
@@ -5844,6 +5884,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 		qcfs_rq->h_nr_running += task_delta;
 		qcfs_rq->idle_h_nr_running += idle_task_delta;
+		walt_inc_throttled_cfs_rq_stats(&cfs_rq->walt_stats, tcfs_rq);
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(qcfs_rq))
@@ -5852,6 +5893,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 
 	/* At this point se is NULL and we are at root level*/
 	add_nr_running(rq, task_delta);
+	walt_inc_throttled_cfs_rq_stats(&rq->walt_stats, tcfs_rq);
 
 unthrottle_throttle:
 	assert_list_leaf_cfs_rq(rq);
@@ -6336,6 +6378,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 #ifdef CONFIG_SMP
 	INIT_LIST_HEAD(&cfs_rq->throttled_csd_list);
 #endif
+	walt_init_cfs_rq_stats(cfs_rq);
 }
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@ -6698,7 +6741,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		cfs_rq->h_nr_running++;
 		cfs_rq->idle_h_nr_running += idle_h_nr_running;
-
+		walt_inc_cfs_rq_stats(cfs_rq, p);
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
@@ -6718,7 +6761,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		cfs_rq->h_nr_running++;
 		cfs_rq->idle_h_nr_running += idle_h_nr_running;
-
+		walt_inc_cfs_rq_stats(cfs_rq, p);
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
@@ -6729,7 +6772,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 	/* At this point se is NULL and we are at root level*/
 	add_nr_running(rq, 1);
-
+	inc_rq_walt_stats(rq, p);
 	/*
 	 * Since new tasks are assigned an initial util_avg equal to
 	 * half of the spare capacity of their CPU, tiny tasks have the
@@ -6781,7 +6824,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		cfs_rq->h_nr_running--;
 		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
-
+		walt_dec_cfs_rq_stats(cfs_rq, p);
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
@@ -6813,7 +6856,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		cfs_rq->h_nr_running--;
 		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
-
+		walt_dec_cfs_rq_stats(cfs_rq, p);
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
@@ -6825,6 +6868,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 	/* At this point se is NULL and we are at root level*/
 	sub_nr_running(rq, 1);
+	dec_rq_walt_stats(rq, p);
 
 	/* balance early to pull high priority tasks */
 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
@@ -7641,6 +7685,16 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
 	unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
 	unsigned long runnable;
 
+#ifdef CONFIG_SCHED_WALT
+	if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util)) {
+		u64 walt_cpu_util =
+			cpu_rq(cpu)->walt_stats.cumulative_runnable_avg_scaled;
+
+		return min_t(unsigned long, walt_cpu_util,
+				capacity_orig_of(cpu));
+	}
+#endif
+
 	if (boost) {
 		runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
 		util = max(util, runnable);
@@ -7724,11 +7778,29 @@ unsigned long cpu_util_cfs_boost(int cpu)
  */
 static unsigned long cpu_util_without(int cpu, struct task_struct *p)
 {
+	unsigned int util;
+#ifdef CONFIG_SCHED_WALT
+	/*
+	 * WALT does not decay idle tasks in the same manner
+	 * as PELT, so it makes little sense to subtract task
+	 * utilization from cpu utilization. Instead just use
+	 * cpu_util for this case.
+	 */
+	if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util) &&
+						p->__state == TASK_WAKING)
+		return cpu_util_cfs(cpu);
+#endif
 	/* Task has no contribution or is new */
 	if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
 		p = NULL;
 
 	return cpu_util(cpu, p, -1, 0);
+#ifdef CONFIG_SCHED_WALT
+	if (likely(!walt_disabled && sysctl_sched_use_walt_cpu_util)) {
+		util = max_t(long, cpu_util_cfs(cpu) - task_util(p), 0);
+		return min_t(unsigned long, util, capacity_orig_of(cpu));
+	}
+#endif
 }
 
 /*
@@ -7804,6 +7876,18 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
 	eenv->pd_busy_time = min(eenv->pd_cap, busy_time);
 }
 
+/*
+ * Returns the current capacity of cpu after applying both
+ * cpu and freq scaling.
+ */
+unsigned long capacity_curr_of(int cpu)
+{
+	unsigned long max_cap = cpu_rq(cpu)->cpu_capacity_orig;
+	unsigned long scale_freq = arch_scale_freq_capacity(cpu);
+
+	return cap_scale(max_cap, scale_freq);
+}
+
 /*
  * Compute the maximum utilization for compute_energy() when the task @p
  * is placed on the cpu @dst_cpu.
@@ -9044,7 +9128,15 @@ static void detach_task(struct task_struct *p, struct lb_env *env)
 	lockdep_assert_rq_held(env->src_rq);
 
 	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
+#ifdef CONFIG_SCHED_WALT
+	double_lock_balance(env->src_rq, env->dst_rq);
+	if (!(env->src_rq->clock_update_flags & RQCF_UPDATED))
+		update_rq_clock(env->src_rq);
+#endif
 	set_task_cpu(p, env->dst_cpu);
+#ifdef CONFIG_SCHED_WALT
+	double_unlock_balance(env->src_rq, env->dst_rq);
+#endif
 }
 
 /*
@@ -13212,6 +13304,10 @@ DEFINE_SCHED_CLASS(fair) = {
 #ifdef CONFIG_UCLAMP_TASK
 	.uclamp_enabled		= 1,
 #endif
+#ifdef CONFIG_SCHED_WALT
+	.fixup_walt_sched_stats	= walt_fixup_sched_stats_fair,
+#endif
+
 };
 
 #ifdef CONFIG_SCHED_DEBUG
@@ -13277,3 +13373,91 @@ __init void init_sched_fair_class(void)
 #endif /* SMP */
 
 }
+
+/* WALT sched implementation begins here */
+#ifdef CONFIG_SCHED_WALT
+
+#ifdef CONFIG_CFS_BANDWIDTH
+
+static void walt_init_cfs_rq_stats(struct cfs_rq *cfs_rq)
+{
+	cfs_rq->walt_stats.cumulative_runnable_avg_scaled = 0;
+}
+
+static void walt_inc_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p)
+{
+	fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
+				      p->ravg.demand_scaled);
+}
+
+static void walt_dec_cfs_rq_stats(struct cfs_rq *cfs_rq, struct task_struct *p)
+{
+	fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
+				      -(s64)p->ravg.demand_scaled);
+}
+
+static void walt_inc_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
+					    struct cfs_rq *tcfs_rq)
+{
+	struct rq *rq = rq_of(tcfs_rq);
+
+	fixup_cumulative_runnable_avg(stats,
+			tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
+
+	if (stats == &rq->walt_stats)
+		walt_fixup_cum_window_demand(rq,
+			tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
+
+}
+
+static void walt_dec_throttled_cfs_rq_stats(struct walt_sched_stats *stats,
+					    struct cfs_rq *tcfs_rq)
+{
+	struct rq *rq = rq_of(tcfs_rq);
+
+	fixup_cumulative_runnable_avg(stats,
+			-tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
+
+	/*
+	 * We remove the throttled cfs_rq's tasks's contribution from the
+	 * cumulative window demand so that the same can be added
+	 * unconditionally when the cfs_rq is unthrottled.
+	 */
+	if (stats == &rq->walt_stats)
+		walt_fixup_cum_window_demand(rq,
+			-tcfs_rq->walt_stats.cumulative_runnable_avg_scaled);
+}
+
+static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
+					u16 updated_demand_scaled)
+{
+	struct cfs_rq *cfs_rq;
+	struct sched_entity *se = &p->se;
+	s64 task_load_delta = (s64)updated_demand_scaled -
+			      p->ravg.demand_scaled;
+
+	for_each_sched_entity(se) {
+		cfs_rq = cfs_rq_of(se);
+
+		fixup_cumulative_runnable_avg(&cfs_rq->walt_stats,
+					      task_load_delta);
+		if (cfs_rq_throttled(cfs_rq))
+			break;
+	}
+
+	/* Fix up rq->walt_stats only if we didn't find any throttled cfs_rq */
+	if (!se) {
+		fixup_cumulative_runnable_avg(&rq->walt_stats,
+					      task_load_delta);
+		walt_fixup_cum_window_demand(rq, task_load_delta);
+	}
+}
+
+#else /* CONFIG_CFS_BANDWIDTH */
+static void walt_fixup_sched_stats_fair(struct rq *rq, struct task_struct *p,
+					u16 updated_demand_scaled)
+{
+	fixup_walt_sched_stats_common(rq, p, updated_demand_scaled);
+}
+#endif /* CONFIG_CFS_BANDWIDTH */
+#endif /* CONFIG_SCHED_WALT */
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4ac36eb4cdee582410b267ffd9617f51d22ec5fc..758bb5d522d11aa721cba9d7149df2ff3bfab039 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -3,7 +3,9 @@
  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  * policies)
  */
+#include "sched.h"
 
+#include "walt.h"
 int sched_rr_timeslice = RR_TIMESLICE;
 /* More than 4 hours if BW_SHIFT equals 20. */
 static const u64 max_rt_runtime = MAX_BW;
@@ -1547,6 +1549,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	update_stats_wait_start_rt(rt_rq_of_se(rt_se), rt_se);
 
 	enqueue_rt_entity(rt_se, flags);
+	walt_inc_cumulative_runnable_avg(rq, p);
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
@@ -1558,6 +1561,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	update_curr_rt(rq);
 	dequeue_rt_entity(rt_se, flags);
+	walt_dec_cumulative_runnable_avg(rq, p);
 
 	dequeue_pushable_task(rq, p);
 }
@@ -2744,6 +2748,9 @@ DEFINE_SCHED_CLASS(rt) = {
 #ifdef CONFIG_UCLAMP_TASK
 	.uclamp_enabled		= 1,
 #endif
+#ifdef CONFIG_SCHED_WALT
+	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
+#endif
 };
 
 #ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/rtg/Kconfig b/kernel/sched/rtg/Kconfig
new file mode 100755
index 0000000000000000000000000000000000000000..1cb0c4298b097e03c6860e37eea3bde664fea260
--- /dev/null
+++ b/kernel/sched/rtg/Kconfig
@@ -0,0 +1,40 @@
+menu "Related Thread Group"
+
+config SCHED_RTG
+	bool "Related Thread Group"
+	depends on SCHED_WALT
+	default n
+	help
+	  Set related threads into a group.
+
+config SCHED_RTG_DEBUG
+	bool "Related Thread Group DebugFS"
+	depends on SCHED_RTG
+	default n
+	help
+	  If set, debug node will show rtg threads
+
+config SCHED_RTG_CGROUP
+	bool "enable DEFAULT_CGROUP_COLOC RTG"
+	depends on SCHED_RTG
+	default n
+	help
+	  If set, support for adding the tasks which belong to
+	  co-located cgroup to DEFAULT_CGROUP_COLOC RTG.
+
+config SCHED_RTG_FRAME
+	bool "Frame-based Related Thread Group"
+	depends on SCHED_RTG
+	default n
+	help
+	 Support frame-based related thread group scheduling.
+	 If set, you can set the task to RTG and kernel will
+	 statistic the load per frame.
+
+config SCHED_RTG_RT_THREAD_LIMIT
+	bool "Limit the number of RT threads in groups"
+	depends on SCHED_RTG_FRAME
+	default n
+	help
+	  If set, limit the number of RT threads in frame RTG.
+endmenu
diff --git a/kernel/sched/rtg/Makefile b/kernel/sched/rtg/Makefile
new file mode 100755
index 0000000000000000000000000000000000000000..4d55523d1f32b8acb0404b943de3cb407d7b3832
--- /dev/null
+++ b/kernel/sched/rtg/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_SCHED_RTG) += rtg.o
+obj-$(CONFIG_SCHED_RTG_FRAME) += frame_rtg.o rtg_ctrl.o
diff --git a/kernel/sched/rtg/frame_rtg.c b/kernel/sched/rtg/frame_rtg.c
new file mode 100755
index 0000000000000000000000000000000000000000..79db645228c42352e8e49847f09d79b67fdd6bfd
--- /dev/null
+++ b/kernel/sched/rtg/frame_rtg.c
@@ -0,0 +1,1229 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Frame-based load tracking for rt_frame and RTG
+ *
+ * Copyright (c) 2022-2023 Huawei Technologies Co., Ltd.
+ */
+
+#include "frame_rtg.h"
+#include "rtg.h"
+
+#include <linux/sched.h>
+#include <trace/events/rtg.h>
+#include <../kernel/sched/sched.h>
+#include <uapi/linux/sched/types.h>
+
+static struct multi_frame_id_manager g_id_manager = {
+	.id_map = {0},
+	.offset = 0,
+	.lock = __RW_LOCK_UNLOCKED(g_id_manager.lock)
+};
+
+static struct frame_info g_multi_frame_info[MULTI_FRAME_NUM];
+
+static bool is_rtg_rt_task(struct task_struct *task)
+{
+	bool ret = false;
+
+	if (!task)
+		return ret;
+
+	ret = ((task->prio < MAX_RT_PRIO) &&
+	       (task->rtg_depth == STATIC_RTG_DEPTH));
+
+	return ret;
+}
+
+#ifdef CONFIG_SCHED_RTG_RT_THREAD_LIMIT
+static atomic_t g_rtg_rt_thread_num = ATOMIC_INIT(0);
+
+static unsigned int _get_rtg_rt_thread_num(struct related_thread_group *grp)
+{
+	unsigned int rtg_rt_thread_num = 0;
+	struct task_struct *p = NULL;
+
+	if (list_empty(&grp->tasks))
+		goto out;
+
+	list_for_each_entry(p, &grp->tasks, grp_list) {
+		if (is_rtg_rt_task(p))
+			++rtg_rt_thread_num;
+	}
+
+out:
+	return rtg_rt_thread_num;
+}
+
+static unsigned int get_rtg_rt_thread_num(void)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned int total_rtg_rt_thread_num = 0;
+	unsigned long flag;
+	unsigned int i;
+
+	for (i = MULTI_FRAME_ID; i < MULTI_FRAME_ID + MULTI_FRAME_NUM; i++) {
+		grp = lookup_related_thread_group(i);
+		if (grp == NULL)
+			continue;
+		raw_spin_lock_irqsave(&grp->lock, flag);
+		total_rtg_rt_thread_num += _get_rtg_rt_thread_num(grp);
+		raw_spin_unlock_irqrestore(&grp->lock, flag);
+	}
+
+	return total_rtg_rt_thread_num;
+}
+
+static void inc_rtg_rt_thread_num(void)
+{
+	atomic_inc(&g_rtg_rt_thread_num);
+}
+
+static void dec_rtg_rt_thread_num(void)
+{
+	atomic_dec_if_positive(&g_rtg_rt_thread_num);
+}
+
+static int test_and_read_rtg_rt_thread_num(void)
+{
+	if (atomic_read(&g_rtg_rt_thread_num) >= RTG_MAX_RT_THREAD_NUM)
+		atomic_set(&g_rtg_rt_thread_num, get_rtg_rt_thread_num());
+
+	return atomic_read(&g_rtg_rt_thread_num);
+}
+
+int read_rtg_rt_thread_num(void)
+{
+	return atomic_read(&g_rtg_rt_thread_num);
+}
+#else
+static inline void inc_rtg_rt_thread_num(void) { }
+static inline void dec_rtg_rt_thread_num(void) { }
+static inline int test_and_read_rtg_rt_thread_num(void)
+{
+	return 0;
+}
+#endif
+
+bool is_frame_rtg(int id)
+{
+	return (id >= MULTI_FRAME_ID) &&
+		(id < (MULTI_FRAME_ID + MULTI_FRAME_NUM));
+}
+
+static struct related_thread_group *frame_rtg(int id)
+{
+	if (!is_frame_rtg(id))
+		return NULL;
+
+	return lookup_related_thread_group(id);
+}
+
+struct frame_info *rtg_frame_info(int id)
+{
+	if (!is_frame_rtg(id))
+		return NULL;
+
+	return rtg_active_multi_frame_info(id);
+}
+
+static int alloc_rtg_id(void)
+{
+	unsigned int id_offset;
+	int id;
+
+	write_lock(&g_id_manager.lock);
+	id_offset = find_next_zero_bit(g_id_manager.id_map, MULTI_FRAME_NUM,
+				       g_id_manager.offset);
+	if (id_offset >= MULTI_FRAME_NUM) {
+		id_offset = find_first_zero_bit(g_id_manager.id_map,
+						MULTI_FRAME_NUM);
+		if (id_offset >= MULTI_FRAME_NUM) {
+			write_unlock(&g_id_manager.lock);
+			return -EINVAL;
+		}
+	}
+
+	set_bit(id_offset, g_id_manager.id_map);
+	g_id_manager.offset = id_offset;
+	id = id_offset + MULTI_FRAME_ID;
+	write_unlock(&g_id_manager.lock);
+	pr_debug("[FRAME_RTG] %s id_offset=%u, id=%d\n", __func__, id_offset, id);
+
+	return id;
+}
+
+static void free_rtg_id(int id)
+{
+	unsigned int id_offset = id - MULTI_FRAME_ID;
+
+	if (id_offset >= MULTI_FRAME_NUM) {
+		pr_err("[FRAME_RTG] %s id_offset is invalid, id=%d, id_offset=%u.\n",
+		       __func__, id, id_offset);
+		return;
+	}
+
+	pr_debug("[FRAME_RTG] %s id=%d id_offset=%u\n", __func__, id, id_offset);
+	write_lock(&g_id_manager.lock);
+	clear_bit(id_offset, g_id_manager.id_map);
+	write_unlock(&g_id_manager.lock);
+}
+
+int set_frame_rate(struct frame_info *frame_info, int rate)
+{
+	int id;
+
+	if ((rate < MIN_FRAME_RATE) || (rate > MAX_FRAME_RATE)) {
+		pr_err("[FRAME_RTG]: %s invalid QOS(rate) value\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!frame_info || !frame_info->rtg)
+		return -EINVAL;
+
+	frame_info->frame_rate = (unsigned int)rate;
+	frame_info->frame_time = div_u64(NSEC_PER_SEC, rate);
+	frame_info->max_vload_time =
+		div_u64(frame_info->frame_time, NSEC_PER_MSEC) +
+		frame_info->vload_margin;
+	id = frame_info->rtg->id;
+	trace_rtg_frame_sched(id, "FRAME_QOS", rate);
+	trace_rtg_frame_sched(id, "FRAME_MAX_TIME", frame_info->max_vload_time);
+
+	return 0;
+}
+
+int alloc_multi_frame_info(void)
+{
+	struct frame_info *frame_info = NULL;
+	int id;
+	int i;
+
+	id = alloc_rtg_id();
+	if (id < 0)
+		return id;
+
+	frame_info = rtg_frame_info(id);
+	if (!frame_info) {
+		free_rtg_id(id);
+		return -EINVAL;
+	}
+
+	set_frame_rate(frame_info, DEFAULT_FRAME_RATE);
+	atomic_set(&frame_info->curr_rt_thread_num, 0);
+	atomic_set(&frame_info->max_rt_thread_num, DEFAULT_MAX_RT_THREAD);
+	for (i = 0; i < MAX_TID_NUM; i++)
+		atomic_set(&frame_info->thread_prio[i], 0);
+
+	return id;
+}
+
+void release_multi_frame_info(int id)
+{
+	if ((id < MULTI_FRAME_ID) || (id >= MULTI_FRAME_ID + MULTI_FRAME_NUM)) {
+		pr_err("[FRAME_RTG] %s frame(id=%d) not found.\n", __func__, id);
+		return;
+	}
+
+	read_lock(&g_id_manager.lock);
+	if (!test_bit(id - MULTI_FRAME_ID, g_id_manager.id_map)) {
+		read_unlock(&g_id_manager.lock);
+		return;
+	}
+	read_unlock(&g_id_manager.lock);
+
+	pr_debug("[FRAME_RTG] %s release frame(id=%d).\n", __func__, id);
+	free_rtg_id(id);
+}
+
+void clear_multi_frame_info(void)
+{
+	write_lock(&g_id_manager.lock);
+	bitmap_zero(g_id_manager.id_map, MULTI_FRAME_NUM);
+	g_id_manager.offset = 0;
+	write_unlock(&g_id_manager.lock);
+}
+
+struct frame_info *rtg_active_multi_frame_info(int id)
+{
+	struct frame_info *frame_info = NULL;
+
+	if ((id < MULTI_FRAME_ID) || (id >= MULTI_FRAME_ID + MULTI_FRAME_NUM))
+		return NULL;
+
+	read_lock(&g_id_manager.lock);
+	if (test_bit(id - MULTI_FRAME_ID, g_id_manager.id_map))
+		frame_info = &g_multi_frame_info[id - MULTI_FRAME_ID];
+	read_unlock(&g_id_manager.lock);
+	if (!frame_info)
+		pr_debug("[FRAME_RTG] %s frame %d has been released\n",
+			 __func__, id);
+
+	return frame_info;
+}
+
+struct frame_info *rtg_multi_frame_info(int id)
+{
+	if ((id < MULTI_FRAME_ID) || (id >= MULTI_FRAME_ID + MULTI_FRAME_NUM))
+		return NULL;
+
+	return &g_multi_frame_info[id - MULTI_FRAME_ID];
+}
+
+static void do_update_frame_task_prio(struct frame_info *frame_info,
+				      struct task_struct *task, int prio)
+{
+	int policy = SCHED_NORMAL;
+	struct sched_param sp = {0};
+	bool is_rt_task = (prio != NOT_RT_PRIO);
+	bool need_dec_flag = false;
+	bool need_inc_flag = false;
+	int err;
+
+	trace_rtg_frame_sched(frame_info->rtg->id, "rtg_rt_thread_num",
+			      read_rtg_rt_thread_num());
+	/* change policy to RT */
+	if (is_rt_task && (atomic_read(&frame_info->curr_rt_thread_num) <
+			   atomic_read(&frame_info->max_rt_thread_num))) {
+		/* change policy from CFS to RT */
+		if (!is_rtg_rt_task(task)) {
+			if (test_and_read_rtg_rt_thread_num() >= RTG_MAX_RT_THREAD_NUM)
+				goto out;
+			need_inc_flag = true;
+		}
+		/* change RT priority */
+		policy = SCHED_FIFO | SCHED_RESET_ON_FORK;
+		sp.sched_priority = MAX_USER_RT_PRIO - 1 - prio;
+		atomic_inc(&frame_info->curr_rt_thread_num);
+	} else {
+		/* change policy from RT to CFS */
+		if (!is_rt_task && is_rtg_rt_task(task))
+			need_dec_flag = true;
+	}
+out:
+	trace_rtg_frame_sched(frame_info->rtg->id, "rtg_rt_thread_num",
+			      read_rtg_rt_thread_num());
+	trace_rtg_frame_sched(frame_info->rtg->id, "curr_rt_thread_num",
+			      atomic_read(&frame_info->curr_rt_thread_num));
+	err = sched_setscheduler_nocheck(task, policy, &sp);
+	if (err == 0) {
+		if (need_dec_flag)
+			dec_rtg_rt_thread_num();
+		else if (need_inc_flag)
+			inc_rtg_rt_thread_num();
+	}
+}
+
+int list_rtg_group(struct rtg_info *rs_data)
+{
+	int i;
+	int num = 0;
+
+	read_lock(&g_id_manager.lock);
+	for (i = MULTI_FRAME_ID; i < MULTI_FRAME_ID + MULTI_FRAME_NUM; i++) {
+		if (test_bit(i - MULTI_FRAME_ID, g_id_manager.id_map)) {
+			rs_data->rtgs[num] = i;
+			num++;
+		}
+	}
+	read_unlock(&g_id_manager.lock);
+	rs_data->rtg_num = num;
+
+	return num;
+}
+
+int search_rtg(int pid)
+{
+	struct rtg_info grp_info;
+	struct frame_info *frame_info = NULL;
+	int i = 0;
+	int j = 0;
+
+	grp_info.rtg_num = 0;
+	read_lock(&g_id_manager.lock);
+	for (i = MULTI_FRAME_ID; i < MULTI_FRAME_ID + MULTI_FRAME_NUM; i++) {
+		if (test_bit(i - MULTI_FRAME_ID, g_id_manager.id_map)) {
+			grp_info.rtgs[grp_info.rtg_num] = i;
+			grp_info.rtg_num++;
+		}
+	}
+	read_unlock(&g_id_manager.lock);
+	for (i = 0; i < grp_info.rtg_num; i++) {
+		frame_info = lookup_frame_info_by_grp_id(grp_info.rtgs[i]);
+		if (!frame_info) {
+			pr_err("[FRAME_RTG] unexpected grp %d find error.", i);
+			return -EINVAL;
+		}
+
+		for (j = 0; j < frame_info->thread_num; j++) {
+			if (frame_info->thread[j] && frame_info->thread[j]->pid == pid)
+				return grp_info.rtgs[i];
+		}
+	}
+
+	return 0;
+}
+
+static void update_frame_task_prio(struct frame_info *frame_info, int prio)
+{
+	int i;
+	struct task_struct *thread = NULL;
+
+	/* reset curr_rt_thread_num */
+	atomic_set(&frame_info->curr_rt_thread_num, 0);
+
+	for (i = 0; i < MAX_TID_NUM; i++) {
+		thread = frame_info->thread[i];
+		if (thread)
+			do_update_frame_task_prio(frame_info, thread, prio);
+	}
+}
+
+void set_frame_prio(struct frame_info *frame_info, int prio)
+{
+	if (!frame_info)
+		return;
+
+	mutex_lock(&frame_info->lock);
+	if (frame_info->prio == prio)
+		goto out;
+
+	update_frame_task_prio(frame_info, prio);
+	frame_info->prio = prio;
+out:
+	mutex_unlock(&frame_info->lock);
+}
+
+static int do_set_rtg_sched(struct task_struct *task, bool is_rtg,
+			    int grp_id, int prio)
+{
+	int err;
+	int policy = SCHED_NORMAL;
+	int grpid = DEFAULT_RTG_GRP_ID;
+	bool is_rt_task = (prio != NOT_RT_PRIO);
+	struct sched_param sp = {0};
+
+	if (is_rtg) {
+		if (is_rt_task) {
+			if (test_and_read_rtg_rt_thread_num() >= RTG_MAX_RT_THREAD_NUM)
+				// rtg_rt_thread_num is inavailable, set policy to CFS
+				goto skip_setpolicy;
+			policy = SCHED_FIFO | SCHED_RESET_ON_FORK;
+			sp.sched_priority = MAX_USER_RT_PRIO - 1 - prio;
+		}
+skip_setpolicy:
+		grpid = grp_id;
+	}
+	err = sched_setscheduler_nocheck(task, policy, &sp);
+	if (err < 0) {
+		pr_err("[FRAME_RTG]: %s task:%d setscheduler err:%d\n",
+				__func__, task->pid, err);
+		return err;
+	}
+	err = sched_set_group_id(task, grpid);
+	if (err < 0) {
+		pr_err("[FRAME_RTG]: %s task:%d set_group_id err:%d\n",
+				__func__, task->pid, err);
+		if (is_rtg) {
+			policy = SCHED_NORMAL;
+			sp.sched_priority = 0;
+			sched_setscheduler_nocheck(task, policy, &sp);
+		}
+	}
+	if (err == 0) {
+		if (is_rtg) {
+			if (policy != SCHED_NORMAL)
+				inc_rtg_rt_thread_num();
+		} else {
+			dec_rtg_rt_thread_num();
+		}
+	}
+
+	return err;
+}
+
+static int set_rtg_sched(struct task_struct *task, bool is_rtg,
+			 int grp_id, int prio)
+{
+	int err = -1;
+	bool is_rt_task = (prio != NOT_RT_PRIO);
+
+	if (!task)
+		return err;
+
+	if (is_rt_task && is_rtg && ((prio < 0) ||
+		(prio > MAX_USER_RT_PRIO - 1)))
+		return err;
+	/*
+	 * original logic deny the non-cfs task st rt.
+	 * add !fair_policy(task->policy) if needed
+	 *
+	 * if CONFIG_HW_FUTEX_PI is set, task->prio and task->sched_class
+	 * may be modified by rtmutex. So we use task->policy instead.
+	 */
+	if (is_rtg && task->flags & PF_EXITING)
+		return err;
+
+	if (in_interrupt()) {
+		pr_err("[FRAME_RTG]: %s is in interrupt\n", __func__);
+		return err;
+	}
+
+	return do_set_rtg_sched(task, is_rtg, grp_id, prio);
+}
+
+static bool set_frame_rtg_thread(int grp_id, struct task_struct *task,
+				 bool is_rtg, int prio)
+{
+	int depth;
+
+	if (!task)
+		return false;
+	depth = task->rtg_depth;
+	if (is_rtg)
+		task->rtg_depth = STATIC_RTG_DEPTH;
+	else
+		task->rtg_depth = 0;
+
+	if (set_rtg_sched(task, is_rtg, grp_id, prio) < 0) {
+		task->rtg_depth = depth;
+		return false;
+	}
+
+	return true;
+}
+
+struct task_struct *update_frame_thread(struct frame_info *frame_info,
+					int old_prio, int prio, int pid,
+					struct task_struct *old_task)
+{
+	struct task_struct *task = NULL;
+	bool is_rt_task = (prio != NOT_RT_PRIO);
+	int new_prio = prio;
+	bool update_ret = false;
+
+	if (pid > 0) {
+		if (old_task && (pid == old_task->pid) && (old_prio == new_prio)) {
+			if (is_rt_task && atomic_read(&frame_info->curr_rt_thread_num) <
+			    atomic_read(&frame_info->max_rt_thread_num) &&
+			    (atomic_read(&frame_info->frame_sched_state) == 1))
+				atomic_inc(&frame_info->curr_rt_thread_num);
+			trace_rtg_frame_sched(frame_info->rtg->id, "curr_rt_thread_num",
+					      atomic_read(&frame_info->curr_rt_thread_num));
+			return old_task;
+		}
+		rcu_read_lock();
+		task = find_task_by_vpid(pid);
+		if (task)
+			get_task_struct(task);
+		rcu_read_unlock();
+	}
+	trace_rtg_frame_sched(frame_info->rtg->id, "FRAME_SCHED_ENABLE",
+			      atomic_read(&frame_info->frame_sched_state));
+	if (atomic_read(&frame_info->frame_sched_state) == 1) {
+		if (task && is_rt_task) {
+			if (atomic_read(&frame_info->curr_rt_thread_num) <
+			    atomic_read(&frame_info->max_rt_thread_num))
+				atomic_inc(&frame_info->curr_rt_thread_num);
+			else
+				new_prio = NOT_RT_PRIO;
+		}
+		trace_rtg_frame_sched(frame_info->rtg->id, "curr_rt_thread_num",
+				      atomic_read(&frame_info->curr_rt_thread_num));
+		trace_rtg_frame_sched(frame_info->rtg->id, "rtg_rt_thread_num",
+				      read_rtg_rt_thread_num());
+
+		set_frame_rtg_thread(frame_info->rtg->id, old_task, false, NOT_RT_PRIO);
+		update_ret = set_frame_rtg_thread(frame_info->rtg->id, task, true, new_prio);
+	}
+	if (old_task)
+		put_task_struct(old_task);
+	if (!update_ret)
+		return NULL;
+
+	return task;
+}
+
+void update_frame_thread_info(struct frame_info *frame_info,
+			      struct frame_thread_info *frame_thread_info)
+{
+	int i;
+	int old_prio;
+	int prio;
+	int thread_num;
+	int real_thread;
+
+	if (!frame_info || !frame_thread_info ||
+		frame_thread_info->thread_num < 0)
+		return;
+
+	prio = frame_thread_info->prio;
+	thread_num = frame_thread_info->thread_num;
+	if (thread_num > MAX_TID_NUM)
+		thread_num = MAX_TID_NUM;
+
+	// reset curr_rt_thread_num
+	atomic_set(&frame_info->curr_rt_thread_num, 0);
+	mutex_lock(&frame_info->lock);
+	old_prio = frame_info->prio;
+	real_thread = 0;
+	for (i = 0; i < thread_num; i++) {
+		atomic_set(&frame_info->thread_prio[i], 0);
+		frame_info->thread[i] = update_frame_thread(frame_info, old_prio, prio,
+							    frame_thread_info->thread[i],
+							    frame_info->thread[i]);
+		if (frame_info->thread[i] && (frame_thread_info->thread[i] > 0))
+			real_thread++;
+	}
+	frame_info->prio = prio;
+	frame_info->thread_num = real_thread;
+	mutex_unlock(&frame_info->lock);
+}
+
+static void do_set_frame_sched_state(struct frame_info *frame_info,
+				     struct task_struct *task,
+				     bool enable, int prio)
+{
+	int new_prio = prio;
+	bool is_rt_task = (prio != NOT_RT_PRIO);
+
+	if (enable && is_rt_task) {
+		if (atomic_read(&frame_info->curr_rt_thread_num) <
+		    atomic_read(&frame_info->max_rt_thread_num))
+			atomic_inc(&frame_info->curr_rt_thread_num);
+		else
+			new_prio = NOT_RT_PRIO;
+	}
+	trace_rtg_frame_sched(frame_info->rtg->id, "curr_rt_thread_num",
+			      atomic_read(&frame_info->curr_rt_thread_num));
+	trace_rtg_frame_sched(frame_info->rtg->id, "rtg_rt_thread_num",
+			      read_rtg_rt_thread_num());
+	set_frame_rtg_thread(frame_info->rtg->id, task, enable, new_prio);
+}
+
+void set_frame_sched_state(struct frame_info *frame_info, bool enable)
+{
+	atomic_t *frame_sched_state = NULL;
+	int prio;
+	int i;
+
+	if (!frame_info || !frame_info->rtg)
+		return;
+
+	frame_sched_state = &(frame_info->frame_sched_state);
+	if (enable) {
+		if (atomic_read(frame_sched_state) == 1)
+			return;
+		atomic_set(frame_sched_state, 1);
+		trace_rtg_frame_sched(frame_info->rtg->id, "FRAME_SCHED_ENABLE", 1);
+
+		frame_info->prev_fake_load_util = 0;
+		frame_info->prev_frame_load_util = 0;
+		frame_info->frame_vload = 0;
+		frame_info_rtg_load(frame_info)->curr_window_load = 0;
+	} else {
+		if (atomic_read(frame_sched_state) == 0)
+			return;
+		atomic_set(frame_sched_state, 0);
+		trace_rtg_frame_sched(frame_info->rtg->id, "FRAME_SCHED_ENABLE", 0);
+
+		(void)sched_set_group_normalized_util(frame_info->rtg->id,
+						      0, RTG_FREQ_NORMAL_UPDATE);
+		trace_rtg_frame_sched(frame_info->rtg->id, "preferred_cluster",
+			INVALID_PREFERRED_CLUSTER);
+		frame_info->status = FRAME_END;
+	}
+
+	/* reset curr_rt_thread_num */
+	atomic_set(&frame_info->curr_rt_thread_num, 0);
+	mutex_lock(&frame_info->lock);
+	for (i = 0; i < MAX_TID_NUM; i++) {
+		if (frame_info->thread[i]) {
+			prio = atomic_read(&frame_info->thread_prio[i]);
+			do_set_frame_sched_state(frame_info, frame_info->thread[i],
+						 enable, prio);
+		}
+	}
+	mutex_unlock(&frame_info->lock);
+
+	trace_rtg_frame_sched(frame_info->rtg->id, "FRAME_STATUS",
+			      frame_info->status);
+	trace_rtg_frame_sched(frame_info->rtg->id, "frame_status",
+			      frame_info->status);
+}
+
+static inline bool check_frame_util_invalid(const struct frame_info *frame_info,
+	u64 timeline)
+{
+	return ((frame_info_rtg(frame_info)->util_invalid_interval <= timeline) &&
+		(frame_info_rtg_load(frame_info)->curr_window_exec * FRAME_UTIL_INVALID_FACTOR
+		 <= timeline));
+}
+
+static u64 calc_prev_fake_load_util(const struct frame_info *frame_info)
+{
+	u64 prev_frame_load = frame_info->prev_frame_load;
+	u64 prev_frame_time = max_t(unsigned long, frame_info->prev_frame_time,
+		frame_info->frame_time);
+	u64 frame_util = 0;
+
+	if (prev_frame_time > 0)
+		frame_util = div_u64((prev_frame_load << SCHED_CAPACITY_SHIFT),
+			prev_frame_time);
+	frame_util = clamp_t(unsigned long, frame_util,
+		frame_info->prev_min_util,
+		frame_info->prev_max_util);
+
+	return frame_util;
+}
+
+static u64 calc_prev_frame_load_util(const struct frame_info *frame_info)
+{
+	u64 prev_frame_load = frame_info->prev_frame_load;
+	u64 frame_time = frame_info->frame_time;
+	u64 frame_util = 0;
+
+	if (prev_frame_load >= frame_time)
+		frame_util = FRAME_MAX_LOAD;
+	else
+		frame_util = div_u64((prev_frame_load << SCHED_CAPACITY_SHIFT),
+			frame_info->frame_time);
+	frame_util = clamp_t(unsigned long, frame_util,
+		frame_info->prev_min_util,
+		frame_info->prev_max_util);
+
+	return frame_util;
+}
+
+/* last frame load tracking */
+static void update_frame_prev_load(struct frame_info *frame_info, bool fake)
+{
+	/* last frame load tracking */
+	frame_info->prev_frame_exec =
+		frame_info_rtg_load(frame_info)->prev_window_exec;
+	frame_info->prev_frame_time =
+		frame_info_rtg(frame_info)->prev_window_time;
+	frame_info->prev_frame_load =
+		frame_info_rtg_load(frame_info)->prev_window_load;
+
+	if (fake)
+		frame_info->prev_fake_load_util =
+			calc_prev_fake_load_util(frame_info);
+	else
+		frame_info->prev_frame_load_util =
+			calc_prev_frame_load_util(frame_info);
+}
+
+static void do_frame_end(struct frame_info *frame_info, bool fake)
+{
+	unsigned long prev_util;
+	int id = frame_info->rtg->id;
+
+	frame_info->status = FRAME_END;
+	trace_rtg_frame_sched(id, "frame_status", frame_info->status);
+
+	/* last frame load tracking */
+	update_frame_prev_load(frame_info, fake);
+
+	/* reset frame_info */
+	frame_info->frame_vload = 0;
+
+	/* reset frame_min_util */
+	frame_info->frame_min_util = 0;
+
+	if (fake)
+		prev_util = frame_info->prev_fake_load_util;
+	else
+		prev_util = frame_info->prev_frame_load_util;
+
+	frame_info->frame_util = clamp_t(unsigned long, prev_util,
+		frame_info->frame_min_util,
+		frame_info->frame_max_util);
+
+	trace_rtg_frame_sched(id, "frame_last_task_time",
+		frame_info->prev_frame_exec);
+	trace_rtg_frame_sched(id, "frame_last_time", frame_info->prev_frame_time);
+	trace_rtg_frame_sched(id, "frame_last_load", frame_info->prev_frame_load);
+	trace_rtg_frame_sched(id, "frame_last_load_util",
+		frame_info->prev_frame_load_util);
+	trace_rtg_frame_sched(id, "frame_util", frame_info->frame_util);
+	trace_rtg_frame_sched(id, "frame_vload", frame_info->frame_vload);
+}
+
+/*
+ * frame_load : calculate frame load using exec util
+ */
+static inline u64 calc_frame_exec(const struct frame_info *frame_info)
+{
+	if (frame_info->frame_time > 0)
+		return div_u64((frame_info_rtg_load(frame_info)->curr_window_exec <<
+			SCHED_CAPACITY_SHIFT), frame_info->frame_time);
+	else
+		return 0;
+}
+
+/*
+ * real_util:
+ * max(last_util, virtual_util, boost_util, phase_util, frame_min_util)
+ */
+static u64 calc_frame_util(const struct frame_info *frame_info, bool fake)
+{
+	unsigned long load_util;
+
+	if (fake)
+		load_util = frame_info->prev_fake_load_util;
+	else
+		load_util = frame_info->prev_frame_load_util;
+
+	load_util = max_t(unsigned long, load_util, frame_info->frame_vload);
+	load_util = clamp_t(unsigned long, load_util,
+		frame_info->frame_min_util,
+		frame_info->frame_max_util);
+
+	return load_util;
+}
+
+/*
+ * frame_vload [0~1024]
+ * vtime: now - timestamp
+ * max_time: frame_info->frame_time + vload_margin
+ * load = F(vtime)
+ *      = vtime ^ 2 - vtime * max_time + FRAME_MAX_VLOAD * vtime / max_time;
+ *      = vtime * (vtime + FRAME_MAX_VLOAD / max_time - max_time);
+ * [0, 0] -=> [max_time, FRAME_MAX_VLOAD]
+ *
+ */
+static u64 calc_frame_vload(const struct frame_info *frame_info, u64 timeline)
+{
+	u64 vload;
+	int vtime = div_u64(timeline, NSEC_PER_MSEC);
+	int max_time = frame_info->max_vload_time;
+	int factor;
+
+	if ((max_time <= 0) || (vtime > max_time))
+		return FRAME_MAX_VLOAD;
+
+	factor = vtime + FRAME_MAX_VLOAD / max_time;
+	/* margin maybe negative */
+	if ((vtime <= 0) || (factor <= max_time))
+		return 0;
+
+	vload = (u64)vtime * (u64)(factor - max_time);
+
+	return vload;
+}
+
+static int update_frame_info_tick_inner(int id, struct frame_info *frame_info,
+	u64 timeline)
+{
+	switch (frame_info->status) {
+	case FRAME_INVALID:
+	case FRAME_END:
+		if (timeline >= frame_info->frame_time) {
+			/*
+			 * fake FRAME_END here to rollover frame_window.
+			 */
+			sched_set_group_window_rollover(id);
+			do_frame_end(frame_info, true);
+		} else {
+			frame_info->frame_vload = calc_frame_exec(frame_info);
+			frame_info->frame_util =
+				calc_frame_util(frame_info, true);
+		}
+
+		/* when not in boost, start tick timer */
+		break;
+	case FRAME_START:
+		/* check frame_util invalid */
+		if (!check_frame_util_invalid(frame_info, timeline)) {
+			/* frame_vload statistic */
+			frame_info->frame_vload = calc_frame_vload(frame_info, timeline);
+			/* frame_util statistic */
+			frame_info->frame_util =
+				calc_frame_util(frame_info, false);
+		} else {
+			frame_info->status = FRAME_INVALID;
+			trace_rtg_frame_sched(id, "FRAME_STATUS",
+				frame_info->status);
+			trace_rtg_frame_sched(id, "frame_status",
+				frame_info->status);
+
+			/*
+			 * trigger FRAME_END to rollover frame_window,
+			 * we treat FRAME_INVALID as FRAME_END.
+			 */
+			sched_set_group_window_rollover(id);
+			do_frame_end(frame_info, false);
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline struct frame_info *rtg_frame_info_inner(
+	const struct related_thread_group *grp)
+{
+	return (struct frame_info *)grp->private_data;
+}
+
+static inline void frame_boost(struct frame_info *frame_info)
+{
+	if (frame_info->frame_util < frame_info->frame_boost_min_util)
+		frame_info->frame_util = frame_info->frame_boost_min_util;
+}
+
+/*
+ * update CPUFREQ and PLACEMENT when frame task running (in tick) and migration
+ */
+static void update_frame_info_tick(struct related_thread_group *grp)
+{
+	u64 window_start;
+	u64 wallclock;
+	u64 timeline;
+	struct frame_info *frame_info = NULL;
+	int id = grp->id;
+
+	rcu_read_lock();
+	frame_info = rtg_frame_info_inner(grp);
+	window_start = grp->window_start;
+	rcu_read_unlock();
+	if (unlikely(!frame_info))
+		return;
+
+	if (atomic_read(&frame_info->frame_sched_state) == 0)
+		return;
+	trace_rtg_frame_sched(id, "frame_status", frame_info->status);
+
+	wallclock = ktime_get_ns();
+	timeline = wallclock - window_start;
+
+	trace_rtg_frame_sched(id, "update_curr_pid", current->pid);
+	trace_rtg_frame_sched(id, "frame_timeline", div_u64(timeline, NSEC_PER_MSEC));
+
+	if (update_frame_info_tick_inner(grp->id, frame_info, timeline) == -EINVAL)
+		return;
+
+	frame_boost(frame_info);
+	trace_rtg_frame_sched(id, "frame_vload", frame_info->frame_vload);
+	trace_rtg_frame_sched(id, "frame_util", frame_info->frame_util);
+
+	sched_set_group_normalized_util(grp->id,
+		frame_info->frame_util, RTG_FREQ_NORMAL_UPDATE);
+
+	if (grp->preferred_cluster)
+		trace_rtg_frame_sched(id, "preferred_cluster",
+			grp->preferred_cluster->id);
+}
+
+const struct rtg_class frame_rtg_class = {
+	.sched_update_rtg_tick = update_frame_info_tick,
+};
+
+int set_frame_margin(struct frame_info *frame_info, int margin)
+{
+	int id;
+
+	if ((margin < MIN_VLOAD_MARGIN) || (margin > MAX_VLOAD_MARGIN)) {
+		pr_err("[FRAME_RTG]: %s invalid MARGIN value\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!frame_info || !frame_info->rtg)
+		return -EINVAL;
+
+	frame_info->vload_margin = margin;
+	frame_info->max_vload_time =
+		div_u64(frame_info->frame_time, NSEC_PER_MSEC) +
+		frame_info->vload_margin;
+	id = frame_info->rtg->id;
+	trace_rtg_frame_sched(id, "FRAME_MARGIN", -margin);
+	trace_rtg_frame_sched(id, "FRAME_MAX_TIME", frame_info->max_vload_time);
+
+	return 0;
+}
+
+static void set_frame_start(struct frame_info *frame_info)
+{
+	int id = frame_info->rtg->id;
+
+	if (likely(frame_info->status == FRAME_START)) {
+		/*
+		 * START -=> START -=> ......
+		 * FRMAE_START is
+		 *	the end of last frame
+		 *	the start of the current frame
+		 */
+		update_frame_prev_load(frame_info, false);
+	} else if ((frame_info->status == FRAME_END) ||
+		(frame_info->status == FRAME_INVALID)) {
+		/* START -=> END -=> [START]
+		 *  FRAME_START is
+		 *	only the start of current frame
+		 * we shoudn't tracking the last rtg-window
+		 * [FRAME_END, FRAME_START]
+		 * it's not an available frame window
+		 */
+		update_frame_prev_load(frame_info, true);
+		frame_info->status = FRAME_START;
+	}
+	trace_rtg_frame_sched(id, "FRAME_STATUS", frame_info->status);
+	trace_rtg_frame_sched(id, "frame_last_task_time",
+		frame_info->prev_frame_exec);
+	trace_rtg_frame_sched(id, "frame_last_time", frame_info->prev_frame_time);
+	trace_rtg_frame_sched(id, "frame_last_load", frame_info->prev_frame_load);
+	trace_rtg_frame_sched(id, "frame_last_load_util",
+		frame_info->prev_frame_load_util);
+
+	/* new_frame_start */
+	if (!frame_info->margin_imme) {
+		frame_info->frame_vload = 0;
+		frame_info->frame_util = clamp_t(unsigned long,
+			frame_info->prev_frame_load_util,
+			frame_info->frame_min_util,
+			frame_info->frame_max_util);
+	} else {
+		frame_info->frame_vload = calc_frame_vload(frame_info, 0);
+		frame_info->frame_util = calc_frame_util(frame_info, false);
+	}
+
+	trace_rtg_frame_sched(id, "frame_vload", frame_info->frame_vload);
+}
+
+static void set_frame_end(struct frame_info *frame_info)
+{
+	trace_rtg_frame_sched(frame_info->rtg->id, "FRAME_STATUS", FRAME_END);
+	do_frame_end(frame_info, false);
+}
+
+static int update_frame_timestamp(unsigned long status,
+	struct frame_info *frame_info, struct related_thread_group *grp)
+{
+	int id = frame_info->rtg->id;
+
+	/* SCHED_FRAME timestamp */
+	switch (status) {
+	case FRAME_START:
+		/* collect frame_info when frame_end timestamp coming */
+		set_frame_start(frame_info);
+		break;
+	case FRAME_END:
+		/* FRAME_END should only set and update freq once */
+		if (unlikely(frame_info->status == FRAME_END))
+			return 0;
+		set_frame_end(frame_info);
+		break;
+	default:
+		pr_err("[FRAME_RTG]: %s invalid timestamp(status)\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	frame_boost(frame_info);
+	trace_rtg_frame_sched(id, "frame_util", frame_info->frame_util);
+
+	/* update cpufreq force when frame_stop */
+	sched_set_group_normalized_util(grp->id,
+		frame_info->frame_util, RTG_FREQ_FORCE_UPDATE);
+	if (grp->preferred_cluster)
+		trace_rtg_frame_sched(id, "preferred_cluster",
+			grp->preferred_cluster->id);
+
+	return 0;
+}
+
+static int set_frame_status(struct frame_info *frame_info, unsigned long status)
+{
+	struct related_thread_group *grp = NULL;
+	int id;
+
+	if (!frame_info)
+		return -EINVAL;
+
+	grp = frame_info->rtg;
+	if (unlikely(!grp))
+		return -EINVAL;
+
+	if (atomic_read(&frame_info->frame_sched_state) == 0)
+		return -EINVAL;
+
+	if (!(status & FRAME_SETTIME) ||
+		(status == (unsigned long)FRAME_SETTIME_PARAM)) {
+		pr_err("[FRAME_RTG]: %s invalid timetsamp(status)\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (status & FRAME_TIMESTAMP_SKIP_START) {
+		frame_info->timestamp_skipped = true;
+		status &= ~FRAME_TIMESTAMP_SKIP_START;
+	} else if (status & FRAME_TIMESTAMP_SKIP_END) {
+		frame_info->timestamp_skipped = false;
+		status &= ~FRAME_TIMESTAMP_SKIP_END;
+	} else if (frame_info->timestamp_skipped) {
+		/*
+		 * skip the following timestamp until
+		 * FRAME_TIMESTAMP_SKIPPED reset
+		 */
+		return 0;
+	}
+	id = grp->id;
+	trace_rtg_frame_sched(id, "FRAME_TIMESTAMP_SKIPPED",
+		frame_info->timestamp_skipped);
+	trace_rtg_frame_sched(id, "FRAME_MAX_UTIL", frame_info->frame_max_util);
+
+	if (status & FRAME_USE_MARGIN_IMME) {
+		frame_info->margin_imme = true;
+		status &= ~FRAME_USE_MARGIN_IMME;
+	} else {
+		frame_info->margin_imme = false;
+	}
+	trace_rtg_frame_sched(id, "FRAME_MARGIN_IMME", frame_info->margin_imme);
+	trace_rtg_frame_sched(id, "FRAME_TIMESTAMP", status);
+
+	return update_frame_timestamp(status, frame_info, grp);
+}
+
+int set_frame_timestamp(struct frame_info *frame_info, unsigned long timestamp)
+{
+	int ret;
+
+	if (!frame_info || !frame_info->rtg)
+		return -EINVAL;
+
+	if (atomic_read(&frame_info->frame_sched_state) == 0)
+		return -EINVAL;
+
+	ret = sched_set_group_window_rollover(frame_info->rtg->id);
+	if (!ret)
+		ret = set_frame_status(frame_info, timestamp);
+
+	return ret;
+}
+
+int set_frame_min_util(struct frame_info *frame_info, int min_util, bool is_boost)
+{
+	int id;
+
+	if (unlikely((min_util < 0) || (min_util > SCHED_CAPACITY_SCALE))) {
+		pr_err("[FRAME_RTG]: %s invalid min_util value\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!frame_info || !frame_info->rtg)
+		return -EINVAL;
+
+	id = frame_info->rtg->id;
+	if (is_boost) {
+		frame_info->frame_boost_min_util = min_util;
+		trace_rtg_frame_sched(id, "FRAME_BOOST_MIN_UTIL", min_util);
+	} else {
+		frame_info->frame_min_util = min_util;
+
+		frame_info->frame_util = calc_frame_util(frame_info, false);
+		trace_rtg_frame_sched(id, "frame_util", frame_info->frame_util);
+		sched_set_group_normalized_util(id,
+			frame_info->frame_util, RTG_FREQ_FORCE_UPDATE);
+	}
+
+	return 0;
+}
+
+int set_frame_max_util(struct frame_info *frame_info, int max_util)
+{
+	int id;
+
+	if ((max_util < 0) || (max_util > SCHED_CAPACITY_SCALE)) {
+		pr_err("[FRAME_RTG]: %s invalid max_util value\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!frame_info || !frame_info->rtg)
+		return -EINVAL;
+
+	frame_info->frame_max_util = max_util;
+	id = frame_info->rtg->id;
+	trace_rtg_frame_sched(id, "FRAME_MAX_UTIL", frame_info->frame_max_util);
+
+	return 0;
+}
+
+struct frame_info *lookup_frame_info_by_grp_id(int grp_id)
+{
+	if (grp_id >= (MULTI_FRAME_ID + MULTI_FRAME_NUM) || (grp_id <= 0))
+		return NULL;
+	if (grp_id >= MULTI_FRAME_ID) {
+		read_lock(&g_id_manager.lock);
+		if (!test_bit(grp_id - MULTI_FRAME_ID, g_id_manager.id_map)) {
+			read_unlock(&g_id_manager.lock);
+			return NULL;
+		}
+		read_unlock(&g_id_manager.lock);
+		return rtg_frame_info(grp_id);
+	} else
+		return rtg_frame_info(grp_id);
+}
+
+static int _init_frame_info(struct frame_info *frame_info, int id)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flags;
+
+	memset(frame_info, 0, sizeof(struct frame_info));
+	mutex_init(&frame_info->lock);
+
+	mutex_lock(&frame_info->lock);
+	frame_info->frame_rate = DEFAULT_FRAME_RATE;
+	frame_info->frame_time = div_u64(NSEC_PER_SEC, frame_info->frame_rate);
+	frame_info->thread_num = 0;
+	frame_info->prio = NOT_RT_PRIO;
+	atomic_set(&(frame_info->curr_rt_thread_num), 0);
+	atomic_set(&(frame_info->frame_sched_state), 0);
+	frame_info->vload_margin = DEFAULT_VLOAD_MARGIN;
+	frame_info->max_vload_time =
+		div_u64(frame_info->frame_time, NSEC_PER_MSEC) +
+		frame_info->vload_margin;
+	frame_info->frame_min_util = FRAME_DEFAULT_MIN_UTIL;
+	frame_info->frame_max_util = FRAME_DEFAULT_MAX_UTIL;
+	frame_info->prev_min_util = FRAME_DEFAULT_MIN_PREV_UTIL;
+	frame_info->prev_max_util = FRAME_DEFAULT_MAX_PREV_UTIL;
+	frame_info->margin_imme = false;
+	frame_info->timestamp_skipped = false;
+	frame_info->status = FRAME_END;
+
+	grp = frame_rtg(id);
+	if (unlikely(!grp)) {
+		mutex_unlock(&frame_info->lock);
+		return -EINVAL;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flags);
+	grp->private_data = frame_info;
+	grp->rtg_class = &frame_rtg_class;
+	raw_spin_unlock_irqrestore(&grp->lock, flags);
+
+	frame_info->rtg = grp;
+	mutex_unlock(&frame_info->lock);
+
+	return 0;
+}
+
+static int __init init_frame_info(void)
+{
+	int ret = 0;
+	int id;
+
+	for (id = MULTI_FRAME_ID; id < (MULTI_FRAME_ID + MULTI_FRAME_NUM); id++) {
+		if (ret != 0)
+			break;
+		ret = _init_frame_info(rtg_multi_frame_info(id), id);
+	}
+
+	return ret;
+}
+late_initcall(init_frame_info);
diff --git a/kernel/sched/rtg/frame_rtg.h b/kernel/sched/rtg/frame_rtg.h
new file mode 100755
index 0000000000000000000000000000000000000000..01f23d27413a9fe856c9508df7f63ba946190c8f
--- /dev/null
+++ b/kernel/sched/rtg/frame_rtg.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Frame declaration
+ *
+ * Copyright (c) 2022-2023 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef __FRAME_RTG_H
+#define __FRAME_RTG_H
+
+#include <linux/sched.h>
+#include <linux/bitmap.h>
+#include <linux/spinlock.h>
+#include <linux/sched/frame_rtg.h>
+
+#define MULTI_FRAME_ID (DEFAULT_CGROUP_COLOC_ID + 1)
+#define MULTI_FRAME_NUM (MAX_NUM_CGROUP_COLOC_ID - DEFAULT_CGROUP_COLOC_ID - 1)
+
+#define NOT_RT_PRIO (-1)
+#define STATIC_RTG_DEPTH (-1)
+
+#define FRAME_START (1 << 0)
+#define FRAME_END (1 << 1)
+#define FRAME_INVALID (1 << 2)
+#define FRAME_USE_MARGIN_IMME (1 << 4)
+#define FRAME_TIMESTAMP_SKIP_START (1 << 5)
+#define FRAME_TIMESTAMP_SKIP_END (1 << 6)
+#define FRAME_SETTIME (FRAME_START | FRAME_END | \
+	FRAME_USE_MARGIN_IMME)
+#define FRAME_SETTIME_PARAM (-1)
+
+#define DEFAULT_FRAME_RATE 60
+#define MIN_FRAME_RATE 1
+#define MAX_FRAME_RATE 120
+
+/* MARGIN value : [-100, 100] */
+#define DEFAULT_VLOAD_MARGIN 16
+#define MIN_VLOAD_MARGIN (-100)
+#define MAX_VLOAD_MARGIN 0xffff
+
+#define FRAME_MAX_VLOAD SCHED_CAPACITY_SCALE
+#define FRAME_MAX_LOAD SCHED_CAPACITY_SCALE
+#define FRAME_UTIL_INVALID_FACTOR 4
+#define FRAME_DEFAULT_MIN_UTIL 0
+#define FRAME_DEFAULT_MAX_UTIL SCHED_CAPACITY_SCALE
+#define FRAME_DEFAULT_MIN_PREV_UTIL 0
+#define FRAME_DEFAULT_MAX_PREV_UTIL SCHED_CAPACITY_SCALE
+
+#define DEFAULT_MAX_RT_THREAD 5
+/*
+ * RTG_MAX_RT_THREAD_NUM should be CONFIG_NR_CPUS in previous version
+ * fit for FFRT here
+ */
+#define RTG_MAX_RT_THREAD_NUM 20
+#define INVALID_PREFERRED_CLUSTER 10
+
+enum rtg_type {
+	VIP = 0,
+	TOP_TASK_KEY,
+	NORMAL_TASK,
+	RTG_TYPE_MAX,
+};
+
+struct frame_thread_info {
+	int prio;
+	int thread[MAX_TID_NUM];
+	int thread_num;
+};
+
+struct multi_frame_id_manager {
+	DECLARE_BITMAP(id_map, MULTI_FRAME_NUM);
+	unsigned int offset;
+	rwlock_t lock;
+};
+
+struct rtg_info {
+	int rtg_num;
+	int rtgs[MULTI_FRAME_NUM];
+};
+
+bool is_frame_rtg(int id);
+int set_frame_rate(struct frame_info *frame_info, int rate);
+int alloc_multi_frame_info(void);
+struct frame_info *rtg_active_multi_frame_info(int id);
+struct frame_info *rtg_multi_frame_info(int id);
+void release_multi_frame_info(int id);
+void clear_multi_frame_info(void);
+void set_frame_prio(struct frame_info *frame_info, int prio);
+struct task_struct *update_frame_thread(struct frame_info *frame_info,
+					int old_prio, int prio, int pid,
+					struct task_struct *old_task);
+void update_frame_thread_info(struct frame_info *frame_info,
+			      struct frame_thread_info *frame_thread_info);
+#ifdef CONFIG_SCHED_RTG_RT_THREAD_LIMIT
+int read_rtg_rt_thread_num(void);
+#else
+static inline int read_rtg_rt_thread_num(void)
+{
+	return 0;
+}
+#endif
+static inline
+struct group_ravg *frame_info_rtg_load(const struct frame_info *frame_info)
+{
+	return &frame_info_rtg(frame_info)->ravg;
+}
+void set_frame_sched_state(struct frame_info *frame_info, bool enable);
+int set_frame_margin(struct frame_info *frame_info, int margin);
+int set_frame_timestamp(struct frame_info *frame_info, unsigned long timestamp);
+int set_frame_max_util(struct frame_info *frame_info, int max_util);
+int set_frame_min_util(struct frame_info *frame_info, int min_util, bool is_boost);
+struct frame_info *lookup_frame_info_by_grp_id(int grp_id);
+int list_rtg_group(struct rtg_info *rs_data);
+int search_rtg(int pid);
+#endif
diff --git a/kernel/sched/rtg/rtg.c b/kernel/sched/rtg/rtg.c
new file mode 100755
index 0000000000000000000000000000000000000000..168c6c3378b34e58689f9962009225fc8aa7d2d8
--- /dev/null
+++ b/kernel/sched/rtg/rtg.c
@@ -0,0 +1,1253 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * related thread group sched
+ *
+ */
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <trace/events/walt.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/rtg.h>
+#undef CREATE_TRACE_POINTS
+
+#include "../sched.h"
+#include "rtg.h"
+#include "../walt.h"
+
+#ifdef CONFIG_SCHED_RTG_FRAME
+#include "frame_rtg.h"
+#endif
+
+#define ADD_TASK	0
+#define REM_TASK	1
+
+#define DEFAULT_GROUP_RATE		60 /* 60FPS */
+#define DEFAULT_UTIL_INVALID_INTERVAL	(~0U) /* ns */
+#define DEFAULT_UTIL_UPDATE_TIMEOUT	20000000  /* ns */
+#define DEFAULT_FREQ_UPDATE_INTERVAL	8000000  /* ns */
+
+struct related_thread_group *related_thread_groups[MAX_NUM_CGROUP_COLOC_ID];
+static DEFINE_RWLOCK(related_thread_group_lock);
+static LIST_HEAD(active_related_thread_groups);
+
+#define for_each_related_thread_group(grp) \
+	list_for_each_entry(grp, &active_related_thread_groups, list)
+
+void init_task_rtg(struct task_struct *p)
+{
+	rcu_assign_pointer(p->grp, NULL);
+	INIT_LIST_HEAD(&p->grp_list);
+}
+
+struct related_thread_group *task_related_thread_group(struct task_struct *p)
+{
+	return rcu_dereference(p->grp);
+}
+
+struct related_thread_group *
+lookup_related_thread_group(unsigned int group_id)
+{
+	return related_thread_groups[group_id];
+}
+
+int alloc_related_thread_groups(void)
+{
+	int i, ret;
+	struct related_thread_group *grp = NULL;
+
+	/* groupd_id = 0 is invalid as it's special id to remove group. */
+	for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
+		grp = kzalloc(sizeof(*grp), GFP_NOWAIT);
+		if (!grp) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		grp->id = i;
+		INIT_LIST_HEAD(&grp->tasks);
+		INIT_LIST_HEAD(&grp->list);
+		grp->window_size = NSEC_PER_SEC / DEFAULT_GROUP_RATE;
+		grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
+		grp->util_update_timeout = DEFAULT_UTIL_UPDATE_TIMEOUT;
+		grp->max_boost = 0;
+		grp->freq_update_interval = DEFAULT_FREQ_UPDATE_INTERVAL;
+		raw_spin_lock_init(&grp->lock);
+
+		related_thread_groups[i] = grp;
+	}
+
+	return 0;
+
+err:
+	for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
+		grp = lookup_related_thread_group(i);
+		if (grp) {
+			kfree(grp);
+			related_thread_groups[i] = NULL;
+		} else {
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Task's cpu usage is accounted in:
+ *	rq->curr/prev_runnable_sum,  when its ->grp is NULL
+ *	grp->cpu_time[cpu]->curr/prev_runnable_sum, when its ->grp is !NULL
+ *
+ * Transfer task's cpu usage between those counters when transitioning between
+ * groups
+ */
+static void transfer_busy_time(struct rq *rq, struct related_thread_group *grp,
+				struct task_struct *p, int event)
+{
+	u64 wallclock;
+	struct group_cpu_time *cpu_time;
+	u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
+	u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
+	u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
+	u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
+	int migrate_type;
+	int cpu = cpu_of(rq);
+	bool new_task;
+	int i;
+
+	wallclock = sched_ktime_clock();
+
+	update_task_ravg(rq->curr, rq, TASK_UPDATE, wallclock, 0);
+	update_task_ravg(p, rq, TASK_UPDATE, wallclock, 0);
+	new_task = is_new_task(p);
+
+	cpu_time = &rq->grp_time;
+	if (event == ADD_TASK) {
+		migrate_type = RQ_TO_GROUP;
+
+		src_curr_runnable_sum = &rq->curr_runnable_sum;
+		dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		src_prev_runnable_sum = &rq->prev_runnable_sum;
+		dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+
+		src_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
+		dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		src_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
+		dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+
+		*src_curr_runnable_sum -= p->ravg.curr_window_cpu[cpu];
+		*src_prev_runnable_sum -= p->ravg.prev_window_cpu[cpu];
+		if (new_task) {
+			*src_nt_curr_runnable_sum -=
+					p->ravg.curr_window_cpu[cpu];
+			*src_nt_prev_runnable_sum -=
+					p->ravg.prev_window_cpu[cpu];
+		}
+
+		update_cluster_load_subtractions(p, cpu,
+				rq->window_start, new_task);
+
+	} else {
+		migrate_type = GROUP_TO_RQ;
+
+		src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		dst_curr_runnable_sum = &rq->curr_runnable_sum;
+		src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+		dst_prev_runnable_sum = &rq->prev_runnable_sum;
+
+		src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		dst_nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
+		src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+		dst_nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
+
+		*src_curr_runnable_sum -= p->ravg.curr_window;
+		*src_prev_runnable_sum -= p->ravg.prev_window;
+		if (new_task) {
+			*src_nt_curr_runnable_sum -= p->ravg.curr_window;
+			*src_nt_prev_runnable_sum -= p->ravg.prev_window;
+		}
+
+		/*
+		 * Need to reset curr/prev windows for all CPUs, not just the
+		 * ones in the same cluster. Since inter cluster migrations
+		 * did not result in the appropriate book keeping, the values
+		 * per CPU would be inaccurate.
+		 */
+		for_each_possible_cpu(i) {
+			p->ravg.curr_window_cpu[i] = 0;
+			p->ravg.prev_window_cpu[i] = 0;
+		}
+	}
+
+	*dst_curr_runnable_sum += p->ravg.curr_window;
+	*dst_prev_runnable_sum += p->ravg.prev_window;
+	if (new_task) {
+		*dst_nt_curr_runnable_sum += p->ravg.curr_window;
+		*dst_nt_prev_runnable_sum += p->ravg.prev_window;
+	}
+
+	/*
+	 * When a task enter or exits a group, it's curr and prev windows are
+	 * moved to a single CPU. This behavior might be sub-optimal in the
+	 * exit case, however, it saves us the overhead of handling inter
+	 * cluster migration fixups while the task is part of a related group.
+	 */
+	p->ravg.curr_window_cpu[cpu] = p->ravg.curr_window;
+	p->ravg.prev_window_cpu[cpu] = p->ravg.prev_window;
+
+	trace_sched_migration_update_sum(p, migrate_type, rq);
+}
+
+static void _set_preferred_cluster(struct related_thread_group *grp,
+				   int sched_cluster_id);
+static void remove_task_from_group(struct task_struct *p)
+{
+	struct related_thread_group *grp = p->grp;
+	struct rq *rq = NULL;
+	bool empty_group = true;
+	struct rq_flags flag;
+	unsigned long irqflag;
+
+	rq = __task_rq_lock(p, &flag);
+	transfer_busy_time(rq, p->grp, p, REM_TASK);
+
+	raw_spin_lock_irqsave(&grp->lock, irqflag);
+	list_del_init(&p->grp_list);
+	rcu_assign_pointer(p->grp, NULL);
+
+	if (p->on_cpu)
+		grp->nr_running--;
+
+	if ((int)grp->nr_running < 0) {
+		WARN_ON(1);
+		grp->nr_running = 0;
+	}
+
+	if (!list_empty(&grp->tasks)) {
+		empty_group = false;
+	} else {
+#ifdef CONFIG_UCLAMP_TASK
+		grp->max_boost = 0;
+#endif
+		_set_preferred_cluster(grp, -1);
+		grp->ravg.normalized_util = 0;
+	}
+
+	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
+	__task_rq_unlock(rq, &flag);
+
+	/* Reserved groups cannot be destroyed */
+	if (empty_group && grp->id != DEFAULT_CGROUP_COLOC_ID) {
+		 /*
+		  * We test whether grp->list is attached with list_empty()
+		  * hence re-init the list after deletion.
+		  */
+		write_lock(&related_thread_group_lock);
+		list_del_init(&grp->list);
+		write_unlock(&related_thread_group_lock);
+	}
+}
+
+static int
+add_task_to_group(struct task_struct *p, struct related_thread_group *grp)
+{
+	struct rq *rq = NULL;
+	struct rq_flags flag;
+	unsigned long irqflag;
+#ifdef CONFIG_UCLAMP_TASK
+	int boost;
+#endif
+
+	/*
+	 * Change p->grp under rq->lock. Will prevent races with read-side
+	 * reference of p->grp in various hot-paths
+	 */
+	rq = __task_rq_lock(p, &flag);
+	transfer_busy_time(rq, grp, p, ADD_TASK);
+
+	raw_spin_lock_irqsave(&grp->lock, irqflag);
+	list_add(&p->grp_list, &grp->tasks);
+	rcu_assign_pointer(p->grp, grp);
+	if (p->on_cpu) {
+		grp->nr_running++;
+		if (grp->nr_running == 1)
+			grp->mark_start = max(grp->mark_start,
+					      sched_ktime_clock());
+	}
+
+#ifdef CONFIG_UCLAMP_TASK
+	boost = (int)uclamp_eff_value(p, UCLAMP_MIN);
+	if (boost > grp->max_boost)
+		grp->max_boost = boost;
+#endif
+	raw_spin_unlock_irqrestore(&grp->lock, irqflag);
+	__task_rq_unlock(rq, &flag);
+
+	return 0;
+}
+
+static int __sched_set_group_id(struct task_struct *p, unsigned int group_id)
+{
+	int rc = 0;
+	unsigned long flags;
+	struct related_thread_group *grp = NULL;
+	struct related_thread_group *old_grp = NULL;
+
+	if (group_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+	old_grp = p->grp;
+	if ((current != p && (p->flags & PF_EXITING)) ||
+	    (!old_grp && !group_id))
+		goto done;
+
+	/*
+	 * If the system has CONFIG_SCHED_RTG_CGROUP, only tasks in DEFAULT group
+	 * can be directly switched to other groups.
+	 *
+	 * In other cases, Switching from one group to another directly is not permitted.
+	 */
+	if (old_grp && group_id) {
+#ifdef CONFIG_SCHED_RTG_CGROUP
+		if (old_grp->id == DEFAULT_CGROUP_COLOC_ID) {
+			remove_task_from_group(p);
+		} else {
+#endif
+			rc = -EINVAL;
+			goto done;
+#ifdef CONFIG_SCHED_RTG_CGROUP
+		}
+#endif
+	}
+
+	if (!group_id) {
+		remove_task_from_group(p);
+		goto done;
+	}
+
+	grp = lookup_related_thread_group(group_id);
+	write_lock(&related_thread_group_lock);
+	if (list_empty(&grp->list))
+		list_add(&grp->list, &active_related_thread_groups);
+	write_unlock(&related_thread_group_lock);
+
+	rc = add_task_to_group(p, grp);
+done:
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+	return rc;
+}
+
+/* group_id == 0: remove task from rtg */
+int sched_set_group_id(struct task_struct *p, unsigned int group_id)
+{
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (group_id == DEFAULT_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	return __sched_set_group_id(p, group_id);
+}
+
+unsigned int sched_get_group_id(struct task_struct *p)
+{
+	unsigned int group_id;
+	struct related_thread_group *grp = NULL;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	group_id = grp ? grp->id : 0;
+	rcu_read_unlock();
+
+	return group_id;
+}
+
+void update_group_nr_running(struct task_struct *p, int event, u64 wallclock)
+{
+	struct related_thread_group *grp;
+	bool need_update = false;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	if (!grp) {
+		rcu_read_unlock();
+		return;
+	}
+
+	raw_spin_lock(&grp->lock);
+
+	if (event == PICK_NEXT_TASK)
+		grp->nr_running++;
+	else if (event == PUT_PREV_TASK)
+		grp->nr_running--;
+
+	if ((int)grp->nr_running < 0) {
+		WARN_ON(1);
+		grp->nr_running = 0;
+	}
+
+	/* update preferred cluster if no update long */
+	if (wallclock - grp->last_util_update_time > grp->util_update_timeout)
+		need_update = true;
+
+	raw_spin_unlock(&grp->lock);
+
+	rcu_read_unlock();
+
+	if (need_update && grp->rtg_class && grp->rtg_class->sched_update_rtg_tick &&
+	    grp->id != DEFAULT_CGROUP_COLOC_ID)
+		grp->rtg_class->sched_update_rtg_tick(grp);
+}
+
+int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	if (!window_size)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set window size for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	grp->window_size = window_size;
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+void group_time_rollover(struct group_ravg *ravg)
+{
+	ravg->prev_window_load = ravg->curr_window_load;
+	ravg->curr_window_load = 0;
+	ravg->prev_window_exec = ravg->curr_window_exec;
+	ravg->curr_window_exec = 0;
+}
+
+int sched_set_group_window_rollover(unsigned int grp_id)
+{
+	struct related_thread_group *grp = NULL;
+	u64 wallclock;
+	unsigned long flag;
+#ifdef CONFIG_UCLAMP_TASK
+	struct task_struct *p = NULL;
+	int boost;
+#endif
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set window start for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+
+	wallclock = sched_ktime_clock();
+	grp->prev_window_time = wallclock - grp->window_start;
+	grp->window_start = wallclock;
+	grp->max_boost = 0;
+
+#ifdef CONFIG_UCLAMP_TASK
+	list_for_each_entry(p, &grp->tasks, grp_list) {
+		boost = (int)uclamp_eff_value(p, UCLAMP_MIN);
+		if (boost > 0)
+			grp->max_boost = boost;
+	}
+#endif
+
+	group_time_rollover(&grp->ravg);
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+static void add_to_group_time(struct related_thread_group *grp, struct rq *rq, u64 wallclock)
+{
+	u64 delta_exec, delta_load;
+	u64 mark_start = grp->mark_start;
+	u64 window_start = grp->window_start;
+
+	if (unlikely(wallclock <= mark_start))
+		return;
+
+	/* per group load tracking in RTG */
+	if (likely(mark_start >= window_start)) {
+		/*
+		 *   ws   ms  wc
+		 *   |    |   |
+		 *   V    V   V
+		 *   |---------------|
+		 */
+		delta_exec = wallclock - mark_start;
+		grp->ravg.curr_window_exec += delta_exec;
+
+		delta_load = scale_exec_time(delta_exec, rq);
+		grp->ravg.curr_window_load += delta_load;
+	} else {
+		/*
+		 *   ms   ws  wc
+		 *   |    |   |
+		 *   V    V   V
+		 *   -----|----------
+		 */
+		/* prev window statistic */
+		delta_exec = window_start - mark_start;
+		grp->ravg.prev_window_exec += delta_exec;
+
+		delta_load = scale_exec_time(delta_exec, rq);
+		grp->ravg.prev_window_load += delta_load;
+
+		/* curr window statistic */
+		delta_exec = wallclock - window_start;
+		grp->ravg.curr_window_exec += delta_exec;
+
+		delta_load = scale_exec_time(delta_exec, rq);
+		grp->ravg.curr_window_load += delta_load;
+	}
+}
+
+static inline void add_to_group_demand(struct related_thread_group *grp,
+				struct rq *rq, u64 wallclock)
+{
+	if (unlikely(wallclock <= grp->window_start))
+		return;
+
+	add_to_group_time(grp, rq, wallclock);
+}
+
+static int account_busy_for_group_demand(struct task_struct *p, int event)
+{
+	/*
+	 *No need to bother updating task demand for exiting tasks
+	 * or the idle task.
+	 */
+	if (exiting_task(p) || is_idle_task(p))
+		return 0;
+
+	if (event == TASK_WAKE || event == TASK_MIGRATE)
+		return 0;
+
+	return 1;
+}
+
+void update_group_demand(struct task_struct *p, struct rq *rq,
+				int event, u64 wallclock)
+{
+	struct related_thread_group *grp;
+
+	if (!account_busy_for_group_demand(p, event))
+		return;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	if (!grp) {
+		rcu_read_unlock();
+		return;
+	}
+
+	raw_spin_lock(&grp->lock);
+
+	if (grp->nr_running == 1)
+		grp->mark_start = max(grp->mark_start, p->ravg.mark_start);
+
+	add_to_group_demand(grp, rq, wallclock);
+
+	grp->mark_start = wallclock;
+
+	raw_spin_unlock(&grp->lock);
+
+	rcu_read_unlock();
+}
+
+void sched_update_rtg_tick(struct task_struct *p)
+{
+	struct related_thread_group *grp = NULL;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	if (!grp || list_empty(&grp->tasks)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	if (grp->rtg_class && grp->rtg_class->sched_update_rtg_tick)
+		grp->rtg_class->sched_update_rtg_tick(grp);
+
+	rcu_read_unlock();
+}
+
+int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p)
+{
+	struct related_thread_group *grp = NULL;
+	int rc = 1;
+
+	rcu_read_lock();
+
+	grp = task_related_thread_group(p);
+	if (grp != NULL)
+		rc = (grp->preferred_cluster == cluster);
+
+	rcu_read_unlock();
+	return rc;
+}
+
+unsigned int get_cluster_grp_running(int cluster_id)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned int total_grp_running = 0;
+	unsigned long flag, rtg_flag;
+	unsigned int i;
+
+	read_lock_irqsave(&related_thread_group_lock, rtg_flag);
+
+	/* grp_id 0 is used for exited tasks */
+	for (i = 1; i < MAX_NUM_CGROUP_COLOC_ID; i++) {
+		grp = lookup_related_thread_group(i);
+		if (!grp)
+			continue;
+
+		raw_spin_lock_irqsave(&grp->lock, flag);
+		if (grp->preferred_cluster != NULL &&
+		    grp->preferred_cluster->id == cluster_id)
+			total_grp_running += grp->nr_running;
+		raw_spin_unlock_irqrestore(&grp->lock, flag);
+	}
+	read_unlock_irqrestore(&related_thread_group_lock, rtg_flag);
+
+	return total_grp_running;
+}
+
+static void _set_preferred_cluster(struct related_thread_group *grp,
+				   int sched_cluster_id)
+{
+	struct sched_cluster *cluster = NULL;
+	struct sched_cluster *cluster_found = NULL;
+
+	if (sched_cluster_id == -1) {
+		grp->preferred_cluster = NULL;
+		return;
+	}
+
+	for_each_sched_cluster_reverse(cluster) {
+		if (cluster->id == sched_cluster_id) {
+			cluster_found = cluster;
+			break;
+		}
+	}
+
+	if (cluster_found != NULL)
+		grp->preferred_cluster = cluster_found;
+	else
+		pr_err("cannot found sched_cluster_id=%d\n", sched_cluster_id);
+}
+
+/*
+ * sched_cluster_id == -1: grp will set to NULL
+ */
+static void set_preferred_cluster(struct related_thread_group *grp,
+				  int sched_cluster_id)
+{
+	unsigned long flag;
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	_set_preferred_cluster(grp, sched_cluster_id);
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+}
+
+int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id)
+{
+	struct related_thread_group *grp = NULL;
+
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
+	    grp_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set preferred cluster for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+	set_preferred_cluster(grp, sched_cluster_id);
+
+	return 0;
+}
+
+struct cpumask *find_rtg_target(struct task_struct *p)
+{
+	struct related_thread_group *grp = NULL;
+	struct sched_cluster *preferred_cluster = NULL;
+	struct cpumask *rtg_target = NULL;
+
+	rcu_read_lock();
+	grp = task_related_thread_group(p);
+	rcu_read_unlock();
+
+	if (!grp)
+		return NULL;
+
+	preferred_cluster = grp->preferred_cluster;
+	if (!preferred_cluster)
+		return NULL;
+
+	rtg_target = &preferred_cluster->cpus;
+	if (!task_fits_max(p, cpumask_first(rtg_target)))
+		return NULL;
+
+	return rtg_target;
+}
+
+int find_rtg_cpu(struct task_struct *p)
+{
+	int i;
+	cpumask_t search_cpus = CPU_MASK_NONE;
+	int max_spare_cap_cpu = -1;
+	unsigned long max_spare_cap = 0;
+	int idle_backup_cpu = -1;
+	struct cpumask *preferred_cpus = find_rtg_target(p);
+
+	if (!preferred_cpus)
+		return -1;
+
+	cpumask_and(&search_cpus, p->cpus_ptr, cpu_online_mask);
+#ifdef CONFIG_CPU_ISOLATION_OPT
+	cpumask_andnot(&search_cpus, &search_cpus, cpu_isolated_mask);
+#endif
+
+	/* search the perferred idle cpu */
+	for_each_cpu_and(i, &search_cpus, preferred_cpus) {
+		if (is_reserved(i))
+			continue;
+
+		if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) {
+			trace_find_rtg_cpu(p, preferred_cpus, "prefer_idle", i);
+			return i;
+		}
+	}
+
+	for_each_cpu(i, &search_cpus) {
+		unsigned long spare_cap;
+
+		if (sched_cpu_high_irqload(i))
+			continue;
+
+		if (is_reserved(i))
+			continue;
+
+		/* take the Active LB CPU as idle_backup_cpu */
+		if (idle_cpu(i) || (i == task_cpu(p) && p->state == TASK_RUNNING)) {
+			/* find the idle_backup_cpu with max capacity */
+			if (idle_backup_cpu == -1 ||
+				capacity_orig_of(i) > capacity_orig_of(idle_backup_cpu))
+				idle_backup_cpu = i;
+
+			continue;
+		}
+
+		spare_cap = capacity_spare_without(i, p);
+		if (spare_cap > max_spare_cap) {
+			max_spare_cap = spare_cap;
+			max_spare_cap_cpu = i;
+		}
+	}
+
+	if (idle_backup_cpu != -1) {
+		trace_find_rtg_cpu(p, preferred_cpus, "idle_backup", idle_backup_cpu);
+		return idle_backup_cpu;
+	}
+
+	trace_find_rtg_cpu(p, preferred_cpus, "max_spare", max_spare_cap_cpu);
+
+	return max_spare_cap_cpu;
+}
+
+int sched_set_group_util_invalid_interval(unsigned int grp_id,
+					  unsigned int interval)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	if (interval == 0)
+		return -EINVAL;
+
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
+	    grp_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set invalid interval for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	if ((signed int)interval < 0)
+		grp->util_invalid_interval = DEFAULT_UTIL_INVALID_INTERVAL;
+	else
+		grp->util_invalid_interval = interval * NSEC_PER_MSEC;
+
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+static inline bool
+group_should_invalid_util(struct related_thread_group *grp, u64 now)
+{
+	if (grp->util_invalid_interval == DEFAULT_UTIL_INVALID_INTERVAL)
+		return false;
+
+	return (now - grp->last_freq_update_time >= grp->util_invalid_interval);
+}
+
+static inline bool valid_normalized_util(struct related_thread_group *grp)
+{
+	struct task_struct *p = NULL;
+	cpumask_t rtg_cpus = CPU_MASK_NONE;
+	bool valid = false;
+
+	if (grp->nr_running != 0) {
+		list_for_each_entry(p, &grp->tasks, grp_list) {
+			get_task_struct(p);
+			if (p->state == TASK_RUNNING)
+				cpumask_set_cpu(task_cpu(p), &rtg_cpus);
+			trace_sched_rtg_task_each(grp->id, grp->nr_running, p);
+			put_task_struct(p);
+		}
+
+		valid = cpumask_intersects(&rtg_cpus,
+					  &grp->preferred_cluster->cpus);
+	}
+	trace_sched_rtg_valid_normalized_util(grp->id, grp->nr_running, &rtg_cpus, valid);
+
+	return valid;
+}
+
+void sched_get_max_group_util(const struct cpumask *query_cpus,
+			      unsigned long *util, unsigned int *freq)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long max_grp_util = 0;
+	unsigned int max_grp_freq = 0;
+	u64 now = ktime_get_ns();
+	unsigned long rtg_flag;
+	unsigned long flag;
+
+	/*
+	 *  sum the prev_runnable_sum for each rtg,
+	 *  return the max rtg->load
+	 */
+	read_lock_irqsave(&related_thread_group_lock, rtg_flag);
+	if (list_empty(&active_related_thread_groups))
+		goto unlock;
+
+	for_each_related_thread_group(grp) {
+		raw_spin_lock_irqsave(&grp->lock, flag);
+		if (!list_empty(&grp->tasks) &&
+		    grp->preferred_cluster != NULL &&
+		    cpumask_intersects(query_cpus,
+				       &grp->preferred_cluster->cpus) &&
+		    !group_should_invalid_util(grp, now)) {
+
+			if (grp->ravg.normalized_util > max_grp_util)
+				max_grp_util = grp->ravg.normalized_util;
+		}
+		raw_spin_unlock_irqrestore(&grp->lock, flag);
+	}
+
+unlock:
+	read_unlock_irqrestore(&related_thread_group_lock, rtg_flag);
+
+	*freq = max_grp_freq;
+	*util = max_grp_util;
+}
+
+static struct sched_cluster *best_cluster(struct related_thread_group *grp)
+{
+	struct sched_cluster *cluster = NULL;
+	struct sched_cluster *max_cluster = NULL;
+	int cpu;
+	unsigned long util = grp->ravg.normalized_util;
+	unsigned long boosted_grp_util = util + grp->max_boost;
+	unsigned long max_cap = 0;
+	unsigned long cap = 0;
+
+	/* find new cluster */
+	for_each_sched_cluster(cluster) {
+		cpu = cpumask_first(&cluster->cpus);
+		cap = capacity_orig_of(cpu);
+		if (cap > max_cap) {
+			max_cap = cap;
+			max_cluster = cluster;
+		}
+
+		if (boosted_grp_util <= cap)
+			return cluster;
+	}
+
+	return max_cluster;
+}
+
+static bool group_should_update_freq(struct related_thread_group *grp,
+			      int cpu, unsigned int flags, u64 now)
+{
+	if (!grp)
+		return true;
+
+	if (flags & RTG_FREQ_FORCE_UPDATE) {
+		return true;
+	} else if (flags & RTG_FREQ_NORMAL_UPDATE) {
+		if (now - grp->last_freq_update_time >=
+		    grp->freq_update_interval)
+			return true;
+	}
+
+	return false;
+}
+
+int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
+				    unsigned int flag)
+{
+	struct related_thread_group *grp = NULL;
+	bool need_update_prev_freq = false;
+	bool need_update_next_freq = false;
+	u64 now;
+	unsigned long flags;
+	struct sched_cluster *preferred_cluster = NULL;
+	int prev_cpu;
+	int next_cpu;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set normalized util for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flags);
+
+	if (list_empty(&grp->tasks)) {
+		raw_spin_unlock_irqrestore(&grp->lock, flags);
+		return 0;
+	}
+
+	grp->ravg.normalized_util = util;
+
+	preferred_cluster = best_cluster(grp);
+
+	/* update prev_cluster force when preferred_cluster changed */
+	if (!grp->preferred_cluster) {
+		grp->preferred_cluster = preferred_cluster;
+	} else if (grp->preferred_cluster != preferred_cluster) {
+		prev_cpu = cpumask_first(&grp->preferred_cluster->cpus);
+		grp->preferred_cluster = preferred_cluster;
+
+		need_update_prev_freq = true;
+	}
+
+	if (grp->preferred_cluster != NULL)
+		next_cpu = cpumask_first(&grp->preferred_cluster->cpus);
+	else
+		next_cpu = 0;
+
+	now = ktime_get_ns();
+	grp->last_util_update_time = now;
+	need_update_next_freq =
+		group_should_update_freq(grp, next_cpu, flag, now);
+	if (need_update_next_freq)
+		grp->last_freq_update_time = now;
+
+	raw_spin_unlock_irqrestore(&grp->lock, flags);
+
+	if (need_update_prev_freq)
+		cpufreq_update_util(cpu_rq(prev_cpu),
+				SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT);
+
+	if (need_update_next_freq)
+		cpufreq_update_util(cpu_rq(next_cpu),
+				SCHED_CPUFREQ_FORCE_UPDATE | SCHED_CPUFREQ_WALT);
+
+	return 0;
+}
+
+int sched_set_group_freq_update_interval(unsigned int grp_id, unsigned int interval)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	if ((signed int)interval <= 0)
+		return -EINVAL;
+
+	/* DEFAULT_CGROUP_COLOC_ID is a reserved id */
+	if (grp_id == DEFAULT_CGROUP_COLOC_ID ||
+	    grp_id >= MAX_NUM_CGROUP_COLOC_ID)
+		return -EINVAL;
+
+	grp = lookup_related_thread_group(grp_id);
+	if (!grp) {
+		pr_err("set update interval for group %d fail\n", grp_id);
+		return -ENODEV;
+	}
+
+	raw_spin_lock_irqsave(&grp->lock, flag);
+	grp->freq_update_interval = interval * NSEC_PER_MSEC;
+	raw_spin_unlock_irqrestore(&grp->lock, flag);
+
+	return 0;
+}
+
+#ifdef CONFIG_SCHED_RTG_CGROUP
+#ifdef CONFIG_UCLAMP_TASK_GROUP
+static inline bool uclamp_task_colocated(struct task_struct *p)
+{
+	struct cgroup_subsys_state *css;
+	struct task_group *tg;
+	bool colocate;
+
+	rcu_read_lock();
+	css = task_css(p, cpu_cgrp_id);
+	if (!css) {
+		rcu_read_unlock();
+		return false;
+	}
+	tg = container_of(css, struct task_group, css);
+	colocate = tg->colocate;
+	rcu_read_unlock();
+
+	return colocate;
+}
+#else
+static inline bool uclamp_task_colocated(struct task_struct *p)
+{
+	return false;
+}
+#endif /* CONFIG_UCLAMP_TASK_GROUP */
+
+void add_new_task_to_grp(struct task_struct *new)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flag;
+
+	/*
+	 * If the task does not belong to colocated schedtune
+	 * cgroup, nothing to do. We are checking this without
+	 * lock. Even if there is a race, it will be added
+	 * to the co-located cgroup via cgroup attach.
+	 */
+	if (!uclamp_task_colocated(new))
+		return;
+
+	grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+	write_lock_irqsave(&related_thread_group_lock, flag);
+
+	/*
+	 * It's possible that someone already added the new task to the
+	 * group. or it might have taken out from the colocated schedtune
+	 * cgroup. check these conditions under lock.
+	 */
+	if (!uclamp_task_colocated(new) || new->grp) {
+		write_unlock_irqrestore(&related_thread_group_lock, flag);
+		return;
+	}
+
+	raw_spin_lock(&grp->lock);
+
+	rcu_assign_pointer(new->grp, grp);
+	list_add(&new->grp_list, &grp->tasks);
+
+	raw_spin_unlock(&grp->lock);
+	write_unlock_irqrestore(&related_thread_group_lock, flag);
+}
+
+
+/*
+ * We create a default colocation group at boot. There is no need to
+ * synchronize tasks between cgroups at creation time because the
+ * correct cgroup hierarchy is not available at boot. Therefore cgroup
+ * colocation is turned off by default even though the colocation group
+ * itself has been allocated. Furthermore this colocation group cannot
+ * be destroyted once it has been created. All of this has been as part
+ * of runtime optimizations.
+ *
+ * The job of synchronizing tasks to the colocation group is done when
+ * the colocation flag in the cgroup is turned on.
+ */
+static int __init create_default_coloc_group(void)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flags;
+
+	grp = lookup_related_thread_group(DEFAULT_CGROUP_COLOC_ID);
+	write_lock_irqsave(&related_thread_group_lock, flags);
+	list_add(&grp->list, &active_related_thread_groups);
+	write_unlock_irqrestore(&related_thread_group_lock, flags);
+
+	return 0;
+}
+late_initcall(create_default_coloc_group);
+
+int sync_cgroup_colocation(struct task_struct *p, bool insert)
+{
+	unsigned int grp_id = insert ? DEFAULT_CGROUP_COLOC_ID : 0;
+	unsigned int old_grp_id;
+
+	if (p) {
+		old_grp_id = sched_get_group_id(p);
+		/*
+		 * If the task is already in a group which is not DEFAULT_CGROUP_COLOC_ID,
+		 * we should not change the group id during switch to background.
+		 */
+		if ((old_grp_id != DEFAULT_CGROUP_COLOC_ID) && (grp_id == 0))
+			return 0;
+	}
+
+	return __sched_set_group_id(p, grp_id);
+}
+#endif /* CONFIG_SCHED_RTG_CGROUP */
+
+#ifdef CONFIG_SCHED_RTG_DEBUG
+#define seq_printf_rtg(m, x...) \
+do { \
+	if (m) \
+		seq_printf(m, x); \
+	else \
+		printk(x); \
+} while (0)
+
+static void print_rtg_info(struct seq_file *file,
+	const struct related_thread_group *grp)
+{
+	seq_printf_rtg(file, "RTG_ID          : %d\n", grp->id);
+	seq_printf_rtg(file, "RTG_INTERVAL    : UPDATE:%lums#INVALID:%lums\n",
+		grp->freq_update_interval / NSEC_PER_MSEC,
+		grp->util_invalid_interval / NSEC_PER_MSEC);
+	seq_printf_rtg(file, "RTG_CLUSTER     : %d\n",
+		grp->preferred_cluster ? grp->preferred_cluster->id : -1);
+#ifdef CONFIG_SCHED_RTG_RT_THREAD_LIMIT
+	seq_printf_rtg(file, "RTG_RT_THREAD_NUM   : %d/%d\n",
+		read_rtg_rt_thread_num(), RTG_MAX_RT_THREAD_NUM);
+#endif
+}
+
+static char rtg_task_state_to_char(const struct task_struct *tsk)
+{
+	static const char state_char[] = "RSDTtXZPI";
+	unsigned int tsk_state = READ_ONCE(tsk->state);
+	unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX);
+	BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1);
+
+	if (tsk_state == TASK_IDLE)
+		state = TASK_REPORT_IDLE;
+	return state_char[fls(state)];
+}
+
+static inline void print_rtg_task_header(struct seq_file *file,
+	const char *header, int run, int nr)
+{
+	seq_printf_rtg(file,
+		"%s   : %d/%d\n"
+		"STATE		COMM	   PID	PRIO	CPU\n"
+		"---------------------------------------------------------\n",
+		header, run, nr);
+}
+
+static inline void print_rtg_task(struct seq_file *file,
+	const struct task_struct *tsk)
+{
+	seq_printf_rtg(file, "%5c %15s %5d %5d %5d(%*pbl)\n",
+		rtg_task_state_to_char(tsk), tsk->comm, tsk->pid,
+		tsk->prio, task_cpu(tsk), cpumask_pr_args(tsk->cpus_ptr));
+}
+
+static void print_rtg_threads(struct seq_file *file,
+	const struct related_thread_group *grp)
+{
+	struct task_struct *tsk = NULL;
+	int nr_thread = 0;
+
+	list_for_each_entry(tsk, &grp->tasks, grp_list)
+		nr_thread++;
+
+	if (!nr_thread)
+		return;
+
+	print_rtg_task_header(file, "RTG_THREADS",
+		grp->nr_running, nr_thread);
+	list_for_each_entry(tsk, &grp->tasks, grp_list) {
+		if (unlikely(!tsk))
+			continue;
+		get_task_struct(tsk);
+		print_rtg_task(file, tsk);
+		put_task_struct(tsk);
+	}
+	seq_printf_rtg(file, "---------------------------------------------------------\n");
+}
+
+static int sched_rtg_debug_show(struct seq_file *file, void *param)
+{
+	struct related_thread_group *grp = NULL;
+	unsigned long flags;
+	bool have_task = false;
+
+	for_each_related_thread_group(grp) {
+		if (unlikely(!grp)) {
+			seq_printf_rtg(file, "RTG none\n");
+			return 0;
+		}
+
+		raw_spin_lock_irqsave(&grp->lock, flags);
+		if (list_empty(&grp->tasks)) {
+			raw_spin_unlock_irqrestore(&grp->lock, flags);
+			continue;
+		}
+
+		if (!have_task)
+			have_task = true;
+
+		seq_printf_rtg(file, "\n\n");
+		print_rtg_info(file, grp);
+		print_rtg_threads(file, grp);
+		raw_spin_unlock_irqrestore(&grp->lock, flags);
+	}
+
+	if (!have_task)
+		seq_printf_rtg(file, "RTG tasklist empty\n");
+
+	return 0;
+}
+
+static int sched_rtg_debug_release(struct inode *inode, struct file *file)
+{
+	seq_release(inode, file);
+	return 0;
+}
+
+static int sched_rtg_debug_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_rtg_debug_show, NULL);
+}
+
+static const struct proc_ops sched_rtg_debug_fops = {
+	.proc_open = sched_rtg_debug_open,
+	.proc_read = seq_read,
+	.proc_lseek = seq_lseek,
+	.proc_release = sched_rtg_debug_release,
+};
+
+static int __init init_sched_rtg_debug_procfs(void)
+{
+	struct proc_dir_entry *pe = NULL;
+
+	pe = proc_create("sched_rtg_debug",
+		0400, NULL, &sched_rtg_debug_fops);
+	if (unlikely(!pe))
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(init_sched_rtg_debug_procfs);
+#endif
diff --git a/kernel/sched/rtg/rtg.h b/kernel/sched/rtg/rtg.h
new file mode 100755
index 0000000000000000000000000000000000000000..4f0cedc332f094391a7d3690acde8fb6e268bbbd
--- /dev/null
+++ b/kernel/sched/rtg/rtg.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * related thread group sched header
+ */
+#ifndef __RTG_H
+#define __RTG_H
+
+#include <linux/types.h>
+#include <linux/sched.h>
+
+#define for_each_sched_cluster_reverse(cluster) \
+	list_for_each_entry_reverse(cluster, &cluster_head, list)
+
+#ifdef CONFIG_SCHED_RTG
+void init_task_rtg(struct task_struct *p);
+int alloc_related_thread_groups(void);
+struct related_thread_group *lookup_related_thread_group(unsigned int group_id);
+struct related_thread_group *task_related_thread_group(struct task_struct *p);
+void update_group_nr_running(struct task_struct *p, int event, u64 wallclock);
+struct rq;
+void update_group_demand(struct task_struct *p, struct rq *rq,
+				int event, u64 wallclock);
+int sched_set_group_window_size(unsigned int grp_id, unsigned int window_size);
+int sched_set_group_window_rollover(unsigned int grp_id);
+struct group_cpu_time *group_update_cpu_time(struct rq *rq,
+	struct related_thread_group *grp);
+void sched_update_rtg_tick(struct task_struct *p);
+int preferred_cluster(struct sched_cluster *cluster, struct task_struct *p);
+int sched_set_group_preferred_cluster(unsigned int grp_id, int sched_cluster_id);
+struct cpumask *find_rtg_target(struct task_struct *p);
+int find_rtg_cpu(struct task_struct *p);
+int sched_set_group_util_invalid_interval(unsigned int grp_id,
+					  unsigned int interval);
+int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
+				    unsigned int flag);
+void sched_get_max_group_util(const struct cpumask *query_cpus,
+			      unsigned long *util, unsigned int *freq);
+int sched_set_group_freq_update_interval(unsigned int grp_id,
+					 unsigned int interval);
+#ifdef CONFIG_SCHED_RTG_CGROUP
+int sync_cgroup_colocation(struct task_struct *p, bool insert);
+void add_new_task_to_grp(struct task_struct *new);
+#else
+static inline void add_new_task_to_grp(struct task_struct *new) {}
+#endif /* CONFIG_SCHED_RTG_CGROUP */
+#else
+static inline int alloc_related_thread_groups(void) { return 0; }
+static inline int sched_set_group_preferred_cluster(unsigned int grp_id,
+						    int sched_cluster_id)
+{
+	return 0;
+}
+static inline int sched_set_group_normalized_util(unsigned int grp_id, unsigned long util,
+				    unsigned int flag)
+{
+	return 0;
+}
+static inline void sched_get_max_group_util(const struct cpumask *query_cpus,
+			      unsigned long *util, unsigned int *freq)
+{
+}
+static inline void add_new_task_to_grp(struct task_struct *new) {}
+#endif /* CONFIG_SCHED_RTG */
+#endif
diff --git a/kernel/sched/rtg/rtg_ctrl.c b/kernel/sched/rtg/rtg_ctrl.c
new file mode 100755
index 0000000000000000000000000000000000000000..164f1b2373b9a2afbf190fe445bf500ab0e0a35b
--- /dev/null
+++ b/kernel/sched/rtg/rtg_ctrl.c
@@ -0,0 +1,934 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rtg control entry
+ *
+ * Copyright (c) 2022-2023 Huawei Technologies Co., Ltd.
+ */
+
+#include "rtg.h"
+#include "rtg_ctrl.h"
+
+#include <linux/module.h>
+#include <linux/device.h>
+
+#ifdef CONFIG_AUTHORITY_CTRL
+#include <linux/sched/auth_ctrl.h>
+#endif
+
+#include <linux/miscdevice.h>
+#include <linux/compat.h>
+#include <trace/events/rtg.h>
+
+atomic_t g_rtg_enable = ATOMIC_INIT(0);
+static atomic_t g_rt_frame_num = ATOMIC_INIT(0);
+static int g_frame_max_util = DEFAULT_MAX_UTIL;
+static int g_max_rt_frames = DEFAULT_MAX_RT_FRAME;
+typedef long (*rtg_ctrl_func)(int abi, void __user *arg);
+
+static long ctrl_set_enable(int abi, void __user *uarg);
+static long ctrl_set_rtg(int abi, void __user *uarg);
+static long ctrl_set_rtg_attr(int abi, void __user *uarg);
+static long ctrl_begin_frame(int abi, void __user *uarg);
+static long ctrl_end_frame(int abi, void __user *uarg);
+static long ctrl_end_scene(int abi, void __user *uarg);
+static long ctrl_set_min_util(int abi, void __user *uarg);
+static long ctrl_set_margin(int abi, void __user *uarg);
+static long ctrl_search_rtg(int abi, void __user *uarg);
+static long ctrl_get_enable(int abi, void __user *uarg);
+
+static rtg_ctrl_func g_func_array[RTG_CTRL_MAX_NR] = {
+	NULL, /* reserved */
+	ctrl_set_enable,  // 1
+	ctrl_set_rtg,
+	NULL,
+	ctrl_set_rtg_attr,
+	ctrl_begin_frame,  // 5
+	ctrl_end_frame,
+	ctrl_end_scene,
+	ctrl_set_min_util,
+	ctrl_set_margin,
+	NULL,
+	NULL,
+	ctrl_search_rtg,
+	ctrl_get_enable
+};
+
+static int init_proc_state(const int *config, int len);
+static void deinit_proc_state(void);
+
+static int set_enable_config(char *config_str)
+{
+	char *p = NULL;
+	char *tmp = NULL;
+	int value;
+	int config[RTG_CONFIG_NUM];
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < RTG_CONFIG_NUM; i++)
+		config[i] = INVALID_VALUE;
+	/* eg: key1:value1;key2:value2;key3:value3 */
+	for (p = strsep(&config_str, ";"); p != NULL;
+		p = strsep(&config_str, ";")) {
+		tmp = strsep(&p, ":");
+		if ((tmp == NULL) || (p == NULL))
+			continue;
+		if (kstrtoint((const char *)p, DECIMAL, &value))
+			return -INVALID_ARG;
+
+		if (!strcmp(tmp, "sched_cycle"))
+			config[RTG_FREQ_CYCLE] = value;
+		else if (!strcmp(tmp, "frame_max_util"))
+			config[RTG_FRAME_MAX_UTIL] = value;
+		else if (!strcmp(tmp, "invalid_interval"))
+			config[RTG_INVALID_INTERVAL] = value;
+		else
+			continue;
+	}
+
+	for (i = 0; i < RTG_CONFIG_NUM; i++)
+		pr_info("[SCHED_RTG] config[%d] = %d\n", i, config[i]);
+
+	ret = init_proc_state(config, RTG_CONFIG_NUM);
+
+	return ret;
+}
+
+static void rtg_enable(int abi, const struct rtg_enable_data *data)
+{
+	char temp[MAX_DATA_LEN];
+	int ret = -1;
+
+	if (atomic_read(&g_rtg_enable) == 1) {
+		pr_info("[SCHED_RTG] already enabled!\n");
+		return;
+	}
+
+	if ((data->len <= 0) || (data->len >= MAX_DATA_LEN)) {
+		pr_err("[SCHED_RTG] %s data len invalid\n", __func__);
+		return;
+	}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpointer-to-int-cast"
+	switch (abi) {
+	case IOCTL_ABI_ARM32:
+		ret = copy_from_user(&temp,
+			(void __user *)compat_ptr((compat_uptr_t)data->data), data->len);
+		break;
+	case IOCTL_ABI_AARCH64:
+		ret = copy_from_user(&temp, (void __user *)data->data, data->len);
+		break;
+	default:
+		pr_err("[SCHED_RTG] abi format error\n");
+		break;
+	}
+	if (ret) {
+		pr_err("[SCHED_RTG] %s copy user data failed\n", __func__);
+		return;
+	}
+#pragma GCC diagnostic pop
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
+	temp[data->len] = '\0';
+
+	if (set_enable_config(&temp) != SUCC) {
+		pr_err("[SCHED_RTG] %s failed!\n", __func__);
+		return;
+	}
+#pragma GCC diagnostic pop
+
+	atomic_set(&g_rtg_enable, 1);
+	pr_info("[SCHED_RTG] enabled!\n");
+}
+
+static void rtg_disable(void)
+{
+	if (atomic_read(&g_rtg_enable) == 0) {
+		pr_info("[SCHED_RTG] already disabled!\n");
+		return;
+	}
+	pr_info("[SCHED_RTG] disabled!\n");
+	atomic_set(&g_rtg_enable, 0);
+	deinit_proc_state();
+}
+
+static inline bool is_rt_type(int type)
+{
+	return (type >= VIP && type < NORMAL_TASK);
+}
+
+static int do_update_rt_frame_num(struct frame_info *frame_info, int new_type)
+{
+	int old_type;
+	int ret = SUCC;
+
+	mutex_lock(&frame_info->lock);
+	old_type = frame_info->prio - DEFAULT_RT_PRIO;
+	if (is_rt_type(new_type) == is_rt_type(old_type))
+		goto out;
+
+	if (is_rt_type(old_type)) {
+		if (atomic_read(&g_rt_frame_num) > 0)
+			atomic_dec(&g_rt_frame_num);
+	} else if (is_rt_type(new_type)) {
+		if (atomic_read(&g_rt_frame_num) < g_max_rt_frames) {
+			atomic_inc(&g_rt_frame_num);
+		} else {
+			pr_err("[SCHED_RTG]: %s g_max_rt_frames is %d\n",
+				__func__, g_max_rt_frames);
+			ret = -INVALID_ARG;
+		}
+	}
+out:
+	mutex_unlock(&frame_info->lock);
+
+	return ret;
+}
+
+static int update_rt_frame_num(struct frame_info *frame_info, int new_type, int cmd)
+{
+	int ret = SUCC;
+
+	switch (cmd) {
+	case UPDATE_RTG_FRAME:
+		ret = do_update_rt_frame_num(frame_info, new_type);
+		break;
+	case ADD_RTG_FRAME:
+		if (is_rt_type(new_type)) {
+			if (atomic_read(&g_rt_frame_num) >= g_max_rt_frames) {
+				pr_err("[SCHED_RTG] g_max_rt_frames is %d!\n", g_max_rt_frames);
+				ret = -INVALID_ARG;
+			} else {
+				atomic_inc(&g_rt_frame_num);
+			}
+		}
+		break;
+	case CLEAR_RTG_FRAME:
+		if ((atomic_read(&g_rt_frame_num) > 0) && is_rt_type(new_type))
+			atomic_dec(&g_rt_frame_num);
+		break;
+	default:
+		return -INVALID_ARG;
+	}
+	trace_rtg_frame_sched(frame_info->rtg->id, "g_rt_frame_num", atomic_read(&g_rt_frame_num));
+	trace_rtg_frame_sched(frame_info->rtg->id, "g_max_rt_frames", g_max_rt_frames);
+
+	return ret;
+}
+
+static long ctrl_set_enable(int abi, void __user *uarg)
+{
+	struct rtg_enable_data rs_enable;
+
+	if (copy_from_user(&rs_enable, uarg, sizeof(rs_enable))) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_ENABLE copy data failed\n");
+		return -INVALID_ARG;
+	}
+	if (rs_enable.enable == 1)
+		rtg_enable(abi, &rs_enable);
+	else
+		rtg_disable();
+
+	return SUCC;
+}
+
+static long ctrl_get_enable(int abi, void __user *uarg)
+{
+	return atomic_read(&g_rtg_enable);
+}
+
+static inline bool is_valid_type(int type)
+{
+	return (type >= VIP && type < RTG_TYPE_MAX);
+}
+
+static int parse_rtg_attr(const struct rtg_str_data *rs_data)
+{
+	char *p = NULL;
+	char *tmp = NULL;
+	char *data = NULL;
+	int value;
+	struct frame_info *frame_info = NULL;
+	int rate = -1;
+	int type = -1;
+	int ret;
+
+	if (rs_data == NULL) {
+		pr_err("[SCHED_RTG] rtg attr: rs_data is null!\n");
+		return -INVALID_ARG;
+	}
+
+	data = rs_data->data;
+	if ((data == NULL) || (rs_data->len <= 0) ||
+		(rs_data->len > MAX_DATA_LEN)) {
+		pr_err("[SCHED_RTG] rtg attr: rs_data len err!\n");
+		return -INVALID_ARG;
+	}
+
+	// eg: rtgId:xx;rate:xx;type:xx;
+	for (p = strsep(&data, ";"); p != NULL; p = strsep(&data, ";")) {
+		tmp = strsep(&p, ":");
+		if ((tmp == NULL) || (p == NULL))
+			continue;
+		if (kstrtoint((const char *)p, DECIMAL, &value)) {
+			pr_err("[SCHED_RTG] rtg attr: rs_data format err!\n");
+			return -INVALID_ARG;
+		}
+		if (!strcmp(tmp, "rtgId")) {
+			frame_info = rtg_frame_info(value);
+		} else if (!strcmp(tmp, "rate")) {
+			rate = value;
+		} else if (!strcmp(tmp, "type")) {
+			if (is_valid_type(value)) {
+				type = value;
+			} else {
+				pr_err("[SCHED_RTG] invalid type : %d\n", value);
+				return -INVALID_ARG;
+			}
+		} else {
+			pr_err("[SCHED_RTG] parse rtg attr failed!\n");
+			return -INVALID_ARG;
+		}
+	}
+
+	if (!frame_info) {
+		pr_err("[SCHED_RTG] rtg attr: invalid args!\n");
+		return -INVALID_ARG;
+	}
+
+	ret = set_frame_rate(frame_info, rate);
+	if (ret)
+		return ret;
+
+	if (is_valid_type(type)) {
+		if (update_rt_frame_num(frame_info, type, UPDATE_RTG_FRAME)) {
+			pr_err("[SCHED_RTG] set rtg attr failed!\n");
+			return -INVALID_ARG;
+		}
+
+		set_frame_prio(frame_info, (type == NORMAL_TASK ?
+			       NOT_RT_PRIO : (type + DEFAULT_RT_PRIO)));
+	}
+
+	return SUCC;
+}
+
+static long ctrl_set_rtg_attr(int abi, void __user *uarg)
+{
+	struct rtg_str_data rs;
+	char temp[MAX_DATA_LEN];
+	int ret;
+
+	if (uarg == NULL)
+		return -INVALID_ARG;
+
+	if (copy_from_user(&rs, uarg, sizeof(rs))) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_RTG_ATTR copy data failed\n");
+		return -INVALID_ARG;
+	}
+	if ((rs.len <= 0) || (rs.len >= MAX_DATA_LEN)) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_RTG_ATTR data len invalid\n");
+		return -INVALID_ARG;
+	}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpointer-to-int-cast"
+	switch (abi) {
+	case IOCTL_ABI_ARM32:
+		ret = copy_from_user(&temp,
+			(void __user *)compat_ptr((compat_uptr_t)rs.data), rs.len);
+		break;
+	case IOCTL_ABI_AARCH64:
+		ret = copy_from_user(&temp, (void __user *)rs.data, rs.len);
+		break;
+	default:
+		pr_err("[SCHED_RTG] abi format error\n");
+		return -INVALID_ARG;
+	}
+#pragma GCC diagnostic pop
+
+	if (ret) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_RTG_ATTR copy rs.data failed with ret %d\n", ret);
+		return -INVALID_ARG;
+	}
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wincompatible-pointer-types"
+	temp[rs.len] = '\0';
+	rs.data = &temp;
+#pragma GCC diagnostic pop
+
+	return parse_rtg_attr(&rs);
+}
+
+static void start_frame_freq(struct frame_info *frame_info)
+{
+	if (!frame_info)
+		return;
+
+	if (atomic_read(&frame_info->start_frame_freq) == 0) {
+		atomic_set(&frame_info->start_frame_freq, 1);
+		set_frame_sched_state(frame_info, true);
+	}
+}
+
+static int set_frame(struct frame_info *frame_info, int margin)
+{
+	int ret;
+	if (!frame_info)
+		return -INVALID_RTG_ID;
+
+	atomic_set(&frame_info->frame_state, FRAME_DRAWING);
+	ret = set_frame_margin(frame_info, margin);
+	if (ret)
+		goto out;
+
+	ret = set_frame_timestamp(frame_info, FRAME_START);
+	if (ret)
+		goto out;
+
+out:
+	return ret;
+}
+
+static int reset_frame(struct frame_info *frame_info)
+{
+	if (!frame_info)
+		return -INVALID_RTG_ID;
+
+	if (atomic_read(&frame_info->frame_state) == FRAME_END_STATE) {
+		pr_debug("[SCHED_RTG]: Frame state is already reset\n");
+		return -INVALID_PROC_STATE;
+	}
+
+	atomic_set(&frame_info->frame_state, FRAME_END_STATE);
+	return set_frame_timestamp(frame_info, FRAME_END);
+}
+
+int update_frame_state(int grp_id, int margin, bool in_frame)
+{
+	int ret;
+	struct frame_info *frame_info = NULL;
+
+	frame_info = lookup_frame_info_by_grp_id(grp_id);
+	if (!frame_info || !frame_info->rtg)
+		return -INVALID_RTG_ID;
+
+	if (in_frame) {
+		start_frame_freq(frame_info);
+		ret = set_frame(frame_info, margin);
+		trace_rtg_frame_sched(grp_id, "margin", margin);
+	} else {
+		ret = reset_frame(frame_info);
+	}
+
+	return ret;
+}
+
+static inline int curr_grp_id()
+{
+	return sched_get_group_id(current);
+}
+
+static long ctrl_frame_state(void __user *uarg, bool is_enter)
+{
+	struct proc_state_data state_data;
+
+	if (uarg == NULL)
+		return -INVALID_ARG;
+
+	if (copy_from_user(&state_data, uarg, sizeof(state_data))) {
+		pr_err("[SCHED_RTG] CMD_ID_FRAME_FREQ copy data failed\n");
+		return -INVALID_ARG;
+	}
+
+	return update_frame_state(curr_grp_id(), state_data.state_param, is_enter);
+}
+
+static long ctrl_begin_frame(int abi, void __user *uarg)
+{
+	return ctrl_frame_state(uarg, true);
+}
+
+static long ctrl_end_frame(int abi, void __user *uarg)
+{
+	return ctrl_frame_state(uarg, false);
+}
+
+static int stop_frame_freq(int gid)
+{
+	struct frame_info *frame_info = NULL;
+
+	frame_info = lookup_frame_info_by_grp_id(gid);
+	if (!frame_info)
+		return -INVALID_RTG_ID;
+
+	atomic_set(&frame_info->start_frame_freq, 0);
+	set_frame_sched_state(frame_info, false);
+
+	return 0;
+}
+
+static long ctrl_end_scene(int abi, void __user *uarg)
+{
+	int rtg_id;
+
+	if (uarg == NULL)
+		return -INVALID_ARG;
+
+	if (copy_from_user(&rtg_id, uarg, sizeof(int))) {
+		pr_err("[SCHED_RTG] CMD_ID_END_SCENE copy data failed\n");
+		return -INVALID_ARG;
+	}
+
+	return stop_frame_freq(rtg_id);
+}
+
+static int set_min_util(int gid, int min_util)
+{
+	struct frame_info *frame_info = NULL;
+
+	frame_info = lookup_frame_info_by_grp_id(gid);
+	if (!frame_info)
+		return -FRAME_ERR_PID;
+
+	return set_frame_min_util(frame_info, min_util, false);
+}
+
+static long ctrl_set_min_util(int abi, void __user *uarg)
+{
+	struct proc_state_data state_data;
+
+	if (uarg == NULL)
+		return -INVALID_ARG;
+
+	if (copy_from_user(&state_data, uarg, sizeof(state_data))) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_MIN_UTIL copy data failed\n");
+		return -INVALID_ARG;
+	}
+
+	return set_min_util(curr_grp_id(), state_data.state_param);
+}
+
+static int set_margin(int grp_id, int margin)
+{
+	struct frame_info *frame_info = NULL;
+
+	frame_info = lookup_frame_info_by_grp_id(grp_id);
+	if (!frame_info)
+		return -FRAME_ERR_PID;
+
+	set_frame_margin(frame_info, margin);
+
+	return SUCC;
+}
+
+static long ctrl_set_margin(int abi, void __user *uarg)
+{
+	struct proc_state_data state_data;
+
+	if (uarg == NULL)
+		return -INVALID_ARG;
+
+	if (copy_from_user(&state_data, uarg, sizeof(state_data))) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_MARGIN copy data failed\n");
+		return -INVALID_ARG;
+	}
+
+	return set_margin(curr_grp_id(), state_data.state_param);
+}
+
+static void clear_rtg_frame_thread(struct frame_info *frame_info, bool reset)
+{
+	struct frame_thread_info frame_thread_info;
+	int i;
+
+	if (!reset && frame_info)
+		frame_thread_info.prio = frame_info->prio;
+	else
+		frame_thread_info.prio = NOT_RT_PRIO;
+	for (i = 0; i < MAX_TID_NUM; i++)
+		frame_thread_info.thread[i] = -1;
+	frame_thread_info.thread_num = MAX_TID_NUM;
+	update_frame_thread_info(frame_info, &frame_thread_info);
+	if (reset) {
+		atomic_set(&frame_info->max_rt_thread_num, DEFAULT_MAX_RT_THREAD);
+		atomic_set(&frame_info->frame_sched_state, 0);
+		trace_rtg_frame_sched(frame_info->rtg->id, "FRAME_SCHED_ENABLE", 0);
+	}
+}
+
+static void copy_proc_from_rsdata(struct rtg_proc_data *proc_info,
+	const struct rtg_grp_data *rs_data)
+{
+	memset(proc_info, 0, sizeof(struct rtg_proc_data));
+	proc_info->type = VIP;
+	proc_info->rtcnt = DEFAULT_MAX_RT_THREAD;
+	if ((rs_data->grp_type > 0) && (rs_data->grp_type < RTG_TYPE_MAX))
+		proc_info->type = rs_data->grp_type;
+	if ((rs_data->rt_cnt > 0) && (rs_data->rt_cnt < DEFAULT_MAX_RT_THREAD))
+		proc_info->rtcnt = rs_data->rt_cnt;
+}
+
+static void init_frame_thread_info(struct frame_thread_info *frame_thread_info,
+				   const struct rtg_proc_data *proc_info)
+{
+	int i;
+	int type = proc_info->type;
+
+	frame_thread_info->prio = (type == NORMAL_TASK ? NOT_RT_PRIO : (type + DEFAULT_RT_PRIO));
+	for (i = 0; i < MAX_TID_NUM; i++)
+		frame_thread_info->thread[i] = proc_info->thread[i];
+	frame_thread_info->thread_num = MAX_TID_NUM;
+}
+
+static int parse_create_rtg_grp(const struct rtg_grp_data *rs_data)
+{
+	struct rtg_proc_data proc_info;
+	struct frame_info *frame_info;
+	struct frame_thread_info frame_thread_info;
+
+	copy_proc_from_rsdata(&proc_info, rs_data);
+	proc_info.rtgid = alloc_multi_frame_info();
+	frame_info = rtg_frame_info(proc_info.rtgid);
+	if (!frame_info) {
+		pr_err("[SCHED_RTG] no free multi frame.\n");
+		return -NO_FREE_MULTI_FRAME;
+	}
+	atomic_set(&frame_info->max_rt_thread_num, proc_info.rtcnt);
+	if (update_rt_frame_num(frame_info, rs_data->grp_type, ADD_RTG_FRAME)) {
+		release_multi_frame_info(proc_info.rtgid);
+		return -NO_RT_FRAME;
+	}
+	init_frame_thread_info(&frame_thread_info, &proc_info);
+	update_frame_thread_info(frame_info, &frame_thread_info);
+	atomic_set(&frame_info->frame_sched_state, 1);
+	pr_info("[SCHED_RTG] %s rtgid=%d, type=%d, prio=%d, threadnum=%d, rtnum=%d\n",
+		__func__, proc_info.rtgid, rs_data->grp_type,
+		frame_thread_info.prio, frame_thread_info.thread_num, proc_info.rtcnt);
+
+	return proc_info.rtgid;
+}
+
+static int parse_add_rtg_thread(const struct rtg_grp_data *rs_data)
+{
+	struct rtg_proc_data proc_info;
+	struct frame_info *frame_info;
+	int add_index;
+	int add_num;
+	int prio;
+	int fail_num = 0;
+	int i;
+
+	if ((rs_data->grp_id <= 0) || (rs_data->grp_id >= MAX_NUM_CGROUP_COLOC_ID))
+		return -INVALID_ARG;
+	copy_proc_from_rsdata(&proc_info, rs_data);
+	frame_info = lookup_frame_info_by_grp_id(rs_data->grp_id);
+	if (!frame_info) {
+		pr_err("[SCHED_RTG] grp not created yet.\n");
+		return -INVALID_ARG;
+	}
+	mutex_lock(&frame_info->lock);
+	add_num = rs_data->tid_num;
+	if ((frame_info->thread_num < 0) || (add_num < 0)) {
+		mutex_unlock(&frame_info->lock);
+		pr_err("[SCHED_RTG] Unexception err: frame_info num < 0.\n");
+		return -INVALID_RTG_ID;
+	}
+	if (frame_info->thread_num + add_num > MAX_TID_NUM) {
+		mutex_unlock(&frame_info->lock);
+		return -INVALID_RTG_ID;
+	}
+	add_index = frame_info->thread_num;
+	prio = (proc_info.type == NORMAL_TASK) ? NOT_RT_PRIO : frame_info->prio;
+	for (i = 0; i < add_num; i++) {
+		frame_info->thread[add_index] = update_frame_thread(frame_info, prio, prio,
+								    rs_data->tids[i],
+								    frame_info->thread[add_index]);
+		if (frame_info->thread[add_index]) {
+			atomic_set(&frame_info->thread_prio[add_index], prio);
+			frame_info->thread_num++;
+			add_index = frame_info->thread_num;
+		} else {
+			fail_num++;
+		}
+	}
+	mutex_unlock(&frame_info->lock);
+
+	return fail_num;
+}
+
+static int parse_remove_thread(const struct rtg_grp_data *rs_data)
+{
+	pr_err("[SCHED_RTG] frame rtg not support remove single yet.\n");
+
+	return -INVALID_ARG;
+}
+
+static int do_clear_or_destroy_grp(const struct rtg_grp_data *rs_data, bool destroy)
+{
+	struct frame_info *frame_info;
+	int type;
+	int id = rs_data->grp_id;
+
+	if (!is_frame_rtg(id)) {
+		pr_err("[SCHED_RTG] Failed to destroy rtg group %d!\n", id);
+		return -INVALID_ARG;
+	}
+
+	frame_info = rtg_frame_info(id);
+	if (!frame_info) {
+		pr_err("[SCHED_RTG] Failed to destroy rtg group %d: grp not exist.\n", id);
+		return -INVALID_ARG;
+	}
+
+	type = frame_info->prio - DEFAULT_RT_PRIO;
+	if (destroy) {
+		clear_rtg_frame_thread(frame_info, true);
+		release_multi_frame_info(id);
+		update_rt_frame_num(frame_info, type, CLEAR_RTG_FRAME);
+	} else {
+		clear_rtg_frame_thread(frame_info, false);
+	}
+	pr_info("[SCHED_RTG] %s clear frame(id=%d)\n", __func__, id);
+
+	return SUCC;
+}
+
+static int parse_destroy_grp(const struct rtg_grp_data *rs_data)
+{
+	return do_clear_or_destroy_grp(rs_data, true);
+}
+
+long ctrl_set_rtg(int abi, void __user *uarg)
+{
+	struct rtg_grp_data rs_data;
+	long ret;
+
+	if (copy_from_user(&rs_data, uarg, sizeof(rs_data))) {
+		pr_err("[SCHED_RTG] CMD_ID_SET_RTG  copy data failed\n");
+		return -INVALID_ARG;
+	}
+
+	switch (rs_data.rtg_cmd) {
+	case CMD_CREATE_RTG_GRP:
+		ret = parse_create_rtg_grp(&rs_data);
+		break;
+	case CMD_ADD_RTG_THREAD:
+		ret = parse_add_rtg_thread(&rs_data);
+		break;
+	case CMD_REMOVE_RTG_THREAD:
+		ret = parse_remove_thread(&rs_data);
+		break;
+	case CMD_CLEAR_RTG_GRP:
+		ret = -INVALID_ARG;
+		break;
+	case CMD_DESTROY_RTG_GRP:
+		ret = parse_destroy_grp(&rs_data);
+		break;
+	default:
+		return -INVALID_ARG;
+	}
+
+	return ret;
+}
+
+static long ctrl_search_rtg(int abi, void __user *uarg)
+{
+	struct proc_state_data search_data;
+
+	if (copy_from_user(&search_data, uarg, sizeof(search_data))) {
+		pr_err("[SCHED_RTG] CMD_ID_SEARCH_RTG copy data failed\n");
+		return -INVALID_ARG;
+	}
+
+	return search_rtg(search_data.state_param);
+}
+
+static long do_proc_rtg_ioctl(int abi, struct file *file, unsigned int cmd, unsigned long arg)
+{
+	void __user *uarg = (void __user *)(uintptr_t)arg;
+	unsigned int func_id = _IOC_NR(cmd);
+#ifdef CONFIG_RTG_AUTHORITY
+	bool authorized = true;
+#endif
+
+	if (uarg == NULL) {
+		pr_err("[SCHED_RTG] %s: invalid user uarg\n", __func__);
+		return -EINVAL;
+	}
+
+	if (_IOC_TYPE(cmd) != RTG_SCHED_IPC_MAGIC) {
+		pr_err("[SCHED_RTG] %s: RTG_SCHED_IPC_MAGIC fail, TYPE=%d\n",
+			__func__, _IOC_TYPE(cmd));
+		return -INVALID_MAGIC;
+	}
+
+	if (!atomic_read(&g_rtg_enable) && (func_id != SET_ENABLE) && (func_id != GET_ENABLE)) {
+		pr_err("[SCHED_RTG] CMD_ID %x error: Rtg not enabled yet.\n", cmd);
+		return -RTG_DISABLED;
+	}
+
+	if (func_id >= RTG_CTRL_MAX_NR) {
+		pr_err("[SCHED_RTG] %s: RTG_MAX_NR fail, _IOC_NR(cmd)=%d, MAX_NR=%d\n",
+			__func__, _IOC_NR(cmd), RTG_CTRL_MAX_NR);
+		return -INVALID_CMD;
+	}
+
+#ifdef CONFIG_RTG_AUTHORITY
+	authorized = check_authorized(func_id, RTG_AUTH_FLAG);
+	if (!authorized) {
+		pr_err("[SCHED_RTG] %s: uid not authorized.\n", __func__);
+		return -INVALID_CMD;
+	}
+#endif
+	if (g_func_array[func_id] != NULL)
+		return (*g_func_array[func_id])(abi, uarg);
+
+	return -EINVAL;
+}
+
+static void reset_frame_info(struct frame_info *frame_info)
+{
+	int i;
+	clear_rtg_frame_thread(frame_info, true);
+	atomic_set(&frame_info->frame_state, -1);
+	atomic_set(&frame_info->curr_rt_thread_num, 0);
+	atomic_set(&frame_info->max_rt_thread_num, DEFAULT_MAX_RT_THREAD);
+	for (i = 0; i < MAX_TID_NUM; i++)
+		atomic_set(&frame_info->thread_prio[i], 0);
+}
+
+static int do_init_proc_state(int rtgid, const int *config, int len)
+{
+	struct related_thread_group *grp = NULL;
+	struct frame_info *frame_info = NULL;
+
+	grp = lookup_related_thread_group(rtgid);
+	if (unlikely(!grp))
+		return -EINVAL;
+
+	frame_info = (struct frame_info *)grp->private_data;
+	if (!frame_info)
+		return -EINVAL;
+
+	reset_frame_info(frame_info);
+
+	if ((config[RTG_FREQ_CYCLE] >= MIN_FREQ_CYCLE) &&
+		(config[RTG_FREQ_CYCLE] <= MAX_FREQ_CYCLE))
+		sched_set_group_freq_update_interval(rtgid,
+				(unsigned int)config[RTG_FREQ_CYCLE]);
+	else
+		sched_set_group_freq_update_interval(rtgid,
+				DEFAULT_FREQ_CYCLE);
+
+	if (config[RTG_INVALID_INTERVAL] != INVALID_VALUE)
+		sched_set_group_util_invalid_interval(rtgid,
+				config[RTG_INVALID_INTERVAL]);
+	else
+		sched_set_group_util_invalid_interval(rtgid,
+				DEFAULT_INVALID_INTERVAL);
+
+	set_frame_max_util(frame_info, g_frame_max_util);
+
+	return SUCC;
+}
+
+static int init_proc_state(const int *config, int len)
+{
+	int ret;
+	int id;
+
+	if ((config == NULL) || (len != RTG_CONFIG_NUM))
+		return -INVALID_ARG;
+
+	if ((config[RTG_FRAME_MAX_UTIL] > 0) &&
+		(config[RTG_FRAME_MAX_UTIL] < DEFAULT_MAX_UTIL))
+		g_frame_max_util = config[RTG_FRAME_MAX_UTIL];
+
+	for (id = MULTI_FRAME_ID; id < (MULTI_FRAME_ID + MULTI_FRAME_NUM); id++) {
+		ret = do_init_proc_state(id, config, len);
+		if (ret) {
+			pr_err("[SCHED_RTG] init proc state for FRAME_ID=%d failed, ret=%d\n",
+			       id, ret);
+			return ret;
+		}
+	}
+	atomic_set(&g_rt_frame_num, 0);
+
+	return SUCC;
+}
+
+static void deinit_proc_state(void)
+{
+	int id;
+	struct frame_info *frame_info = NULL;
+	struct related_thread_group *grp = NULL;
+
+	for (id = MULTI_FRAME_ID; id < (MULTI_FRAME_ID + MULTI_FRAME_NUM); id++) {
+		grp = lookup_related_thread_group(id);
+		if (unlikely(!grp))
+			return;
+
+		frame_info = (struct frame_info *)grp->private_data;
+		if (frame_info)
+			reset_frame_info(frame_info);
+	}
+	clear_multi_frame_info();
+	atomic_set(&g_rt_frame_num, 0);
+}
+
+int proc_rtg_open(struct inode *inode, struct file *filp)
+{
+	return SUCC;
+}
+
+static int proc_rtg_release(struct inode *inode, struct file *filp)
+{
+	return SUCC;
+}
+
+long proc_rtg_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return do_proc_rtg_ioctl(IOCTL_ABI_AARCH64, file, cmd, arg);
+}
+
+#ifdef CONFIG_COMPAT
+long proc_rtg_compat_ioctl(struct file *file,
+				  unsigned int cmd, unsigned long arg)
+{
+	return do_proc_rtg_ioctl(IOCTL_ABI_ARM32, file, cmd,
+		(unsigned long)(compat_ptr((compat_uptr_t)arg)));
+}
+#endif
+
+static const struct file_operations rtg_ctrl_fops = {
+	.open = proc_rtg_open,
+	.release = proc_rtg_release,
+	.unlocked_ioctl = proc_rtg_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= proc_rtg_compat_ioctl,
+#endif
+};
+
+static struct miscdevice rtg_ctrl_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "sched_rtg_ctrl",
+	.fops = &rtg_ctrl_fops,
+	.mode = 0666,
+};
+
+static int __init rtg_ctrl_dev_init(void)
+{
+	return misc_register(&rtg_ctrl_device);
+}
+
+static void __exit rtg_ctrl_dev_exit(void)
+{
+	misc_deregister(&rtg_ctrl_device);
+}
+
+module_init(rtg_ctrl_dev_init);
+module_exit(rtg_ctrl_dev_exit);
diff --git a/kernel/sched/rtg/rtg_ctrl.h b/kernel/sched/rtg/rtg_ctrl.h
new file mode 100755
index 0000000000000000000000000000000000000000..6fe3d9d399d9ec344c18f07c165613b231152b05
--- /dev/null
+++ b/kernel/sched/rtg/rtg_ctrl.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * rtg control interface
+ *
+ * Copyright (c) 2022-2023 Huawei Technologies Co., Ltd.
+ */
+
+#ifndef __RTG_CTL_H
+#define __RTG_CTL_H
+
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+#include <linux/sched/rtg_ctrl.h>
+
+#include "frame_rtg.h"
+
+/* set rtg */
+#define INVALID_VALUE 0xffff
+#define DEFAULT_RT_PRIO 97
+
+#define MAX_DATA_LEN 256
+#define DECIMAL 10
+#define DEFAULT_MAX_UTIL 1024
+#define MAX_SUBPROCESS_NUM 8
+
+#define RTG_ID_INVALID (-1)
+/* fit for FFRT, original DEFAULT_MAX_RT_FRAME is 3 */
+#define DEFAULT_MAX_RT_FRAME 10
+#define MAX_RT_THREAD (MAX_TID_NUM + 2)
+#define INIT_VALUE		(-1)
+#define UPDATE_RTG_FRAME (1 << 0)
+#define ADD_RTG_FRAME (1 << 1)
+#define CLEAR_RTG_FRAME (1 << 2)
+
+#define DEFAULT_FREQ_CYCLE 4
+#define MIN_FREQ_CYCLE 1
+#define MAX_FREQ_CYCLE 16
+#define DEFAULT_INVALID_INTERVAL 50
+
+/* proc_state */
+enum proc_state {
+	STATE_MIN = 0,
+	FRAME_DRAWING,
+	FRAME_RME_MAX = 19,
+	/* rme end */
+	FRAME_END_STATE = FRAME_RME_MAX + 1,
+
+	FRAME_CLICK = 100,
+	STATE_MAX,
+};
+
+enum rtg_config {
+	RTG_FREQ_CYCLE,
+	RTG_FRAME_MAX_UTIL,
+	RTG_INVALID_INTERVAL,
+	RTG_CONFIG_NUM,
+};
+
+enum rtg_err_no {
+	SUCC = 0,
+	RTG_DISABLED = 1,
+	INVALID_ARG,
+	INVALID_MAGIC,
+	INVALID_CMD,
+	FRAME_ERR_PID = 100,
+	NO_FREE_MULTI_FRAME,
+	NOT_MULTI_FRAME,
+	INVALID_RTG_ID,
+	NO_RT_FRAME,
+	INVALID_PROC_STATE,
+};
+
+struct rtg_grp_data {
+	int rtg_cmd;
+	int grp_id;
+	int grp_type;
+	int rt_cnt;
+	int tid_num;
+	int tids[MAX_TID_NUM];
+};
+
+struct rtg_proc_data {
+	int rtgid;
+	int type;
+	int thread[MAX_TID_NUM];
+	int rtcnt;
+};
+
+#endif
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d6afa45f008a3ba4460c2dfbe2a9c9cc829ba5fa..afef39e60e9e0ea3abe0bb916e12dc825a073ccf 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -101,6 +101,45 @@
 struct rq;
 struct cpuidle_state;
 
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sched_ravg_window;
+extern unsigned int walt_cpu_util_freq_divisor;
+
+struct walt_sched_stats {
+	u64 cumulative_runnable_avg_scaled;
+};
+
+struct load_subtractions {
+	u64 window_start;
+	u64 subs;
+	u64 new_subs;
+};
+
+#define NUM_TRACKED_WINDOWS 2
+
+struct sched_cluster {
+	raw_spinlock_t load_lock;
+	struct list_head list;
+	struct cpumask cpus;
+	int id;
+	int max_power_cost;
+	int min_power_cost;
+	int max_possible_capacity;
+	int capacity;
+	int efficiency; /* Differentiate cpus with different IPC capability */
+	int load_scale_factor;
+	unsigned int exec_scale_factor;
+	/*
+	 * max_freq = user maximum
+	 * max_possible_freq = maximum supported by hardware
+	 */
+	unsigned int cur_freq, max_freq, min_freq;
+	unsigned int max_possible_freq;
+	bool freq_init_done;
+};
+
+extern unsigned int sched_disable_window_stats;
+#endif /* CONFIG_SCHED_WALT */
 /* task_struct::on_rq states: */
 #define TASK_ON_RQ_QUEUED	1
 #define TASK_ON_RQ_MIGRATING	2
@@ -657,6 +696,9 @@ struct cfs_rq {
 	struct list_head	leaf_cfs_rq_list;
 	struct task_group	*tg;	/* group that "owns" this runqueue */
 
+#ifdef CONFIG_SCHED_WALT
+	struct walt_sched_stats walt_stats;
+#endif
 	/* Locally cached copy of our task_group's idle value */
 	int			idle;
 
@@ -679,6 +721,9 @@ struct cfs_rq {
 #ifdef CONFIG_SMP
 	struct list_head	throttled_csd_list;
 #endif
+#ifdef CONFIG_SCHED_WALT
+	u64 cumulative_runnable_avg;
+#endif
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 };
@@ -1116,6 +1161,27 @@ struct rq {
 
 	/* This is used to determine avg_idle's max value */
 	u64			max_idle_balance_cost;
+#ifdef CONFIG_SCHED_WALT
+	struct sched_cluster *cluster;
+	struct cpumask freq_domain_cpumask;
+	struct walt_sched_stats walt_stats;
+
+	u64 window_start;
+	unsigned long walt_flags;
+
+	u64 cur_irqload;
+	u64 avg_irqload;
+	u64 irqload_ts;
+	u64 curr_runnable_sum;
+	u64 prev_runnable_sum;
+	u64 nt_curr_runnable_sum;
+	u64 nt_prev_runnable_sum;
+	u64 cum_window_demand_scaled;
+	struct load_subtractions load_subs[NUM_TRACKED_WINDOWS];
+#ifdef CONFIG_SCHED_RTG
+	struct group_cpu_time grp_time;
+#endif
+#endif /* CONFIG_SCHED_WALT */
 
 #ifdef CONFIG_HOTPLUG_CPU
 	struct rcuwait		hotplug_wait;
@@ -2321,6 +2387,10 @@ struct sched_class {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	void (*task_change_group)(struct task_struct *p);
 #endif
+#ifdef CONFIG_SCHED_WALT
+	void (*fixup_walt_sched_stats)(struct rq *rq, struct task_struct *p,
+					u16 updated_demand_scaled);
+#endif
 
 #ifdef CONFIG_SCHED_CORE
 	int (*task_is_throttled)(struct task_struct *p, int cpu);
@@ -2623,6 +2693,15 @@ static inline int hrtick_enabled(struct rq *rq)
 
 #endif /* CONFIG_SCHED_HRTICK */
 
+#ifdef CONFIG_SCHED_WALT
+u64 sched_ktime_clock(void);
+#else
+static inline u64 sched_ktime_clock(void)
+{
+	return sched_clock();
+}
+#endif
+
 #ifndef arch_scale_freq_tick
 static __always_inline
 void arch_scale_freq_tick(void)
@@ -2703,6 +2782,11 @@ static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
 
 extern void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
+#ifdef CONFIG_SCHED_WALT
+extern unsigned int sysctl_sched_use_walt_cpu_util;
+extern unsigned int walt_disabled;
+#endif
+
 #ifdef CONFIG_PREEMPTION
 
 /*
@@ -2993,11 +3077,20 @@ DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
 static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
 {
 	struct update_util_data *data;
+	u64 clock;
+
+#ifdef CONFIG_SCHED_WALT
+	if (!(flags & SCHED_CPUFREQ_WALT))
+		return;
 
+	clock = sched_ktime_clock();
+#else
+	clock = rq_clock(rq);
+#endif
 	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
 						  cpu_of(rq)));
 	if (data)
-		data->func(data, rq_clock(rq), flags);
+		data->func(data, clock, flags);
 }
 #else
 static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
@@ -3563,4 +3656,249 @@ static inline void init_sched_mm_cid(struct task_struct *t) { }
 extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
 extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
+#ifdef CONFIG_SCHED_WALT
+static inline int cluster_first_cpu(struct sched_cluster *cluster)
+{
+	return cpumask_first(&cluster->cpus);
+}
+
+extern struct list_head cluster_head;
+extern struct sched_cluster *sched_cluster[NR_CPUS];
+unsigned long capacity_curr_of(int cpu);
+unsigned long cpu_util_cfs(int cpu);
+
+#define for_each_sched_cluster(cluster) \
+	list_for_each_entry_rcu(cluster, &cluster_head, list)
+
+extern struct mutex policy_mutex;
+extern unsigned int sched_disable_window_stats;
+extern unsigned int max_possible_freq;
+extern unsigned int min_max_freq;
+extern unsigned int max_possible_efficiency;
+extern unsigned int min_possible_efficiency;
+extern unsigned int max_capacity;
+extern unsigned int min_capacity;
+extern unsigned int max_load_scale_factor;
+extern unsigned int max_possible_capacity;
+extern unsigned int min_max_possible_capacity;
+extern unsigned int max_power_cost;
+extern unsigned int __read_mostly sched_init_task_load_windows;
+extern unsigned int sysctl_sched_restrict_cluster_spill;
+extern unsigned int sched_pred_alert_load;
+extern struct sched_cluster init_cluster;
+
+static inline void walt_fixup_cum_window_demand(struct rq *rq, s64 scaled_delta)
+{
+	rq->cum_window_demand_scaled += scaled_delta;
+	if (unlikely((s64)rq->cum_window_demand_scaled < 0))
+		rq->cum_window_demand_scaled = 0;
+}
+
+/* Is frequency of two cpus synchronized with each other? */
+static inline int same_freq_domain(int src_cpu, int dst_cpu)
+{
+	struct rq *rq = cpu_rq(src_cpu);
+
+	if (src_cpu == dst_cpu)
+		return 1;
+
+	return cpumask_test_cpu(dst_cpu, &rq->freq_domain_cpumask);
+}
+
+extern void reset_task_stats(struct task_struct *p);
+
+#define CPU_RESERVED	1
+static inline int is_reserved(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	return test_bit(CPU_RESERVED, &rq->walt_flags);
+}
+
+static inline int mark_reserved(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	return test_and_set_bit(CPU_RESERVED, &rq->walt_flags);
+}
+
+static inline void clear_reserved(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	clear_bit(CPU_RESERVED, &rq->walt_flags);
+}
+
+static inline int cpu_capacity(int cpu)
+{
+	return cpu_rq(cpu)->cluster->capacity;
+}
+
+static inline int cpu_max_possible_capacity(int cpu)
+{
+	return cpu_rq(cpu)->cluster->max_possible_capacity;
+}
+
+static inline int cpu_load_scale_factor(int cpu)
+{
+	return cpu_rq(cpu)->cluster->load_scale_factor;
+}
+
+static inline unsigned int cluster_max_freq(struct sched_cluster *cluster)
+{
+	/*
+	 * Governor and thermal driver don't know the other party's mitigation
+	 * voting. So struct cluster saves both and return min() for current
+	 * cluster fmax.
+	 */
+	return cluster->max_freq;
+}
+
+/* Keep track of max/min capacity possible across CPUs "currently" */
+static inline void __update_min_max_capacity(void)
+{
+	int i;
+	int max_cap = 0, min_cap = INT_MAX;
+
+	for_each_possible_cpu(i) {
+		if (!cpu_active(i))
+			continue;
+
+		max_cap = max(max_cap, cpu_capacity(i));
+		min_cap = min(min_cap, cpu_capacity(i));
+	}
+
+	max_capacity = max_cap;
+	min_capacity = min_cap;
+}
+
+/*
+ * Return load_scale_factor of a cpu in reference to "most" efficient cpu, so
+ * that "most" efficient cpu gets a load_scale_factor of 1
+ */
+static inline unsigned long
+load_scale_cpu_efficiency(struct sched_cluster *cluster)
+{
+	return DIV_ROUND_UP(1024 * max_possible_efficiency,
+			    cluster->efficiency);
+}
+
+/*
+ * Return load_scale_factor of a cpu in reference to cpu with best max_freq
+ * (max_possible_freq), so that one with best max_freq gets a load_scale_factor
+ * of 1.
+ */
+static inline unsigned long load_scale_cpu_freq(struct sched_cluster *cluster)
+{
+	return DIV_ROUND_UP(1024 * max_possible_freq,
+			   cluster_max_freq(cluster));
+}
+
+static inline int compute_load_scale_factor(struct sched_cluster *cluster)
+{
+	int load_scale = 1024;
+
+	/*
+	 * load_scale_factor accounts for the fact that task load
+	 * is in reference to "best" performing cpu. Task's load will need to be
+	 * scaled (up) by a factor to determine suitability to be placed on a
+	 * (little) cpu.
+	 */
+	load_scale *= load_scale_cpu_efficiency(cluster);
+	load_scale >>= 10;
+
+	load_scale *= load_scale_cpu_freq(cluster);
+	load_scale >>= 10;
+
+	return load_scale;
+}
+
+static inline bool is_max_capacity_cpu(int cpu)
+{
+	return cpu_max_possible_capacity(cpu) == max_possible_capacity;
+}
+
+static inline bool is_min_capacity_cpu(int cpu)
+{
+	return cpu_max_possible_capacity(cpu) == min_max_possible_capacity;
+}
+
+/*
+ * Return 'capacity' of a cpu in reference to "least" efficient cpu, such that
+ * least efficient cpu gets capacity of 1024
+ */
+static unsigned long
+capacity_scale_cpu_efficiency(struct sched_cluster *cluster)
+{
+	return (1024 * cluster->efficiency) / min_possible_efficiency;
+}
+
+/*
+ * Return 'capacity' of a cpu in reference to cpu with lowest max_freq
+ * (min_max_freq), such that one with lowest max_freq gets capacity of 1024.
+ */
+static unsigned long capacity_scale_cpu_freq(struct sched_cluster *cluster)
+{
+	return (1024 * cluster_max_freq(cluster)) / min_max_freq;
+}
+
+static inline int compute_capacity(struct sched_cluster *cluster)
+{
+	int capacity = 1024;
+
+	capacity *= capacity_scale_cpu_efficiency(cluster);
+	capacity >>= 10;
+
+	capacity *= capacity_scale_cpu_freq(cluster);
+	capacity >>= 10;
+
+	return capacity;
+}
+
+static inline unsigned int power_cost(int cpu, u64 demand)
+{
+	return cpu_max_possible_capacity(cpu);
+}
+
+static inline unsigned long cpu_util_freq_walt(int cpu)
+{
+	u64 util;
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long capacity = capacity_orig_of(cpu);
+
+	if (unlikely(walt_disabled || !sysctl_sched_use_walt_cpu_util))
+		return cpu_util_cfs(cpu);
+
+	util = rq->prev_runnable_sum << SCHED_CAPACITY_SHIFT;
+	util = div_u64(util, sched_ravg_window);
+
+	return (util >= capacity) ? capacity : util;
+}
+
+static inline bool hmp_capable(void)
+{
+	return max_possible_capacity != min_max_possible_capacity;
+}
+#else /* CONFIG_SCHED_WALT */
+static inline void walt_fixup_cum_window_demand(struct rq *rq,
+						s64 scaled_delta) { }
+
+static inline int same_freq_domain(int src_cpu, int dst_cpu)
+{
+	return 1;
+}
+
+static inline int is_reserved(int cpu)
+{
+	return 0;
+}
+
+static inline void clear_reserved(int cpu) { }
+
+static inline bool hmp_capable(void)
+{
+	return false;
+}
+#endif /* CONFIG_SCHED_WALT */
+
 #endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c
new file mode 100755
index 0000000000000000000000000000000000000000..d74579a1553db6f6545b0e02bcc7b21d9e5da312
--- /dev/null
+++ b/kernel/sched/sched_avg.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2012, 2015-2021, The Linux Foundation. All rights reserved.
+ */
+/*
+ * Scheduler hook for average runqueue determination
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/hrtimer.h>
+#include <linux/sched.h>
+#include <linux/math64.h>
+
+#include "sched.h"
+#include "walt.h"
+#include <trace/events/sched.h>
+
+static DEFINE_PER_CPU(u64, nr_prod_sum);
+static DEFINE_PER_CPU(u64, last_time);
+static DEFINE_PER_CPU(u64, nr_big_prod_sum);
+static DEFINE_PER_CPU(u64, nr);
+static DEFINE_PER_CPU(u64, nr_max);
+
+static DEFINE_PER_CPU(unsigned long, iowait_prod_sum);
+static DEFINE_PER_CPU(spinlock_t, nr_lock) = __SPIN_LOCK_UNLOCKED(nr_lock);
+static s64 last_get_time;
+
+static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);
+
+#define NR_THRESHOLD_PCT		15
+
+/**
+ * sched_get_nr_running_avg
+ * @return: Average nr_running, iowait and nr_big_tasks value since last poll.
+ *	    Returns the avg * 100 to return up to two decimal points
+ *	    of accuracy.
+ *
+ * Obtains the average nr_running value since the last poll.
+ * This function may not be called concurrently with itself
+ */
+void sched_get_nr_running_avg(struct sched_avg_stats *stats)
+{
+	int cpu;
+	u64 curr_time = sched_clock();
+	u64 period = curr_time - last_get_time;
+	u64 tmp_nr, tmp_misfit;
+
+	if (!period)
+		return;
+
+	/* read and reset nr_running counts */
+	for_each_possible_cpu(cpu) {
+		unsigned long flags;
+		u64 diff;
+
+		spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
+		curr_time = sched_clock();
+		diff = curr_time - per_cpu(last_time, cpu);
+		BUG_ON((s64)diff < 0);
+
+		tmp_nr = per_cpu(nr_prod_sum, cpu);
+		tmp_nr += per_cpu(nr, cpu) * diff;
+		tmp_nr = div64_u64((tmp_nr * 100), period);
+
+		tmp_misfit = per_cpu(nr_big_prod_sum, cpu);
+		tmp_misfit = div64_u64((tmp_misfit * 100), period);
+
+		/*
+		 * NR_THRESHOLD_PCT is to make sure that the task ran
+		 * at least 85% in the last window to compensate any
+		 * over estimating being done.
+		 */
+		stats[cpu].nr = (int)div64_u64((tmp_nr + NR_THRESHOLD_PCT),
+								100);
+		stats[cpu].nr_misfit = (int)div64_u64((tmp_misfit +
+						NR_THRESHOLD_PCT), 100);
+		stats[cpu].nr_max = per_cpu(nr_max, cpu);
+
+		trace_sched_get_nr_running_avg(cpu, stats[cpu].nr,
+				stats[cpu].nr_misfit, stats[cpu].nr_max);
+
+		per_cpu(last_time, cpu) = curr_time;
+		per_cpu(nr_prod_sum, cpu) = 0;
+		per_cpu(nr_big_prod_sum, cpu) = 0;
+		per_cpu(iowait_prod_sum, cpu) = 0;
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
+		spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
+	}
+
+	last_get_time = curr_time;
+
+}
+EXPORT_SYMBOL(sched_get_nr_running_avg);
+
+#define BUSY_NR_RUN		3
+#define BUSY_LOAD_FACTOR	10
+static inline void update_last_busy_time(int cpu, bool dequeue,
+				unsigned long prev_nr_run, u64 curr_time)
+{
+	bool nr_run_trigger = false, load_trigger = false;
+
+	if (!hmp_capable() || is_min_capacity_cpu(cpu))
+		return;
+
+	if (prev_nr_run >= BUSY_NR_RUN && per_cpu(nr, cpu) < BUSY_NR_RUN)
+		nr_run_trigger = true;
+
+	if (dequeue && (cpu_util(cpu) * BUSY_LOAD_FACTOR) >
+			capacity_orig_of(cpu))
+		load_trigger = true;
+
+	if (nr_run_trigger || load_trigger)
+		atomic64_set(&per_cpu(last_busy_time, cpu), curr_time);
+}
+
+/**
+ * sched_update_nr_prod
+ * @cpu: The core id of the nr running driver.
+ * @delta: Adjust nr by 'delta' amount
+ * @inc: Whether we are increasing or decreasing the count
+ * @return: N/A
+ *
+ * Update average with latest nr_running value for CPU
+ */
+void sched_update_nr_prod(int cpu, long delta, bool inc)
+{
+	u64 diff;
+	u64 curr_time;
+	unsigned long flags, nr_running;
+
+	spin_lock_irqsave(&per_cpu(nr_lock, cpu), flags);
+	nr_running = per_cpu(nr, cpu);
+	curr_time = sched_clock();
+	diff = curr_time - per_cpu(last_time, cpu);
+	BUG_ON((s64)diff < 0);
+	per_cpu(last_time, cpu) = curr_time;
+	per_cpu(nr, cpu) = nr_running + (inc ? delta : -delta);
+
+	BUG_ON((s64)per_cpu(nr, cpu) < 0);
+
+	if (per_cpu(nr, cpu) > per_cpu(nr_max, cpu))
+		per_cpu(nr_max, cpu) = per_cpu(nr, cpu);
+
+	update_last_busy_time(cpu, !inc, nr_running, curr_time);
+
+	per_cpu(nr_prod_sum, cpu) += nr_running * diff;
+	per_cpu(iowait_prod_sum, cpu) += nr_iowait_cpu(cpu) * diff;
+	spin_unlock_irqrestore(&per_cpu(nr_lock, cpu), flags);
+}
+EXPORT_SYMBOL(sched_update_nr_prod);
+
+/*
+ * Returns the CPU utilization % in the last window.
+ *
+ */
+unsigned int sched_get_cpu_util(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 util;
+	unsigned long capacity, flags;
+	unsigned int busy;
+
+	raw_spin_lock_irqsave(&rq->lock, flags);
+
+	util = rq->cfs.avg.util_avg;
+	capacity = capacity_orig_of(cpu);
+
+#ifdef CONFIG_SCHED_WALT
+	if (!walt_disabled && sysctl_sched_use_walt_cpu_util) {
+		util = rq->prev_runnable_sum;
+		util = div64_u64(util,
+				 sched_ravg_window >> SCHED_CAPACITY_SHIFT);
+	}
+#endif
+	raw_spin_unlock_irqrestore(&rq->lock, flags);
+
+	util = (util >= capacity) ? capacity : util;
+	busy = div64_ul((util * 100), capacity);
+	return busy;
+}
+
+u64 sched_get_cpu_last_busy_time(int cpu)
+{
+	return atomic64_read(&per_cpu(last_busy_time, cpu));
+}
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 85590599b4d60545b7774222971f8f7baa678dac..84f00a3de3d0232360c3e95f25328cc053b0e289 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -7,6 +7,7 @@
  *
  * See kernel/stop_machine.c
  */
+#include "walt.h"
 
 #ifdef CONFIG_SMP
 static int
@@ -55,12 +56,14 @@ static void
 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
 	add_nr_running(rq, 1);
+	walt_inc_cumulative_runnable_avg(rq, p);
 }
 
 static void
 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
 {
 	sub_nr_running(rq, 1);
+	walt_dec_cumulative_runnable_avg(rq, p);
 }
 
 static void yield_task_stop(struct rq *rq)
@@ -138,4 +141,7 @@ DEFINE_SCHED_CLASS(stop) = {
 	.prio_changed		= prio_changed_stop,
 	.switched_to		= switched_to_stop,
 	.update_curr		= update_curr_stop,
+#ifdef CONFIG_SCHED_WALT
+	.fixup_walt_sched_stats	= fixup_walt_sched_stats_common,
+#endif
 };
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
new file mode 100755
index 0000000000000000000000000000000000000000..4391bf669ad49de314d601479a315991adbc8add
--- /dev/null
+++ b/kernel/sched/walt.c
@@ -0,0 +1,1862 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * walt.c
+ *
+ * Window Assistant Load Tracking
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/syscore_ops.h>
+#include <linux/cpufreq.h>
+#include <linux/list_sort.h>
+#include <linux/jiffies.h>
+#include <linux/sched/stat.h>
+#include <trace/events/sched.h>
+#include <linux/sched/clock.h>
+#include "sched.h"
+#include "walt.h"
+#include "core_ctl.h"
+#include "rtg/rtg.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/walt.h>
+#undef CREATE_TRACE_POINTS
+
+const char *task_event_names[] = {"PUT_PREV_TASK", "PICK_NEXT_TASK",
+				  "TASK_WAKE", "TASK_MIGRATE", "TASK_UPDATE",
+				  "IRQ_UPDATE"};
+const char *migrate_type_names[] = {"GROUP_TO_RQ", "RQ_TO_GROUP",
+					"RQ_TO_RQ", "GROUP_TO_GROUP"};
+
+#define SCHED_FREQ_ACCOUNT_WAIT_TIME 0
+#define SCHED_ACCOUNT_WAIT_TIME 1
+
+static ktime_t ktime_last;
+static bool sched_ktime_suspended;
+DEFINE_MUTEX(cluster_lock);
+static atomic64_t walt_irq_work_lastq_ws;
+u64 walt_load_reported_window;
+
+static struct irq_work walt_cpufreq_irq_work;
+static struct irq_work walt_migration_irq_work;
+
+u64 sched_ktime_clock(void)
+{
+	if (unlikely(sched_ktime_suspended))
+		return ktime_to_ns(ktime_last);
+	return ktime_get_ns();
+}
+
+static void sched_resume(void)
+{
+	sched_ktime_suspended = false;
+}
+
+static int sched_suspend(void)
+{
+	ktime_last = ktime_get();
+	sched_ktime_suspended = true;
+	return 0;
+}
+
+static struct syscore_ops sched_syscore_ops = {
+	.resume = sched_resume,
+	.suspend = sched_suspend
+};
+
+static int __init sched_init_ops(void)
+{
+	register_syscore_ops(&sched_syscore_ops);
+	return 0;
+}
+late_initcall(sched_init_ops);
+
+static void acquire_rq_locks_irqsave(const cpumask_t *cpus,
+				     unsigned long *flags)
+{
+	int cpu;
+	int level = 0;
+
+	local_irq_save(*flags);
+	for_each_cpu(cpu, cpus) {
+		if (level == 0)
+			raw_spin_lock(&cpu_rq(cpu)->__lock);
+		else
+			raw_spin_lock_nested(&cpu_rq(cpu)->__lock, level);
+		level++;
+	}
+}
+
+static void release_rq_locks_irqrestore(const cpumask_t *cpus,
+					unsigned long *flags)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpus)
+		raw_spin_unlock(&cpu_rq(cpu)->__lock);
+	local_irq_restore(*flags);
+}
+
+#ifdef CONFIG_HZ_300
+/*
+ * Tick interval becomes to 3333333 due to
+ * rounding error when HZ=300.
+ */
+#define MIN_SCHED_RAVG_WINDOW (3333333 * 6)
+#else
+/* Min window size (in ns) = 20ms */
+#define MIN_SCHED_RAVG_WINDOW 20000000
+#endif
+
+/* Max window size (in ns) = 1s */
+#define MAX_SCHED_RAVG_WINDOW 1000000000
+
+/* 1 -> use PELT based load stats, 0 -> use window-based load stats */
+unsigned int __read_mostly walt_disabled;
+
+__read_mostly unsigned int sysctl_sched_cpu_high_irqload = (10 * NSEC_PER_MSEC);
+
+/*
+ * sched_window_stats_policy and sched_ravg_hist_size have a 'sysctl' copy
+ * associated with them. This is required for atomic update of those variables
+ * when being modifed via sysctl interface.
+ *
+ * IMPORTANT: Initialize both copies to same value!!
+ */
+
+__read_mostly unsigned int sched_ravg_hist_size = 5;
+__read_mostly unsigned int sysctl_sched_ravg_hist_size = 5;
+
+__read_mostly unsigned int sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
+__read_mostly unsigned int sysctl_sched_window_stats_policy = WINDOW_STATS_MAX_RECENT_AVG;
+
+static __read_mostly unsigned int sched_io_is_busy = 1;
+
+unsigned int sysctl_sched_use_walt_cpu_util = 1;
+unsigned int sysctl_sched_use_walt_task_util = 1;
+unsigned int sysctl_sched_walt_init_task_load_pct = 15;
+__read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = (10 * NSEC_PER_MSEC);
+
+/* Window size (in ns) */
+__read_mostly unsigned int sched_ravg_window = MIN_SCHED_RAVG_WINDOW;
+
+/*
+ * A after-boot constant divisor for cpu_util_freq_walt() to apply the load
+ * boost.
+ */
+__read_mostly unsigned int walt_cpu_util_freq_divisor;
+
+/* Initial task load. Newly created tasks are assigned this load. */
+unsigned int __read_mostly sched_init_task_load_windows;
+unsigned int __read_mostly sched_init_task_load_windows_scaled;
+unsigned int __read_mostly sysctl_sched_init_task_load_pct = 15;
+
+/*
+ * Maximum possible frequency across all cpus. Task demand and cpu
+ * capacity (cpu_power) metrics are scaled in reference to it.
+ */
+unsigned int max_possible_freq = 1;
+
+/*
+ * Minimum possible max_freq across all cpus. This will be same as
+ * max_possible_freq on homogeneous systems and could be different from
+ * max_possible_freq on heterogenous systems. min_max_freq is used to derive
+ */
+unsigned int min_max_freq = 1;
+
+unsigned int max_capacity = 1024; /* max(rq->capacity) */
+unsigned int min_capacity = 1024; /* min(rq->capacity) */
+unsigned int max_possible_capacity = 1024; /* max(rq->max_possible_capacity) */
+unsigned int
+min_max_possible_capacity = 1024; /* min(rq->max_possible_capacity) */
+
+/* Temporarily disable window-stats activity on all cpus */
+unsigned int __read_mostly sched_disable_window_stats;
+
+/*
+ * This governs what load needs to be used when reporting CPU busy time
+ * to the cpufreq governor.
+ */
+__read_mostly unsigned int sysctl_sched_freq_reporting_policy;
+
+static int __init set_sched_ravg_window(char *str)
+{
+	unsigned int window_size;
+
+	get_option(&str, &window_size);
+
+	if (window_size < MIN_SCHED_RAVG_WINDOW ||
+			window_size > MAX_SCHED_RAVG_WINDOW) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	sched_ravg_window = window_size;
+	return 0;
+}
+early_param("sched_ravg_window", set_sched_ravg_window);
+
+__read_mostly unsigned int walt_scale_demand_divisor;
+#define scale_demand(d) ((d)/walt_scale_demand_divisor)
+
+void inc_rq_walt_stats(struct rq *rq, struct task_struct *p)
+{
+	walt_inc_cumulative_runnable_avg(rq, p);
+}
+
+void dec_rq_walt_stats(struct rq *rq, struct task_struct *p)
+{
+	walt_dec_cumulative_runnable_avg(rq, p);
+}
+
+void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
+				   u16 updated_demand_scaled)
+{
+	s64 task_load_delta = (s64)updated_demand_scaled -
+			      p->ravg.demand_scaled;
+
+	fixup_cumulative_runnable_avg(&rq->walt_stats, task_load_delta);
+
+	walt_fixup_cum_window_demand(rq, task_load_delta);
+}
+
+static u64
+update_window_start(struct rq *rq, u64 wallclock, int event)
+{
+	s64 delta;
+	int nr_windows;
+	u64 old_window_start = rq->window_start;
+
+	delta = wallclock - rq->window_start;
+	BUG_ON(delta < 0);
+	if (delta < sched_ravg_window)
+		return old_window_start;
+
+	nr_windows = div64_u64(delta, sched_ravg_window);
+	rq->window_start += (u64)nr_windows * (u64)sched_ravg_window;
+
+	rq->cum_window_demand_scaled =
+			rq->walt_stats.cumulative_runnable_avg_scaled;
+
+	return old_window_start;
+}
+
+void sched_account_irqtime(int cpu, struct task_struct *curr,
+				 u64 delta, u64 wallclock)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags, nr_windows;
+	u64 cur_jiffies_ts;
+
+	raw_spin_lock_irqsave(&rq->__lock, flags);
+
+	/*
+	 * cputime (wallclock) uses sched_clock so use the same here for
+	 * consistency.
+	 */
+	delta += sched_clock() - wallclock;
+	cur_jiffies_ts = get_jiffies_64();
+
+	if (is_idle_task(curr))
+		update_task_ravg(curr, rq, IRQ_UPDATE, sched_ktime_clock(),
+				 delta);
+
+	nr_windows = cur_jiffies_ts - rq->irqload_ts;
+
+	if (nr_windows) {
+		if (nr_windows < 10) {
+			/* Decay CPU's irqload by 3/4 for each window. */
+			rq->avg_irqload *= (3 * nr_windows);
+			rq->avg_irqload = div64_u64(rq->avg_irqload,
+						    4 * nr_windows);
+		} else {
+			rq->avg_irqload = 0;
+		}
+		rq->avg_irqload += rq->cur_irqload;
+		rq->cur_irqload = 0;
+	}
+
+	rq->cur_irqload += delta;
+	rq->irqload_ts = cur_jiffies_ts;
+	raw_spin_unlock_irqrestore(&rq->__lock, flags);
+}
+
+static int
+account_busy_for_task_demand(struct rq *rq, struct task_struct *p, int event)
+{
+	/*
+	 * No need to bother updating task demand for exiting tasks
+	 * or the idle task.
+	 */
+	if (exiting_task(p) || is_idle_task(p))
+		return 0;
+
+	/*
+	 * When a task is waking up it is completing a segment of non-busy
+	 * time. Likewise, if wait time is not treated as busy time, then
+	 * when a task begins to run or is migrated, it is not running and
+	 * is completing a segment of non-busy time.
+	 */
+	if (event == TASK_WAKE || (!SCHED_ACCOUNT_WAIT_TIME &&
+			(event == PICK_NEXT_TASK || event == TASK_MIGRATE)))
+		return 0;
+
+	/*
+	 * The idle exit time is not accounted for the first task _picked_ up to
+	 * run on the idle CPU.
+	 */
+	if (event == PICK_NEXT_TASK && rq->curr == rq->idle)
+		return 0;
+
+	/*
+	 * TASK_UPDATE can be called on sleeping task, when its moved between
+	 * related groups
+	 */
+	if (event == TASK_UPDATE) {
+		if (rq->curr == p)
+			return 1;
+
+		return p->on_rq ? SCHED_ACCOUNT_WAIT_TIME : 0;
+	}
+
+	return 1;
+}
+
+/*
+ * In this function we match the accumulated subtractions with the current
+ * and previous windows we are operating with. Ignore any entries where
+ * the window start in the load_subtraction struct does not match either
+ * the curent or the previous window. This could happen whenever CPUs
+ * become idle or busy with interrupts disabled for an extended period.
+ */
+static inline void account_load_subtractions(struct rq *rq)
+{
+	u64 ws = rq->window_start;
+	u64 prev_ws = ws - sched_ravg_window;
+	struct load_subtractions *ls = rq->load_subs;
+	int i;
+
+	for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
+		if (ls[i].window_start == ws) {
+			rq->curr_runnable_sum -= ls[i].subs;
+			rq->nt_curr_runnable_sum -= ls[i].new_subs;
+		} else if (ls[i].window_start == prev_ws) {
+			rq->prev_runnable_sum -= ls[i].subs;
+			rq->nt_prev_runnable_sum -= ls[i].new_subs;
+		}
+
+		ls[i].subs = 0;
+		ls[i].new_subs = 0;
+	}
+
+	BUG_ON((s64)rq->prev_runnable_sum < 0);
+	BUG_ON((s64)rq->curr_runnable_sum < 0);
+	BUG_ON((s64)rq->nt_prev_runnable_sum < 0);
+	BUG_ON((s64)rq->nt_curr_runnable_sum < 0);
+}
+
+static inline void create_subtraction_entry(struct rq *rq, u64 ws, int index)
+{
+	rq->load_subs[index].window_start = ws;
+	rq->load_subs[index].subs = 0;
+	rq->load_subs[index].new_subs = 0;
+}
+
+static bool get_subtraction_index(struct rq *rq, u64 ws)
+{
+	int i;
+	u64 oldest = ULLONG_MAX;
+	int oldest_index = 0;
+
+	for (i = 0; i < NUM_TRACKED_WINDOWS; i++) {
+		u64 entry_ws = rq->load_subs[i].window_start;
+
+		if (ws == entry_ws)
+			return i;
+
+		if (entry_ws < oldest) {
+			oldest = entry_ws;
+			oldest_index = i;
+		}
+	}
+
+	create_subtraction_entry(rq, ws, oldest_index);
+	return oldest_index;
+}
+
+static void update_rq_load_subtractions(int index, struct rq *rq,
+					u32 sub_load, bool new_task)
+{
+	rq->load_subs[index].subs +=  sub_load;
+	if (new_task)
+		rq->load_subs[index].new_subs += sub_load;
+}
+
+void update_cluster_load_subtractions(struct task_struct *p,
+				      int cpu, u64 ws, bool new_task)
+{
+	struct sched_cluster *cluster = cpu_cluster(cpu);
+	struct cpumask cluster_cpus = cluster->cpus;
+	u64 prev_ws = ws - sched_ravg_window;
+	int i;
+
+	cpumask_clear_cpu(cpu, &cluster_cpus);
+	raw_spin_lock(&cluster->load_lock);
+
+	for_each_cpu(i, &cluster_cpus) {
+		struct rq *rq = cpu_rq(i);
+		int index;
+
+		if (p->ravg.curr_window_cpu[i]) {
+			index = get_subtraction_index(rq, ws);
+			update_rq_load_subtractions(index, rq,
+				p->ravg.curr_window_cpu[i], new_task);
+			p->ravg.curr_window_cpu[i] = 0;
+		}
+
+		if (p->ravg.prev_window_cpu[i]) {
+			index = get_subtraction_index(rq, prev_ws);
+			update_rq_load_subtractions(index, rq,
+				p->ravg.prev_window_cpu[i], new_task);
+			p->ravg.prev_window_cpu[i] = 0;
+		}
+	}
+
+	raw_spin_unlock(&cluster->load_lock);
+}
+
+static inline void inter_cluster_migration_fixup
+	(struct task_struct *p, int new_cpu, int task_cpu, bool new_task)
+{
+	struct rq *dest_rq = cpu_rq(new_cpu);
+	struct rq *src_rq = cpu_rq(task_cpu);
+
+	if (same_freq_domain(new_cpu, task_cpu))
+		return;
+
+	p->ravg.curr_window_cpu[new_cpu] = p->ravg.curr_window;
+	p->ravg.prev_window_cpu[new_cpu] = p->ravg.prev_window;
+
+	dest_rq->curr_runnable_sum += p->ravg.curr_window;
+	dest_rq->prev_runnable_sum += p->ravg.prev_window;
+
+	src_rq->curr_runnable_sum -=  p->ravg.curr_window_cpu[task_cpu];
+	src_rq->prev_runnable_sum -=  p->ravg.prev_window_cpu[task_cpu];
+
+	if (new_task) {
+		dest_rq->nt_curr_runnable_sum += p->ravg.curr_window;
+		dest_rq->nt_prev_runnable_sum += p->ravg.prev_window;
+
+		src_rq->nt_curr_runnable_sum -=
+				p->ravg.curr_window_cpu[task_cpu];
+		src_rq->nt_prev_runnable_sum -=
+				p->ravg.prev_window_cpu[task_cpu];
+	}
+
+	p->ravg.curr_window_cpu[task_cpu] = 0;
+	p->ravg.prev_window_cpu[task_cpu] = 0;
+
+	update_cluster_load_subtractions(p, task_cpu,
+			src_rq->window_start, new_task);
+
+	BUG_ON((s64)src_rq->prev_runnable_sum < 0);
+	BUG_ON((s64)src_rq->curr_runnable_sum < 0);
+	BUG_ON((s64)src_rq->nt_prev_runnable_sum < 0);
+	BUG_ON((s64)src_rq->nt_curr_runnable_sum < 0);
+}
+
+void fixup_busy_time(struct task_struct *p, int new_cpu)
+{
+	struct rq *src_rq = task_rq(p);
+	struct rq *dest_rq = cpu_rq(new_cpu);
+	u64 wallclock;
+	bool new_task;
+#ifdef CONFIG_SCHED_RTG
+	u64 *src_curr_runnable_sum, *dst_curr_runnable_sum;
+	u64 *src_prev_runnable_sum, *dst_prev_runnable_sum;
+	u64 *src_nt_curr_runnable_sum, *dst_nt_curr_runnable_sum;
+	u64 *src_nt_prev_runnable_sum, *dst_nt_prev_runnable_sum;
+	struct related_thread_group *grp;
+#endif
+
+	if (!p->on_rq && p->__state != TASK_WAKING)
+		return;
+
+	if (exiting_task(p))
+		return;
+
+	if (p->__state == TASK_WAKING)
+		double_rq_lock(src_rq, dest_rq);
+
+	if (sched_disable_window_stats)
+		goto done;
+
+	wallclock = sched_ktime_clock();
+
+	update_task_ravg(task_rq(p)->curr, task_rq(p),
+			 TASK_UPDATE,
+			 wallclock, 0);
+	update_task_ravg(dest_rq->curr, dest_rq,
+			 TASK_UPDATE, wallclock, 0);
+
+	update_task_ravg(p, task_rq(p), TASK_MIGRATE,
+			 wallclock, 0);
+
+	/*
+	 * When a task is migrating during the wakeup, adjust
+	 * the task's contribution towards cumulative window
+	 * demand.
+	 */
+	if (p->__state == TASK_WAKING && p->last_sleep_ts >=
+				       src_rq->window_start) {
+		walt_fixup_cum_window_demand(src_rq,
+					     -(s64)p->ravg.demand_scaled);
+		walt_fixup_cum_window_demand(dest_rq, p->ravg.demand_scaled);
+	}
+
+	new_task = is_new_task(p);
+#ifdef CONFIG_SCHED_RTG
+	/* Protected by rq_lock */
+	grp = task_related_thread_group(p);
+
+	/*
+	 * For frequency aggregation, we continue to do migration fixups
+	 * even for intra cluster migrations. This is because, the aggregated
+	 * load has to reported on a single CPU regardless.
+	 */
+	if (grp) {
+		struct group_cpu_time *cpu_time;
+
+		cpu_time = &src_rq->grp_time;
+		src_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		src_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+		src_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		src_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+
+		cpu_time = &dest_rq->grp_time;
+		dst_curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		dst_prev_runnable_sum = &cpu_time->prev_runnable_sum;
+		dst_nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		dst_nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+
+		if (p->ravg.curr_window) {
+			*src_curr_runnable_sum -= p->ravg.curr_window;
+			*dst_curr_runnable_sum += p->ravg.curr_window;
+			if (new_task) {
+				*src_nt_curr_runnable_sum -=
+							p->ravg.curr_window;
+				*dst_nt_curr_runnable_sum +=
+							p->ravg.curr_window;
+			}
+		}
+
+		if (p->ravg.prev_window) {
+			*src_prev_runnable_sum -= p->ravg.prev_window;
+			*dst_prev_runnable_sum += p->ravg.prev_window;
+			if (new_task) {
+				*src_nt_prev_runnable_sum -=
+							p->ravg.prev_window;
+				*dst_nt_prev_runnable_sum +=
+							p->ravg.prev_window;
+			}
+		}
+	} else {
+#endif
+		inter_cluster_migration_fixup(p, new_cpu,
+						task_cpu(p), new_task);
+#ifdef CONFIG_SCHED_RTG
+	}
+#endif
+
+	if (!same_freq_domain(new_cpu, task_cpu(p)))
+		irq_work_queue(&walt_migration_irq_work);
+
+done:
+	if (p->__state == TASK_WAKING)
+		double_rq_unlock(src_rq, dest_rq);
+}
+
+void set_window_start(struct rq *rq)
+{
+	static int sync_cpu_available;
+
+	if (likely(rq->window_start))
+		return;
+
+	if (!sync_cpu_available) {
+		rq->window_start = 1;
+		sync_cpu_available = 1;
+		atomic64_set(&walt_irq_work_lastq_ws, rq->window_start);
+		walt_load_reported_window =
+					atomic64_read(&walt_irq_work_lastq_ws);
+
+	} else {
+		struct rq *sync_rq = cpu_rq(cpumask_any(cpu_online_mask));
+
+		raw_spin_unlock(&rq->__lock);
+		double_rq_lock(rq, sync_rq);
+		rq->window_start = sync_rq->window_start;
+		rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
+		rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
+		raw_spin_unlock(&sync_rq->__lock);
+	}
+
+	rq->curr->ravg.mark_start = rq->window_start;
+}
+
+/*
+ * Called when new window is starting for a task, to record cpu usage over
+ * recently concluded window(s). Normally 'samples' should be 1. It can be > 1
+ * when, say, a real-time task runs without preemption for several windows at a
+ * stretch.
+ */
+static void update_history(struct rq *rq, struct task_struct *p,
+			   u32 runtime, int samples, int event)
+{
+	u32 *hist = &p->ravg.sum_history[0];
+	int ridx, widx;
+	u32 max = 0, avg, demand;
+	u64 sum = 0;
+	u16 demand_scaled;
+
+	/* Ignore windows where task had no activity */
+	if (!runtime || is_idle_task(p) || exiting_task(p) || !samples)
+		goto done;
+
+	/* Push new 'runtime' value onto stack */
+	widx = sched_ravg_hist_size - 1;
+	ridx = widx - samples;
+	for (; ridx >= 0; --widx, --ridx) {
+		hist[widx] = hist[ridx];
+		sum += hist[widx];
+		if (hist[widx] > max)
+			max = hist[widx];
+	}
+
+	for (widx = 0; widx < samples && widx < sched_ravg_hist_size; widx++) {
+		hist[widx] = runtime;
+		sum += hist[widx];
+		if (hist[widx] > max)
+			max = hist[widx];
+	}
+
+	p->ravg.sum = 0;
+
+	if (sched_window_stats_policy == WINDOW_STATS_RECENT) {
+		demand = runtime;
+	} else if (sched_window_stats_policy == WINDOW_STATS_MAX) {
+		demand = max;
+	} else {
+		avg = div64_u64(sum, sched_ravg_hist_size);
+		if (sched_window_stats_policy == WINDOW_STATS_AVG)
+			demand = avg;
+		else
+			demand = max(avg, runtime);
+	}
+	demand_scaled = scale_demand(demand);
+
+	/*
+	 * A throttled deadline sched class task gets dequeued without
+	 * changing p->on_rq. Since the dequeue decrements walt stats
+	 * avoid decrementing it here again.
+	 *
+	 * When window is rolled over, the cumulative window demand
+	 * is reset to the cumulative runnable average (contribution from
+	 * the tasks on the runqueue). If the current task is dequeued
+	 * already, it's demand is not included in the cumulative runnable
+	 * average. So add the task demand separately to cumulative window
+	 * demand.
+	 */
+	if (!task_has_dl_policy(p) || !p->dl.dl_throttled) {
+		if (task_on_rq_queued(p)
+				&& p->sched_class->fixup_walt_sched_stats)
+			p->sched_class->fixup_walt_sched_stats(rq, p,
+					demand_scaled);
+		else if (rq->curr == p)
+			walt_fixup_cum_window_demand(rq, demand_scaled);
+	}
+
+	p->ravg.demand = demand;
+	p->ravg.demand_scaled = demand_scaled;
+
+done:
+	trace_sched_update_history(rq, p, runtime, samples, event);
+}
+
+#define DIV64_U64_ROUNDUP(X, Y) div64_u64((X) + (Y - 1), Y)
+
+static u64 add_to_task_demand(struct rq *rq, struct task_struct *p, u64 delta)
+{
+	delta = scale_exec_time(delta, rq);
+	p->ravg.sum += delta;
+	if (unlikely(p->ravg.sum > sched_ravg_window))
+		p->ravg.sum = sched_ravg_window;
+
+	return delta;
+}
+
+/*
+ * Account cpu demand of task and/or update task's cpu demand history
+ *
+ * ms = p->ravg.mark_start;
+ * wc = wallclock
+ * ws = rq->window_start
+ *
+ * Three possibilities:
+ *
+ *	a) Task event is contained within one window.
+ *		window_start < mark_start < wallclock
+ *
+ *		ws   ms  wc
+ *		|    |   |
+ *		V    V   V
+ *		|---------------|
+ *
+ *	In this case, p->ravg.sum is updated *iff* event is appropriate
+ *	(ex: event == PUT_PREV_TASK)
+ *
+ *	b) Task event spans two windows.
+ *		mark_start < window_start < wallclock
+ *
+ *		ms   ws   wc
+ *		|    |    |
+ *		V    V    V
+ *		-----|-------------------
+ *
+ *	In this case, p->ravg.sum is updated with (ws - ms) *iff* event
+ *	is appropriate, then a new window sample is recorded followed
+ *	by p->ravg.sum being set to (wc - ws) *iff* event is appropriate.
+ *
+ *	c) Task event spans more than two windows.
+ *
+ *		ms ws_tmp			   ws  wc
+ *		|  |				   |   |
+ *		V  V				   V   V
+ *		---|-------|-------|-------|-------|------
+ *		   |				   |
+ *		   |<------ nr_full_windows ------>|
+ *
+ *	In this case, p->ravg.sum is updated with (ws_tmp - ms) first *iff*
+ *	event is appropriate, window sample of p->ravg.sum is recorded,
+ *	'nr_full_window' samples of window_size is also recorded *iff*
+ *	event is appropriate and finally p->ravg.sum is set to (wc - ws)
+ *	*iff* event is appropriate.
+ *
+ * IMPORTANT : Leave p->ravg.mark_start unchanged, as update_cpu_busy_time()
+ * depends on it!
+ */
+static u64 update_task_demand(struct task_struct *p, struct rq *rq,
+			      int event, u64 wallclock)
+{
+	u64 mark_start = p->ravg.mark_start;
+	u64 delta, window_start = rq->window_start;
+	int new_window, nr_full_windows;
+	u32 window_size = sched_ravg_window;
+	u64 runtime;
+
+#ifdef CONFIG_SCHED_RTG
+	update_group_demand(p, rq, event, wallclock);
+#endif
+
+	new_window = mark_start < window_start;
+	if (!account_busy_for_task_demand(rq, p, event)) {
+		if (new_window)
+			/*
+			 * If the time accounted isn't being accounted as
+			 * busy time, and a new window started, only the
+			 * previous window need be closed out with the
+			 * pre-existing demand. Multiple windows may have
+			 * elapsed, but since empty windows are dropped,
+			 * it is not necessary to account those.
+			 */
+			update_history(rq, p, p->ravg.sum, 1, event);
+		return 0;
+	}
+
+	if (!new_window) {
+		/*
+		 * The simple case - busy time contained within the existing
+		 * window.
+		 */
+		return add_to_task_demand(rq, p, wallclock - mark_start);
+	}
+
+	/*
+	 * Busy time spans at least two windows. Temporarily rewind
+	 * window_start to first window boundary after mark_start.
+	 */
+	delta = window_start - mark_start;
+	nr_full_windows = div64_u64(delta, window_size);
+	window_start -= (u64)nr_full_windows * (u64)window_size;
+
+	/* Process (window_start - mark_start) first */
+	runtime = add_to_task_demand(rq, p, window_start - mark_start);
+
+	/* Push new sample(s) into task's demand history */
+	update_history(rq, p, p->ravg.sum, 1, event);
+	if (nr_full_windows) {
+		u64 scaled_window = scale_exec_time(window_size, rq);
+
+		update_history(rq, p, scaled_window, nr_full_windows, event);
+		runtime += nr_full_windows * scaled_window;
+	}
+
+	/*
+	 * Roll window_start back to current to process any remainder
+	 * in current window.
+	 */
+	window_start += (u64)nr_full_windows * (u64)window_size;
+
+	/* Process (wallclock - window_start) next */
+	mark_start = window_start;
+	runtime += add_to_task_demand(rq, p, wallclock - mark_start);
+
+	return runtime;
+}
+
+static u32 empty_windows[NR_CPUS];
+
+static void rollover_task_window(struct task_struct *p, bool full_window)
+{
+	u32 *curr_cpu_windows = empty_windows;
+	u32 curr_window;
+	int i;
+
+	/* Rollover the sum */
+	curr_window = 0;
+
+	if (!full_window) {
+		curr_window = p->ravg.curr_window;
+		curr_cpu_windows = p->ravg.curr_window_cpu;
+	}
+
+	p->ravg.prev_window = curr_window;
+	p->ravg.curr_window = 0;
+
+	/* Roll over individual CPU contributions */
+	for (i = 0; i < nr_cpu_ids; i++) {
+		p->ravg.prev_window_cpu[i] = curr_cpu_windows[i];
+		p->ravg.curr_window_cpu[i] = 0;
+	}
+}
+
+static void rollover_cpu_window(struct rq *rq, bool full_window)
+{
+	u64 curr_sum = rq->curr_runnable_sum;
+	u64 nt_curr_sum = rq->nt_curr_runnable_sum;
+
+	if (unlikely(full_window)) {
+		curr_sum = 0;
+		nt_curr_sum = 0;
+	}
+
+	rq->prev_runnable_sum = curr_sum;
+	rq->nt_prev_runnable_sum = nt_curr_sum;
+
+	rq->curr_runnable_sum = 0;
+	rq->nt_curr_runnable_sum = 0;
+}
+
+static inline int cpu_is_waiting_on_io(struct rq *rq)
+{
+	if (!sched_io_is_busy)
+		return 0;
+
+	return atomic_read(&rq->nr_iowait);
+}
+
+static int account_busy_for_cpu_time(struct rq *rq, struct task_struct *p,
+				     u64 irqtime, int event)
+{
+	if (is_idle_task(p)) {
+		/* TASK_WAKE && TASK_MIGRATE is not possible on idle task! */
+		if (event == PICK_NEXT_TASK)
+			return 0;
+
+		/* PUT_PREV_TASK, TASK_UPDATE && IRQ_UPDATE are left */
+		return irqtime || cpu_is_waiting_on_io(rq);
+	}
+
+	if (event == TASK_WAKE)
+		return 0;
+
+	if (event == PUT_PREV_TASK || event == IRQ_UPDATE)
+		return 1;
+
+	/*
+	 * TASK_UPDATE can be called on sleeping task, when its moved between
+	 * related groups
+	 */
+	if (event == TASK_UPDATE) {
+		if (rq->curr == p)
+			return 1;
+
+		return p->on_rq ? SCHED_FREQ_ACCOUNT_WAIT_TIME : 0;
+	}
+
+	/* TASK_MIGRATE, PICK_NEXT_TASK left */
+	return SCHED_FREQ_ACCOUNT_WAIT_TIME;
+}
+
+/*
+ * Account cpu activity in its busy time counters (rq->curr/prev_runnable_sum)
+ */
+static void update_cpu_busy_time(struct task_struct *p, struct rq *rq,
+				 int event, u64 wallclock, u64 irqtime)
+{
+	int new_window, full_window = 0;
+	int p_is_curr_task = (p == rq->curr);
+	u64 mark_start = p->ravg.mark_start;
+	u64 window_start = rq->window_start;
+	u32 window_size = sched_ravg_window;
+	u64 delta;
+	u64 *curr_runnable_sum = &rq->curr_runnable_sum;
+	u64 *prev_runnable_sum = &rq->prev_runnable_sum;
+	u64 *nt_curr_runnable_sum = &rq->nt_curr_runnable_sum;
+	u64 *nt_prev_runnable_sum = &rq->nt_prev_runnable_sum;
+	bool new_task;
+	int cpu = rq->cpu;
+#ifdef CONFIG_SCHED_RTG
+	struct group_cpu_time *cpu_time;
+	struct related_thread_group *grp;
+#endif
+
+	new_window = mark_start < window_start;
+	if (new_window) {
+		full_window = (window_start - mark_start) >= window_size;
+		if (p->ravg.active_windows < USHRT_MAX)
+			p->ravg.active_windows++;
+	}
+
+	new_task = is_new_task(p);
+
+	/*
+	 * Handle per-task window rollover. We don't care about the idle
+	 * task or exiting tasks.
+	 */
+	if (!is_idle_task(p) && !exiting_task(p)) {
+		if (new_window)
+			rollover_task_window(p, full_window);
+	}
+
+	if (p_is_curr_task && new_window)
+		rollover_cpu_window(rq, full_window);
+
+	if (!account_busy_for_cpu_time(rq, p, irqtime, event))
+		goto done;
+
+#ifdef CONFIG_SCHED_RTG
+	grp = task_related_thread_group(p);
+	if (grp) {
+		cpu_time = &rq->grp_time;
+
+		curr_runnable_sum = &cpu_time->curr_runnable_sum;
+		prev_runnable_sum = &cpu_time->prev_runnable_sum;
+
+		nt_curr_runnable_sum = &cpu_time->nt_curr_runnable_sum;
+		nt_prev_runnable_sum = &cpu_time->nt_prev_runnable_sum;
+	}
+#endif
+
+	if (!new_window) {
+		/*
+		 * account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. No rollover
+		 * since we didn't start a new window. An example of this is
+		 * when a task starts execution and then sleeps within the
+		 * same window.
+		 */
+
+		if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq))
+			delta = wallclock - mark_start;
+		else
+			delta = irqtime;
+		delta = scale_exec_time(delta, rq);
+		*curr_runnable_sum += delta;
+		if (new_task)
+			*nt_curr_runnable_sum += delta;
+
+		if (!is_idle_task(p) && !exiting_task(p)) {
+			p->ravg.curr_window += delta;
+			p->ravg.curr_window_cpu[cpu] += delta;
+		}
+
+		goto done;
+	}
+
+	if (!p_is_curr_task) {
+		/*
+		 * account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. A new window
+		 * has also started, but p is not the current task, so the
+		 * window is not rolled over - just split up and account
+		 * as necessary into curr and prev. The window is only
+		 * rolled over when a new window is processed for the current
+		 * task.
+		 *
+		 * Irqtime can't be accounted by a task that isn't the
+		 * currently running task.
+		 */
+
+		if (!full_window) {
+			/*
+			 * A full window hasn't elapsed, account partial
+			 * contribution to previous completed window.
+			 */
+			delta = scale_exec_time(window_start - mark_start, rq);
+			if (!exiting_task(p)) {
+				p->ravg.prev_window += delta;
+				p->ravg.prev_window_cpu[cpu] += delta;
+			}
+		} else {
+			/*
+			 * Since at least one full window has elapsed,
+			 * the contribution to the previous window is the
+			 * full window (window_size).
+			 */
+			delta = scale_exec_time(window_size, rq);
+			if (!exiting_task(p)) {
+				p->ravg.prev_window = delta;
+				p->ravg.prev_window_cpu[cpu] = delta;
+			}
+		}
+
+		*prev_runnable_sum += delta;
+		if (new_task)
+			*nt_prev_runnable_sum += delta;
+
+		/* Account piece of busy time in the current window. */
+		delta = scale_exec_time(wallclock - window_start, rq);
+		*curr_runnable_sum += delta;
+		if (new_task)
+			*nt_curr_runnable_sum += delta;
+
+		if (!exiting_task(p)) {
+			p->ravg.curr_window = delta;
+			p->ravg.curr_window_cpu[cpu] = delta;
+		}
+
+		goto done;
+	}
+
+	if (!irqtime || !is_idle_task(p) || cpu_is_waiting_on_io(rq)) {
+		/*
+		 * account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. A new window
+		 * has started and p is the current task so rollover is
+		 * needed. If any of these three above conditions are true
+		 * then this busy time can't be accounted as irqtime.
+		 *
+		 * Busy time for the idle task or exiting tasks need not
+		 * be accounted.
+		 *
+		 * An example of this would be a task that starts execution
+		 * and then sleeps once a new window has begun.
+		 */
+
+		if (!full_window) {
+			/*
+			 * A full window hasn't elapsed, account partial
+			 * contribution to previous completed window.
+			 */
+			delta = scale_exec_time(window_start - mark_start, rq);
+			if (!is_idle_task(p) && !exiting_task(p)) {
+				p->ravg.prev_window += delta;
+				p->ravg.prev_window_cpu[cpu] += delta;
+			}
+		} else {
+			/*
+			 * Since at least one full window has elapsed,
+			 * the contribution to the previous window is the
+			 * full window (window_size).
+			 */
+			delta = scale_exec_time(window_size, rq);
+			if (!is_idle_task(p) && !exiting_task(p)) {
+				p->ravg.prev_window = delta;
+				p->ravg.prev_window_cpu[cpu] = delta;
+			}
+		}
+
+		/*
+		 * Rollover is done here by overwriting the values in
+		 * prev_runnable_sum and curr_runnable_sum.
+		 */
+		*prev_runnable_sum += delta;
+		if (new_task)
+			*nt_prev_runnable_sum += delta;
+
+		/* Account piece of busy time in the current window. */
+		delta = scale_exec_time(wallclock - window_start, rq);
+		*curr_runnable_sum += delta;
+		if (new_task)
+			*nt_curr_runnable_sum += delta;
+
+		if (!is_idle_task(p) && !exiting_task(p)) {
+			p->ravg.curr_window = delta;
+			p->ravg.curr_window_cpu[cpu] = delta;
+		}
+
+		goto done;
+	}
+
+	if (irqtime) {
+		/*
+		 * account_busy_for_cpu_time() = 1 so busy time needs
+		 * to be accounted to the current window. A new window
+		 * has started and p is the current task so rollover is
+		 * needed. The current task must be the idle task because
+		 * irqtime is not accounted for any other task.
+		 *
+		 * Irqtime will be accounted each time we process IRQ activity
+		 * after a period of idleness, so we know the IRQ busy time
+		 * started at wallclock - irqtime.
+		 */
+
+		BUG_ON(!is_idle_task(p));
+		mark_start = wallclock - irqtime;
+
+		/*
+		 * Roll window over. If IRQ busy time was just in the current
+		 * window then that is all that need be accounted.
+		 */
+		if (mark_start > window_start) {
+			*curr_runnable_sum = scale_exec_time(irqtime, rq);
+			return;
+		}
+
+		/*
+		 * The IRQ busy time spanned multiple windows. Process the
+		 * window then that is all that need be accounted.
+		 */
+		delta = window_start - mark_start;
+		if (delta > window_size)
+			delta = window_size;
+		delta = scale_exec_time(delta, rq);
+		*prev_runnable_sum += delta;
+
+		/* Process the remaining IRQ busy time in the current window. */
+		delta = wallclock - window_start;
+		rq->curr_runnable_sum = scale_exec_time(delta, rq);
+
+		return;
+	}
+
+done:
+	return;
+}
+
+static inline void run_walt_irq_work(u64 old_window_start, struct rq *rq)
+{
+	u64 result;
+
+	if (old_window_start == rq->window_start)
+		return;
+
+	result = atomic64_cmpxchg(&walt_irq_work_lastq_ws, old_window_start,
+				   rq->window_start);
+	if (result == old_window_start)
+		irq_work_queue(&walt_cpufreq_irq_work);
+}
+
+/* Reflect task activity on its demand and cpu's busy time statistics */
+void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
+						u64 wallclock, u64 irqtime)
+{
+	u64 old_window_start;
+
+	if (!rq->window_start || sched_disable_window_stats ||
+	    p->ravg.mark_start == wallclock)
+		return;
+
+	lockdep_assert_held(&rq->__lock);
+
+	old_window_start = update_window_start(rq, wallclock, event);
+
+#ifdef CONFIG_SCHED_RTG
+	update_group_nr_running(p, event, wallclock);
+#endif
+	if (!p->ravg.mark_start)
+		goto done;
+
+	update_task_demand(p, rq, event, wallclock);
+	update_cpu_busy_time(p, rq, event, wallclock, irqtime);
+
+	if (exiting_task(p))
+		goto done;
+
+	trace_sched_update_task_ravg(p, rq, event, wallclock, irqtime);
+done:
+	p->ravg.mark_start = wallclock;
+
+	run_walt_irq_work(old_window_start, rq);
+}
+
+int sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table,
+		int write, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int rc;
+
+	rc = proc_dointvec(table, write, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	sysctl_sched_init_task_load_pct = sysctl_sched_walt_init_task_load_pct;
+
+	return 0;
+}
+
+u32 sched_get_init_task_load(struct task_struct *p)
+{
+	return p->init_load_pct;
+}
+
+int sched_set_init_task_load(struct task_struct *p, int init_load_pct)
+{
+	if (init_load_pct < 0 || init_load_pct > 100)
+		return -EINVAL;
+
+	p->init_load_pct = init_load_pct;
+
+	return 0;
+}
+
+void init_new_task_load(struct task_struct *p)
+{
+	int i;
+	u32 init_load_windows = sched_init_task_load_windows;
+	u32 init_load_windows_scaled = sched_init_task_load_windows_scaled;
+	u32 init_load_pct = current->init_load_pct;
+
+#ifdef CONFIG_SCHED_RTG
+	init_task_rtg(p);
+#endif
+
+	p->last_sleep_ts = 0;
+	p->init_load_pct = 0;
+	memset(&p->ravg, 0, sizeof(struct ravg));
+
+	p->ravg.curr_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32),
+					  GFP_KERNEL | __GFP_NOFAIL);
+	p->ravg.prev_window_cpu = kcalloc(nr_cpu_ids, sizeof(u32),
+					  GFP_KERNEL | __GFP_NOFAIL);
+
+	if (init_load_pct) {
+		init_load_windows = div64_u64((u64)init_load_pct *
+			  (u64)sched_ravg_window, 100);
+		init_load_windows_scaled = scale_demand(init_load_windows);
+	}
+
+	p->ravg.demand = init_load_windows;
+	p->ravg.demand_scaled = init_load_windows_scaled;
+	for (i = 0; i < RAVG_HIST_SIZE_MAX; ++i)
+		p->ravg.sum_history[i] = init_load_windows;
+}
+
+void free_task_load_ptrs(struct task_struct *p)
+{
+	kfree(p->ravg.curr_window_cpu);
+	kfree(p->ravg.prev_window_cpu);
+
+	/*
+	 * update_task_ravg() can be called for exiting tasks. While the
+	 * function itself ensures correct behavior, the corresponding
+	 * trace event requires that these pointers be NULL.
+	 */
+	p->ravg.curr_window_cpu = NULL;
+	p->ravg.prev_window_cpu = NULL;
+}
+
+void reset_task_stats(struct task_struct *p)
+{
+	u32 sum = 0;
+	u32 *curr_window_ptr = NULL;
+	u32 *prev_window_ptr = NULL;
+
+	if (exiting_task(p)) {
+		sum = EXITING_TASK_MARKER;
+	} else {
+		curr_window_ptr =  p->ravg.curr_window_cpu;
+		prev_window_ptr = p->ravg.prev_window_cpu;
+		memset(curr_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
+		memset(prev_window_ptr, 0, sizeof(u32) * nr_cpu_ids);
+	}
+
+	memset(&p->ravg, 0, sizeof(struct ravg));
+
+	p->ravg.curr_window_cpu = curr_window_ptr;
+	p->ravg.prev_window_cpu = prev_window_ptr;
+
+	/* Retain EXITING_TASK marker */
+	p->ravg.sum_history[0] = sum;
+}
+
+void mark_task_starting(struct task_struct *p)
+{
+	u64 wallclock;
+	struct rq *rq = task_rq(p);
+
+	if (!rq->window_start || sched_disable_window_stats) {
+		reset_task_stats(p);
+		return;
+	}
+
+	wallclock = sched_ktime_clock();
+	p->ravg.mark_start = wallclock;
+}
+
+unsigned int max_possible_efficiency = 1;
+unsigned int min_possible_efficiency = UINT_MAX;
+unsigned int max_power_cost = 1;
+
+static cpumask_t all_cluster_cpus = CPU_MASK_NONE;
+DECLARE_BITMAP(all_cluster_ids, NR_CPUS);
+struct sched_cluster *sched_cluster[NR_CPUS];
+int num_clusters;
+
+struct list_head cluster_head;
+
+static void
+insert_cluster(struct sched_cluster *cluster, struct list_head *head)
+{
+	struct sched_cluster *tmp;
+	struct list_head *iter = head;
+
+	list_for_each_entry(tmp, head, list) {
+		if (cluster->max_power_cost < tmp->max_power_cost)
+			break;
+		iter = &tmp->list;
+	}
+
+	list_add(&cluster->list, iter);
+}
+
+static struct sched_cluster *alloc_new_cluster(const struct cpumask *cpus)
+{
+	struct sched_cluster *cluster = NULL;
+
+	cluster = kzalloc(sizeof(struct sched_cluster), GFP_ATOMIC);
+	if (!cluster) {
+		pr_warn("Cluster allocation failed. Possible bad scheduling\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&cluster->list);
+	cluster->max_power_cost		=	1;
+	cluster->min_power_cost		=	1;
+	cluster->capacity		=	1024;
+	cluster->max_possible_capacity	=	1024;
+	cluster->efficiency		=	1;
+	cluster->load_scale_factor	=	1024;
+	cluster->cur_freq		=	1;
+	cluster->max_freq		=	1;
+	cluster->min_freq		=	1;
+	cluster->max_possible_freq	=	1;
+	cluster->freq_init_done		=	false;
+
+	raw_spin_lock_init(&cluster->load_lock);
+	cluster->cpus = *cpus;
+	cluster->efficiency = topology_get_cpu_scale(cpumask_first(cpus));
+
+	if (cluster->efficiency > max_possible_efficiency)
+		max_possible_efficiency = cluster->efficiency;
+	if (cluster->efficiency < min_possible_efficiency)
+		min_possible_efficiency = cluster->efficiency;
+
+	return cluster;
+}
+
+static void add_cluster(const struct cpumask *cpus, struct list_head *head)
+{
+	struct sched_cluster *cluster = alloc_new_cluster(cpus);
+	int i;
+
+	if (!cluster)
+		return;
+
+	for_each_cpu(i, cpus)
+		cpu_rq(i)->cluster = cluster;
+
+	insert_cluster(cluster, head);
+	set_bit(num_clusters, all_cluster_ids);
+	num_clusters++;
+}
+
+static int compute_max_possible_capacity(struct sched_cluster *cluster)
+{
+	int capacity = 1024;
+
+	capacity *= capacity_scale_cpu_efficiency(cluster);
+	capacity >>= 10;
+
+	capacity *= (1024 * cluster->max_possible_freq) / min_max_freq;
+	capacity >>= 10;
+
+	return capacity;
+}
+
+void walt_update_min_max_capacity(void)
+{
+	unsigned long flags;
+
+	acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
+	__update_min_max_capacity();
+	release_rq_locks_irqrestore(cpu_possible_mask, &flags);
+}
+
+static int
+compare_clusters(void *priv, const struct list_head *a, const struct list_head *b)
+{
+	struct sched_cluster *cluster1, *cluster2;
+	int ret;
+
+	cluster1 = container_of(a, struct sched_cluster, list);
+	cluster2 = container_of(b, struct sched_cluster, list);
+
+	/*
+	 * Don't assume higher capacity means higher power. If the
+	 * power cost is same, sort the higher capacity cluster before
+	 * the lower capacity cluster to start placing the tasks
+	 * on the higher capacity cluster.
+	 */
+	ret = cluster1->max_power_cost > cluster2->max_power_cost ||
+		(cluster1->max_power_cost == cluster2->max_power_cost &&
+		cluster1->max_possible_capacity <
+				cluster2->max_possible_capacity);
+
+	return ret;
+}
+
+void sort_clusters(void)
+{
+	struct sched_cluster *cluster;
+	struct list_head new_head;
+	unsigned int tmp_max = 1;
+
+	INIT_LIST_HEAD(&new_head);
+
+	for_each_sched_cluster(cluster) {
+		cluster->max_power_cost = power_cost(cluster_first_cpu(cluster),
+							       max_task_load());
+		cluster->min_power_cost = power_cost(cluster_first_cpu(cluster),
+							       0);
+
+		if (cluster->max_power_cost > tmp_max)
+			tmp_max = cluster->max_power_cost;
+	}
+	max_power_cost = tmp_max;
+
+	move_list(&new_head, &cluster_head, true);
+
+	list_sort(NULL, &new_head, compare_clusters);
+	assign_cluster_ids(&new_head);
+
+	/*
+	 * Ensure cluster ids are visible to all CPUs before making
+	 * cluster_head visible.
+	 */
+	move_list(&cluster_head, &new_head, false);
+}
+
+static void update_all_clusters_stats(void)
+{
+	struct sched_cluster *cluster;
+	u64 highest_mpc = 0, lowest_mpc = U64_MAX;
+	unsigned long flags;
+
+	acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
+
+	for_each_sched_cluster(cluster) {
+		u64 mpc;
+
+		cluster->capacity = compute_capacity(cluster);
+		mpc = cluster->max_possible_capacity =
+			compute_max_possible_capacity(cluster);
+		cluster->load_scale_factor = compute_load_scale_factor(cluster);
+
+		cluster->exec_scale_factor =
+			DIV_ROUND_UP(cluster->efficiency * 1024,
+				     max_possible_efficiency);
+
+		if (mpc > highest_mpc)
+			highest_mpc = mpc;
+
+		if (mpc < lowest_mpc)
+			lowest_mpc = mpc;
+	}
+
+	max_possible_capacity = highest_mpc;
+	min_max_possible_capacity = lowest_mpc;
+
+	__update_min_max_capacity();
+	release_rq_locks_irqrestore(cpu_possible_mask, &flags);
+}
+
+void update_cluster_topology(void)
+{
+	struct cpumask cpus = *cpu_possible_mask;
+	const struct cpumask *cluster_cpus;
+	struct list_head new_head;
+	int i;
+
+	INIT_LIST_HEAD(&new_head);
+
+	for_each_cpu(i, &cpus) {
+		cluster_cpus = cpu_coregroup_mask(i);
+		cpumask_or(&all_cluster_cpus, &all_cluster_cpus, cluster_cpus);
+		cpumask_andnot(&cpus, &cpus, cluster_cpus);
+		add_cluster(cluster_cpus, &new_head);
+	}
+
+	assign_cluster_ids(&new_head);
+
+	/*
+	 * Ensure cluster ids are visible to all CPUs before making
+	 * cluster_head visible.
+	 */
+	move_list(&cluster_head, &new_head, false);
+	update_all_clusters_stats();
+}
+
+struct sched_cluster init_cluster = {
+	.list			=	LIST_HEAD_INIT(init_cluster.list),
+	.id			=	0,
+	.max_power_cost		=	1,
+	.min_power_cost		=	1,
+	.capacity		=	1024,
+	.max_possible_capacity	=	1024,
+	.efficiency		=	1,
+	.load_scale_factor	=	1024,
+	.cur_freq		=	1,
+	.max_freq		=	1,
+	.min_freq		=	1,
+	.max_possible_freq	=	1,
+	.exec_scale_factor	=	1024,
+};
+
+void init_clusters(void)
+{
+	bitmap_clear(all_cluster_ids, 0, NR_CPUS);
+	init_cluster.cpus = *cpu_possible_mask;
+	raw_spin_lock_init(&init_cluster.load_lock);
+	INIT_LIST_HEAD(&cluster_head);
+}
+
+static unsigned long cpu_max_table_freq[NR_CPUS];
+
+void update_cpu_cluster_capacity(const cpumask_t *cpus)
+{
+	int i;
+	struct sched_cluster *cluster;
+	struct cpumask cpumask;
+	unsigned long flags;
+
+	cpumask_copy(&cpumask, cpus);
+	acquire_rq_locks_irqsave(cpu_possible_mask, &flags);
+
+	for_each_cpu(i, &cpumask) {
+		cluster = cpu_rq(i)->cluster;
+		cpumask_andnot(&cpumask, &cpumask, &cluster->cpus);
+
+		cluster->capacity = compute_capacity(cluster);
+		cluster->load_scale_factor = compute_load_scale_factor(cluster);
+	}
+
+	__update_min_max_capacity();
+
+	release_rq_locks_irqrestore(cpu_possible_mask, &flags);
+}
+
+static int cpufreq_notifier_policy(struct notifier_block *nb,
+		unsigned long val, void *data)
+{
+	struct cpufreq_policy *policy = (struct cpufreq_policy *)data;
+	struct sched_cluster *cluster = NULL;
+	struct cpumask policy_cluster = *policy->related_cpus;
+	unsigned int orig_max_freq = 0;
+	int i, j, update_capacity = 0;
+
+	if (val != CPUFREQ_CREATE_POLICY)
+		return 0;
+
+	walt_update_min_max_capacity();
+
+	max_possible_freq = max(max_possible_freq, policy->cpuinfo.max_freq);
+	if (min_max_freq == 1)
+		min_max_freq = UINT_MAX;
+	min_max_freq = min(min_max_freq, policy->cpuinfo.max_freq);
+	BUG_ON(!min_max_freq);
+	BUG_ON(!policy->max);
+
+	for_each_cpu(i, &policy_cluster)
+		cpu_max_table_freq[i] = policy->cpuinfo.max_freq;
+
+	for_each_cpu(i, &policy_cluster) {
+		cluster = cpu_rq(i)->cluster;
+		cpumask_andnot(&policy_cluster, &policy_cluster,
+						&cluster->cpus);
+
+		orig_max_freq = cluster->max_freq;
+		cluster->min_freq = policy->min;
+		cluster->max_freq = policy->max;
+		cluster->cur_freq = policy->cur;
+
+		if (!cluster->freq_init_done) {
+			mutex_lock(&cluster_lock);
+			for_each_cpu(j, &cluster->cpus)
+				cpumask_copy(&cpu_rq(j)->freq_domain_cpumask,
+						policy->related_cpus);
+			cluster->max_possible_freq = policy->cpuinfo.max_freq;
+			cluster->max_possible_capacity =
+				compute_max_possible_capacity(cluster);
+			cluster->freq_init_done = true;
+
+			sort_clusters();
+			update_all_clusters_stats();
+			mutex_unlock(&cluster_lock);
+			continue;
+		}
+
+		update_capacity += (orig_max_freq != cluster->max_freq);
+	}
+
+	if (update_capacity)
+		update_cpu_cluster_capacity(policy->related_cpus);
+
+	return 0;
+}
+
+static struct notifier_block notifier_policy_block = {
+	.notifier_call = cpufreq_notifier_policy
+};
+
+static int cpufreq_notifier_trans(struct notifier_block *nb,
+		unsigned long val, void *data)
+{
+	struct cpufreq_freqs *freq = (struct cpufreq_freqs *)data;
+	unsigned int cpu = freq->policy->cpu, new_freq = freq->new;
+	unsigned long flags;
+	struct sched_cluster *cluster;
+	struct cpumask policy_cpus = cpu_rq(cpu)->freq_domain_cpumask;
+	int i, j;
+
+	if (val != CPUFREQ_POSTCHANGE)
+		return NOTIFY_DONE;
+
+	if (cpu_cur_freq(cpu) == new_freq)
+		return NOTIFY_OK;
+
+	for_each_cpu(i, &policy_cpus) {
+		cluster = cpu_rq(i)->cluster;
+
+		for_each_cpu(j, &cluster->cpus) {
+			struct rq *rq = cpu_rq(j);
+
+			raw_spin_lock_irqsave(&rq->__lock, flags);
+			update_task_ravg(rq->curr, rq, TASK_UPDATE,
+					 sched_ktime_clock(), 0);
+			raw_spin_unlock_irqrestore(&rq->__lock, flags);
+		}
+
+		cluster->cur_freq = new_freq;
+		cpumask_andnot(&policy_cpus, &policy_cpus, &cluster->cpus);
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block notifier_trans_block = {
+	.notifier_call = cpufreq_notifier_trans
+};
+
+static int register_walt_callback(void)
+{
+	int ret;
+
+	ret = cpufreq_register_notifier(&notifier_policy_block,
+					CPUFREQ_POLICY_NOTIFIER);
+	if (!ret)
+		ret = cpufreq_register_notifier(&notifier_trans_block,
+						CPUFREQ_TRANSITION_NOTIFIER);
+
+	return ret;
+}
+/*
+ * cpufreq callbacks can be registered at core_initcall or later time.
+ * Any registration done prior to that is "forgotten" by cpufreq. See
+ * initialization of variable init_cpufreq_transition_notifier_list_called
+ * for further information.
+ */
+core_initcall(register_walt_callback);
+
+/*
+ * Runs in hard-irq context. This should ideally run just after the latest
+ * window roll-over.
+ */
+void walt_irq_work(struct irq_work *irq_work)
+{
+	struct sched_cluster *cluster;
+	struct rq *rq;
+	int cpu;
+	u64 wc;
+	bool is_migration = false;
+	int level = 0;
+
+	/* Am I the window rollover work or the migration work? */
+	if (irq_work == &walt_migration_irq_work)
+		is_migration = true;
+
+	for_each_cpu(cpu, cpu_possible_mask) {
+		if (level == 0)
+			raw_spin_lock(&cpu_rq(cpu)->__lock);
+		else
+			raw_spin_lock_nested(&cpu_rq(cpu)->__lock, level);
+		level++;
+	}
+
+	wc = sched_ktime_clock();
+	walt_load_reported_window = atomic64_read(&walt_irq_work_lastq_ws);
+	for_each_sched_cluster(cluster) {
+		raw_spin_lock(&cluster->load_lock);
+
+		for_each_cpu(cpu, &cluster->cpus) {
+			rq = cpu_rq(cpu);
+			if (rq->curr) {
+				update_task_ravg(rq->curr, rq,
+						TASK_UPDATE, wc, 0);
+				account_load_subtractions(rq);
+			}
+		}
+
+		raw_spin_unlock(&cluster->load_lock);
+	}
+
+	for_each_sched_cluster(cluster) {
+		cpumask_t cluster_online_cpus;
+		unsigned int num_cpus, i = 1;
+
+		cpumask_and(&cluster_online_cpus, &cluster->cpus,
+						cpu_online_mask);
+		num_cpus = cpumask_weight(&cluster_online_cpus);
+		for_each_cpu(cpu, &cluster_online_cpus) {
+			int flag = SCHED_CPUFREQ_WALT;
+
+			rq = cpu_rq(cpu);
+
+			if (i == num_cpus)
+				cpufreq_update_util(cpu_rq(cpu), flag);
+			else
+				cpufreq_update_util(cpu_rq(cpu), flag |
+							SCHED_CPUFREQ_CONTINUE);
+			i++;
+		}
+	}
+
+	for_each_cpu(cpu, cpu_possible_mask)
+		raw_spin_unlock(&cpu_rq(cpu)->__lock);
+
+	if (!is_migration)
+		core_ctl_check(this_rq()->window_start);
+}
+
+static void walt_init_once(void)
+{
+	init_irq_work(&walt_migration_irq_work, walt_irq_work);
+	init_irq_work(&walt_cpufreq_irq_work, walt_irq_work);
+
+	walt_cpu_util_freq_divisor =
+	    (sched_ravg_window >> SCHED_CAPACITY_SHIFT) * 100;
+	walt_scale_demand_divisor = sched_ravg_window >> SCHED_CAPACITY_SHIFT;
+
+	sched_init_task_load_windows =
+		div64_u64((u64)sysctl_sched_init_task_load_pct *
+			  (u64)sched_ravg_window, 100);
+	sched_init_task_load_windows_scaled =
+		scale_demand(sched_init_task_load_windows);
+}
+
+void walt_sched_init_rq(struct rq *rq)
+{
+	static bool init;
+	int j;
+
+	if (!init) {
+		walt_init_once();
+		init = true;
+	}
+
+	cpumask_set_cpu(cpu_of(rq), &rq->freq_domain_cpumask);
+
+	rq->walt_stats.cumulative_runnable_avg_scaled = 0;
+	rq->window_start = 0;
+	rq->walt_flags = 0;
+	rq->cur_irqload = 0;
+	rq->avg_irqload = 0;
+	rq->irqload_ts = 0;
+
+	/*
+	 * All cpus part of same cluster by default. This avoids the
+	 * need to check for rq->cluster being non-NULL in hot-paths
+	 * like select_best_cpu()
+	 */
+	rq->cluster = &init_cluster;
+	rq->curr_runnable_sum = rq->prev_runnable_sum = 0;
+	rq->nt_curr_runnable_sum = rq->nt_prev_runnable_sum = 0;
+	rq->cum_window_demand_scaled = 0;
+
+	for (j = 0; j < NUM_TRACKED_WINDOWS; j++)
+		memset(&rq->load_subs[j], 0, sizeof(struct load_subtractions));
+}
+
+#define min_cap_cluster() \
+	list_first_entry(&cluster_head, struct sched_cluster, list)
+#define max_cap_cluster() \
+	list_last_entry(&cluster_head, struct sched_cluster, list)
+static int sched_cluster_debug_show(struct seq_file *file, void *param)
+{
+	struct sched_cluster *cluster = NULL;
+
+	seq_printf(file, "min_id:%d, max_id:%d\n",
+		min_cap_cluster()->id,
+		max_cap_cluster()->id);
+
+	for_each_sched_cluster(cluster) {
+		seq_printf(file, "id:%d, cpumask:%d(%*pbl)\n",
+			   cluster->id,
+			   cpumask_first(&cluster->cpus),
+			   cpumask_pr_args(&cluster->cpus));
+	}
+
+	return 0;
+}
+
+static int sched_cluster_debug_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, sched_cluster_debug_show, NULL);
+}
+
+static const struct proc_ops sched_cluster_fops = {
+	.proc_open		= sched_cluster_debug_open,
+	.proc_read		= seq_read,
+	.proc_lseek		= seq_lseek,
+	.proc_release		= seq_release,
+};
+
+static int __init init_sched_cluster_debug_procfs(void)
+{
+	struct proc_dir_entry *pe = NULL;
+
+	pe = proc_create("sched_cluster",
+		0444, NULL, &sched_cluster_fops);
+	if (!pe)
+		return -ENOMEM;
+	return 0;
+}
+late_initcall(init_sched_cluster_debug_procfs);
diff --git a/kernel/sched/walt.h b/kernel/sched/walt.h
new file mode 100755
index 0000000000000000000000000000000000000000..c5d6e241034da47010aa756adc7ec67e7c2cd0f4
--- /dev/null
+++ b/kernel/sched/walt.h
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * walt.h
+ *
+ * head file for Window-Assistant-Load-Tracking
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __WALT_H
+#define __WALT_H
+
+#ifdef CONFIG_SCHED_WALT
+
+#include <linux/sched/sysctl.h>
+
+#define WINDOW_STATS_RECENT		0
+#define WINDOW_STATS_MAX		1
+#define WINDOW_STATS_MAX_RECENT_AVG	2
+#define WINDOW_STATS_AVG		3
+#define WINDOW_STATS_INVALID_POLICY	4
+
+#define EXITING_TASK_MARKER	0xdeaddead
+
+#define SCHED_NEW_TASK_WINDOWS 5
+
+extern unsigned int sched_ravg_window;
+extern unsigned int sysctl_sched_walt_init_task_load_pct;
+unsigned long capacity_curr_of(int cpu);
+
+static inline int exiting_task(struct task_struct *p)
+{
+	return (p->ravg.sum_history[0] == EXITING_TASK_MARKER);
+}
+
+static inline struct sched_cluster *cpu_cluster(int cpu)
+{
+	return cpu_rq(cpu)->cluster;
+}
+
+static inline int same_cluster(int src_cpu, int dst_cpu)
+{
+	return cpu_rq(src_cpu)->cluster == cpu_rq(dst_cpu)->cluster;
+}
+
+static inline u64 scale_exec_time(u64 delta, struct rq *rq)
+{
+	unsigned long capcurr = capacity_curr_of(cpu_of(rq));
+
+	delta = (delta * capcurr) >> SCHED_CAPACITY_SHIFT;
+
+	return delta;
+}
+
+static inline bool is_new_task(struct task_struct *p)
+{
+	return p->ravg.active_windows < SCHED_NEW_TASK_WINDOWS;
+}
+
+static inline unsigned int max_task_load(void)
+{
+	return sched_ravg_window;
+}
+
+static inline void
+move_list(struct list_head *dst, struct list_head *src, bool sync_rcu)
+{
+	struct list_head *first, *last;
+
+	first = src->next;
+	last = src->prev;
+
+	if (sync_rcu) {
+		INIT_LIST_HEAD_RCU(src);
+		synchronize_rcu();
+	}
+
+	first->prev = dst;
+	dst->prev = last;
+	last->next = dst;
+
+	/* Ensure list sanity before making the head visible to all CPUs. */
+	smp_mb();
+	dst->next = first;
+}
+
+extern void reset_task_stats(struct task_struct *p);
+extern void update_cluster_topology(void);
+extern void init_clusters(void);
+extern void update_task_ravg(struct task_struct *p, struct rq *rq, int event,
+						u64 wallclock, u64 irqtime);
+
+static inline void
+fixup_cumulative_runnable_avg(struct walt_sched_stats *stats,
+			      s64 demand_scaled_delta)
+{
+	if (sched_disable_window_stats)
+		return;
+
+	stats->cumulative_runnable_avg_scaled += demand_scaled_delta;
+	BUG_ON((s64)stats->cumulative_runnable_avg_scaled < 0);
+}
+
+static inline void
+walt_inc_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
+{
+	if (sched_disable_window_stats)
+		return;
+
+	fixup_cumulative_runnable_avg(&rq->walt_stats, p->ravg.demand_scaled);
+
+	/*
+	 * Add a task's contribution to the cumulative window demand when
+	 *
+	 * (1) task is enqueued with on_rq = 1 i.e migration,
+	 *     prio/cgroup/class change.
+	 * (2) task is waking for the first time in this window.
+	 */
+	if (p->on_rq || (p->last_sleep_ts < rq->window_start))
+		walt_fixup_cum_window_demand(rq, p->ravg.demand_scaled);
+}
+
+static inline void
+walt_dec_cumulative_runnable_avg(struct rq *rq, struct task_struct *p)
+{
+	if (sched_disable_window_stats)
+		return;
+
+	fixup_cumulative_runnable_avg(&rq->walt_stats,
+				      -(s64)p->ravg.demand_scaled);
+
+	/*
+	 * on_rq will be 1 for sleeping tasks. So check if the task
+	 * is migrating or dequeuing in RUNNING state to change the
+	 * prio/cgroup/class.
+	 */
+	if (task_on_rq_migrating(p) || p->__state == TASK_RUNNING)
+		walt_fixup_cum_window_demand(rq, -(s64)p->ravg.demand_scaled);
+}
+extern void fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
+					  u16 updated_demand_scaled);
+extern void inc_rq_walt_stats(struct rq *rq, struct task_struct *p);
+extern void dec_rq_walt_stats(struct rq *rq, struct task_struct *p);
+extern void fixup_busy_time(struct task_struct *p, int new_cpu);
+extern void init_new_task_load(struct task_struct *p);
+extern void mark_task_starting(struct task_struct *p);
+extern void set_window_start(struct rq *rq);
+void account_irqtime(int cpu, struct task_struct *curr, u64 delta, u64 wallclock);
+
+void walt_irq_work(struct irq_work *irq_work);
+
+void walt_sched_init_rq(struct rq *rq);
+
+extern void sched_account_irqtime(int cpu, struct task_struct *curr,
+				  u64 delta, u64 wallclock);
+
+#define SCHED_HIGH_IRQ_TIMEOUT 3
+static inline u64 sched_irqload(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	s64 delta;
+
+	delta = get_jiffies_64() - rq->irqload_ts;
+	/*
+	 * Current context can be preempted by irq and rq->irqload_ts can be
+	 * updated by irq context so that delta can be negative.
+	 * But this is okay and we can safely return as this means there
+	 * was recent irq occurrence.
+	 */
+
+	if (delta < SCHED_HIGH_IRQ_TIMEOUT)
+		return rq->avg_irqload;
+	else
+		return 0;
+}
+
+static inline int sched_cpu_high_irqload(int cpu)
+{
+	return sched_irqload(cpu) >= sysctl_sched_cpu_high_irqload;
+}
+
+extern int
+sysctl_sched_walt_init_task_load_pct_sysctl_handler(struct ctl_table *table,
+	int write, void __user *buffer, size_t *length, loff_t *ppos);
+
+static inline unsigned int cpu_cur_freq(int cpu)
+{
+	return cpu_rq(cpu)->cluster->cur_freq;
+}
+
+static inline void assign_cluster_ids(struct list_head *head)
+{
+	struct sched_cluster *cluster;
+	int pos = 0;
+
+	list_for_each_entry(cluster, head, list) {
+		cluster->id = pos;
+		sched_cluster[pos++] = cluster;
+	}
+}
+
+extern void update_cluster_load_subtractions(struct task_struct *p,
+		int cpu, u64 ws, bool new_task);
+#else /* CONFIG_SCHED_WALT */
+static inline void walt_sched_init_rq(struct rq *rq) { }
+
+static inline void update_task_ravg(struct task_struct *p, struct rq *rq,
+				int event, u64 wallclock, u64 irqtime) { }
+
+static inline void walt_inc_cumulative_runnable_avg(struct rq *rq,
+		struct task_struct *p) { }
+
+static inline void walt_dec_cumulative_runnable_avg(struct rq *rq,
+		struct task_struct *p) { }
+
+static inline void
+inc_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
+
+static inline void
+dec_rq_walt_stats(struct rq *rq, struct task_struct *p) { }
+
+static inline void fixup_busy_time(struct task_struct *p, int new_cpu) { }
+static inline void init_new_task_load(struct task_struct *p) { }
+static inline void mark_task_starting(struct task_struct *p) { }
+static inline void set_window_start(struct rq *rq) { }
+static inline void update_cluster_topology(void) { }
+static inline void init_clusters(void) { }
+
+static inline void
+fixup_walt_sched_stats_common(struct rq *rq, struct task_struct *p,
+			      u16 updated_demand_scaled) { }
+
+static inline void sched_account_irqtime(int cpu, struct task_struct *curr,
+					 u64 delta, u64 wallclock) { }
+
+static inline u64 sched_irqload(int cpu)
+{
+	return 0;
+}
+static inline int sched_cpu_high_irqload(int cpu)
+{
+	return 0;
+}
+static inline int same_cluster(int src_cpu, int dst_cpu) { return 1; }
+#endif /* CONFIG_SCHED_WALT */
+
+#endif /* __WALT_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 354a2d294f526ad6688168443913385eda101fa1..b0151dbbd162a758d07a016373bbf9c274831a24 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1623,6 +1623,40 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+#ifdef CONFIG_SCHED_WALT
+	{
+		.procname	= "sched_use_walt_cpu_util",
+		.data		= &sysctl_sched_use_walt_cpu_util,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "sched_use_walt_task_util",
+		.data		= &sysctl_sched_use_walt_task_util,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "sched_walt_init_task_load_pct",
+		.data		= &sysctl_sched_walt_init_task_load_pct,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sched_walt_init_task_load_pct_sysctl_handler,
+	},
+	{
+		.procname	= "sched_cpu_high_irqload",
+		.data		= &sysctl_sched_cpu_high_irqload,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 #ifdef CONFIG_PROC_SYSCTL
 	{
 		.procname	= "tainted",