diff --git a/fs/proc/base.c b/fs/proc/base.c
index ffd54617c35478e92a9f6bef67013e16e6cd3183..db6bdb6f7de2abc816df8392d4d3a2ee2e7b2355 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3588,6 +3588,67 @@ static const struct inode_operations proc_tid_comm_inode_operations = {
 		.permission	= proc_tid_comm_permission,
 };
 
+#ifdef CONFIG_BPF_SCHED
+static ssize_t pid_tag_write(struct file *file, const char __user *buf,
+				size_t count, loff_t *offset)
+{
+	struct inode *inode = file_inode(file);
+	struct task_struct *tsk;
+	int err = 0;
+	long tag = 0;
+
+	tsk = get_proc_task(inode);
+	if (!tsk) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	if (unlikely(tsk->pid == 1)) {
+		err = -EPERM;
+		goto out;
+	}
+
+
+	err = kstrtol_from_user(buf, count, 0, &tag);
+	if (err)
+		goto out;
+
+	sched_settag(tsk, tag);
+
+out:
+	put_task_struct(tsk);
+	return err < 0 ? err : count;
+}
+
+static int pid_tag_show(struct seq_file *m, void *v)
+{
+	struct inode *inode = m->private;
+	struct task_struct *tsk;
+
+	tsk = get_proc_task(inode);
+	if (!tsk)
+		return -ESRCH;
+
+	seq_printf(m, "%ld\n", tsk->tag);
+	put_task_struct(tsk);
+
+	return 0;
+}
+
+static int pid_tag_open(struct inode *inode, struct file *flip)
+{
+	return single_open(flip, pid_tag_show, inode);
+}
+
+static const struct file_operations proc_pid_tag_operations = {
+		.open		= pid_tag_open,
+		.read		= seq_read,
+		.write		= pid_tag_write,
+		.llseek		= seq_lseek,
+		.release	= single_release,
+};
+#endif
+
 /*
  * Tasks
  */
@@ -3691,6 +3752,9 @@ static const struct pid_entry tid_base_stuff[] = {
 	ONE("ksm_merging_pages",  S_IRUSR, proc_pid_ksm_merging_pages),
 	ONE("ksm_stat",  S_IRUSR, proc_pid_ksm_stat),
 #endif
+#ifdef CONFIG_BPF_SCHED
+	REG("tag", 0644, proc_pid_tag_operations),
+#endif
 };
 
 static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/bpf_sched.h b/include/linux/bpf_sched.h
new file mode 100644
index 0000000000000000000000000000000000000000..9cd2493d2787ce9ea689d9c5d50d539453eb76f0
--- /dev/null
+++ b/include/linux/bpf_sched.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BPF_SCHED_H
+#define _LINUX_BPF_SCHED_H
+
+#include <linux/bpf.h>
+
+#ifdef CONFIG_BPF_SCHED
+
+#include <linux/jump_label.h>
+
+#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) \
+	RET bpf_sched_##NAME(__VA_ARGS__);
+#include <linux/sched_hook_defs.h>
+#undef BPF_SCHED_HOOK
+
+int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
+			  const struct bpf_prog *prog);
+
+DECLARE_STATIC_KEY_FALSE(bpf_sched_enabled_key);
+
+static inline bool bpf_sched_enabled(void)
+{
+	return static_branch_unlikely(&bpf_sched_enabled_key);
+}
+
+static inline void bpf_sched_inc(void)
+{
+	static_branch_inc(&bpf_sched_enabled_key);
+}
+
+static inline void bpf_sched_dec(void)
+{
+	static_branch_dec(&bpf_sched_enabled_key);
+}
+
+#else /* !CONFIG_BPF_SCHED */
+
+static inline int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
+			  const struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool bpf_sched_enabled(void)
+{
+	return false;
+}
+
+#endif /* CONFIG_BPF_SCHED */
+#endif /* _LINUX_BPF_SCHED_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fc0d6f32c68760b37872634fa3a0c0a0870c6066..dd79463eea4e31989cb816009f5d8ca039c75827 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -83,6 +83,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall,
 BPF_PROG_TYPE(BPF_PROG_TYPE_NETFILTER, netfilter,
 	      struct bpf_nf_ctx, struct bpf_nf_ctx)
 #endif
+#ifdef CONFIG_BPF_SCHED
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED, bpf_sched,
+	      void *, void *)
+#endif /* CONFIG_BPF_SCHED */
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3520e3fbaa916670190eea018a4a6a01f78d5010..3e3a5a5ffa3931fdba201ee35bd5e062933f4978 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1537,6 +1537,10 @@ struct task_struct {
 	struct user_event_mm		*user_event_mm;
 #endif
 
+#ifdef CONFIG_BPF_SCHED
+	long tag;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
@@ -2469,4 +2473,38 @@ static inline int sched_qos_cpu_overload(void)
 }
 #endif
 
+#ifdef CONFIG_BPF_SCHED
+extern void sched_settag(struct task_struct *tsk, s64 tag);
+
+struct bpf_sched_cpu_stats {
+	/* nr_running */
+	unsigned int nr_running;
+	unsigned int cfs_nr_running;
+	unsigned int cfs_h_nr_running;
+	unsigned int cfs_idle_h_nr_running;
+	unsigned int rt_nr_running;
+	unsigned int rr_nr_running;
+};
+
+struct sched_migrate_ctx {
+	struct task_struct *task;
+	struct cpumask *select_idle_mask;
+	int prev_cpu;
+	int curr_cpu;
+	int is_sync;
+	int want_affine;
+	int wake_flags;
+	int sd_flag;
+	int new_cpu;
+};
+
+struct sched_migrate_node {
+	int src_cpu;
+	int src_node;
+	int dst_cpu;
+	int dst_node;
+};
+
+#endif
+
 #endif
diff --git a/include/linux/sched_hook_defs.h b/include/linux/sched_hook_defs.h
new file mode 100644
index 0000000000000000000000000000000000000000..3d9fa7aed10aa9f9ed59228fb940f18f55984b90
--- /dev/null
+++ b/include/linux/sched_hook_defs.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx)
+BPF_SCHED_HOOK(int, -1, cfs_can_migrate_task, struct task_struct *p, struct sched_migrate_node *migrate_node)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4924f0cde1bcacd9ee3419aec56dad680094637d..87914a0fc2e396d8a49faaaa7ab097d5cd046d2e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -988,6 +988,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 	BPF_PROG_TYPE_NETFILTER,
+	BPF_PROG_TYPE_SCHED,
 };
 
 enum bpf_attach_type {
@@ -1040,6 +1041,7 @@ enum bpf_attach_type {
 	BPF_TCX_INGRESS,
 	BPF_TCX_EGRESS,
 	BPF_TRACE_UPROBE_MULTI,
+	BPF_SCHED,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -5642,6 +5644,12 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * int bpf_sched_cpu_stats_of(int cpu, struct bpf_sched_cpu_stats *ctx, int len)
+ *	Description
+ *		Get multiple types of *cpu* statistics and store in *ctx*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -5856,6 +5864,7 @@ union bpf_attr {
 	FN(user_ringbuf_drain, 209, ##ctx)		\
 	FN(cgrp_storage_get, 210, ##ctx)		\
 	FN(cgrp_storage_delete, 211, ##ctx)		\
+	FN(sched_cpu_stats_of, 212, ##ctx)		\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/init/init_task.c b/init/init_task.c
index ff6c4b9bfe6b1c01c0d99d605d5f0c22693470d3..d377a089f002e101622c04ee028fa6e23fea5b95 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -210,6 +210,9 @@ struct task_struct init_task
 #ifdef CONFIG_SECCOMP_FILTER
 	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
 #endif
+#ifdef CONFIG_BPF_SCHED
+	.tag		= 0,
+#endif
 };
 EXPORT_SYMBOL(init_task);
 
diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig
index 6a906ff930065268dbdf4e5c4ea0bb3f851bcb2f..bf20943ff18caf851152fa1e410126389d15a846 100644
--- a/kernel/bpf/Kconfig
+++ b/kernel/bpf/Kconfig
@@ -100,4 +100,14 @@ config BPF_LSM
 
 	  If you are unsure how to answer this question, answer N.
 
+config BPF_SCHED
+	bool "Sched Instrumentation with BPF"
+	depends on BPF_EVENTS
+	depends on BPF_SYSCALL
+	help
+	  Enables instrumentation of the sched hooks with eBPF programs for
+	  implementing dynamic scheduling policies.
+
+	  If you are unsure how to answer this question, answer N.
+
 endmenu # "BPF subsystem"
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 8090d7fb11ef686019b4734bbc7151057ee78efa..133805b4bc71ebc0a2a783a3e186adbb490fc563 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5982,6 +5982,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 				return true;
 			t = btf_type_by_id(btf, t->type);
 			break;
+		case BPF_SCHED:
 		case BPF_MODIFY_RETURN:
 			/* For now the BPF_MODIFY_RETURN can only be attached to
 			 * functions that return an int.
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d77b2f8b93641b4445645ec2d9397849fe6a5af3..aea7fe557d63562a5d41a15fafad94f7e0ee7bc4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -36,6 +36,7 @@
 #include <linux/memcontrol.h>
 #include <linux/trace_events.h>
 #include <net/netfilter/nf_bpf_link.h>
+#include <linux/bpf_sched.h>
 
 #include <net/tcx.h>
 
@@ -2412,6 +2413,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		case BPF_PROG_TYPE_LSM:
 		case BPF_PROG_TYPE_STRUCT_OPS:
 		case BPF_PROG_TYPE_EXT:
+		case BPF_PROG_TYPE_SCHED:
 			break;
 		default:
 			return -EINVAL;
@@ -2539,6 +2541,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 	case BPF_PROG_TYPE_LSM:
 	case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
 	case BPF_PROG_TYPE_EXT: /* extends any prog */
+	case BPF_PROG_TYPE_SCHED:
 		return true;
 	default:
 		return false;
@@ -3025,6 +3028,11 @@ static void bpf_tracing_link_release(struct bpf_link *link)
 	struct bpf_tracing_link *tr_link =
 		container_of(link, struct bpf_tracing_link, link.link);
 
+#ifdef CONFIG_BPF_SCHED
+	if (link->prog->type == BPF_PROG_TYPE_SCHED)
+		bpf_sched_dec();
+#endif
+
 	WARN_ON_ONCE(bpf_trampoline_unlink_prog(&tr_link->link,
 						tr_link->trampoline));
 
@@ -3115,6 +3123,12 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 			goto out_put_prog;
 		}
 		break;
+	case BPF_PROG_TYPE_SCHED:
+		if (prog->expected_attach_type != BPF_SCHED) {
+			err = -EINVAL;
+			goto out_put_prog;
+		}
+		break;
 	default:
 		err = -EINVAL;
 		goto out_put_prog;
@@ -3234,6 +3248,11 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 		goto out_unlock;
 	}
 
+#ifdef CONFIG_BPF_SCHED
+	if (prog->type == BPF_PROG_TYPE_SCHED)
+		bpf_sched_inc();
+#endif
+
 	link->tgt_prog = tgt_prog;
 	link->trampoline = tr;
 
@@ -3582,6 +3601,7 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
 	case BPF_PROG_TYPE_TRACING:
 	case BPF_PROG_TYPE_EXT:
 	case BPF_PROG_TYPE_LSM:
+	case BPF_PROG_TYPE_SCHED:
 		if (user_tp_name)
 			/* The attach point for this category of programs
 			 * should be specified via btf_id during program load.
@@ -3717,6 +3737,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_TCX_INGRESS:
 	case BPF_TCX_EGRESS:
 		return BPF_PROG_TYPE_SCHED_CLS;
+	case BPF_SCHED:
+		return BPF_PROG_TYPE_SCHED;	
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -3744,6 +3766,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 			-EINVAL : 0;
 	case BPF_PROG_TYPE_EXT:
 		return 0;
+	case BPF_PROG_TYPE_SCHED:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		return 0;
 	case BPF_PROG_TYPE_NETFILTER:
 		if (attach_type != BPF_NETFILTER)
 			return -EINVAL;
@@ -4922,6 +4948,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
 		ret = cgroup_bpf_link_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_EXT:
+	case BPF_PROG_TYPE_SCHED:
 		ret = bpf_tracing_prog_attach(prog,
 					      attr->link_create.target_fd,
 					      attr->link_create.target_btf_id,
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index e97aeda3a86b55522a73e53279543cb5b4df6919..2aa01c26f6a4e021ed8f035632c688baeb40d3d5 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -493,6 +493,7 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
 	switch (prog->expected_attach_type) {
 	case BPF_TRACE_FENTRY:
 		return BPF_TRAMP_FENTRY;
+	case BPF_SCHED:
 	case BPF_MODIFY_RETURN:
 		return BPF_TRAMP_MODIFY_RETURN;
 	case BPF_TRACE_FEXIT:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 824531d4c262a3409dffd3138e86776e8a770ebf..c8da0be7d576c7fac80165af2f4b1ebd1e1fbd92 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/cpumask.h>
 #include <net/xdp.h>
+#include <linux/bpf_sched.h>
 
 #include "disasm.h"
 
@@ -19453,6 +19454,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 	case BPF_LSM_CGROUP:
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
+	case BPF_SCHED:
 		if (!btf_type_is_func(t)) {
 			bpf_log(log, "attach_btf_id %u is not a function\n",
 				btf_id);
@@ -19629,7 +19631,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 
 	if (prog->type != BPF_PROG_TYPE_TRACING &&
 	    prog->type != BPF_PROG_TYPE_LSM &&
-	    prog->type != BPF_PROG_TYPE_EXT)
+	    prog->type != BPF_PROG_TYPE_EXT &&
+	    prog->type != BPF_PROG_TYPE_SCHED)
 		return 0;
 
 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
@@ -19673,6 +19676,12 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		return -EINVAL;
 	}
 
+	if (prog->type == BPF_PROG_TYPE_SCHED) {
+		ret = bpf_sched_verify_prog(&env->log, prog);
+		if (ret < 0)
+			return ret;
+	}
+
 	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
 	tr = bpf_trampoline_get(key, &tgt_info);
 	if (!tr)
diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c
new file mode 100644
index 0000000000000000000000000000000000000000..1ddff44b6a93e4ae98ea947cc659a1363e09b3b0
--- /dev/null
+++ b/kernel/sched/bpf_sched.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/cgroup.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf_sched.h>
+#include <linux/btf_ids.h>
+#include "sched.h"
+
+DEFINE_STATIC_KEY_FALSE(bpf_sched_enabled_key);
+
+/*
+ * For every hook declare a nop function where a BPF program can be attached.
+ */
+#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...)	\
+noinline RET bpf_sched_##NAME(__VA_ARGS__)	\
+{						\
+	return DEFAULT;				\
+}
+
+#include <linux/sched_hook_defs.h>
+#undef BPF_SCHED_HOOK
+
+#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_sched_##NAME)
+BTF_SET_START(bpf_sched_hooks)
+#include <linux/sched_hook_defs.h>
+#undef BPF_SCHED_HOOK
+BTF_SET_END(bpf_sched_hooks)
+
+int bpf_sched_verify_prog(struct bpf_verifier_log *vlog,
+			  const struct bpf_prog *prog)
+{
+	if (!prog->gpl_compatible) {
+		bpf_log(vlog,
+			"sched programs must have a GPL compatible license\n");
+		return -EINVAL;
+	}
+
+	if (!btf_id_set_contains(&bpf_sched_hooks, prog->aux->attach_btf_id)) {
+		bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n",
+			prog->aux->attach_btf_id, prog->aux->attach_func_name);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+BPF_CALL_3(bpf_sched_cpu_stats_of, int *, cpuid,
+		struct bpf_sched_cpu_stats *, ctx,
+		int, len)
+{
+	struct rq *rq;
+	int cpu = *cpuid;
+
+	if ((unsigned int)cpu >= nr_cpu_ids) {
+		memset(ctx, 0, len);
+		return -EINVAL;
+	}
+
+	rq = cpu_rq(cpu);
+	memset(ctx, 0, len);
+
+	SCHED_WARN_ON(!rcu_read_lock_held());
+	/* nr_running */
+	ctx->nr_running = rq->nr_running;
+	ctx->cfs_nr_running = rq->cfs.nr_running;
+	ctx->cfs_h_nr_running = rq->cfs.h_nr_running;
+	ctx->cfs_idle_h_nr_running = rq->cfs.idle_h_nr_running;
+	ctx->rt_nr_running = rq->rt.rt_nr_running;
+	ctx->rr_nr_running = rq->rt.rr_nr_running;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_sched_cpu_stats_of_proto = {
+	.func		= bpf_sched_cpu_stats_of,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_INT,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+
+static const struct bpf_func_proto *
+bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_trace_printk:
+		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_sched_cpu_stats_of:
+		return &bpf_sched_cpu_stats_of_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
+const struct bpf_prog_ops bpf_sched_prog_ops = {
+};
+
+const struct bpf_verifier_ops bpf_sched_verifier_ops = {
+	.get_func_proto = bpf_sched_func_proto,
+	.is_valid_access = btf_ctx_access,
+};
diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
index 99bdd96f454f4eba861b11b0aae6991d348dce0e..d44c584d9bc74a03b11de3cf1bd5f0689b4f6137 100644
--- a/kernel/sched/build_utility.c
+++ b/kernel/sched/build_utility.c
@@ -108,3 +108,7 @@
 #ifdef CONFIG_SCHED_AUTOGROUP
 # include "autogroup.c"
 #endif
+
+#ifdef CONFIG_BPF_SCHED
+# include "bpf_sched.c"
+#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a1c73dea1f778c4038fef05cab1875335bc3dd17..0898d8c5d9c3498c816e0614019cfd418b41fa60 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2468,7 +2468,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
  * Per-CPU kthreads are allowed to run on !active && online CPUs, see
  * __set_cpus_allowed_ptr() and select_fallback_rq().
  */
+#ifdef CONFIG_BPF_SCHED
+inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+#else
 static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+#endif
 {
 	/* When not in the task's cpumask, no point in looking further. */
 	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
@@ -4541,6 +4545,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->migration_pending = NULL;
 #endif
 	init_sched_mm_cid(p);
+#ifdef CONFIG_BPF_SCHED
+	p->tag		= 0;
+#endif
 }
 
 DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -9921,6 +9928,10 @@ LIST_HEAD(task_groups);
 static struct kmem_cache *task_group_cache __read_mostly;
 #endif
 
+#ifdef CONFIG_BPF_SCHED
+DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+#endif
+
 void __init sched_init(void)
 {
 	unsigned long ptr = 0;
@@ -9976,6 +9987,13 @@ void __init sched_init(void)
 			global_rt_period(), global_rt_runtime());
 #endif /* CONFIG_RT_GROUP_SCHED */
 
+#if defined(CONFIG_CPUMASK_OFFSTACK) && defined(CONFIG_BPF_SCHED)
+	for_each_possible_cpu(i) {
+		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+	}
+#endif
+
 #ifdef CONFIG_CGROUP_SCHED
 	task_group_cache = KMEM_CACHE(task_group, 0);
 
@@ -10459,6 +10477,13 @@ static void sched_unregister_group(struct task_group *tg)
 	call_rcu(&tg->rcu, sched_free_group_rcu);
 }
 
+#ifdef CONFIG_BPF_SCHED
+static inline void tg_init_tag(struct task_group *tg, struct task_group *ptg)
+{
+	tg->tag = ptg->tag;
+}
+#endif
+
 /* allocate runqueue etc for a new task group */
 struct task_group *sched_create_group(struct task_group *parent)
 {
@@ -10479,6 +10504,10 @@ struct task_group *sched_create_group(struct task_group *parent)
 	if (!alloc_rt_sched_group(tg, parent))
 		goto err;
 
+#ifdef CONFIG_BPF_SCHED
+	tg_init_tag(tg, parent);
+#endif
+
 	alloc_uclamp_sched_group(tg, parent);
 
 	return tg;
@@ -10566,6 +10595,14 @@ static void sched_change_group(struct task_struct *tsk, struct task_group *group
 	sched_change_qos_group(tsk, group);
 #endif
 
+#ifdef CONFIG_BPF_SCHED
+	/*
+	 * This function has cleared and restored the task status,
+	 * so we do not need to dequeue and enqueue the task again.
+	 */
+	tsk->tag = group->tag;
+#endif
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (tsk->sched_class->task_change_group)
 		tsk->sched_class->task_change_group(tsk);
@@ -11349,12 +11386,86 @@ static int cpu_qos_write(struct cgroup_subsys_state *css,
 done:
 	return 0;
 }
+#endif
 
 static inline s64 cpu_qos_read(struct cgroup_subsys_state *css,
 			       struct cftype *cft)
 {
 	return css_tg(css)->qos_level;
 }
+
+#ifdef CONFIG_BPF_SCHED
+void sched_settag(struct task_struct *tsk, s64 tag)
+{
+	int queued, running, queue_flags =
+			DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (tsk->tag == tag)
+		return;
+
+	rq = task_rq_lock(tsk, &rf);
+
+	running = task_current(rq, tsk);
+	queued = task_on_rq_queued(tsk);
+
+	update_rq_clock(rq);
+	if (queued)
+		dequeue_task(rq, tsk, queue_flags);
+	if (running)
+		put_prev_task(rq, tsk);
+
+	tsk->tag = tag;
+
+	if (queued)
+		enqueue_task(rq, tsk, queue_flags);
+	if (running)
+		set_next_task(rq, tsk);
+
+	task_rq_unlock(rq, tsk, &rf);
+}
+
+int tg_change_tag(struct task_group *tg, void *data)
+{
+	struct css_task_iter it;
+	struct task_struct *tsk;
+	s64 tag = *(s64 *)data;
+	struct cgroup_subsys_state *css = &tg->css;
+
+	tg->tag = tag;
+
+	css_task_iter_start(css, 0, &it);
+	while ((tsk = css_task_iter_next(&it)))
+		sched_settag(tsk, tag);
+	css_task_iter_end(&it);
+
+	return 0;
+}
+
+static int cpu_tag_write(struct cgroup_subsys_state *css,
+			 struct cftype *cftype, s64 tag)
+{
+	struct task_group *tg = css_tg(css);
+
+	if (tg == &root_task_group)
+		return -EINVAL;
+
+	if (tg->tag == tag)
+		return 0;
+
+	rcu_read_lock();
+	walk_tg_tree_from(tg, tg_change_tag, tg_nop, (void *)(&tag));
+	rcu_read_unlock();
+
+	return 0;
+}
+
+static inline s64 cpu_tag_read(struct cgroup_subsys_state *css,
+			 struct cftype *cft)
+{
+	return css_tg(css)->tag;
+}
 #endif
 
 static struct cftype cpu_legacy_files[] = {
@@ -11427,6 +11538,13 @@ static struct cftype cpu_legacy_files[] = {
 		.read_s64 = cpu_qos_read,
 		.write_s64 = cpu_qos_write,
 	},
+#endif
+#ifdef CONFIG_BPF_SCHED
+	{
+		.name = "tag",
+		.read_s64 = cpu_tag_read,
+		.write_s64 = cpu_tag_write,
+	},
 #endif
 	{ }	/* Terminate */
 };
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0de55884f9dadb7fc976824b16c1ddfd054bf668..2d2db34a69340da3e4a89d6b20edf2f54399a3cf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -52,6 +52,7 @@
 #include <asm/switch_to.h>
 
 #include <linux/sched/cond_resched.h>
+#include <linux/bpf_sched.h>
 
 #include "sched.h"
 #include "stats.h"
@@ -91,6 +92,10 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
 
 const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
 
+#ifdef CONFIG_BPF_SCHED
+DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+#endif
+
 int sched_thermal_decay_shift;
 static int __init setup_sched_thermal_decay_shift(char *str)
 {
@@ -8094,6 +8099,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	/* SD_flags and WF_flags share the first nibble */
 	int sd_flag = wake_flags & 0xF;
 
+#ifdef CONFIG_BPF_SCHED
+	struct sched_migrate_ctx ctx;
+	int ret;
+#endif
+
 	/*
 	 * required for stable ->cpus_allowed
 	 */
@@ -8116,6 +8126,25 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	}
 
 	rcu_read_lock();
+#ifdef CONFIG_BPF_SCHED
+	if (bpf_sched_enabled()) {
+		ctx.task = p;
+		ctx.prev_cpu = prev_cpu;
+		ctx.curr_cpu = cpu;
+		ctx.is_sync = sync;
+		ctx.wake_flags = wake_flags;
+		ctx.want_affine = want_affine;
+		ctx.sd_flag = sd_flag;
+		ctx.select_idle_mask = this_cpu_cpumask_var_ptr(select_idle_mask);
+
+		ret = bpf_sched_cfs_select_rq(&ctx);
+		if (ret >= 0 && is_cpu_allowed(p, ret)) {
+			rcu_read_unlock();
+			return ret;
+		}
+	}
+#endif
+
 	for_each_domain(cpu, tmp) {
 		/*
 		 * If both 'cpu' and 'prev_cpu' are part of this domain,
@@ -9149,9 +9178,26 @@ static
 int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
 	int tsk_cache_hot;
+#ifdef CONFIG_BPF_SCHED
+	struct sched_migrate_node migrate_node;
+	int ret;
+#endif
 
 	lockdep_assert_rq_held(env->src_rq);
 
+#ifdef CONFIG_BPF_SCHED
+	if (bpf_sched_enabled()) {
+		migrate_node.src_cpu = env->src_cpu;
+		migrate_node.src_node = cpu_to_node(env->src_cpu);
+		migrate_node.dst_cpu = env->dst_cpu;
+		migrate_node.dst_node = cpu_to_node(env->dst_cpu);
+
+		ret = bpf_sched_cfs_can_migrate_task(p, &migrate_node);
+		if (!ret)
+			return ret;
+	}
+#endif
+
 	/*
 	 * We do not migrate tasks that are:
 	 * 1) throttled_lb_pair, or
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3de84e95baf1cbee2a0a79d9bdcdb1f4b960b103..53cc14e49acda7d1215500c979987aea833b3712 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -416,6 +416,9 @@ struct task_group {
 	struct uclamp_se	uclamp[UCLAMP_CNT];
 #endif
 
+#ifdef CONFIG_BPF_SCHED
+	long tag;
+#endif
 };
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -450,6 +453,9 @@ static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
 }
 
 extern int tg_nop(struct task_group *tg, void *data);
+#ifdef CONFIG_BPF_SCHED
+extern int tg_change_tag(struct task_group *tg, void *data);
+#endif
 
 extern void free_fair_sched_group(struct task_group *tg);
 extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
@@ -3540,4 +3546,8 @@ static inline void init_sched_mm_cid(struct task_struct *t) { }
 extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
 extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
+#ifdef CONFIG_BPF_SCHED
+inline bool is_cpu_allowed(struct task_struct *p, int cpu);
+#endif
+
 #endif /* _KERNEL_SCHED_SCHED_H */
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 61b7dddedc461e2ece91a7b25bcf14987fc98886..359373bc8dab0cb7a86ce1066117aa9845e56909 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -700,6 +700,8 @@ class PrinterHelpers(Printer):
             'struct bpf_dynptr',
             'struct iphdr',
             'struct ipv6hdr',
+            'struct bpf_sched_cpu_stats',
+            'struct sched_migrate_ctx',
     ]
     known_types = {
             '...',
@@ -755,6 +757,8 @@ class PrinterHelpers(Printer):
             'const struct bpf_dynptr',
             'struct iphdr',
             'struct ipv6hdr',
+            'struct bpf_sched_cpu_stats',
+            'struct sched_migrate_ctx',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4924f0cde1bcacd9ee3419aec56dad680094637d..87914a0fc2e396d8a49faaaa7ab097d5cd046d2e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -988,6 +988,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SK_LOOKUP,
 	BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
 	BPF_PROG_TYPE_NETFILTER,
+	BPF_PROG_TYPE_SCHED,
 };
 
 enum bpf_attach_type {
@@ -1040,6 +1041,7 @@ enum bpf_attach_type {
 	BPF_TCX_INGRESS,
 	BPF_TCX_EGRESS,
 	BPF_TRACE_UPROBE_MULTI,
+	BPF_SCHED,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -5642,6 +5644,12 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * int bpf_sched_cpu_stats_of(int cpu, struct bpf_sched_cpu_stats *ctx, int len)
+ *	Description
+ *		Get multiple types of *cpu* statistics and store in *ctx*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -5856,6 +5864,7 @@ union bpf_attr {
 	FN(user_ringbuf_drain, 209, ##ctx)		\
 	FN(cgrp_storage_get, 210, ##ctx)		\
 	FN(cgrp_storage_delete, 211, ##ctx)		\
+	FN(sched_cpu_stats_of, 212, ##ctx)		\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index b0f1913763a33250987aaf4112682df4fdc6895e..ddbb16be651fa6b19df926693ae02a2940999fe4 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -781,6 +781,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 	case BPF_TRACE_FENTRY:
 	case BPF_TRACE_FEXIT:
 	case BPF_MODIFY_RETURN:
+	case BPF_SCHED:
 	case BPF_LSM_MAC:
 		attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
 		if (!OPTS_ZEROED(opts, tracing))
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 96ff1aa4bf6a0ebb49abf2d4f886c66f0d522da3..41697c6274b916e666d570cb09b8e0a046dbb39b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -121,6 +121,7 @@ static const char * const attach_type_name[] = {
 	[BPF_TCX_INGRESS]		= "tcx_ingress",
 	[BPF_TCX_EGRESS]		= "tcx_egress",
 	[BPF_TRACE_UPROBE_MULTI]	= "trace_uprobe_multi",
+	[BPF_SCHED]			= "sched",
 };
 
 static const char * const link_type_name[] = {
@@ -209,6 +210,7 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SK_LOOKUP]		= "sk_lookup",
 	[BPF_PROG_TYPE_SYSCALL]			= "syscall",
 	[BPF_PROG_TYPE_NETFILTER]		= "netfilter",
+	[BPF_PROG_TYPE_SCHED]			= "sched",
 };
 
 static int __base_pr(enum libbpf_print_level level, const char *format,
@@ -3029,7 +3031,8 @@ static int bpf_object_fixup_btf(struct bpf_object *obj)
 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 {
 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
-	    prog->type == BPF_PROG_TYPE_LSM)
+	    prog->type == BPF_PROG_TYPE_LSM ||
+	    prog->type == BPF_PROG_TYPE_SCHED)
 		return true;
 
 	/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
@@ -8764,6 +8767,7 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru
 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
+static int attach_sched(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 
 static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("socket",		SOCKET_FILTER, 0, SEC_NONE),
@@ -8858,6 +8862,7 @@ static const struct bpf_sec_def section_defs[] = {
 	SEC_DEF("struct_ops.s+",	STRUCT_OPS, 0, SEC_SLEEPABLE),
 	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
 	SEC_DEF("netfilter",		NETFILTER, BPF_NETFILTER, SEC_NONE),
+	SEC_DEF("sched/",			SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched),
 };
 
 int libbpf_register_prog_handler(const char *sec,
@@ -9237,6 +9242,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 #define BTF_TRACE_PREFIX "btf_trace_"
 #define BTF_LSM_PREFIX "bpf_lsm_"
 #define BTF_ITER_PREFIX "bpf_iter_"
+#define BTF_SCHED_PREFIX "bpf_sched_"
 #define BTF_MAX_NAME_SIZE 128
 
 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
@@ -9256,6 +9262,10 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 		*prefix = BTF_ITER_PREFIX;
 		*kind = BTF_KIND_FUNC;
 		break;
+	case BPF_SCHED:
+		*prefix = BTF_SCHED_PREFIX;
+		*kind = BTF_KIND_FUNC;
+		break;
 	default:
 		*prefix = "";
 		*kind = BTF_KIND_FUNC;
@@ -12113,6 +12123,17 @@ struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
 	return link;
 }
 
+struct bpf_link *bpf_program__attach_sched(const struct bpf_program *prog)
+{
+	return bpf_program__attach_btf_id(prog, NULL);
+}
+
+static int attach_sched(const struct bpf_program *prog, long cookie, struct bpf_link **link)
+{
+	*link = bpf_program__attach_sched(prog);
+	return libbpf_get_error(*link);
+}
+
 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
 {
 	struct bpf_link *link = NULL;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0e52621cba435ae31feaa500300eb77a74e08f2a..aabdd973c1a589b5c2f89d90a939deeeec30cef6 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -769,6 +769,8 @@ bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex);
 LIBBPF_API struct bpf_link *
 bpf_program__attach_freplace(const struct bpf_program *prog,
 			     int target_fd, const char *attach_func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_sched(const struct bpf_program *prog);
 
 struct bpf_netfilter_opts {
 	/* size of this struct, for forward/backward compatibility */
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 57712321490f8e0d7569b0733d987b678827c6da..228ab00a5e6909d86e5071462c2ff4770420b33b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -236,6 +236,7 @@ LIBBPF_0.2.0 {
 		perf_buffer__buffer_fd;
 		perf_buffer__epoll_fd;
 		perf_buffer__consume_buffer;
+		bpf_program__attach_sched;
 } LIBBPF_0.1.0;
 
 LIBBPF_0.3.0 {