diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 34061d75a0d21be9c981d724c4e05c90b4e1000d..745897d5f4d6e372ad718a84b6fbceade12c7c3f 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -162,6 +162,8 @@ CONFIG_FAIR_GROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
+CONFIG_SCHED_TASK_RELATIONSHIP=y
+CONFIG_QOS_SCHED_NUMA_ICON=y
 CONFIG_QOS_SCHED_SMART_GRID=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_RDMA=y
@@ -234,7 +236,7 @@ CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_BASE_RELATIVE=y
 # CONFIG_BPF_LSM is not set
-# CONFIG_BPF_SCHED is not set
+CONFIG_BPF_SCHED=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
 CONFIG_BPF_JIT_ALWAYS_ON=y
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index 1835f38f2947ccbf8c017d1d72656c378fa04c54..3c9d3d4e3964c9316db1ee4b1dd00dfcd15f28e6 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -167,6 +167,8 @@ CONFIG_FAIR_GROUP_SCHED=y
 CONFIG_CFS_BANDWIDTH=y
 CONFIG_RT_GROUP_SCHED=y
 CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
+# CONFIG_SCHED_TASK_RELATIONSHIP is not set
+# CONFIG_QOS_SCHED_NUMA_ICON is not set
 # CONFIG_QOS_SCHED_SMART_GRID is not set
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_RDMA=y
diff --git a/fs/exec.c b/fs/exec.c
index 981b3ac90c44e66de934ceefca85c73e0e910fc0..792d62632e92aece555f19f5622a3493c712db03 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -38,6 +38,7 @@
 #include <linux/sched/coredump.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/numa_balancing.h>
+#include <linux/sched/relationship.h>
 #include <linux/sched/task.h>
 #include <linux/pagemap.h>
 #include <linux/perf_event.h>
@@ -1822,6 +1823,7 @@ static int bprm_execve(struct linux_binprm *bprm,
 	rseq_execve(current);
 	acct_update_integrals(current);
 	task_numa_free(current, false);
+	task_relationship_free(current, true);
 	return retval;
 
 out:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4ab407cab37968510b38e8741c478b79dff4503..fa83018137ce3bf73565d30bb4b25e5335dacea7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -26,6 +26,7 @@
 #include <linux/resource.h>
 #include <linux/latencytop.h>
 #include <linux/sched/prio.h>
+#include <linux/sched/relationship.h>
 #include <linux/sched/types.h>
 #include <linux/signal_types.h>
 #include <linux/mm_types_task.h>
@@ -1437,11 +1438,15 @@ struct task_struct {
 	KABI_USE(7, void *pf_io_worker)
 #if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY) && !defined(__GENKSYMS__)
 	KABI_USE(8, cpumask_t *prefer_cpus)
-	KABI_USE(9, const cpumask_t *select_cpus)
 #else
 	KABI_RESERVE(8)
+#endif
+#if defined(CONFIG_TASK_PLACEMENT_BY_CPU_RANGE) && !defined(__GENKSYMS__)
+	KABI_USE(9, const cpumask_t *select_cpus)
+#else
 	KABI_RESERVE(9)
 #endif
+
 #if (defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)) && defined(CONFIG_X86)
 	KABI_USE(10, unsigned int sequential_io)
 	KABI_USE(11, unsigned int sequential_io_avg)
@@ -1464,7 +1469,11 @@ struct task_struct {
 #else
 	KABI_RESERVE(13)
 #endif
+#if defined(CONFIG_SCHED_TASK_RELATIONSHIP) && !defined(__GENKSYMS__)
+	KABI_USE(14, struct task_relationship *rship)
+#else
 	KABI_RESERVE(14)
+#endif
 	KABI_RESERVE(15)
 	KABI_RESERVE(16)
 	KABI_AUX_PTR(task_struct)
@@ -2351,6 +2360,21 @@ struct bpf_sched_cpu_stats {
 	KABI_RESERVE(4)
 };
 
+struct bpf_node_stats {
+	unsigned long util;
+	unsigned long compute_capacity;
+	unsigned int weight;
+
+	KABI_RESERVE(1)
+	KABI_RESERVE(2)
+	KABI_RESERVE(3)
+	KABI_RESERVE(4)
+	KABI_RESERVE(5)
+	KABI_RESERVE(6)
+	KABI_RESERVE(7)
+	KABI_RESERVE(8)
+};
+
 struct cpumask_op_args {
 	unsigned int op_type;
 	void *arg1;
@@ -2374,6 +2398,28 @@ enum cpumask_op_type {
 	CPUMASK_CPULIST_PARSE
 };
 
+enum nodemask_op_type {
+	NODEMASK_EMPTY,
+	NODEMASK_NODE_ISSET,
+	NODEMASK_NODES_CLEAR,
+	NODEMASK_NODE_SET,
+	NODEMASK_NODE_CLEAR,
+	NODEMASK_NODELIST_PARSE,
+	NODEMASK_TO_CPUMASK,
+	NODEMASK_NODES_ANDNOT,
+	NODEMASK_NODES_AND,
+	NODEMASK_NODES_OR,
+	NODEMASK_WEIGHT,
+	NODEMASK_ONLINE
+};
+
+struct nodemask_op_args {
+	enum nodemask_op_type op_type;
+	void *arg1;
+	void *arg2;
+	void *arg3;
+};
+
 struct sched_migrate_ctx {
 	struct task_struct *task;
 	struct cpumask *select_idle_mask;
@@ -2402,5 +2448,15 @@ struct sched_affine_ctx {
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
 };
+
+struct sched_migrate_node {
+	int src_cpu;
+	int dst_cpu;
+
+	KABI_RESERVE(1)
+	KABI_RESERVE(2)
+	KABI_RESERVE(3)
+	KABI_RESERVE(4)
+};
 #endif
 #endif
diff --git a/include/linux/sched/relationship.h b/include/linux/sched/relationship.h
new file mode 100644
index 0000000000000000000000000000000000000000..43aa3f9706d40521c6147947c3a774adf00186ea
--- /dev/null
+++ b/include/linux/sched/relationship.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_RELATIONSHIP_H
+#define _LINUX_SCHED_RELATIONSHIP_H
+
+#include <linux/nodemask.h>
+#include <linux/jump_label.h>
+#include <linux/refcount.h>
+#include <uapi/linux/sched_ctrl.h>
+
+#define FAULT_NODES_MAX 4
+
+struct task_struct;
+struct rq;
+
+#ifdef CONFIG_SCHED_DEBUG
+struct seq_file;
+#endif
+
+struct fault_array_info {
+	int nid;
+	unsigned long val;
+};
+
+struct relationship_comm {
+	int nr_tasks;
+	int gid;
+	nodemask_t preferred_node;
+};
+
+struct bpf_net_relationship {
+	struct relationship_comm comm;
+	unsigned long grp_rxtx_bytes;
+	unsigned long grp_remote_rxtx_bytes;
+};
+
+struct bpf_mm_relationship {
+	struct relationship_comm comm;
+	unsigned long grp_total_faults;
+	struct fault_array_info grp_faults_ordered[FAULT_NODES_MAX];
+	struct fault_array_info grp_score_ordered[FAULT_NODES_MAX];
+};
+
+struct bpf_relationship_get_args {
+	struct bpf_mm_relationship mm;
+	struct bpf_net_relationship net;
+};
+
+struct bpf_relationship_set_args {
+	nodemask_t preferred_node;
+};
+
+struct relationship_hdr {
+	refcount_t refcount;
+	spinlock_t lock;
+	int nr_tasks;
+	int gid;
+	nodemask_t preferred_nid;
+};
+
+enum net_req_type {
+	NET_RS_TYPE_INVALID = 0,
+	NET_RS_TYPE_LOCAL,
+	NET_RS_TYPE_RX,
+	NET_RS_TYPE_TX,
+	NET_RS_TYPE_MAX
+};
+
+struct net_relationship_req {
+	enum net_req_type net_rship_type;
+	pid_t rx_pid;
+	pid_t tx_pid;
+	int nic_nid;
+	int rx_dev_idx;
+	int rx_dev_queue_idx;
+	u64 rx_dev_netns_cookie;
+	unsigned long rxtx_bytes;
+
+	/* reserved */
+	unsigned long rxtx_cnt;
+};
+
+struct net_relationship_callback {
+	struct callback_head twork;
+	atomic_t active;
+	pid_t src_pid;
+	struct net_relationship_req req;
+};
+
+struct net_group {
+	struct rcu_head rcu;
+	struct relationship_hdr hdr;
+	unsigned long rxtx_bytes;
+
+	/* reserved */
+	unsigned long rxtx_cnt;
+};
+
+struct numa_fault_ext {
+	struct fault_array_info faults_ordered[FAULT_NODES_MAX];
+};
+
+struct task_relationship {
+	/* network relationship */
+	struct net_group __rcu *net_group;
+	spinlock_t net_lock;
+	int nic_nid;
+	int rx_dev_idx;
+	int rx_dev_queue_idx;
+	unsigned long rx_dev_netns_cookie;
+	unsigned long rxtx_remote_bytes;
+	unsigned long rxtx_remote_update_next;
+	unsigned long rxtx_remote_buffer;
+	unsigned long rxtx_bytes;
+	unsigned long rxtx_buffer;
+	unsigned long rxtx_update_next;
+	struct net_relationship_callback cb;
+
+	/* extras numa fault data */
+	struct numa_fault_ext faults;
+
+#ifdef CONFIG_NUMA_BALANCING
+	/* preferred nodes adjust */
+	u64 node_stamp;
+	struct callback_head node_work;
+#endif
+};
+
+#ifdef CONFIG_BPF_SCHED
+struct sched_preferred_node_ctx {
+	struct task_struct *tsk;
+	nodemask_t preferred_node;
+
+	KABI_RESERVE(1)
+	KABI_RESERVE(2)
+	KABI_RESERVE(3)
+	KABI_RESERVE(4)
+};
+#endif
+
+extern void task_relationship_enable(void);
+extern void task_relationship_disable(void);
+
+#ifdef CONFIG_SCHED_DEBUG
+extern void sched_show_relationship(struct task_struct *p, struct seq_file *m);
+#endif
+
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+extern int sched_relationship_fork(struct task_struct *p);
+extern void sched_relationship_free(struct task_struct *p);
+void task_relationship_free(struct task_struct *tsk, bool reset);
+extern bool task_relationship_supported(struct task_struct *tsk);
+extern int sched_net_relationship_submit(struct net_relationship_req *req);
+extern void
+sctl_sched_get_net_relationship(struct task_struct *tsk,
+				struct sctl_net_relationship_info *info);
+extern void
+sctl_sched_get_mem_relationship(struct task_struct *tsk,
+				struct sctl_mem_relationship_info *info);
+extern void sched_get_mm_relationship(struct task_struct *tsk,
+			       struct bpf_relationship_get_args *args);
+extern void sched_get_relationship(struct task_struct *tsk,
+				   struct bpf_relationship_get_args *args);
+extern void numa_faults_update_and_sort(int nid, int new,
+					  struct fault_array_info *stats);
+extern void task_tick_relationship(struct rq *rq, struct task_struct *curr);
+
+extern void task_preferred_node_work(struct callback_head *work);
+extern void
+sched_set_curr_preferred_node(struct bpf_relationship_set_args *args);
+
+DECLARE_STATIC_KEY_FALSE(__relationship_switch);
+static inline bool task_relationship_used(void)
+{
+	return static_branch_unlikely(&__relationship_switch);
+}
+#else
+static inline bool task_relationship_used(void)
+{
+	return false;
+}
+
+static inline int sched_relationship_fork(struct task_struct *p)
+{
+	return 0;
+}
+
+static inline void sched_relationship_free(struct task_struct *p) {}
+
+static inline void
+task_relationship_free(struct task_struct *tsk, bool reset) {}
+
+static inline int
+sched_net_relationship_submit(struct net_relationship_req *req)
+{
+	return 0;
+}
+
+static inline void
+task_tick_relationship(struct rq *rq, struct task_struct *curr) {}
+#endif
+
+#endif
diff --git a/include/linux/sched_hook_defs.h b/include/linux/sched_hook_defs.h
index 818b1244a018f55d774b8929bf4de9c7b0b80224..0a871f728c856ea88c5dabf9c3742d6db947e93e 100644
--- a/include/linux/sched_hook_defs.h
+++ b/include/linux/sched_hook_defs.h
@@ -10,3 +10,7 @@ BPF_SCHED_HOOK(void, (void) 0, cfs_dequeue_task, struct rq *rq, struct task_stru
 BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx)
 BPF_SCHED_HOOK(int, -1, cfs_wake_affine, struct sched_affine_ctx *ctx)
 BPF_SCHED_HOOK(int, -1, cfs_select_rq_exit, struct sched_migrate_ctx *ctx)
+BPF_SCHED_HOOK(int, -1, cfs_can_migrate_task, struct task_struct *p,
+	struct sched_migrate_node *migrate_node)
+BPF_SCHED_HOOK(void, (void) 0, cfs_change_preferred_node,
+	struct sched_preferred_node_ctx *ctx)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2b11202c3439e3e5740bcb28e057c79fe7464ac9..b87934003c407563770aa110aa1b7988f3b22cc4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3874,6 +3874,52 @@ union bpf_attr {
  *		check src_cpu whether share cache with dst_cpu.
  *	Return
  *		yes 1, no 0.
+ *
+ * int bpf_nodemask_op(struct nodemask_op_args *op, int len)
+ *	Description
+ *		A series of nodemask-related operations. Perform different
+ *		operations base on *op*->type. User also need fill other
+ *		*op* field base on *op*->type. *op*->type is one of them
+ *
+ *		**NODEMASK_EMPTY**
+ *			nodes_empty(op->arg1) returned.
+ *		**NODEMASK_NODE_ISSET**
+ *			node_isset(op->arg1, op->arg2) returned
+ *		**NODEMASK_NODES_CLEAR**
+ *			0 returned
+ *		**NODEMASK_NODE_CLEAR**
+ *			unset op->arg1 from op->arg2, 0 returned
+ *		**NODEMASK_NODE_SET**
+ *			set op->arg1 to op->arg2, 0 returned
+ *		**NODEMASK_WEIGHT**
+ *			nodes_weight(op->arg1) returned
+ *		**NODEMASK_NODELIST_PARSE**
+ *			str *op->arg1* to nodemask_t *op->arg2*,
+ *			0 on success, or a negative error in case of failure.
+ *		**NODEMASK_TO_CPUMASK**
+ *			nodemask_t *arg1* to cpumask_t *op->arg2*, 0 returned.
+ *		**NODEMASK_ONLINE**
+ *			set online nodes to nodemask_t *op->arg1*, 0 returned.
+ *	Return
+ *		View above.
+ *
+ * int bpf_get_task_relationship_stats(struct task_struct *tsk, struct bpf_map *map, struct bpf_relationship_get_args *stats)
+ *	Description
+ *		get relationship statistics of *tsk* and store in *stats*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_sched_set_curr_preferred_node(struct bpf_relationship_set_args *args, int len)
+ *	Description
+ *		set current task preferred node.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_node_stats(int nid, struct bpf_node_stats *ctx, int len)
+ *	Description
+ *		get resource statistics of *nid* and store in *ctx*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4046,6 +4092,10 @@ union bpf_attr {
 	FN(sched_entity_to_tg),		\
 	FN(cpumask_op),			\
 	FN(cpus_share_cache),		\
+	FN(nodemask_op),		\
+	FN(get_task_relationship_stats),\
+	FN(sched_set_curr_preferred_node),\
+	FN(get_node_stats),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/include/uapi/linux/sched_ctrl.h b/include/uapi/linux/sched_ctrl.h
new file mode 100644
index 0000000000000000000000000000000000000000..13a4eb182d5e3a37036dce667e53242ba2f0b44d
--- /dev/null
+++ b/include/uapi/linux/sched_ctrl.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_SCHED_CTRL_H
+#define _LINUX_SCHED_CTRL_H
+
+#include <linux/types.h>
+
+
+#define SCTL_IOC_MAGIC	'X'
+
+/* get task relationship */
+#define SCTL_GET_RSHIP		\
+	_IOR(SCTL_IOC_MAGIC, 0, struct sctl_get_relationship_args)
+
+#define SCTL_IOC_MAXNR	1
+
+#define SCTL_MAX_NUMNODES 16
+#define SCTL_STR_MAX 64
+#define NR_TASK_FAULTS_TYPE 2
+
+#define NO_RSHIP (-1)
+
+struct grp_hdr {
+	int gid;
+	char preferred_nid[SCTL_STR_MAX];
+	int nr_tasks;
+};
+
+struct sctl_net_relationship_info {
+	int valid;
+	struct grp_hdr grp_hdr;
+	int nic_nid;
+	int rx_dev_idx;
+	int rx_dev_queue_idx;
+	unsigned long rx_dev_netns_cookie;
+	unsigned long rxtx_remote_bytes;
+	unsigned long rxtx_bytes;
+	unsigned long grp_rxtx_bytes;
+};
+
+struct sctl_mem_relationship_info {
+	int valid;
+	struct grp_hdr grp_hdr;
+	int nodes_num;
+	unsigned long total_faults;
+	unsigned long grp_total_faults;
+	unsigned long faults[SCTL_MAX_NUMNODES][NR_TASK_FAULTS_TYPE];
+	unsigned long faults_cpu[SCTL_MAX_NUMNODES][NR_TASK_FAULTS_TYPE];
+	unsigned long grp_faults[SCTL_MAX_NUMNODES][NR_TASK_FAULTS_TYPE];
+	unsigned long grp_faults_cpu[SCTL_MAX_NUMNODES][NR_TASK_FAULTS_TYPE];
+};
+
+struct sctl_get_relationship_args {
+	int tid;
+	struct sctl_net_relationship_info nrsi;
+	struct sctl_mem_relationship_info mrsi;
+};
+#endif /* _LINUX_SCHED_CTRL_H */
diff --git a/init/Kconfig b/init/Kconfig
index 69bd400daeb3b03e0c776302a6964e0ff0fe41a5..b722b7a887c1f63af8af7fcd37720b59df5d91e4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1066,9 +1066,13 @@ config RT_GROUP_SCHED
 
 endif #CGROUP_SCHED
 
+config TASK_PLACEMENT_BY_CPU_RANGE
+	bool "variable cpu range for task placement"
+
 config QOS_SCHED_DYNAMIC_AFFINITY
 	bool "qos dynamic affinity"
 	depends on CPUSETS
+	select TASK_PLACEMENT_BY_CPU_RANGE
 	default n
 	help
 	 This feature lets you allocate preferred cpus to taskgroup. If enabled,
@@ -1076,6 +1080,29 @@ config QOS_SCHED_DYNAMIC_AFFINITY
 	 of taskgroup is below threshold setted, otherwise make taskgroup to use
 	 cpus allowed.
 
+config SCHED_TASK_RELATIONSHIP
+	bool "task relationship"
+	depends on NUMA_BALANCING
+	default n
+	help
+	 This feature enables the scheduler to identify tasks relationship by
+	 page fault, SPE, socket and other IPC method.
+
+	 If in doubt, say N.
+
+config QOS_SCHED_NUMA_ICON
+	bool "numa aware schedule"
+	depends on BPF_SCHED
+	depends on SCHED_TASK_RELATIONSHIP
+	default n
+	help
+	 This feature provides the NUMA Isolation and Consolidationthe
+	 Mechanisms based on ebpf and task relationship. If enabled, scheduler
+	 places related tasks on same numa node when the node has spare
+	 resource.
+
+	 If in doubt, say N.
+
 config UCLAMP_TASK_GROUP
 	bool "Utilization clamping per group of tasks"
 	depends on CGROUP_SCHED
@@ -1838,6 +1865,7 @@ config BPF_SCHED
 	bool "SCHED Instrumentation with BPF"
 	depends on BPF_EVENTS
 	depends on BPF_SYSCALL
+	select TASK_PLACEMENT_BY_CPU_RANGE
 	help
 	  Enables instrumentation of the sched hooks with eBPF programs for
 	  implementing dynamic scheduling policies.
diff --git a/init/init_task.c b/init/init_task.c
index fa8838c2c203b694b0ef6e9c4c2e83050d9e3e39..3b846f8223d96663354155c05eb6052257a580c0 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -217,6 +217,9 @@ struct task_struct init_task
 #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
 	.prefer_cpus	= NULL,
 #endif
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+	.rship		= NULL,
+#endif
 #ifdef CONFIG_SECCOMP_FILTER
 	.seccomp	= { .filter_count = ATOMIC_INIT(0) },
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 079b718131b064eb2cc875d8ce1d45f5ce2f7969..12db99751381f2a155e4902bfede63231b182c41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -476,6 +476,8 @@ void free_task(struct task_struct *tsk)
 #ifdef CONFIG_QOS_SCHED_SMART_GRID
 	sched_grid_qos_free(tsk);
 #endif
+	if (task_relationship_used())
+		sched_relationship_free(tsk);
 	free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
@@ -748,6 +750,7 @@ void __put_task_struct(struct task_struct *tsk)
 	io_uring_free(tsk);
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
+	task_relationship_free(tsk, false);
 	security_task_free(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
@@ -949,6 +952,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->prefer_cpus = NULL;
 #endif
 
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+	tsk->rship = NULL;
+#endif
+
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
@@ -2102,6 +2109,12 @@ static __latent_entropy struct task_struct *copy_process(
 		goto bad_fork_cleanup_count;
 #endif
 
+	if (task_relationship_used()) {
+		retval = sched_relationship_fork(p);
+		if (retval)
+			goto bad_fork_cleanup_count;
+	}
+
 	/*
 	 * If multiple threads are within copy_process(), then this check
 	 * triggers too late. This doesn't hurt, the check is only there
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index a6fe0ee09917a9a81a9ecaf2e73f16bd616400fd..ff9ff2c17f79d881430f47b63489b3f6a73f6f5c 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -40,3 +40,5 @@ obj-$(CONFIG_SCHED_CORE) += core_sched.o
 obj-$(CONFIG_BPF_SCHED) += bpf_sched.o
 obj-$(CONFIG_BPF_SCHED) += bpf_topology.o
 obj-$(CONFIG_QOS_SCHED_SMART_GRID) += grid/
+obj-$(CONFIG_SCHED_TASK_RELATIONSHIP) += relationship.o relationship_ioctl.o
+obj-$(CONFIG_QOS_SCHED_NUMA_ICON) += numa_icon.o
diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c
index 220ba83fc5f4a0fae183db3d32b320ad9f30ab66..3e14d1fa911ee2abcee7242e08a4a7de7970e3b8 100644
--- a/kernel/sched/bpf_sched.c
+++ b/kernel/sched/bpf_sched.c
@@ -260,6 +260,75 @@ static const struct bpf_func_proto bpf_cpumask_op_proto = {
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_2(bpf_nodemask_op, struct nodemask_op_args *, op, int, len)
+{
+	struct cpumask *cpumask;
+	nodemask_t mask;
+	int nid;
+
+	if (len != sizeof(*op) || !op->arg1)
+		return -EINVAL;
+
+	switch (op->op_type) {
+	case NODEMASK_EMPTY:
+		mask = *(nodemask_t *)op->arg1;
+		return nodes_empty(mask);
+	case NODEMASK_NODE_ISSET:
+		mask = *(nodemask_t *)op->arg2;
+		return node_isset(*(int *)op->arg1, mask);
+	case NODEMASK_NODES_CLEAR:
+		__nodes_clear((nodemask_t *)op->arg1, MAX_NUMNODES);
+		break;
+	case NODEMASK_NODE_CLEAR:
+		__node_clear(*(int *)op->arg1, (nodemask_t *)op->arg2);
+		break;
+	case NODEMASK_NODE_SET:
+		__node_set(*(int *)op->arg1, (nodemask_t *)op->arg2);
+		break;
+	case NODEMASK_NODES_AND:
+		__nodes_and((nodemask_t *)op->arg1, (nodemask_t *)op->arg2,
+			    (nodemask_t *)op->arg3, MAX_NUMNODES);
+		break;
+	case NODEMASK_NODES_ANDNOT:
+		__nodes_andnot((nodemask_t *)op->arg1, (nodemask_t *)op->arg2,
+			       (nodemask_t *)op->arg3, MAX_NUMNODES);
+		break;
+	case NODEMASK_NODES_OR:
+		__nodes_or((nodemask_t *)op->arg1, (nodemask_t *)op->arg2,
+			   (nodemask_t *)op->arg3, MAX_NUMNODES);
+		break;
+	case NODEMASK_WEIGHT:
+		mask = *(nodemask_t *)op->arg1;
+		return nodes_weight(mask);
+	case NODEMASK_NODELIST_PARSE:
+		return __nodelist_parse((const char *)op->arg1,
+					(nodemask_t *)op->arg2, MAX_NUMNODES);
+	case NODEMASK_TO_CPUMASK:
+		mask = *(nodemask_t *)op->arg1;
+		cpumask = (struct cpumask *)op->arg2;
+		cpumask_clear(cpumask);
+		for_each_node_mask(nid, mask) {
+			cpumask_or(cpumask, cpumask, cpumask_of_node(nid));
+		}
+		break;
+	case NODEMASK_ONLINE:
+		*(nodemask_t *)op->arg1 = node_online_map;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_nodemask_op_proto = {
+	.func		= bpf_nodemask_op,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+};
+
 BPF_CALL_2(bpf_cpus_share_cache, int, src_cpu, int, dst_cpu)
 {
 	if ((unsigned int)src_cpu >= nr_cpu_ids ||
@@ -277,6 +346,74 @@ static const struct bpf_func_proto bpf_cpus_share_cache_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+#ifdef CONFIG_QOS_SCHED_NUMA_ICON
+BPF_CALL_3(bpf_get_node_stats, int, nid,
+	   struct bpf_node_stats *, ctx,
+	   int, len)
+{
+	if (len != sizeof(*ctx))
+		return -EINVAL;
+
+	if ((unsigned int)nid >= nr_node_ids)
+		return -EINVAL;
+
+	sched_get_node_load(nid, ctx);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_get_node_stats_proto = {
+	.func		= bpf_get_node_stats,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+#endif
+
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+BPF_CALL_3(bpf_get_task_relationship_stats, struct task_struct *, tsk,
+	   struct bpf_map *, map, struct bpf_relationship_get_args *, args)
+{
+	if (!task_relationship_supported(tsk))
+		return -EPERM;
+
+	if (!args)
+		return -EINVAL;
+
+	sched_get_relationship(tsk, args);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_get_task_relationship_stats_proto = {
+	.func		= bpf_get_task_relationship_stats,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg1_btf_id	= &btf_sched_task_ids[0],
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_VALUE_OR_NULL,
+};
+
+BPF_CALL_2(bpf_sched_set_curr_preferred_node,
+	   struct bpf_relationship_set_args *, args, int, len)
+{
+	if (!args || len != sizeof(*args))
+		return -EINVAL;
+
+	sched_set_curr_preferred_node(args);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_sched_set_curr_preferred_node_proto = {
+	.func		= bpf_sched_set_curr_preferred_node,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+};
+#endif
+
 static const struct bpf_func_proto *
 bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -299,6 +436,18 @@ bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_cpumask_op_proto;
 	case BPF_FUNC_cpus_share_cache:
 		return &bpf_cpus_share_cache_proto;
+	case BPF_FUNC_nodemask_op:
+		return &bpf_nodemask_op_proto;
+#ifdef CONFIG_QOS_SCHED_NUMA_ICON
+	case BPF_FUNC_get_node_stats:
+		return &bpf_get_node_stats_proto;
+#endif
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+	case BPF_FUNC_get_task_relationship_stats:
+		return &bpf_get_task_relationship_stats_proto;
+	case BPF_FUNC_sched_set_curr_preferred_node:
+		return &bpf_sched_set_curr_preferred_node_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fa71c7c5119641fbd162e0564e85059deb4e8027..d034294c59ceb4a29be7df9caac49c42e6f198fc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8225,6 +8225,9 @@ static struct kmem_cache *task_group_cache __read_mostly;
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
 DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
+#ifdef CONFIG_BPF_SCHED
+DECLARE_PER_CPU(cpumask_var_t, select_cpu_mask);
+#endif
 
 void __init sched_init(void)
 {
@@ -8278,6 +8281,10 @@ void __init sched_init(void)
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+#ifdef CONFIG_BPF_SCHED
+		per_cpu(select_cpu_mask, i) = (cpumask_var_t)kzalloc_node(
+			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+#endif
 	}
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 00f01518bbddc5935dd6029d686266be439ae72f..5233ba9fdc697d776246bee1b96e81d207e045fd 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -1040,6 +1040,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 	}
 
 	sched_show_numa(p, m);
+
+	sched_show_relationship(p, m);
 }
 
 void proc_sched_set_task(struct task_struct *p)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f39e7547523c78bda1af34d28df053b1670296d3..404358af80c74637932f0c6a84f15302babee513 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1086,6 +1086,13 @@ struct numa_group {
 	struct rcu_head rcu;
 	unsigned long total_faults;
 	unsigned long max_faults_cpu;
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+	struct fault_array_info score_ordered[FAULT_NODES_MAX];
+	struct fault_array_info faults_ordered[FAULT_NODES_MAX];
+	nodemask_t preferred_nid;
+	u64 node_stamp;
+	u64 nodes_switch_cnt;
+#endif
 	/*
 	 * Faults_cpu is used to decide whether memory should move
 	 * towards the CPU. As a consequence, these stats are weighted
@@ -2279,6 +2286,9 @@ static int preferred_group_nid(struct task_struct *p, int nid)
 {
 	nodemask_t nodes;
 	int dist;
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+	struct numa_group *ng;
+#endif
 
 	/* Direct connections between all NUMA nodes. */
 	if (sched_numa_topology_type == NUMA_DIRECT)
@@ -2301,7 +2311,19 @@ static int preferred_group_nid(struct task_struct *p, int nid)
 				max_score = score;
 				max_node = node;
 			}
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+			if (task_relationship_used()) {
+				ng = deref_curr_numa_group(p);
+				if (ng) {
+					spin_lock_irq(&ng->lock);
+					numa_faults_update_and_sort(node, score,
+						ng->score_ordered);
+					spin_unlock_irq(&ng->lock);
+				}
+			}
+#endif
 		}
+
 		return max_node;
 	}
 
@@ -2451,6 +2473,17 @@ static void task_numa_placement(struct task_struct *p)
 			max_faults = group_faults;
 			max_nid = nid;
 		}
+
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+		if (task_relationship_used()) {
+			numa_faults_update_and_sort(nid, faults,
+				p->rship->faults.faults_ordered);
+
+			if (ng)
+				numa_faults_update_and_sort(nid, group_faults,
+					ng->faults_ordered);
+		}
+#endif
 	}
 
 	if (ng) {
@@ -2512,6 +2545,17 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 
 		grp->nr_tasks++;
 		rcu_assign_pointer(p->numa_group, grp);
+
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+		if (task_relationship_used()) {
+			grp->preferred_nid = NODE_MASK_NONE;
+			grp->node_stamp = jiffies;
+			for (i = 0; i < FAULT_NODES_MAX; i++) {
+				grp->faults_ordered[i].nid = -1;
+				grp->score_ordered[i].nid = -1;
+			}
+		}
+#endif
 	}
 
 	rcu_read_lock();
@@ -2623,6 +2667,15 @@ void task_numa_free(struct task_struct *p, bool final)
 		p->total_numa_faults = 0;
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
 			numa_faults[i] = 0;
+
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+		if (task_relationship_used()) {
+			for (i = 0; i < FAULT_NODES_MAX; i++) {
+				p->rship->faults.faults_ordered[i].nid = -1;
+				p->rship->faults.faults_ordered[i].val = 0;
+			}
+		}
+#endif
 	}
 }
 
@@ -2992,6 +3045,91 @@ static inline void update_scan_period(struct task_struct *p, int new_cpu)
 
 #endif /* CONFIG_NUMA_BALANCING */
 
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+void sctl_sched_get_mem_relationship(struct task_struct *tsk,
+			       struct sctl_mem_relationship_info *info)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	struct task_relationship *rship = tsk->rship;
+	int nid, priv, cpu_idx, mem_idx;
+	struct numa_group *grp;
+
+	info->valid = false;
+	if (unlikely(!rship) || !tsk->numa_faults)
+		return;
+
+	memset(info, 0, sizeof(*info));
+	info->valid = true;
+	info->nodes_num = nr_node_ids;
+	info->grp_hdr.gid = NO_RSHIP;
+	info->total_faults = tsk->total_numa_faults;
+
+	rcu_read_lock();
+
+	grp = rcu_dereference(tsk->numa_group);
+	if (grp) {
+		info->grp_hdr.gid = grp->gid;
+		info->grp_hdr.nr_tasks = grp->nr_tasks;
+		snprintf(info->grp_hdr.preferred_nid, SCTL_STR_MAX, "%*pbl",
+			nodemask_pr_args(&grp->preferred_nid));
+	}
+
+	for_each_online_node(nid) {
+		if (nid >= SCTL_MAX_NUMNODES)
+			break;
+
+		for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) {
+			cpu_idx = task_faults_idx(NUMA_CPU, nid, priv);
+			mem_idx = task_faults_idx(NUMA_MEM, nid, priv);
+			info->faults[nid][priv] = tsk->numa_faults[mem_idx];
+			info->faults_cpu[nid][priv] = tsk->numa_faults[cpu_idx];
+
+			if (grp) {
+				info->grp_faults[nid][priv] = grp->faults[mem_idx];
+				info->grp_faults_cpu[nid][priv] = grp->faults_cpu[mem_idx];
+				info->grp_total_faults = grp->total_faults;
+			}
+		}
+	}
+
+	rcu_read_unlock();
+#endif
+}
+
+#ifdef CONFIG_BPF_SCHED
+void sched_get_mm_relationship(struct task_struct *tsk,
+			       struct bpf_relationship_get_args *args)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	struct numa_group *grp;
+
+	grp = rcu_dereference(tsk->numa_group);
+	if (grp) {
+		args->mm.comm.gid = grp->gid;
+		args->mm.comm.nr_tasks = grp->nr_tasks;
+		args->mm.grp_total_faults = grp->total_faults;
+		args->mm.comm.preferred_node = grp->preferred_nid;
+		memcpy(args->mm.grp_faults_ordered, grp->faults_ordered,
+			sizeof(args->mm.grp_faults_ordered));
+		memcpy(args->mm.grp_score_ordered, grp->score_ordered,
+			sizeof(args->mm.grp_score_ordered));
+	}
+#endif
+}
+
+void sched_set_curr_preferred_node(struct bpf_relationship_set_args *args)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	struct numa_group *grp = rcu_dereference_raw(current->numa_group);
+
+	grp->preferred_nid = args->preferred_node;
+	schedstat_inc(grp->nodes_switch_cnt);
+#endif
+}
+#endif
+
+#endif
+
 #ifdef CONFIG_QOS_SCHED_PRIO_LB
 static __always_inline void
 adjust_rq_cfs_tasks(void (*list_op)(struct list_head *, struct list_head *),
@@ -3816,6 +3954,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
 	cfs_rq_util_change(cfs_rq, 0);
 
+	numa_load_change(cfs_rq);
+
 	trace_pelt_cfs_tp(cfs_rq);
 }
 
@@ -3846,6 +3986,8 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
 	cfs_rq_util_change(cfs_rq, 0);
 
+	numa_load_change(cfs_rq);
+
 	trace_pelt_cfs_tp(cfs_rq);
 }
 
@@ -3886,6 +4028,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 
 	} else if (decayed) {
 		cfs_rq_util_change(cfs_rq, 0);
+		numa_load_change(cfs_rq);
 
 		if (flags & UPDATE_TG)
 			update_tg_load_avg(cfs_rq);
@@ -6578,6 +6721,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 /* Working cpumask for: load_balance, load_balance_newidle. */
 DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
 DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+#ifdef CONFIG_BPF_SCHED
+DEFINE_PER_CPU(cpumask_var_t, select_cpu_mask);
+#endif
 
 #ifdef CONFIG_NO_HZ_COMMON
 
@@ -6838,7 +6984,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 		return cpumask_first(sched_group_span(group));
 
 	/* Traverse only the allowed CPUs */
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	for_each_cpu_and(i, sched_group_span(group), p->select_cpus) {
 #else
 	for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
@@ -6889,7 +7035,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 {
 	int new_cpu = cpu;
 
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	if (!cpumask_intersects(sched_domain_span(sd), p->select_cpus))
 #else
 	if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
@@ -7020,7 +7166,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 		if (!available_idle_cpu(cpu)) {
 			idle = false;
 			if (*idle_cpu == -1) {
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->select_cpus)) {
 #else
 				if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) {
@@ -7080,7 +7226,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	if (!this_sd)
 		return -1;
 
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	cpumask_and(cpus, sched_domain_span(sd), p->select_cpus);
 #else
 	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
@@ -7248,7 +7394,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	lockdep_assert_irqs_disabled();
 
 	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	    cpumask_test_cpu(target, p->select_cpus) &&
 #endif
 	    asym_fits_capacity(task_util, target)) {
@@ -7261,7 +7407,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	 */
 	if (prev != target && cpus_share_cache(prev, target) &&
 	    (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	    cpumask_test_cpu(prev, p->select_cpus) &&
 #endif
 	    asym_fits_capacity(task_util, prev)) {
@@ -7297,7 +7443,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	    recent_used_cpu != target &&
 	    cpus_share_cache(recent_used_cpu, target) &&
 	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	    cpumask_test_cpu(p->recent_used_cpu, p->select_cpus) &&
 #else
 	    cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
@@ -7897,7 +8043,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
 #ifdef CONFIG_BPF_SCHED
 	struct sched_migrate_ctx ctx;
-	cpumask_t *cpus_prev = NULL;
 	cpumask_t *cpus;
 	int ret;
 #endif
@@ -7912,8 +8057,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	 */
 	lockdep_assert_held(&p->pi_lock);
 
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 	p->select_cpus = p->cpus_ptr;
+#endif
+
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
 	if (dynamic_affinity_used() || smart_grid_used())
 		set_task_select_cpus(p, &idlest_cpu, sd_flag);
 #endif
@@ -7928,7 +8076,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			new_cpu = prev_cpu;
 		}
 
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->select_cpus);
 #else
 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
@@ -7945,18 +8093,18 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 		ctx.wake_flags = wake_flags;
 		ctx.want_affine = want_affine;
 		ctx.sd_flag = sd_flag;
-		ctx.select_idle_mask = this_cpu_cpumask_var_ptr(select_idle_mask);
+		ctx.select_idle_mask =
+			this_cpu_cpumask_var_ptr(select_cpu_mask);
 
 		ret = bpf_sched_cfs_select_rq(&ctx);
 		if (ret >= 0) {
 			rcu_read_unlock();
 			return ret;
 		} else if (ret != -1) {
-			cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-			if (cpumask_subset(cpus, p->cpus_ptr) &&
+			cpus = this_cpu_cpumask_var_ptr(select_cpu_mask);
+			if (cpumask_subset(cpus, p->select_cpus) &&
 			    !cpumask_empty(cpus)) {
-				cpus_prev = (void *)p->cpus_ptr;
-				p->cpus_ptr = cpus;
+				p->select_cpus = cpus;
 			}
 		}
 	}
@@ -7969,7 +8117,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 		 */
 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 			new_cpu = cpu;
 			if (cpu != prev_cpu &&
 			    cpumask_test_cpu(prev_cpu, p->select_cpus))
@@ -8004,11 +8152,8 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	if (bpf_sched_enabled()) {
 		ctx.new_cpu = new_cpu;
 		ret = bpf_sched_cfs_select_rq_exit(&ctx);
-		if (ret >= 0)
-			new_cpu = ret;
-
-		if (cpus_prev)
-			p->cpus_ptr = cpus_prev;
+		if (ret > 0 && ret <= nr_cpu_ids)
+			new_cpu = ret - 1;
 	}
 #endif
 
@@ -9486,9 +9631,23 @@ static
 int can_migrate_task(struct task_struct *p, struct lb_env *env)
 {
 	int tsk_cache_hot;
+#ifdef CONFIG_BPF_SCHED
+	struct sched_migrate_node migrate_node;
+	int ret;
+#endif
 
 	lockdep_assert_rq_held(env->src_rq);
 
+#ifdef CONFIG_BPF_SCHED
+	if (bpf_sched_enabled()) {
+		migrate_node.src_cpu = env->src_cpu;
+		migrate_node.dst_cpu = env->dst_cpu;
+		ret = bpf_sched_cfs_can_migrate_task(p, &migrate_node);
+		if (ret > 0)
+			return ret - 1;
+	}
+#endif
+
 	/*
 	 * We do not migrate tasks that are:
 	 * 1) throttled_lb_pair, or
@@ -10845,7 +11004,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 		int local_group;
 
 		/* Skip over this group if it has no CPUs allowed */
-#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#ifdef CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
 		if (!cpumask_intersects(sched_group_span(group),
 					p->select_cpus))
 #else
@@ -13130,6 +13289,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 	update_overutilized_status(task_rq(curr));
 
 	task_tick_core(rq, curr);
+
+	task_tick_relationship(rq, curr);
+
+	update_numa_capacity(rq);
 }
 
 /*
@@ -13691,7 +13854,7 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
 	struct numa_group *ng;
 
 	rcu_read_lock();
-	ng = rcu_dereference(p->numa_group);
+
 	for_each_online_node(node) {
 		if (p->numa_faults) {
 			tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
@@ -13706,8 +13869,99 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
 	rcu_read_unlock();
 }
 #endif /* CONFIG_NUMA_BALANCING */
+
+void sched_show_relationship(struct task_struct *p, struct seq_file *m)
+{
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+	struct net_group *net_grp;
+	struct numa_group *ng;
+	int node;
+
+	if (!task_relationship_used())
+		return;
+
+	rcu_read_lock();
+
+	ng = rcu_dereference(p->numa_group);
+	if (ng) {
+		seq_printf(m, "numa group preferred nid %*pbl switch_cnt %llu\n",
+			nodemask_pr_args(&ng->preferred_nid),
+			ng->nodes_switch_cnt);
+	}
+
+	net_grp = rcu_dereference(p->rship->net_group);
+	if (net_grp) {
+		seq_printf(m, "net group gid %d preferred nid %*pbl\n",
+			net_grp->hdr.gid,
+			nodemask_pr_args(&net_grp->hdr.preferred_nid));
+	}
+
+	rcu_read_unlock();
+
+	for_each_online_node(node) {
+		print_node_load_info(m, node);
+	}
+#endif
+}
 #endif /* CONFIG_SCHED_DEBUG */
 
+#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
+void task_preferred_node_work(struct callback_head *work)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	struct task_struct *curr = current;
+	struct numa_group *numa_grp;
+#ifdef CONFIG_BPF_SCHED
+	struct sched_preferred_node_ctx ctx = {0};
+#endif
+
+	work->next = work;
+
+#ifdef CONFIG_BPF_SCHED
+	numa_grp = deref_curr_numa_group(curr);
+	if (numa_grp) {
+
+		spin_lock_irq(&numa_grp->lock);
+		ctx.tsk = curr;
+		ctx.preferred_node = numa_grp->preferred_nid;
+		bpf_sched_cfs_change_preferred_node(&ctx);
+		spin_unlock_irq(&numa_grp->lock);
+	}
+#endif
+#endif
+}
+
+void task_tick_relationship(struct rq *rq, struct task_struct *curr)
+{
+#ifdef CONFIG_NUMA_BALANCING
+	struct callback_head *work = &curr->rship->node_work;
+	struct numa_group *numa_grp;
+
+	if (!task_relationship_supported(curr))
+		return;
+
+	if (work->next != work)
+		return;
+
+	numa_grp = deref_curr_numa_group(curr);
+	if (!numa_grp || numa_grp->nr_tasks <= 1)
+		return;
+
+	spin_lock(&numa_grp->lock);
+
+	if (time_after(jiffies,
+	    (unsigned long)(numa_grp->node_stamp + msecs_to_jiffies(100)))) {
+		numa_grp->node_stamp = jiffies;
+		spin_unlock(&numa_grp->lock);
+		task_work_add(curr, &curr->rship->node_work, TWA_RESUME);
+		return;
+	}
+
+	spin_unlock(&numa_grp->lock);
+#endif
+}
+#endif
+
 __init void init_sched_fair_class(void)
 {
 #ifdef CONFIG_QOS_SCHED
@@ -13717,6 +13971,8 @@ __init void init_sched_fair_class(void)
 		INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i));
 #endif
 
+	init_sched_numa_icon();
+
 #ifdef CONFIG_SMP
 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
 
diff --git a/kernel/sched/numa_icon.c b/kernel/sched/numa_icon.c
new file mode 100644
index 0000000000000000000000000000000000000000..e9825ac7f866e15414352ec68c69567fa217e2ba
--- /dev/null
+++ b/kernel/sched/numa_icon.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Common code for task numa isolation consolidation
+ *
+ * Copyright (C) 2023-2024 Huawei Technologies Co., Ltd
+ *
+ * Author: Hui Tang <tanghui20@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include "sched.h"
+
+static bool __sched_numa_icon_switch __initdata;
+DEFINE_STATIC_KEY_FALSE(sched_numa_icon_switch);
+
+struct node_load_info *node_load_ptr;
+
+static void set_numa_icon_switch(bool enabled)
+{
+	if (enabled) {
+		static_branch_enable(&sched_numa_icon_switch);
+		task_relationship_enable();
+	} else {
+		static_branch_disable(&sched_numa_icon_switch);
+		task_relationship_disable();
+	}
+}
+
+static int __init numa_icon_switch_setup(char *str)
+{
+	int ret = 0;
+
+	if (!str)
+		goto out;
+
+	/*
+	 * This code is called before jump labels have been set up, so we can't
+	 * change the static branch directly just yet.  Instead set a temporary
+	 * variable so init_numa_icon_switch() can do it later.
+	 */
+	if (!strcmp(str, "enable")) {
+		__sched_numa_icon_switch = true;
+		ret = 1;
+	} else if (!strcmp(str, "disable")) {
+		__sched_numa_icon_switch = false;
+		ret = 1;
+	}
+out:
+	if (!ret)
+		pr_warn("Unable to parse numa_icon=\n");
+
+	return ret;
+}
+__setup("numa_icon=", numa_icon_switch_setup);
+
+__init void init_sched_numa_icon(void)
+{
+	int i;
+
+	set_numa_icon_switch(__sched_numa_icon_switch);
+
+	if (!sched_numa_icon_enabled())
+		return;
+
+	node_load_ptr = kcalloc(nr_node_ids, sizeof(struct node_load_info),
+		GFP_KERNEL);
+
+	for (i = 0; i < nr_node_ids; i++) {
+		raw_spin_lock_init(&node_load_ptr[i].lock);
+		node_load_ptr[i].util_avg_last =
+			kcalloc(nr_cpu_ids, sizeof(struct sched_avg), GFP_KERNEL);
+	}
+
+	for_each_possible_cpu(i) {
+		node_load_ptr[cpu_to_node(i)].compute_capacity +=
+			SCHED_CAPACITY_SCALE;
+	}
+}
+
+void print_node_load_info(struct seq_file *m, int node)
+{
+	if (!sched_numa_icon_enabled())
+		return;
+
+	seq_printf(m, "node %d capacity=%lu util_avg=%lu\n", node,
+		node_load_ptr[node].compute_capacity,
+		atomic_long_read(&node_load_ptr[node].util_avg));
+}
+
+void numa_load_change(struct cfs_rq *cfs_rq)
+{
+	struct rq *rq = rq_of(cfs_rq);
+	int cpu = cpu_of(rq);
+	int nid = cpu_to_node(cpu);
+	struct sched_avg *avg_old;
+	long delta;
+
+	if (!sched_numa_icon_enabled())
+		return;
+
+	avg_old = &node_load_ptr[nid].util_avg_last[cpu];
+
+	if (&rq->cfs != cfs_rq)
+		return;
+
+	delta = cfs_rq->avg.util_avg - avg_old->util_avg;
+	atomic_long_add(delta, &node_load_ptr[nid].util_avg);
+	avg_old->util_avg = cfs_rq->avg.util_avg;
+}
+
+void update_numa_capacity(struct rq *rq)
+{
+	int cpu = cpu_of(rq);
+	int nid = cpu_to_node(cpu);
+	unsigned long capacity = 0;
+
+	if (!sched_numa_icon_enabled())
+		return;
+
+	if (cpu != cpumask_first(cpumask_of_node(nid)))
+		return;
+
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		capacity += cpu_rq(cpu)->cpu_capacity;
+	}
+	node_load_ptr[nid].compute_capacity = capacity;
+}
+
+#ifdef CONFIG_BPF_SCHED
+void sched_get_node_load(int nid, struct bpf_node_stats *ctx)
+{
+	ctx->util = atomic_long_read(&node_load_ptr[nid].util_avg);
+	ctx->compute_capacity = node_load_ptr[nid].compute_capacity;
+	ctx->weight = cpumask_weight(cpumask_of_node(nid));
+}
+#endif
diff --git a/kernel/sched/numa_icon.h b/kernel/sched/numa_icon.h
new file mode 100644
index 0000000000000000000000000000000000000000..adeed53e9f14502e33e83d62852bda9c952ff343
--- /dev/null
+++ b/kernel/sched/numa_icon.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_NUMA_ICON_H
+#include <linux/sched.h>
+
+struct node_load_info {
+	raw_spinlock_t		lock ____cacheline_aligned;
+	atomic_long_t		util_avg;
+	unsigned long		compute_capacity;
+	struct sched_avg	*util_avg_last;
+};
+
+#ifdef CONFIG_QOS_SCHED_NUMA_ICON
+extern struct static_key_false sched_numa_icon_switch;
+static __always_inline bool sched_numa_icon_enabled(void)
+{
+	return static_branch_unlikely(&sched_numa_icon_switch);
+}
+
+extern void print_node_load_info(struct seq_file *m, int node);
+extern __init void init_sched_numa_icon(void);
+extern void sched_get_node_load(int nid, struct bpf_node_stats *ctx);
+extern void init_node_load(struct rq *rq);
+extern void numa_load_change(struct cfs_rq *cfs_rq);
+extern void update_numa_capacity(struct rq *rq);
+
+#else /* !CONFIG_QOS_SCHED_NUMA_ICON */
+static inline void init_sched_numa_icon(void) {}
+
+static inline void init_node_load(struct rq *rq) {}
+
+static inline void numa_load_change(struct cfs_rq *cfs_rq) {}
+
+static inline void update_numa_capacity(struct rq *rq) {}
+
+static inline void print_node_load_info(struct seq_file *m, int node) {}
+
+static __always_inline bool sched_numa_icon_enabled(void)
+{
+	return false;
+}
+#endif /* CONFIG_QOS_SCHED_NUMA_ICON */
+
+#endif
diff --git a/kernel/sched/relationship.c b/kernel/sched/relationship.c
new file mode 100644
index 0000000000000000000000000000000000000000..515c913aeb334d66464a7d0147d1d094e4314d77
--- /dev/null
+++ b/kernel/sched/relationship.c
@@ -0,0 +1,436 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Common code for task relationship aware
+ *
+ * Copyright (C) 2023-2024 Huawei Technologies Co., Ltd
+ *
+ * Author: Hui Tang <tanghui20@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/bpf_sched.h>
+#include <linux/sort.h>
+
+#include "sched.h"
+
+#define RXTX_BYTES_PERIOD_MS	(1000)
+#define RXTX_BYTES_DECAY_RATIO	(2)
+
+DEFINE_STATIC_KEY_FALSE(__relationship_switch);
+
+void task_relationship_enable(void)
+{
+	static_branch_enable(&__relationship_switch);
+}
+
+void task_relationship_disable(void)
+{
+	static_branch_disable(&__relationship_switch);
+}
+
+bool task_relationship_supported(struct task_struct *tsk)
+{
+	if (!task_relationship_used())
+		return false;
+
+	if (!tsk->rship || !tsk->mm ||
+		!cpumask_subset(cpu_online_mask, tsk->cpus_ptr) ||
+		!nodes_subset(node_online_map, tsk->mems_allowed) ||
+		get_task_policy(tsk)->mode == MPOL_BIND ||
+		get_task_policy(tsk)->mode == MPOL_INTERLEAVE)
+		return false;
+
+	return true;
+}
+
+static inline int get_net_group(struct net_group *grp)
+{
+	return refcount_inc_not_zero(&grp->hdr.refcount);
+}
+
+static inline void put_net_group(struct net_group *grp)
+{
+	if (refcount_dec_and_test(&grp->hdr.refcount))
+		kfree_rcu(grp, rcu);
+}
+
+static inline void put_task_net_group(struct task_struct *tsk, bool reset)
+{
+	struct net_group *grp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tsk->rship->net_lock, flags);
+
+	grp = rcu_dereference_protected(tsk->rship->net_group,
+					lockdep_is_held(&tsk->rship->net_lock));
+	if (grp) {
+		spin_lock(&grp->hdr.lock);
+		grp->rxtx_bytes -= tsk->rship->rxtx_bytes;
+		grp->hdr.nr_tasks--;
+		spin_unlock(&grp->hdr.lock);
+		put_net_group(grp);
+		RCU_INIT_POINTER(tsk->rship->net_group, NULL);
+	}
+
+	if (reset) {
+		tsk->rship->rxtx_bytes = 0;
+		tsk->rship->rxtx_remote_bytes = 0;
+		tsk->rship->rx_dev_idx = -1;
+		tsk->rship->rx_dev_queue_idx = -1;
+		tsk->rship->nic_nid = -1;
+		tsk->rship->rx_dev_netns_cookie = 0;
+	}
+
+	spin_unlock_irqrestore(&tsk->rship->net_lock, flags);
+}
+
+static inline int remote_rxtx_process(struct net_relationship_req *req)
+{
+	struct task_relationship *rship;
+	struct task_struct *tsk;
+	unsigned long flags;
+	pid_t pid;
+	long diff;
+
+	rcu_read_lock();
+
+	pid = req->net_rship_type == NET_RS_TYPE_RX ? req->rx_pid : req->tx_pid;
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	if (!tsk || !task_relationship_supported(tsk))
+		goto out_unlock;
+
+	rship = tsk->rship;
+	if (time_after(jiffies, rship->rxtx_remote_update_next)) {
+		diff = rship->rxtx_remote_buffer - rship->rxtx_remote_bytes / 2;
+
+		spin_lock_irqsave(&rship->net_lock, flags);
+		rship->nic_nid = req->nic_nid;
+		if (req->net_rship_type == NET_RS_TYPE_RX) {
+			rship->rx_dev_idx = req->rx_dev_idx;
+			rship->rx_dev_queue_idx = req->rx_dev_queue_idx;
+			rship->rx_dev_netns_cookie = req->rx_dev_netns_cookie;
+		}
+		rship->rxtx_remote_bytes += diff;
+		rship->rxtx_remote_buffer = 0;
+		spin_unlock_irqrestore(&rship->net_lock, flags);
+	}
+
+	rship->rxtx_remote_buffer += req->rxtx_bytes;
+
+out_unlock:
+	rcu_read_unlock();
+
+	return 0;
+}
+
+int sched_net_relationship_submit(struct net_relationship_req *req)
+{
+	struct task_struct *rx_tsk, *tx_tsk, *dst_tsk;
+	struct net_group *rx_grp, *tx_grp;
+	int ret;
+
+	if (req->net_rship_type == NET_RS_TYPE_RX ||
+	    req->net_rship_type == NET_RS_TYPE_TX)
+		return remote_rxtx_process(req);
+
+	rcu_read_lock();
+
+	rx_tsk = find_task_by_pid_ns(req->rx_pid, &init_pid_ns);
+	tx_tsk = find_task_by_pid_ns(req->tx_pid, &init_pid_ns);
+	if (!rx_tsk || !tx_tsk) {
+		ret = -ESRCH;
+		goto out_unlock;
+	}
+
+	if (!task_relationship_supported(rx_tsk) ||
+	    !task_relationship_supported(tx_tsk)) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+
+	if (atomic_read(&rx_tsk->rship->cb.active) &&
+	    atomic_read(&tx_tsk->rship->cb.active)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	rx_grp = rcu_dereference(rx_tsk->rship->net_group);
+	tx_grp = rcu_dereference(tx_tsk->rship->net_group);
+	if (rx_grp && tx_grp) {
+		dst_tsk = rx_grp->hdr.nr_tasks >= tx_grp->hdr.nr_tasks ?
+			rx_tsk : tx_tsk;
+	} else if (rx_grp) {
+		dst_tsk = rx_tsk;
+	} else if (tx_grp) {
+		dst_tsk = tx_tsk;
+	} else {
+		dst_tsk = !atomic_read(&rx_tsk->rship->cb.active) ?
+			rx_tsk : tx_tsk;
+	}
+
+	if (atomic_cmpxchg(&dst_tsk->rship->cb.active, 0, 1)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	memcpy(&dst_tsk->rship->cb.req, req, sizeof(*req));
+	dst_tsk->rship->cb.src_pid = dst_tsk == rx_tsk ?
+		req->tx_pid : req->rx_pid;
+	task_work_add(dst_tsk, &dst_tsk->rship->cb.twork, TWA_RESUME);
+	ret = 0;
+
+out_unlock:
+	rcu_read_unlock();
+	return ret;
+}
+
+static void task_net_group(struct task_struct *curr, struct task_struct *src)
+{
+	struct net_group *src_grp, *curr_grp, *grp;
+
+	double_lock_irq(&src->rship->net_lock, &curr->rship->net_lock);
+	curr_grp = rcu_dereference_protected(curr->rship->net_group,
+			lockdep_is_held(&curr->rship->net_lock));
+	src_grp = rcu_dereference_protected(src->rship->net_group,
+			lockdep_is_held(&src->rship->net_lock));
+
+	if (!curr_grp) {
+		grp = kzalloc(sizeof(*grp), GFP_ATOMIC | __GFP_NOWARN);
+		if (!grp)
+			goto out_unlock;
+
+		refcount_set(&grp->hdr.refcount, 1);
+		spin_lock_init(&grp->hdr.lock);
+		grp->hdr.gid = curr->pid;
+		grp->hdr.preferred_nid = NODE_MASK_NONE;
+		node_set(task_node(curr), grp->hdr.preferred_nid);
+		grp->hdr.nr_tasks = 1;
+		rcu_assign_pointer(curr->rship->net_group, grp);
+		curr_grp = rcu_dereference_protected(curr->rship->net_group,
+				lockdep_is_held(&curr->rship->net_lock));
+	}
+
+	if (curr_grp == src_grp)
+		goto out_unlock;
+
+	if (!get_net_group(curr_grp))
+		goto out_unlock;
+
+	spin_lock(&curr_grp->hdr.lock);
+	curr_grp->hdr.nr_tasks++;
+	curr_grp->rxtx_bytes += src->rship->rxtx_bytes;
+	spin_unlock(&curr_grp->hdr.lock);
+
+	if (src_grp) {
+		spin_lock(&src_grp->hdr.lock);
+		src_grp->hdr.nr_tasks--;
+		src_grp->rxtx_bytes -= src->rship->rxtx_bytes;
+		spin_unlock(&src_grp->hdr.lock);
+		put_net_group(src_grp);
+	}
+
+	rcu_assign_pointer(src->rship->net_group, curr_grp);
+out_unlock:
+	spin_unlock(&src->rship->net_lock);
+	spin_unlock_irq(&curr->rship->net_lock);
+}
+
+static void task_rxtx_data_update(struct task_struct *tsk)
+{
+	struct net_group *grp;
+	long bytes_diff;
+
+	spin_lock_irq(&tsk->rship->net_lock);
+	bytes_diff = tsk->rship->rxtx_buffer -
+		tsk->rship->rxtx_bytes / RXTX_BYTES_DECAY_RATIO;
+	tsk->rship->rxtx_bytes += bytes_diff;
+	tsk->rship->rxtx_buffer = 0;
+	tsk->rship->rxtx_update_next = jiffies +
+		msecs_to_jiffies(RXTX_BYTES_PERIOD_MS);
+
+	grp = rcu_dereference_protected(tsk->rship->net_group,
+			lockdep_is_held(&tsk->rship->net_lock));
+	if (grp) {
+		spin_lock(&grp->hdr.lock);
+		grp->rxtx_bytes += bytes_diff;
+		spin_unlock(&grp->hdr.lock);
+	}
+
+	spin_unlock_irq(&tsk->rship->net_lock);
+}
+
+static void task_net_relationship_work(struct callback_head *work)
+{
+	struct net_relationship_callback *ncb;
+	struct task_struct *curr = current;
+	struct net_relationship_req req;
+	struct task_struct *src;
+
+	ncb = container_of(work, struct net_relationship_callback, twork);
+	req = ncb->req;
+	atomic_set(&ncb->active, 0);
+
+	rcu_read_lock();
+	src = find_task_by_pid_ns(ncb->src_pid, &init_pid_ns);
+	if (!src) {
+		rcu_read_unlock();
+		return;
+	}
+
+	if (!task_relationship_supported(src) ||
+	    !task_relationship_supported(curr)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	/* prevent src going away */
+	get_task_struct(src);
+
+	rcu_read_unlock();
+
+	/* build net relationship */
+	task_net_group(src, curr);
+
+	if (time_after(jiffies, curr->rship->rxtx_update_next))
+		task_rxtx_data_update(curr);
+
+	if (time_after(jiffies, src->rship->rxtx_update_next))
+		task_rxtx_data_update(src);
+
+	double_lock_irq(&src->rship->net_lock, &curr->rship->net_lock);
+	curr->rship->rxtx_buffer += req.rxtx_bytes;
+	src->rship->rxtx_buffer += req.rxtx_bytes;
+	spin_unlock(&src->rship->net_lock);
+	spin_unlock_irq(&curr->rship->net_lock);
+
+	put_task_struct(src);
+}
+
+static int cmp_fault_stats(const void *a, const void *b)
+{
+	return ((struct fault_array_info *)b)->val -
+		((struct fault_array_info *)a)->val;
+}
+
+void numa_faults_update_and_sort(int nid, int new,
+				 struct fault_array_info *stats)
+{
+	int nodes, i;
+
+	if (!task_relationship_used())
+		return;
+
+	if (nid == first_online_node) {
+		for (i = 0; i < FAULT_NODES_MAX; i++) {
+			stats[i].nid = -1;
+			stats[i].val = 0;
+		}
+	}
+
+	nodes = min(FAULT_NODES_MAX, num_online_nodes());
+	if (new <= stats[nodes - 1].val)
+		return;
+
+	stats[nodes - 1].nid = nid;
+	stats[nodes - 1].val = new;
+	sort(stats, nodes, sizeof(stats[0]), cmp_fault_stats, NULL);
+}
+
+void sched_get_relationship(struct task_struct *tsk,
+			    struct bpf_relationship_get_args *args)
+{
+	struct net_group *ngrp;
+
+	rcu_read_lock();
+
+	/* memory relationship */
+	sched_get_mm_relationship(tsk, args);
+
+	/* net relationship */
+	ngrp = rcu_dereference(tsk->rship->net_group);
+	if (ngrp) {
+		args->net.comm.gid = ngrp->hdr.gid;
+		args->net.comm.nr_tasks = ngrp->hdr.nr_tasks;
+		args->net.comm.preferred_node = ngrp->hdr.preferred_nid;
+		args->net.grp_rxtx_bytes = ngrp->rxtx_bytes;
+	}
+
+	rcu_read_unlock();
+}
+
+void sctl_sched_get_net_relationship(struct task_struct *tsk,
+				     struct sctl_net_relationship_info *info)
+{
+	struct task_relationship *rship = tsk->rship;
+	struct net_group *grp;
+
+	memset(info, 0, sizeof(*info));
+	info->valid = true;
+	info->nic_nid = rship->nic_nid;
+	info->rx_dev_idx = rship->rx_dev_idx;
+	info->rx_dev_queue_idx = rship->rx_dev_queue_idx;
+	info->rx_dev_netns_cookie = rship->rx_dev_netns_cookie;
+	info->rxtx_remote_bytes = rship->rxtx_remote_bytes;
+	info->rxtx_bytes = rship->rxtx_bytes;
+
+	info->grp_hdr.gid = NO_RSHIP;
+
+	rcu_read_lock();
+
+	grp = rcu_dereference(rship->net_group);
+	if (grp) {
+		info->grp_hdr.gid = grp->hdr.gid;
+		info->grp_hdr.nr_tasks = grp->hdr.nr_tasks;
+		snprintf(info->grp_hdr.preferred_nid, SCTL_STR_MAX, "%*pbl",
+			nodemask_pr_args(&grp->hdr.preferred_nid));
+		info->grp_rxtx_bytes = grp->rxtx_bytes;
+	}
+
+	rcu_read_unlock();
+}
+
+void task_relationship_free(struct task_struct *tsk, bool reset)
+{
+	if (!task_relationship_used())
+		return;
+
+	put_task_net_group(tsk, reset);
+}
+
+int sched_relationship_fork(struct task_struct *p)
+{
+	int i;
+
+	p->rship = kzalloc(sizeof(struct task_relationship), GFP_KERNEL);
+	if (!p->rship)
+		return -ENOMEM;
+
+	for (i = 0; i < FAULT_NODES_MAX; i++)
+		p->rship->faults.faults_ordered[i].nid = -1;
+
+	p->rship->nic_nid = -1;
+	p->rship->rx_dev_idx = -1;
+	p->rship->rx_dev_queue_idx = -1;
+
+	spin_lock_init(&p->rship->net_lock);
+	init_task_work(&p->rship->cb.twork, task_net_relationship_work);
+#ifdef CONFIG_NUMA_BALANCING
+	p->rship->node_work.next		= &p->rship->node_work;
+	init_task_work(&p->rship->node_work, task_preferred_node_work);
+#endif
+	return 0;
+}
+
+void sched_relationship_free(struct task_struct *p)
+{
+	kfree(p->rship);
+	p->rship = NULL;
+}
diff --git a/kernel/sched/relationship_ioctl.c b/kernel/sched/relationship_ioctl.c
new file mode 100644
index 0000000000000000000000000000000000000000..229786961ec86ef2de01646a737b5c16bcde3dad
--- /dev/null
+++ b/kernel/sched/relationship_ioctl.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Common code for support ioctl for schedluler
+ *
+ * Copyright (C) 2023-2024 Huawei Technologies Co., Ltd
+ *
+ * Author: Hui Tang <tanghui20@huawei.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/compat.h>
+
+#include "sched.h"
+
+static int sched_ctl_open(struct inode *inode, struct file *filp)
+{
+	filp->private_data = NULL;
+
+	return 0;
+}
+
+static int sched_ctl_release(struct inode *inode, struct file *filp)
+{
+	return 0;
+}
+
+static int sched_ctrl_get_relationship(void __user *arg)
+{
+	struct sctl_get_relationship_args data;
+	struct task_struct *tsk;
+	pid_t pid;
+
+	if (!task_relationship_used()) {
+		pr_err("task relationship disabled!\n");
+		return -EPERM;
+	}
+
+	if (copy_from_user(&data, arg, sizeof(data))) {
+		pr_err("fail to copy_from_user!\n");
+		return -EFAULT;
+	}
+
+	pid = data.tid;
+
+	rcu_read_lock();
+
+	tsk = find_task_by_vpid(pid);
+	if (!tsk) {
+		rcu_read_unlock();
+		return -ESRCH;
+	}
+
+	if (!task_relationship_supported(tsk)) {
+		rcu_read_unlock();
+		return -EPERM;
+	}
+
+	sctl_sched_get_net_relationship(tsk, &data.nrsi);
+	sctl_sched_get_mem_relationship(tsk, &data.mrsi);
+
+	rcu_read_unlock();
+
+	if (copy_to_user(arg, &data, sizeof(data))) {
+		pr_err("fail to copy_to_user!\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static long sched_ctl_ioctl(struct file *filp, unsigned int cmd,
+				unsigned long arg)
+{
+	int ret = 0;
+	struct sched_ctl_data *data;
+
+	if (_IOC_TYPE(cmd) != SCTL_IOC_MAGIC)
+		return -ENOTTY;
+
+	if (_IOC_NR(cmd) > SCTL_IOC_MAXNR)
+		return -ENOTTY;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	data = filp->private_data;
+
+	switch (cmd) {
+	case SCTL_GET_RSHIP:
+		ret = sched_ctrl_get_relationship((void __user *)(uintptr_t)arg);
+		break;
+	default:
+		ret = -EINVAL;
+
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long
+sched_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	arg = (unsigned long)(uintptr_t)compat_ptr(arg);
+	return sched_ctl_ioctl(file, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
+static const struct file_operations sched_ctl_fops = {
+	.open = sched_ctl_open,
+	.release = sched_ctl_release,
+	.llseek = no_llseek,
+	.unlocked_ioctl = sched_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = sched_ctl_compat_ioctl,
+#endif
+};
+
+static struct miscdevice sched_ctl_device = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "relationship_ctrl",
+	.fops = &sched_ctl_fops,
+};
+
+static int __init sched_ctl_device_init(void)
+{
+	return misc_register(&sched_ctl_device);
+};
+
+device_initcall(sched_ctl_device_init);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e6f934af7062f938c5a67f9b10a77d034e91868d..3b2fc472908a161c28f9fd327ca31e5d575f2715 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -76,6 +76,8 @@
 
 #include "cpupri.h"
 #include "cpudeadline.h"
+#include "numa_icon.h"
+#include <uapi/linux/sched_ctrl.h>
 
 #include <trace/events/sched.h>
 
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index fc51d6f0d447e5d81b1a9c42fca9a11c186a8fca..3afc3e354844b59d7f70daab890813b2623f54a6 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -444,6 +444,12 @@ class PrinterHelpers(Printer):
             'struct cpumask_op_args',
             'struct sched_migrate_ctx',
             'struct sched_affine_ctx',
+            'struct sched_migrate_node',
+            'struct nodemask_op_args',
+            'struct bpf_relationship_get_args',
+            'struct bpf_relationship_set_args',
+            'struct sched_preferred_node_ctx',
+            'struct bpf_node_stats',
     ]
     known_types = {
             '...',
@@ -496,6 +502,12 @@ class PrinterHelpers(Printer):
             'struct cpumask_op_args',
             'struct sched_migrate_ctx',
             'struct sched_affine_ctx',
+            'struct sched_migrate_node',
+            'struct nodemask_op_args',
+            'struct bpf_relationship_get_args',
+            'struct bpf_relationship_set_args',
+            'struct sched_preferred_node_ctx',
+            'struct bpf_node_stats',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e241f8d4becd8553344c5cbb679ee3a6667b122a..5a153a1a8f18a4758864366630384d84b48b1eb0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3874,6 +3874,52 @@ union bpf_attr {
  *		check src_cpu whether share cache with dst_cpu.
  *	Return
  *		true yes, false no.
+ *
+ * int bpf_nodemask_op(struct nodemask_op_args *op, int len)
+ *	Description
+ *		A series of nodemask-related operations. Perform different
+ *		operations base on *op*->type. User also need fill other
+ *		*op* field base on *op*->type. *op*->type is one of them
+ *
+ *		**NODEMASK_EMPTY**
+ *			nodes_empty(op->arg1) returned.
+ *		**NODEMASK_NODE_ISSET**
+ *			node_isset(op->arg1, op->arg2) returned
+ *		**NODEMASK_NODES_CLEAR**
+ *			0 returned
+ *		**NODEMASK_NODE_CLEAR**
+ *			unset op->arg1 from op->arg2, 0 returned
+ *		**NODEMASK_NODE_SET**
+ *			set op->arg1 to op->arg2, 0 returned
+ *		**NODEMASK_WEIGHT**
+ *			nodes_weight(op->arg1) returned
+ *		**NODEMASK_NODELIST_PARSE**
+ *			str *op->arg1* to nodemask_t *op->arg2*,
+ *			0 on success, or a negative error in case of failure.
+ *		**NODEMASK_TO_CPUMASK**
+ *			nodemask_t *arg1* to cpumask_t *op->arg2*, 0 returned.
+ *		**NODEMASK_ONLINE**
+ *			set online nodes to nodemask_t *op->arg1*, 0 returned.
+ *	Return
+ *		View above.
+ *
+ * int bpf_get_task_relationship_stats(struct task_struct *tsk, struct bpf_map *map, struct bpf_relationship_get_args *stats)
+ *	Description
+ *		get relationship statistics of *tsk* and store in *stats*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_sched_set_curr_preferred_node(struct bpf_relationship_set_args *args, int len)
+ *	Description
+ *		set current task preferred node.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_node_stats(int nid, struct bpf_node_stats *ctx, int len)
+ *	Description
+ *		get resource statistics of *nid* and store in *ctx*.
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -4046,6 +4092,10 @@ union bpf_attr {
 	FN(sched_entity_to_tg),		\
 	FN(cpumask_op),			\
 	FN(cpus_share_cache),		\
+	FN(nodemask_op),		\
+	FN(get_task_relationship_stats),\
+	FN(sched_set_curr_preferred_node),\
+	FN(get_node_stats),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/tools/lib/bpf/libbpf_sched.h b/tools/lib/bpf/libbpf_sched.h
index 04b43c145fcd4d720260104294465a9634b49c82..3e9b41788637b468e4657fcd5b7b8af2eab05cec 100644
--- a/tools/lib/bpf/libbpf_sched.h
+++ b/tools/lib/bpf/libbpf_sched.h
@@ -16,6 +16,8 @@
 #define __LIBBPF_LIBSCHED_H
 
 #include <linux/bpf_topology.h>
+#include <linux/numa.h>
+#include <linux/sched/relationship.h>
 #include <linux/version.h>
 #include <uapi/linux/bpf.h>
 #include <bpf/bpf_helpers.h>
@@ -26,7 +28,7 @@
 #define INVALID_PTR		((void *)(0UL))
 #define getVal(P)							\
 	({								\
-		typeof(P) val = 0;					\
+		typeof(P) val;						\
 		bpf_probe_read_kernel(&val, sizeof(val), &(P));		\
 		val;							\
 	})
@@ -78,6 +80,119 @@ struct {
 	__uint(max_entries, 1);
 } map_cpumask_info SEC(".maps");
 
+static struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, u32);
+	__type(value, struct bpf_relationship_get_args);
+	__uint(max_entries, 1);
+} map_rship_stats SEC(".maps");
+
+static __always_inline void
+libbpf_nodes_and(nodemask_t *dst, nodemask_t *src1, nodemask_t *src2)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_NODES_AND;
+	op.arg1 = dst;
+	op.arg2 = src1;
+	op.arg3 = src2;
+	bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline void
+libbpf_nodes_andnot(nodemask_t *dst, nodemask_t *src1, nodemask_t *src2)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_NODES_ANDNOT;
+	op.arg1 = dst;
+	op.arg2 = src1;
+	op.arg3 = src2;
+	bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline void
+libbpf_nodes_or(nodemask_t *dst, nodemask_t *src1, nodemask_t *src2)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_NODES_OR;
+	op.arg1 = dst;
+	op.arg2 = src1;
+	op.arg3 = src2;
+	bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline void libbpf_node_set(int nid,
+					    nodemask_t *nodes)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_NODE_SET;
+	op.arg1 = &nid;
+	op.arg2 = nodes;
+	op.arg3 = INVALID_PTR;
+	bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline void libbpf_node_clear(int nid,
+					      nodemask_t *nodes)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_NODE_CLEAR;
+	op.arg1 = &nid;
+	op.arg2 = nodes;
+	op.arg3 = INVALID_PTR;
+	bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline long libbpf_node_isset(int nid,
+					      nodemask_t *nodes)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_NODE_ISSET;
+	op.arg1 = &nid;
+	op.arg2 = nodes;
+	op.arg3 = INVALID_PTR;
+	return bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline long libbpf_nodemask_empty(nodemask_t *nodes)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_EMPTY;
+	op.arg1 = nodes;
+	op.arg2 = INVALID_PTR;
+	op.arg3 = INVALID_PTR;
+	return bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline long libbpf_nodemask_to_cpumask(nodemask_t *nodes,
+						       struct cpumask *cpus)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_TO_CPUMASK;
+	op.arg1 = nodes;
+	op.arg2 = cpus;
+	op.arg3 = INVALID_PTR;
+	return bpf_nodemask_op(&op, sizeof(op));
+}
+
+static __always_inline long libbpf_nodes_online(nodemask_t *nodes)
+{
+	struct nodemask_op_args op = {0};
+
+	op.op_type = NODEMASK_ONLINE;
+	op.arg1 = nodes;
+	op.arg2 = INVALID_PTR;
+	op.arg3 = INVALID_PTR;
+	return bpf_nodemask_op(&op, sizeof(op));
+}
+
 static __always_inline long libbpf_cpumask_copy(struct cpumask *dst,
 						struct cpumask *src)
 {
@@ -507,4 +622,47 @@ static __always_inline  int libbpf_sched_se_tag_of(struct sched_entity *se)
 
 	return se_tag;
 }
+
+static __always_inline unsigned long libbpf_node_cfs_util_of(int nid)
+{
+	struct bpf_node_stats stats = {0};
+
+	bpf_get_node_stats(nid, &stats, sizeof(stats));
+	return getVal(stats.util);
+}
+
+static __always_inline unsigned long libbpf_node_cfs_capacity_of(int nid)
+{
+	struct bpf_node_stats stats = {0};
+
+	bpf_get_node_stats(nid, &stats, sizeof(stats));
+	return getVal(stats.compute_capacity);
+}
+
+static __always_inline unsigned int libbpf_node_weight_of(int nid)
+{
+	struct bpf_node_stats stats = {0};
+
+	bpf_get_node_stats(nid, &stats, sizeof(stats));
+	return getVal(stats.weight);
+}
+
+static __always_inline int
+libbpf_mem_preferred_nid(struct task_struct *tsk, nodemask_t *preferred_node)
+{
+	struct bpf_relationship_get_args *stats;
+	int key = 0;
+	int ret;
+
+	stats = bpf_map_lookup_elem(&map_rship_stats, &key);
+	if (!stats)
+		return NUMA_NO_NODE;
+
+	ret = bpf_get_task_relationship_stats(tsk, &map_rship_stats, stats);
+	if (ret)
+		return NUMA_NO_NODE;
+
+	*preferred_node = getVal(stats->mm.comm.preferred_node);
+	return 0;
+}
 #endif