From a2c6588cc5d57e8c3f713e5fe6240d555fb03c9c Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 11:32:26 +0800 Subject: [PATCH 1/8] anolis: bpf, cgroup: Introduce CGROUP_RICH_CONTAINER program type ANBZ: #6193 The functionality of the old rich container cannot be used in cgroup v2 and cannot meet the requirements of different container resource views. To address this issue, the functionality of cgroup v2 ebpf is leveraged, and a new program type is introduced to calculate the resources of the container within the ebpf program to meet the customized requirements of the container. By using BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, the kernel passes a pointer to bpf_rich_container_info to the user ebpf program, which can customize the information to be written into CPU and memory, allowing the processes inside the container to see the specified information. Signed-off-by: Tianchen Ding Signed-off-by: Yi Tao Reviewed-by: Tianchen Ding Reviewed-by: Tony Lu Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- include/linux/bpf-cgroup.h | 25 +++++++++++++++++ include/linux/bpf_types.h | 2 ++ include/linux/cgroup-defs.h | 6 ++++ include/uapi/linux/bpf.h | 5 ++++ kernel/bpf/cgroup.c | 56 +++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 16 +++++++++++ kernel/bpf/verifier.c | 1 + 7 files changed, 111 insertions(+) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 99c0355c3007..202aac60ffde 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -22,6 +22,7 @@ struct bpf_cgroup_storage; struct ctl_table; struct ctl_table_header; struct task_struct; +struct bpf_rich_container_info; #ifdef CONFIG_CGROUP_BPF enum cgroup_bpf_attach_type { @@ -49,6 +50,8 @@ enum cgroup_bpf_attach_type { CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, + CGROUP_RICH_CONTAINER_CPU, + CGROUP_RICH_CONTAINER_MEM, MAX_CGROUP_BPF_ATTACH_TYPE, ANOLIS_KABI_MAX_CG_BPF_ATTACH = 38 }; @@ -89,6 +92,8 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); + CGROUP_ATYPE(CGROUP_RICH_CONTAINER_CPU); + CGROUP_ATYPE(CGROUP_RICH_CONTAINER_MEM); default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -225,6 +230,10 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, char **buf, size_t *pcount, loff_t *ppos, enum cgroup_bpf_attach_type atype); +int __cgroup_bpf_run_filter_rich_container( + struct bpf_rich_container_info *info, + enum cgroup_bpf_attach_type atype); + int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, int *optname, char __user *optval, int *optlen, char **kernel_optval); @@ -486,6 +495,22 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, __ret; \ }) +#define BPF_CGROUP_RUN_PROG_RICH_CONTAINER_CPU(info, retval) \ +({ \ + int __ret = retval; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_rich_container(info, CGROUP_RICH_CONTAINER_CPU); \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_RICH_CONTAINER_MEM(info, retval) \ +({ \ + int __ret = retval; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_rich_container(info, CGROUP_RICH_CONTAINER_MEM); \ + __ret; \ +}) + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 4cfa1fc39f30..9f1208096527 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -56,6 +56,8 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl, struct bpf_sysctl, struct bpf_sysctl_kern) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt, struct bpf_sockopt, struct bpf_sockopt_kern) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, cg_rich_container, + struct bpf_rich_container_info, struct bpf_rich_container_info) #endif #ifdef CONFIG_BPF_LIRC_MODE2 BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 420dcf460b5c..8562d63c1520 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -671,6 +671,12 @@ struct cftype { CK_KABI_RESERVE(1) }; +struct bpf_rich_container_info { + cpumask_t cpus_mask; + struct sysinfo sysinfo; + struct sysinfo_ext sysinfo_ext; +}; + /* * Control Group subsystem type. * See Documentation/admin-guide/cgroup-v1/cgroups.rst for details diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index df47f25c5ce9..47a04320a614 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -201,6 +201,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + /* ======== anolis own features ======== */ + BPF_PROG_TYPE_CGROUP_RICH_CONTAINER = 0x1000, }; enum bpf_attach_type { @@ -242,6 +244,9 @@ enum bpf_attach_type { BPF_XDP_CPUMAP, BPF_SK_LOOKUP, BPF_XDP, + /* ======== anolis own features ======== */ + BPF_CGROUP_RICH_CONTAINER_CPU = 0x1000, + BPF_CGROUP_RICH_CONTAINER_MEM, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 20fc960daa0c..779399e6a92f 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1313,6 +1313,19 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, return ret == 1 ? 0 : -EPERM; } +int __cgroup_bpf_run_filter_rich_container(struct bpf_rich_container_info *info, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp; + int ret; + + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[atype], info, BPF_PROG_RUN); + rcu_read_unlock(); + return ret; +} + #ifdef CONFIG_NET static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, enum cgroup_bpf_attach_type attach_type) @@ -1941,3 +1954,46 @@ const struct bpf_verifier_ops cg_sockopt_verifier_ops = { const struct bpf_prog_ops cg_sockopt_prog_ops = { }; + +static const struct bpf_func_proto * +rich_container_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + return cgroup_base_func_proto(func_id, prog) ? : bpf_tracing_func_proto(func_id, prog); +} + +static bool rich_container_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + int start_off, end_off; + + switch (prog->expected_attach_type) { + case BPF_CGROUP_RICH_CONTAINER_CPU: + start_off = offsetof(struct bpf_rich_container_info, cpus_mask); + end_off = offsetofend(struct bpf_rich_container_info, cpus_mask); + break; + case BPF_CGROUP_RICH_CONTAINER_MEM: + start_off = offsetof(struct bpf_rich_container_info, sysinfo); + end_off = offsetofend(struct bpf_rich_container_info, sysinfo_ext); + break; + default: + return false; + } + + if (off < start_off || off >= end_off) + return false; + + if (off % size != 0) + return false; + + return true; +} + +const struct bpf_verifier_ops cg_rich_container_verifier_ops = { + .get_func_proto = rich_container_func_proto, + .is_valid_access = rich_container_is_valid_access, +}; + +const struct bpf_prog_ops cg_rich_container_prog_ops = { +}; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index da57b0322c1e..be9be6d4ddfb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2096,6 +2096,14 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, default: return -EINVAL; } + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: + switch (expected_attach_type) { + case BPF_CGROUP_RICH_CONTAINER_CPU: + case BPF_CGROUP_RICH_CONTAINER_MEM: + return 0; + default: + return -EINVAL; + } case BPF_PROG_TYPE_SK_LOOKUP: if (expected_attach_type == BPF_SK_LOOKUP) return 0; @@ -2152,6 +2160,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ case BPF_PROG_TYPE_EXT: /* extends any prog */ + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: return true; default: return false; @@ -2958,6 +2967,7 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_SK_LOOKUP: + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: return attach_type == prog->expected_attach_type ? 0 : -EINVAL; case BPF_PROG_TYPE_CGROUP_SKB: if (!capable(CAP_NET_ADMIN)) @@ -3013,6 +3023,9 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_FLOW_DISSECTOR; case BPF_CGROUP_SYSCTL: return BPF_PROG_TYPE_CGROUP_SYSCTL; + case BPF_CGROUP_RICH_CONTAINER_CPU: + case BPF_CGROUP_RICH_CONTAINER_MEM: + return BPF_PROG_TYPE_CGROUP_RICH_CONTAINER; case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; @@ -3075,6 +3088,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: ret = cgroup_bpf_prog_attach(attr, ptype, prog); break; default: @@ -3112,6 +3126,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: return cgroup_bpf_prog_detach(attr, ptype); default: return -EINVAL; @@ -4132,6 +4147,7 @@ static int link_create(union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_TRACING: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 707b9e8c2084..edd89cc79c87 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8394,6 +8394,7 @@ static int check_return_code(struct bpf_verifier_env *env) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: break; case BPF_PROG_TYPE_RAW_TRACEPOINT: if (!env->prog->aux->attach_btf_id) -- Gitee From d2f7c80d52dcdda6f47b906355979099b8729c95 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 11:39:36 +0800 Subject: [PATCH 2/8] anolis: x86: cpuinfo: Add cpuinfo support for bpf rich container ANBZ: #6193 Make /proc/cpuinfo container aware. Signed-off-by: Yi Tao Reviewed-by: Zelin Deng Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- arch/x86/kernel/cpu/proc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index d2b2c19648f8..8a6b6a5d085c 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "cpu.h" @@ -72,8 +73,15 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned int cpu, index, total; int i; bool rich_container = false; + struct bpf_rich_container_info info = {0}; index = cpu = c->cpu_index; + + /* Get cpu mask and check it */ + if (!BPF_CGROUP_RUN_PROG_RICH_CONTAINER_CPU(&info, 1) && + !cpumask_test_cpu(cpu, &info.cpus_mask)) + return 0; + if (check_rich_container(cpu, &index, &rich_container, &total)) return 0; -- Gitee From 51212c992e33b55a1297528c5fad7e903cf2ddf7 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 11:40:32 +0800 Subject: [PATCH 3/8] anolis: arm64: cpuinfo: Add cpuinfo support for bpf rich container ANBZ: #6193 Add arm64 cpuinfo support for rich container Signed-off-by: Yi Tao Reviewed-by: Baolin Wang Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- arch/arm64/kernel/cpuinfo.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 072d112b1044..fb441954a2e8 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * In case the boot CPU is hotpluggable, we record its initial state and @@ -165,9 +166,15 @@ static int c_show(struct seq_file *m, void *v) for_each_online_cpu(i) { struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); u32 midr = cpuinfo->reg_midr; + struct bpf_rich_container_info info = {0}; index = cpu = i; + /* Get cpu mask and check it */ + if (!BPF_CGROUP_RUN_PROG_RICH_CONTAINER_CPU(&info, 1) && + !cpumask_test_cpu(cpu, &info.cpus_mask)) + continue; + if (check_rich_container(cpu, &index, &rich_container, &total)) continue; -- Gitee From 524a9b17014915c2d60940cfc70b18ea852e9c16 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 11:41:36 +0800 Subject: [PATCH 4/8] anolis: sysfs/cpu: Add online cpus support for bpf rich container ANBZ: #6193 Make /sys/devices/system/cpu/online container aware. Signed-off-by: Yi Tao Reviewed-by: Tony Lu Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- drivers/base/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index b7dce8cf9dda..5022aa389a96 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "base.h" @@ -213,6 +214,7 @@ static ssize_t show_cpus_attr(struct device *dev, struct cpumask cpuset_allowed; struct task_struct __maybe_unused *scenario; bool rich_container; + struct bpf_rich_container_info info = {0}; rcu_read_lock(); rich_container = in_rich_container(current); @@ -234,6 +236,9 @@ static ssize_t show_cpus_attr(struct device *dev, } else cpumask_copy(&cpuset_allowed, ca->map); + + if (!BPF_CGROUP_RUN_PROG_RICH_CONTAINER_CPU(&info, 1)) + cpumask_copy(&cpuset_allowed, &info.cpus_mask); return cpumap_print_to_pagebuf(true, buf, &cpuset_allowed); } -- Gitee From 6951902fdc1ee309fc8181d8db4cee41b1832f62 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 11:42:19 +0800 Subject: [PATCH 5/8] anolis: meminfo: Add meminfo support for bpf rich container ANBZ: #6193 Make /proc/meminfo container aware. Signed-off-by: Yi Tao Reviewed-by: Xu Yu Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- fs/proc/meminfo.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 5e1a6be82aeb..2d17084fb26c 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -19,6 +19,7 @@ #include #include "internal.h" #include +#include void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) { @@ -39,6 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) struct mem_cgroup *memcg = NULL; struct sysinfo_ext ext; + struct bpf_rich_container_info info = {0}; #ifdef CONFIG_MEMCG rcu_read_lock(); @@ -81,6 +83,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) memcg_meminfo(memcg, &i, &ext); } + if (!BPF_CGROUP_RUN_PROG_RICH_CONTAINER_MEM(&info, 1)) { + memcpy(&i, &info.sysinfo, sizeof(i)); + memcpy(&ext, &info.sysinfo_ext, sizeof(ext)); + } + committed = percpu_counter_read_positive(&vm_committed_as); sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B); sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B); -- Gitee From db6de97f6055a246bb4254d77c045653a03e4ae4 Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 14:50:49 +0800 Subject: [PATCH 6/8] anolis: bpf: Sync linux/bpf.h to tools ANBZ: #6193 Newly added program, context type is used by tests in a subsequent patch. Synchronize the header file. Signed-off-by: Tianchen Ding Signed-off-by: Yi Tao Reviewed-by: Tianchen Ding Reviewed-by: Tony Lu Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- tools/include/uapi/linux/bpf.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 81a23646a8e0..baa42a75aed4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -201,6 +201,8 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + /* ======== anolis own features ======== */ + BPF_PROG_TYPE_CGROUP_RICH_CONTAINER = 0x1000, }; enum bpf_attach_type { @@ -242,6 +244,9 @@ enum bpf_attach_type { BPF_XDP_CPUMAP, BPF_SK_LOOKUP, BPF_XDP, + /* ======== anolis own features ======== */ + BPF_CGROUP_RICH_CONTAINER_CPU = 0x1000, + BPF_CGROUP_RICH_CONTAINER_MEM, __MAX_BPF_ATTACH_TYPE }; -- Gitee From c6b95222523fdf1a6a379a21d68d7267e1e342cb Mon Sep 17 00:00:00 2001 From: Yi Tao Date: Mon, 21 Aug 2023 14:51:29 +0800 Subject: [PATCH 7/8] anolis: libbpf: Add support for CGROUP_RICH_CONTAINER program type ANBZ: #6193 Make libbpf aware of the newly added program type, and assign it a section name. Signed-off-by: Tianchen Ding Signed-off-by: Yi Tao Reviewed-by: Tianchen Ding Reviewed-by: Tony Lu Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- tools/lib/bpf/libbpf.c | 4 ++++ tools/lib/bpf/libbpf_probes.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2552dae45197..78326b8aa4a4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8466,6 +8466,10 @@ static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("struct_ops", BPF_PROG_TYPE_STRUCT_OPS), BPF_EAPROG_SEC("sk_lookup/", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP), + BPF_EAPROG_SEC("cgroup/rich_container_cpu",BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + BPF_CGROUP_RICH_CONTAINER_CPU), + BPF_EAPROG_SEC("cgroup/rich_container_mem",BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + BPF_CGROUP_RICH_CONTAINER_MEM), }; #undef BPF_PROG_SEC_IMPL diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index cd8c703dde71..cf311cd5da6e 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -84,6 +84,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_KPROBE: xattr.kern_version = get_kernel_version(); break; + case BPF_PROG_TYPE_CGROUP_RICH_CONTAINER: + xattr.expected_attach_type = BPF_CGROUP_RICH_CONTAINER_CPU; + break; case BPF_PROG_TYPE_UNSPEC: case BPF_PROG_TYPE_SOCKET_FILTER: case BPF_PROG_TYPE_SCHED_CLS: -- Gitee From 4ff67cf9a06612b9c0e7877801f5dbe9c53e0a04 Mon Sep 17 00:00:00 2001 From: Tianchen Ding Date: Mon, 21 Aug 2023 10:16:14 +0800 Subject: [PATCH 8/8] anolis: selftests/bpf: add test for rich_container ANBZ: #6193 Run test_rich_container and test_verifier to check the bpf_rich_container feature. Signed-off-by: Tianchen Ding Reviewed-by: Tony Lu Link: https://gitee.com/anolis/cloud-kernel/pulls/2065 --- tools/testing/selftests/bpf/Makefile | 4 +- .../selftests/bpf/progs/rich_container.c | 47 +++++ .../selftests/bpf/test_rich_container.c | 186 ++++++++++++++++++ .../selftests/bpf/verifier/rich_container.c | 79 ++++++++ 4 files changed, 315 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/rich_container.c create mode 100644 tools/testing/selftests/bpf/test_rich_container.c create mode 100644 tools/testing/selftests/bpf/verifier/rich_container.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 1d9155533360..0919de5fb027 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -37,7 +37,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sysctl \ test_progs-no_alu32 \ - test_current_pid_tgid_new_ns + test_current_pid_tgid_new_ns test_rich_container # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) @@ -173,6 +173,7 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c +$(OUTPUT)/test_rich_container: cgroup_helpers.c BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ @@ -461,6 +462,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \ $(OUTPUT)/bench_ringbufs.o $(call msg,BINARY,,$@) $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS) +$(OUTPUT)/test_rich_container: $(OUTPUT)/rich_container.skel.h EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/progs/rich_container.c b/tools/testing/selftests/bpf/progs/rich_container.c new file mode 100644 index 000000000000..5af39e9d0242 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/rich_container.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Alibaba, Inc. */ + +#include +#include +#include + +#define MAX_NR_CPUS 1024 +#define NR_ARR (MAX_NR_CPUS / 64) + +/* + * 0: no cpus + * 1: only 1 cpu + * 2: all cpus + */ +int test_mode; + +static void set_cpu_masks(cpumask_t *cpumask, u64 mask) +{ + const size_t len = bpf_core_type_size(*cpumask) / 8; + int i; + + #pragma unroll + for (i = 0; i < NR_ARR; i++) { + if (i < len) + cpumask->bits[i] = mask; + } +} + +SEC("cgroup/rich_container_cpu") +int bpf_prog1(struct bpf_rich_container_info *ctx) +{ + cpumask_t *cpumask = &ctx->cpus_mask; + + if (test_mode == 2) { + set_cpu_masks(cpumask, -1UL); + return 0; + } + + set_cpu_masks(cpumask, 0); + if (test_mode) + cpumask->bits[0] = 1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_rich_container.c b/tools/testing/selftests/bpf/test_rich_container.c new file mode 100644 index 000000000000..baa48ca407ff --- /dev/null +++ b/tools/testing/selftests/bpf/test_rich_container.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2023 Alibaba, Inc. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "rich_container.skel.h" +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" + +#define RICH_CONTAINER_PROG "./rich_container.o" +#define TEST_CGROUP "/test-bpf-rich-container/" +#define SYSPATH "/sys/devices/system/cpu/online" + +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int load_empty_prog(enum bpf_prog_type prog_type, enum bpf_attach_type attach_type, + bool expected_accept) +{ + struct bpf_load_program_attr attr = {}; + int ret; + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + attr.prog_type = prog_type; + attr.expected_attach_type = attach_type; + attr.insns = prog; + attr.insns_cnt = insns_cnt; + attr.license = "GPL"; + attr.log_level = 2; + + ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE); + if (expected_accept && ret < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + + return ret; +} + +static int get_cpu_info(char *buf, size_t buflen) +{ + ssize_t len; + int sysfd; + + sysfd = open(SYSPATH, O_RDONLY); + if (sysfd < 0) { + printf("Failed to open "SYSPATH"\n"); + return 1; + } + + len = read(sysfd, buf, buflen); + close(sysfd); + if (len <= 0) { + printf("Failed to read "SYSPATH"\n"); + return len; + } + + buf[len-1] = '\0'; + return len; +} + +int main(int argc, char **argv) +{ + struct rich_container *skel = NULL; + struct bpf_link *link = NULL; + int error = EXIT_FAILURE; + int ret; + int prog_fd, cgroup_fd; + char origin_cpus[PATH_MAX], now_cpus[PATH_MAX]; + + ret = get_cpu_info(origin_cpus, sizeof(origin_cpus)); + if (ret <= 0) + return 1; + + /* + * For empty prog, we only test REJECT cases here, + * since ACCEPT cases have been tested in test_verifier. + */ + /* test wrong expected_attach_type */ + ret = load_empty_prog(BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, BPF_CGROUP_INET_INGRESS, false); + if (!ret || errno != EINVAL) { + printf("Unexpected load result, ret=%d, errno=%d\n", ret, errno); + return 1; + } + + skel = rich_container__open_and_load(); + if (!skel) { + printf("Failed to open and load object\n"); + return 1; + } + + cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); + if (cgroup_fd < 0) { + printf("Failed to create test cgroup\n"); + goto out; + } + + /* test wrong attach_type */ + prog_fd = load_empty_prog(BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + BPF_CGROUP_RICH_CONTAINER_CPU, true); + if (prog_fd < 0) { + printf("Failed to load empty prog\n"); + goto clean_cg; + } + ret = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_INGRESS, 0); + if (!ret || errno != EINVAL) { + printf("Unexpected attach result, ret=%d, errno=%d\n", ret, errno); + goto clean_cg; + } + + ret = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_RICH_CONTAINER_MEM, 0); + if (!ret || errno != EINVAL) { + printf("Unexpected attach result, ret=%d, errno=%d\n", ret, errno); + goto clean_cg; + } + + /* test wrong prog_type */ + prog_fd = load_empty_prog(BPF_PROG_TYPE_KPROBE, 0, true); + if (prog_fd < 0) { + printf("Failed to load empty prog\n"); + goto clean_cg; + } + ret = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_RICH_CONTAINER_CPU, 0); + if (!ret || errno != EINVAL) { + printf("Unexpected attach result, ret=%d, errno=%d\n", ret, errno); + goto clean_cg; + } + + /* Attach cpu program */ + link = bpf_program__attach_cgroup(skel->progs.bpf_prog1, cgroup_fd); + if (IS_ERR(link)) { + printf("Failed to attach cgroup\n"); + goto clean_cg; + } + + ret = get_cpu_info(now_cpus, sizeof(now_cpus)); + if (ret <= 0) + goto clean_cg; + + if (strcmp(now_cpus, "")) { + printf("Test rich_container_cpu failed! Expect empty string but get %s\n", + now_cpus); + goto clean_cg; + } + + skel->bss->test_mode = 1; + ret = get_cpu_info(now_cpus, sizeof(now_cpus)); + if (ret <= 0) + goto clean_cg; + + if (strcmp(now_cpus, "0")) { + printf("Test rich_container_cpu failed! Expect 0 but get %s\n", now_cpus); + goto clean_cg; + } + + skel->bss->test_mode = 2; + ret = get_cpu_info(now_cpus, sizeof(now_cpus)); + if (ret <= 0) + goto clean_cg; + + if (strcmp(now_cpus, origin_cpus)) { + printf("Test rich_container_cpu failed! Expect %s but get %s\n", + origin_cpus, now_cpus); + goto clean_cg; + } + + error = 0; + printf("test_rich_container:PASS\n"); + +clean_cg: + cleanup_cgroup_environment(); + +out: + bpf_link__destroy(link); + rich_container__destroy(skel); + return error; +} diff --git a/tools/testing/selftests/bpf/verifier/rich_container.c b/tools/testing/selftests/bpf/verifier/rich_container.c new file mode 100644 index 000000000000..956d56c01f35 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/rich_container.c @@ -0,0 +1,79 @@ +{ + "rich_container: true expected_attach_type(cpu)", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_CPU, + .result = ACCEPT, +}, +{ + "rich_container: true expected_attach_type(mem)", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_MEM, + .result = ACCEPT, +}, +{ + "rich_container: invalid ctx access(cpu) 1", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 240), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_CPU, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "rich_container: invalid ctx access(cpu) 2", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 10000), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_CPU, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "rich_container: invalid ctx access(mem) 1", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_MEM, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "rich_container: invalid ctx access(mem) 2", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 10000), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_MEM, + .result = REJECT, + .errstr = "invalid bpf_context access", +}, +{ + "rich_container: invalid return value", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_CGROUP_RICH_CONTAINER, + .expected_attach_type = BPF_CGROUP_RICH_CONTAINER_CPU, + .result = REJECT, + .errstr = "At program exit the register R0", +}, -- Gitee