From cbdbb01be3cef9bc3e85752dd7f57442d5d614de Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Thu, 24 Oct 2024 19:12:17 +0800 Subject: [PATCH 01/24] add var --- CMakeLists.txt | 7 ++++++- src/CMakeLists.txt | 8 ++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c2e6b10..a1ca2ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,11 +14,16 @@ option(BUILD_TESTING "Build test cases" OFF) option(BUILD_EXAMPLE "Build example cases" OFF) option(BUILD_LCC "Build lcc project" OFF) OPTION(ENABLE_GCOV "Enable gcov" OFF) +option(ENABLE_ASAN "Enable asan" OFF) +option(ENABLE_PROFILE "Enable profile" OFF) +option(ENABLE_STATIC_LINK_ELF "Enable static link libelf" OFF) IF (ENABLE_GCOV) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fprofile-arcs -ftest-coverage") ENDIF() +message(STATUS "coolbpf C compile flags: ${CMAKE_C_FLAGS}") + # compile coolbpf library add_subdirectory(src) # test cases @@ -29,4 +34,4 @@ add_subdirectory(tools) if(BUILD_LCC) add_subdirectory(lcc) add_subdirectory(third) -endif() \ No newline at end of file +endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 315f585..ceef4bf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,7 +19,9 @@ else(ELF_LIBRARY) endif(ELF_LIBRARY) add_subdirectory(bpf) -add_subdirectory(profiler) +if (ENABLE_PROFILE) + add_subdirectory(profiler) +endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coolbpf.pc.in ${CMAKE_CURRENT_BINARY_DIR}/coolbpf.pc @ONLY) @@ -73,6 +75,8 @@ install(FILES ${uapi_headers} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf/linux) install(FILES ${PROJECT_SOURCE_DIR}/arch/${CMAKE_SYSTEM_PROCESSOR}/vmlinux.h DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) +if (ENABLE_PROFILE) # install libprofiler install(FILES ${CMAKE_CURRENT_BINARY_DIR}/profiler/release/libprofiler.so - DESTINATION ${INSTALL_LIB_DIR}) \ No newline at end of file + DESTINATION ${INSTALL_LIB_DIR}) +endif() -- Gitee From b10fa33eebfe05654fa5b0161ecb3a424309f349 Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Thu, 5 Dec 2024 18:03:09 +0800 Subject: [PATCH 02/24] enable cgroup name filter && opt conn stats event report --- src/bpf/net.bpf.c | 246 ++++++++++++++++++++++++++++++++++++++-------- src/net.c | 50 ++++++++++ src/net.h | 24 ++++- 3 files changed, 278 insertions(+), 42 deletions(-) diff --git a/src/bpf/net.bpf.c b/src/bpf/net.bpf.c index dd20695..922cbdf 100644 --- a/src/bpf/net.bpf.c +++ b/src/bpf/net.bpf.c @@ -169,6 +169,22 @@ struct __uint(max_entries, 1); } connect_info_heap SEC(".maps"); +struct +{ + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct container_id_key); + __uint(max_entries, 1); +} container_id_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(max_entries, 1024); + __type(key, __u8[sizeof(struct container_id_key)]); // Need to specify as byte array as wouldn't take struct as key type + __type(value, __u8); + __uint(map_flags, BPF_F_NO_PREALLOC); +} enable_container_ids SEC(".maps"); + struct trace_event_raw_sys_enter_comp { struct trace_entry ent; @@ -217,6 +233,51 @@ static __always_inline void set_addr_pair_by_sock(struct sock *sk, struct addr_p ap->dport = bpf_ntohs(ap->dport); } +static __always_inline bool match_container_id(struct connect_info_t* conn_info) +{ + u32 index = ContainerIdIndex; + int64_t *cid_prefix_length = bpf_map_lookup_elem(&config_tgid_map, &index); + if (cid_prefix_length == NULL) { + bpf_printk("cid_prefix_length null! pid:%u\n", conn_info->conn_id.tgid); + return true; + } + + u32 trim_len = *cid_prefix_length; + if (trim_len <= 0 || trim_len > KN_NAME_LENGTH) { + bpf_printk("trim_len invalid! pid:%u trim_len:%u\n", conn_info->conn_id.tgid, trim_len); + return false; + } + + if (conn_info->docker_id_length == 0) { + bpf_printk("dockerid length is zero! pid:%u docker_id_length:%u\n", conn_info->conn_id.tgid, conn_info->docker_id_length); + return false; + } + int length = conn_info->docker_id_length >= KN_NAME_LENGTH? KN_NAME_LENGTH : conn_info->docker_id_length; + int real_length = length - trim_len; + if (real_length <=0 ) { + bpf_printk("reallen invalid! pid:%u real_length:%u\n", conn_info->conn_id.tgid, real_length); + return false; + } + if (real_length >= CONTAINER_ID_MAX_LENGTH) real_length = CONTAINER_ID_MAX_LENGTH; + + // check config + u32 zero = 0; + struct container_id_key* prefix = bpf_map_lookup_elem(&container_id_heap, &zero); + if (!prefix) return false; + __builtin_memset(prefix, 0, sizeof(struct container_id_key)); + bpf_printk("after memset! pid:%u, cgroup:%s, real_length:%u \n", conn_info->conn_id.tgid, prefix->data, real_length); + bpf_probe_read(prefix->data, real_length, conn_info->docker_id + trim_len); + prefix->prefixlen = real_length << 3; + __u8* ppass = bpf_map_lookup_elem(&enable_container_ids, prefix); + if (ppass) { + bpf_printk("bingo! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); + // in whitelist + return true; + } + bpf_printk("blacklist! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); + return false; +} + static __always_inline enum support_tgid_e match_tgid(const uint32_t tgid) { u32 index = TgidIndex; @@ -249,6 +310,83 @@ static __always_inline enum support_tgid_e match_tgid(const uint32_t tgid) return TgidUnmatch; } +#ifndef unlikely +# define unlikely(X) __builtin_expect(!!(X), 0) +#endif + +static __always_inline const char *get_cgroup_name(const struct cgroup *cgrp) +{ + const char *name; + + if (unlikely(!cgrp)) + return NULL; + + if (BPF_CORE_READ_INTO(&name, cgrp, kn, name) != 0) + return NULL; + + return name; +} + +#define EVENT_ERROR_CGROUP_NAME 0x010000 +#define EVENT_ERROR_CGROUPS 0x100000 +#define EVENT_ERROR_CGROUP_SUBSYSCGRP 0x040000 +#define EVENT_ERROR_CGROUP_SUBSYS 0x080000 +#define VALID_HEX_LENGTH 64 + +// Function to check if a character is a hex digit [a-f0-9] +static __always_inline bool is_hex_digit(char c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'); +} + +/* Gather current task cgroup name */ +static __always_inline __u32 __event_get_current_cgroup_name(struct cgroup *cgrp, struct connect_info_t * conn_info) +{ + const char *name; + + name = get_cgroup_name(cgrp); + conn_info->docker_id_length = 0; + if (!name) return EVENT_ERROR_CGROUP_NAME; + + int ret = bpf_probe_read_str(conn_info->docker_id, KN_NAME_LENGTH, name); + bpf_printk("pid:%u docker_id:%s ret:%u \n", conn_info->conn_id.tgid, conn_info->docker_id, ret); + conn_info->docker_id_length = ret; + + return name ? 0 : EVENT_ERROR_CGROUP_NAME; +} + +static __always_inline struct cgroup * +get_task_cgroup(struct task_struct *task) +{ + __u32 subsys_idx = 0; + __u32 flags = 0; + struct cgroup_subsys_state *subsys; + struct css_set *cgroups; + struct cgroup *cgrp = NULL; + + bpf_probe_read(&cgroups, sizeof(cgroups), __builtin_preserve_access_index(&task->cgroups)); + if (unlikely(!cgroups)) { + flags |= EVENT_ERROR_CGROUPS; + return cgrp; + } + + if (unlikely(subsys_idx > pids_cgrp_id)) { + flags |= EVENT_ERROR_CGROUP_SUBSYS; + return cgrp; + } + + bpf_probe_read(&subsys, sizeof(subsys), __builtin_preserve_access_index(&cgroups->subsys[subsys_idx])); + if (unlikely(!subsys)) { + flags |= EVENT_ERROR_CGROUP_SUBSYS; + return cgrp; + } + + bpf_probe_read(&cgrp, sizeof(cgrp), __builtin_preserve_access_index(&subsys->cgroup)); + if (!cgrp) + flags |= EVENT_ERROR_CGROUP_SUBSYSCGRP; + + return cgrp; +} + static __always_inline uint64_t get_start_time() { struct task_struct *task = (struct task_struct *)bpf_get_current_task(); @@ -258,11 +396,24 @@ static __always_inline uint64_t get_start_time() sizeof(struct task_struct *), (uint8_t *)task + gl_off); - uint64_t st_off = offsetof(struct task_struct, start_time); uint64_t start_time = 0; - bpf_probe_read(&start_time, + + if (bpf_core_field_exists(group_leader_ptr->start_time)) + { + uint64_t st_off = offsetof(struct task_struct, start_time); + bpf_probe_read(&start_time, + sizeof(uint64_t), + (uint8_t *)group_leader_ptr + st_off); + } + else if (bpf_core_field_exists(group_leader_ptr->start_boottime)) + { + uint64_t st_off = offsetof(struct task_struct, start_boottime); + bpf_probe_read(&start_time, sizeof(uint64_t), (uint8_t *)group_leader_ptr + st_off); + } else { + start_time = bpf_ktime_get_ns(); + } return start_time; // return nsec_to_clock_t(start_time); @@ -281,7 +432,7 @@ static __always_inline void init_conn_id(uint32_t tgid, conn_id->fd = fd; // currently use kernel time for connection id. conn_id->start = bpf_ktime_get_ns(); - ; + // conn_id->start = get_start_time(); } static __always_inline void init_conn_info(uint32_t tgid, @@ -293,6 +444,11 @@ static __always_inline void init_conn_info(uint32_t tgid, conn_info->addr.sa.sa_family = AF_UNKNOWN; conn_info->is_sample = true; conn_info->protocol = ProtoUnknown; + struct task_struct *task = bpf_get_current_task(); + struct cgroup *cgrp = get_task_cgroup(task); + if (!cgrp) + return; + __event_get_current_cgroup_name(cgrp, conn_info); } static __always_inline int32_t get_buf_32(const char *buf) @@ -912,7 +1068,9 @@ static __always_inline void try_event_output(void *ctx, struct connect_info_t *i struct conn_data_event_t *data = &info->wr_min_ts; data->conn_id = info->conn_id; u64 total_size = (u64)(&data->msg[0]) - (u64)data + info->request_len + info->response_len; - bpf_perf_event_output(ctx, &connect_data_events_map, BPF_F_CURRENT_CPU, data, total_size & (PACKET_MAX_SIZE * 2 - 1)); + if (match_container_id(info)) { + bpf_perf_event_output(ctx, &connect_data_events_map, BPF_F_CURRENT_CPU, data, total_size & (PACKET_MAX_SIZE * 2 - 1)); + } } reset_sock_info(info); } @@ -946,6 +1104,8 @@ static __always_inline struct conn_stats_event_t *add_conn_stats(struct connect_ } event->conn_id = conn_info->conn_id; + event->protocol = conn_info->protocol; + bpf_probe_read_str(event->docker_id, KN_NAME_LENGTH, conn_info->docker_id); event->addr = conn_info->addr; event->role = conn_info->role; event->wr_bytes = conn_info->wr_bytes; @@ -1212,6 +1372,43 @@ static __always_inline enum support_role_e get_sock_role(const struct socket *so return max_ack_backlog == 0 ? IsClient : IsServer; } + +static __always_inline void output_conn_stats(struct trace_event_raw_sys_exit_comp *ctx, + struct connect_info_t *conn_info, + enum support_direction_e direction, + ssize_t return_bytes, bool force) +{ + switch (direction) + { + case DirEgress: + conn_info->wr_bytes += return_bytes; + conn_info->wr_pkts++; + break; + case DirIngress: + conn_info->rd_bytes += return_bytes; + conn_info->rd_pkts++; + break; + } + + uint64_t total_bytes = conn_info->wr_bytes + conn_info->rd_bytes; + uint32_t total_pkts = conn_info->wr_pkts + conn_info->rd_pkts; + + bool real_threshold = (total_bytes >= conn_info->last_output_rd_bytes + conn_info->last_output_wr_bytes + ConnStatsBytesThreshold) || (total_pkts >= conn_info->last_output_rd_pkts + conn_info->last_output_wr_pkts + ConnStatsPacketsThreshold); + if (real_threshold || force || !conn_info->ever_sent) + { + struct conn_stats_event_t *event = add_conn_stats(conn_info); + if (event != NULL) + { + bpf_perf_event_output(ctx, &connect_stats_events_map, BPF_F_CURRENT_CPU, event, sizeof(struct conn_stats_event_t)); + } + conn_info->last_output_wr_bytes = conn_info->wr_bytes; + conn_info->last_output_rd_bytes = conn_info->rd_bytes; + conn_info->last_output_wr_pkts = conn_info->wr_pkts; + conn_info->last_output_rd_pkts = conn_info->rd_pkts; + conn_info->ever_sent = true; + } +} + static __always_inline void add_one_conn(struct trace_event_raw_sys_exit_comp *ctx, const struct sockaddr *addr, const struct socket *socket, @@ -1224,6 +1421,10 @@ static __always_inline void add_one_conn(struct trace_event_raw_sys_exit_comp *c return; } + conn_info->ever_sent = false; + + // __builtin_memset(conn_info, 0, sizeof(struct connect_info_t)); + uint32_t tgid = tg_role->tgid; int32_t fd = tg_role->fd; enum support_role_e role = tg_role->role; @@ -1261,6 +1462,7 @@ static __always_inline void add_one_conn(struct trace_event_raw_sys_exit_comp *c uint64_t tgid_fd = combine_tgid_fd(tgid, fd); // net_bpf_print("start ====add_conn\n"); bpf_map_update_elem(&connect_info_map, &tgid_fd, conn_info, BPF_ANY); + output_conn_stats(ctx, conn_info, DirUnknown, 0, true); bpf_map_update_elem(&socket_pidfd_map, &socket, &tgid_fd, BPF_ANY); if (!need_trace_family(conn_info->addr.sa.sa_family)) { @@ -1282,40 +1484,6 @@ static __always_inline void add_one_conn(struct trace_event_raw_sys_exit_comp *c #endif } -static __always_inline void output_conn_stats(struct trace_event_raw_sys_exit_comp *ctx, - struct connect_info_t *conn_info, - enum support_direction_e direction, - ssize_t return_bytes) -{ - switch (direction) - { - case DirEgress: - conn_info->wr_bytes += return_bytes; - conn_info->wr_pkts++; - break; - case DirIngress: - conn_info->rd_bytes += return_bytes; - conn_info->rd_pkts++; - break; - } - - uint64_t total_bytes = conn_info->wr_bytes + conn_info->rd_bytes; - uint32_t total_pkts = conn_info->wr_pkts + conn_info->rd_pkts; - - bool real_threshold = (total_bytes >= conn_info->last_output_rd_bytes + conn_info->last_output_wr_bytes + ConnStatsBytesThreshold) || (total_pkts >= conn_info->last_output_rd_pkts + conn_info->last_output_wr_pkts + ConnStatsPacketsThreshold); - if (real_threshold) - { - struct conn_stats_event_t *event = add_conn_stats(conn_info); - if (event != NULL) - { - bpf_perf_event_output(ctx, &connect_stats_events_map, BPF_F_CURRENT_CPU, event, sizeof(struct conn_stats_event_t)); - } - conn_info->last_output_wr_bytes = conn_info->wr_bytes; - conn_info->last_output_rd_bytes = conn_info->rd_bytes; - conn_info->last_output_wr_pkts = conn_info->wr_pkts; - conn_info->last_output_rd_pkts = conn_info->rd_pkts; - } -} static __always_inline void add_close_event(struct trace_event_raw_sys_exit_comp *ctx, struct connect_info_t *conn_info) { @@ -1641,8 +1809,8 @@ static __always_inline void trace_exit_data(struct trace_event_raw_sys_exit_comp handle_server_recv_request(conn_info); } } + output_conn_stats(ctx, conn_info, direction, return_bytes, false); try_event_output(ctx, conn_info, direction); - output_conn_stats(ctx, conn_info, direction, return_bytes); // if (!conn_info->is_sample) // { // return; diff --git a/src/net.c b/src/net.c index 007316d..398d0c5 100644 --- a/src/net.c +++ b/src/net.c @@ -87,6 +87,7 @@ static struct net_env_t struct perf_buffer *pbs[MAX_HAND]; struct callback_t callback[MAX_HAND]; int32_t page_count[MAX_HAND]; + int32_t cid_prefix_len; struct lost_callback_t lost_callback; net_print_fn_t libbpf_print; char version[64]; @@ -252,6 +253,19 @@ static void handle_lost_stat_event(void *ctx, int cpu, __u64 lost_cnt) } } +static int user_config_cid(int config_fd) +{ + int ret; + uint32_t index = ContainerIdIndex; + ret = bpf_map_update_elem(config_fd, &index, &env.cid_prefix_len, BPF_ANY); + if (ret) + net_log(LOG_TYPE_WARN, "Could not update map for cid prefix len %d: %s\n", env.cid_prefix_len, strerror(-ret)); + else + net_log(LOG_TYPE_INFO, "success to update map for cid prefix len: %d\n", env.cid_prefix_len); + + return ret; +} + static int user_config_tgid(int config_fd) { int ret; @@ -504,6 +518,11 @@ void ebpf_config(int32_t opt1, int32_t opt2, int32_t params_count, value = (int32_t *)(params[0]); env.page_count[opt2] = *value; break; + case CONTAINER_ID_FILTER: + value = (int32_t *)(params[0]); + env.cid_prefix_len = *value; + user_config_cid(bpf_map__fd(obj->maps.config_tgid_map)); + break; defaults: user_config_proto(bpf_map__fd(obj->maps.config_protocol_map)); user_config_tgid(bpf_map__fd(obj->maps.config_tgid_map)); @@ -659,3 +678,34 @@ void ebpf_disable_process(uint32_t pid, bool drop) void ebpf_update_conn_role(struct connect_id_t *conn_id, enum support_role_e role_type) { } + +bool ebpf_set_cid_filter(const char* container_id, size_t length, bool update) +{ + struct net_bpf *obj = env.obj; + int map_fd = bpf_map__fd(obj->maps.enable_container_ids); + + // Prepare the key for update/delete + struct container_id_key key = { + .prefixlen = CONTAINER_ID_MAX_LENGTH * 8 // Full length as prefix length in bits + }; + memset(key.data, 0, CONTAINER_ID_MAX_LENGTH); + memcpy(key.data, container_id, length); + __u8 value = 1; + bool ret; + + if (update) { + ret = bpf_map_update_elem(map_fd, &key, &value, BPF_ANY); + if (ret) { + net_log(LOG_TYPE_WARN, "Failed to update element: %s\n", strerror(errno)); + return false; + } + } else { + ret = bpf_map_delete_elem(map_fd, &key); + if (ret) { + net_log(LOG_TYPE_WARN, "Failed to delete element: %s\n", strerror(errno)); + return false; + } + } + + return true; +} diff --git a/src/net.h b/src/net.h index e38dc0a..f0c0384 100644 --- a/src/net.h +++ b/src/net.h @@ -43,6 +43,7 @@ enum support_role_e enum tgid_config_e { TgidIndex = 0, + ContainerIdIndex = 1, TgidNum, }; @@ -151,6 +152,7 @@ union sockaddr_t struct sockaddr_in6 in6; }; +#define KN_NAME_LENGTH 128 struct connect_id_t { int32_t fd; @@ -188,7 +190,9 @@ struct conn_stats_event_t struct connect_id_t conn_id; union sockaddr_t addr; struct socket_info si; + enum support_proto_e protocol; enum support_role_e role; + char docker_id[KN_NAME_LENGTH]; int64_t wr_bytes; int64_t rd_bytes; int32_t wr_pkts; @@ -206,6 +210,7 @@ struct conn_data_event_t uint64_t start_ts; uint64_t end_ts; enum support_proto_e protocol; + enum support_role_e role; uint16_t request_len; uint16_t response_len; #ifdef __VMLINUX_H__ @@ -222,8 +227,9 @@ struct connect_info_t struct connect_id_t conn_id; union sockaddr_t addr; struct socket_info si; - enum support_role_e role; enum support_type_e type; + int32_t docker_id_length; + char docker_id[KN_NAME_LENGTH]; int64_t wr_bytes; int64_t rd_bytes; int32_t wr_pkts; @@ -237,6 +243,7 @@ struct connect_info_t size_t prev_count; char prev_buf[4]; bool try_to_prepend; + bool ever_sent; bool is_sample; uint64_t rt; @@ -247,6 +254,7 @@ struct connect_info_t uint64_t start_ts; uint64_t end_ts; enum support_proto_e protocol; + enum support_role_e role; uint16_t request_len; uint16_t response_len; char msg[PACKET_MAX_SIZE * 3]; @@ -302,13 +310,19 @@ struct config_info_t int32_t data_sample; }; +#define CONTAINER_ID_MAX_LENGTH 64 +struct container_id_key { + uint32_t prefixlen; + uint8_t data[CONTAINER_ID_MAX_LENGTH]; +}; + #ifndef __VMLINUX_H__ enum callback_type_e { - CTRL_HAND = 0, + STAT_HAND = 0, INFO_HANDLE, - STAT_HAND, + CTRL_HAND, #ifdef NET_TEST TEST_HAND, #endif @@ -344,6 +358,7 @@ enum ebpf_config_primary_e // 采样的策略:tcp的包,连接建立的ns时间 % 100, 小于采样率即为需要上传,大于的话对该连接进行标记,不上传Data、Ctrl(统计数据还是要上传) // udp的包,接收到数据包的ns时间 % 100, 小于采样率即为需要上传,大于的话不上传Data(统计数据还是要上传 @note 要注意统计数据Map的清理策略) PERF_BUFFER_PAGE, // ring buffer page count, 默认128个页,也就是512KB, opt2 的类型是 callback_type_e + CONTAINER_ID_FILTER, // container id filter, 不配置则全部采集,如果需要开启,则 value 需要设置为 cgroup name 的前缀长度 }; // opt1 列表: // AddProtocolFilter、RemoveProtocolFilter @@ -425,6 +440,9 @@ void ebpf_update_conn_addr(struct connect_id_t *conn_id, union sockaddr_t *dest_ // 更新process 观察范围,动态增加pid,drop 为true 是进行删除操作。 void ebpf_disable_process(uint32_t pid, bool drop); +// 更新containerid 观察范围,动态增加pid,drop 为true 是进行删除操作。 +bool ebpf_set_cid_filter(const char* container_id, size_t length, bool update); + // 更新conn对应的角色,某些协议内核态无法判断角色 void ebpf_update_conn_role(struct connect_id_t *conn_id, enum support_role_e role_type); -- Gitee From b48272fd99311ea1c6d579de91c9d27b516d0eb1 Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Thu, 2 Jan 2025 17:41:05 +0800 Subject: [PATCH 03/24] adapt to loongcollector compile --- src/CMakeLists.txt | 10 + src/bpf/net.bpf.c | 2 +- src/security/CMakeLists.txt | 3 + src/security/addr_lpm_maps.h | 71 ++ src/security/api.h | 316 ++++++ src/security/bpf_cgroup.h | 429 ++++++++ src/security/bpf_common.h | 79 ++ src/security/bpf_cred.h | 83 ++ src/security/bpf_elf.h | 56 + src/security/bpf_event.h | 21 + src/security/bpf_exit.h | 89 ++ src/security/bpf_head.h | 141 +++ src/security/bpf_process_event.h | 606 +++++++++++ src/security/bpf_process_event_type.h | 345 +++++++ src/security/bpf_rate.h | 140 +++ src/security/bpf_task.h | 180 ++++ src/security/compiler.h | 17 + src/security/data_event.h | 221 ++++ src/security/data_msg.h | 42 + src/security/environ_conf.h | 40 + src/security/filter.h | 37 + src/security/generic.h | 67 ++ src/security/int_maps.h | 38 + src/security/msg_type.h | 53 + src/security/process.h | 607 +++++++++++ src/security/security.bpf.c | 1357 +++++++++++++++++++++++++ src/security/string_maps.h | 253 +++++ src/security/tailcall_stack.h | 16 + src/security/type.h | 253 +++++ 29 files changed, 5571 insertions(+), 1 deletion(-) create mode 100644 src/security/CMakeLists.txt create mode 100644 src/security/addr_lpm_maps.h create mode 100644 src/security/api.h create mode 100644 src/security/bpf_cgroup.h create mode 100644 src/security/bpf_common.h create mode 100644 src/security/bpf_cred.h create mode 100644 src/security/bpf_elf.h create mode 100644 src/security/bpf_event.h create mode 100644 src/security/bpf_exit.h create mode 100644 src/security/bpf_head.h create mode 100644 src/security/bpf_process_event.h create mode 100644 src/security/bpf_process_event_type.h create mode 100644 src/security/bpf_rate.h create mode 100644 src/security/bpf_task.h create mode 100644 src/security/compiler.h create mode 100644 src/security/data_event.h create mode 100644 src/security/data_msg.h create mode 100644 src/security/environ_conf.h create mode 100644 src/security/filter.h create mode 100644 src/security/generic.h create mode 100644 src/security/int_maps.h create mode 100644 src/security/msg_type.h create mode 100644 src/security/process.h create mode 100644 src/security/security.bpf.c create mode 100644 src/security/string_maps.h create mode 100644 src/security/tailcall_stack.h create mode 100644 src/security/type.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ceef4bf..8a0391a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -19,6 +19,7 @@ else(ELF_LIBRARY) endif(ELF_LIBRARY) add_subdirectory(bpf) +add_subdirectory(security) if (ENABLE_PROFILE) add_subdirectory(profiler) endif() @@ -42,6 +43,7 @@ file(GLOB sources ${PROJECT_SOURCE_DIR}/third/libbpf/src/*.c # share library add_library(coolbpf SHARED ${sources}) add_dependencies(coolbpf net_skel) +add_dependencies(coolbpf security_skel) target_link_libraries(coolbpf PRIVATE ${ELF_LIBRARY} ${Z_LIBRARY} pthread) set_target_properties(coolbpf PROPERTIES VERSION ${VERSION} SOVERSION ${VERSION}) @@ -55,6 +57,14 @@ set_target_properties(coolbpf_static PROPERTIES OUTPUT_NAME "coolbpf") install(TARGETS coolbpf LIBRARY DESTINATION ${INSTALL_LIB_DIR}) install(TARGETS coolbpf_static ARCHIVE DESTINATION ${INSTALL_LIB_DIR}) +# install skeleton headers +file(GLOB skel_headers ${CMAKE_CURRENT_BINARY_DIR}/bpf/*.skel.h ${CMAKE_CURRENT_BINARY_DIR}/security/*.skel.h) +install(FILES ${skel_headers} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) + +# install security headers +file(GLOB security_headers security/*.h) +install(FILES ${security_headers} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf/security) + # install coolbpf headers file(GLOB headers "*.h") install(FILES ${headers} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) diff --git a/src/bpf/net.bpf.c b/src/bpf/net.bpf.c index 922cbdf..ff4103b 100644 --- a/src/bpf/net.bpf.c +++ b/src/bpf/net.bpf.c @@ -444,7 +444,7 @@ static __always_inline void init_conn_info(uint32_t tgid, conn_info->addr.sa.sa_family = AF_UNKNOWN; conn_info->is_sample = true; conn_info->protocol = ProtoUnknown; - struct task_struct *task = bpf_get_current_task(); + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); struct cgroup *cgrp = get_task_cgroup(task); if (!cgrp) return; diff --git a/src/security/CMakeLists.txt b/src/security/CMakeLists.txt new file mode 100644 index 0000000..dd57f8b --- /dev/null +++ b/src/security/CMakeLists.txt @@ -0,0 +1,3 @@ +include(${PROJECT_SOURCE_DIR}/scripts/cmake/genskel.cmake) + +genskel(security) \ No newline at end of file diff --git a/src/security/addr_lpm_maps.h b/src/security/addr_lpm_maps.h new file mode 100644 index 0000000..78cb3ee --- /dev/null +++ b/src/security/addr_lpm_maps.h @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright Authors of Cilium */ + +#pragma once + +#ifdef __cplusplus +#include +#endif +#include +#include +#include +#include +#include "../coolbpf.h" +#include "type.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, ADDR_LPM_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(max_entries, 1); + __type(key, __u8[8]); // Need to specify as byte array as wouldn't take struct as key type + __type(value, __u8); + __uint(map_flags, BPF_F_NO_PREALLOC); + }); +} addr4lpm_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, ADDR_LPM_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(max_entries, 1); + __type(key, __u8[20]); // Need to specify as byte array as wouldn't take struct as key type + __type(value, __u8); + __uint(map_flags, BPF_F_NO_PREALLOC); + }); +} addr6lpm_maps SEC(".maps"); + + +// struct { +// __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); +// __uint(max_entries, ADDR_LPM_MAPS_OUTER_MAX_ENTRIES); +// __uint(key_size, sizeof(__u32)); +// __array( +// values, struct { +// __uint(type, BPF_MAP_TYPE_LPM_TRIE); +// __uint(max_entries, 16); +// __type(key, __u8[8]); // Need to specify as byte array as wouldn't take struct as key type +// __type(value, __u8); +// __uint(map_flags, BPF_F_NO_PREALLOC); +// }); +// } daddr4lpm_maps SEC(".maps"); + +// struct { +// __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); +// __uint(max_entries, ADDR_LPM_MAPS_OUTER_MAX_ENTRIES); +// __uint(key_size, sizeof(__u32)); +// __array( +// values, struct { +// __uint(type, BPF_MAP_TYPE_LPM_TRIE); +// __uint(max_entries, 16); +// __type(key, __u8[20]); // Need to specify as byte array as wouldn't take struct as key type +// __type(value, __u8); +// __uint(map_flags, BPF_F_NO_PREALLOC); +// }); +// } daddr6lpm_maps SEC(".maps"); diff --git a/src/security/api.h b/src/security/api.h new file mode 100644 index 0000000..daf9f74 --- /dev/null +++ b/src/security/api.h @@ -0,0 +1,316 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_API_H +#define SYSAK_API_H + +/* Note: + * + * This file can be included into eBPF kernel programs. It contains + * a couple of useful helper functions, map/section ABI (bpf_elf.h), + * misc macros and some eBPF specific LLVM built-ins. + */ +#include "bpf_elf.h" + +#ifndef TC_ACT_OK +#define TC_ACT_OK 0 +#define TC_ACT_RECLASSIFY 1 +#define TC_ACT_SHOT 2 +#define TC_ACT_PIPE 3 +#define TC_ACT_STOLEN 4 +#define TC_ACT_QUEUED 5 +#define TC_ACT_REPEAT 6 +#define TC_ACT_REDIRECT 7 +#endif +#define TC_ACT_UNSPEC -1 + +/** Misc macros. */ + +#ifndef __stringify +# define __stringify(X) #X +#endif + +#ifndef __maybe_unused +# define __maybe_unused __attribute__((__unused__)) +#endif + +#ifndef offsetof +# define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER) +#endif + +#ifndef likely +# define likely(X) __builtin_expect(!!(X), 1) +#endif + +#ifndef unlikely +# define unlikely(X) __builtin_expect(!!(X), 0) +#endif + +#ifndef __inline__ +# define __inline__ __attribute__((always_inline)) +#endif + +/** Section helper macros. */ + +#ifndef __section +# define __section(NAME) \ + __attribute__((section(NAME), used)) +#endif + +#ifndef __section_tail +# define __section_tail(ID, KEY) \ + __section(__stringify(ID) "/" __stringify(KEY)) +#endif + +#ifndef __section_cls_entry +# define __section_cls_entry \ + __section(ELF_SECTION_CLASSIFIER) +#endif + +#ifndef __section_act_entry +# define __section_act_entry \ + __section(ELF_SECTION_ACTION) +#endif + +#ifndef __section_license +# define __section_license \ + __section(ELF_SECTION_LICENSE) +#endif + +#ifndef __section_maps +# define __section_maps \ + __section(ELF_SECTION_MAPS) +#endif + +/** Declaration helper macros. */ + +#ifndef BPF_LICENSE +# define BPF_LICENSE(NAME) \ + char ____license[] __section_license = NAME +#endif + +/** Classifier helper */ + +#ifndef BPF_H_DEFAULT +# define BPF_H_DEFAULT -1 +#endif +// +///** BPF helper functions for tc. Individual flags are in linux/bpf.h */ +// +//#ifndef BPF_FUNC +//# define BPF_FUNC(NAME, ...) \ +// (* NAME)(__VA_ARGS__) __maybe_unused = (void *) bpf_##NAME +//#endif +// +//#ifndef BPF_FUNC2 +//# define BPF_FUNC2(NAME, ...) \ +// (* NAME)(__VA_ARGS__) __maybe_unused +//#endif +// +///* Map access/manipulation */ +//static void *BPF_FUNC(map_lookup_elem, void *map, const void *key); +//static int BPF_FUNC(map_update_elem, void *map, const void *key, +// const void *value, uint32_t flags); +//static int BPF_FUNC(map_delete_elem, void *map, const void *key); +// +///* Memory reads */ +//static int BPF_FUNC(bpf_probe_read, void *dst, uint32_t size, const void *src); +//static int BPF_FUNC(bpf_probe_read_str, void *dst, int size, const void *src); +//static int BPF_FUNC(bpf_probe_read_kernel, void *dst, uint32_t size, const void *src); +// +///* Time access */ +//static uint64_t BPF_FUNC(ktime_get_ns); +//static uint64_t BPF_FUNC(ktime_get_boot_ns); +//static uint64_t BPF_FUNC(ktime_get_coarse_ns); +//static uint64_t BPF_FUNC(jiffies64); +// +///* Platform */ +//static uint64_t BPF_FUNC(get_numa_node_id); +// +///* Timer Callbacks */ +//static long BPF_FUNC(timer_init, struct bpf_timer *timer, void *map, uint64_t flags); +//static long BPF_FUNC(timer_set_callback, struct bpf_timer *timer, void *callback_fun); +//static long BPF_FUNC(timer_start, struct bpf_timer *timer, uint64_t nsecs, uint64_t flags); +//static long BPF_FUNC(timer_cancel, struct bpf_timer *timer); +// +///* Sockets */ +//static uint64_t BPF_FUNC(get_socket_cookie, void *ctx); +// +//static struct bpf_sock *BPF_FUNC(sk_lookup_tcp, void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags); +//static struct bpf_sock *BPF_FUNC(sk_lookup_udp, void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags); +//static uint64_t BPF_FUNC(sk_release, void *sock); +//static struct bpf_sock *BPF_FUNC(sk_fullsock, struct bpf_sock *sk); +//static struct bpf_tcp_sock *BPF_FUNC(tcp_sock, struct bpf_sock *sk); +//static struct bpf_sock *BPF_FUNC(get_listener_sock, struct bpf_sock *sk); +//static struct bpf_sock *BPF_FUNC(skc_lookup_tcp, void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags); +//static void *BPF_FUNC(sk_storage_get, struct bpf_map *map, void *sk, void *value, u64 flags); +//static void *BPF_FUNC(sk_storage_delete, struct bpf_map *map, void *sk); +//static struct tcp6_sock *BPF_FUNC(skc_to_tcp6_sock, void *sk); +//static struct tcp_sock *BPF_FUNC(skc_to_tcp_sock, void *sk); +//static struct tcp_timewait_sock *BPF_FUNC(skc_to_tcp_timewait_sock, void *sk); +//static struct tcp_request_sock *BPF_FUNC(skc_to_tcp_request_sock, void *sk); +//static struct udp6_sock *BPF_FUNC(skc_to_udp6_sock, void *sk); +//static struct socket *BPF_FUNC(sock_from_file, struct file *file); +// +///* Debugging */ +//__attribute__((__format__(__printf__, 1, 0))) +//static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...); +//static long BPF_FUNC(trace_vprintk, const char *fmt, __u32 fmt_size, const void *data, __u32 data_len); +// +// +///* Random numbers */ +//static uint32_t BPF_FUNC(get_prandom_u32); +// +///* Tail calls */ +//static void BPF_FUNC(tail_call, void *ctx, void *map, uint32_t index); +// +///* System helpers */ +//static uint32_t BPF_FUNC(get_smp_processor_id); +// +///* Packet misc meta data */ +//static uint32_t BPF_FUNC(get_cgroup_classid, struct __sk_buff *skb); +//static uint32_t BPF_FUNC(get_route_realm, struct __sk_buff *skb); +//static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb); +//static uint32_t BPF_FUNC(set_hash_invalid, struct __sk_buff *skb); +// +//static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index); +// +///* Packet redirection */ +//static int BPF_FUNC(redirect, int ifindex, uint32_t flags); +//static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex, +// uint32_t flags); +// +///* Packet manipulation */ +//static int BPF_FUNC(skb_load_bytes_relative, struct __sk_buff *skb, uint32_t off, +// void *to, uint32_t len, uint32_t hdr); +//static int BPF_FUNC(skb_load_bytes, struct __sk_buff *skb, uint32_t off, +// void *to, uint32_t len); +//static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off, +// const void *from, uint32_t len, uint32_t flags); +// +//static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off, +// uint32_t from, uint32_t to, uint32_t flags); +//static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off, +// uint32_t from, uint32_t to, uint32_t flags); +//static int BPF_FUNC(csum_diff, void *from, uint32_t from_size, void *to, +// uint32_t to_size, uint32_t seed); +// +//static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type); +//static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto, +// uint32_t flags); +//static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen, +// uint32_t flags); +//static int BPF_FUNC(skb_adjust_room, struct __sk_buff *skb, int32_t len_diff, +// uint32_t mode, uint64_t flags); +//static int BPF_FUNC(skb_pull_data, struct __sk_buff *skb, uint32_t len); +// +///* Packet vlan encap/decap */ +//static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto, +// uint16_t vlan_tci); +//static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb); +// +///* Packet tunnel encap/decap */ +//static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb, +// struct bpf_tunnel_key *to, uint32_t size, uint32_t flags); +//static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb, +// const struct bpf_tunnel_key *from, uint32_t size, +// uint32_t flags); +// +//static int BPF_FUNC(skb_get_tunnel_opt, struct __sk_buff *skb, +// void *to, uint32_t size); +//static int BPF_FUNC(skb_set_tunnel_opt, struct __sk_buff *skb, +// const void *from, uint32_t size); +// +///* Events for user space */ +//static int BPF_FUNC2(skb_event_output, struct __sk_buff *skb, void *map, uint64_t index, +// const void *data, uint32_t size) = (void *)BPF_FUNC_perf_event_output; +// +///* Sockops and SK_MSG helpers */ +//static int BPF_FUNC(sock_map_update, struct bpf_sock_ops *skops, void *map, uint32_t key, uint64_t flags); +//static int BPF_FUNC(sock_hash_update, struct bpf_sock_ops *skops, void *map, void *key, uint64_t flags); +//static int BPF_FUNC(msg_redirect_hash, struct sk_msg_md *md, void *map, void *key, uint64_t flags); +//static int BPF_FUNC(msg_pull_data, struct sk_msg_md *md, __u32 start, __u32 end, __u64 flags); +//static int BPF_FUNC(msg_apply_bytes, struct sk_msg_md *md, __u32 bytes); +//static int BPF_FUNC(msg_cork_bytes, struct sk_msg_md *md, __u32 bytes); +// +//static int BPF_FUNC(fib_lookup, void *ctx, struct bpf_fib_lookup *params, uint32_t plen, uint32_t flags); +// +// +///* Current Process Info */ +//static uint64_t BPF_FUNC(bpf_get_current_task); +//static uint64_t BPF_FUNC(get_current_cgroup_id); +//static uint64_t BPF_FUNC(get_current_ancestor_cgroup_id); +//static uint64_t BPF_FUNC(get_current_uid_gid); +//static uint64_t BPF_FUNC(get_current_pid_tgid); +// +//static int BPF_FUNC(get_current_comm, char *buf, uint32_t size); +// +//static int BPF_FUNC(send_signal, uint32_t sig); +//static int BPF_FUNC(override_return, void *regs, uint64_t rc); +//static long BPF_FUNC(get_stackid, void *ctx, void *map, uint64_t flags); +//static long BPF_FUNC(loop, __u32 nr_loops, void *callback_fn, void *callback_ctx, __u64 flags); +//static __u64 BPF_FUNC(get_attach_cookie, void *ctx); +// +///* Perf and Rignbuffer */ +//static int BPF_FUNC(perf_event_output, void *ctx, void *map, uint64_t flags, void *data, uint64_t size); +// +//static int BPF_FUNC(get_stack, void *ctx, void *buf, uint32_t size, uint64_t flags); +//static long BPF_FUNC(ringbuf_output, void *data, uint64_t size, uint64_t flags); +//static void *BPF_FUNC(ringbuf_reserve, void *ringbuf, uint64_t size, uint64_t flags); +//static void BPF_FUNC(ringbuf_submit, void *data, uint64_t flags); +//static void BPF_FUNC(ringbuf_discard, void *data, uint64_t flags); +//static long BPF_FUNC(ringbuf_query, void *ringbuf, uint64_t flags); +// +//static long BPF_FUNC(ringbuf_reserve_dynptr, void *ringbuf, uint32_t size, uint64_t flags, struct bpf_dynptr *ptr); +//static void BPF_FUNC(ringbuf_submit_dynptr, struct bpf_dynptr *ptr, uint64_t flags); +//static void BPF_FUNC(ringbuf_discard_dynptr, struct bpf_dynptr *ptr, uint64_t flags); +// +//static long BPF_FUNC(dynptr_from_mem, void *data, uint32_t size, uint64_t flags, struct bpf_dynptr *ptr); +//static long BPF_FUNC(dynptr_read, void *dst, uint32_t len, const struct bpf_dynptr *src, uint32_t offset, uint64_t flags); +//static long BPF_FUNC(dynptr_write, const struct bpf_dynptr *dst, uint32_t offset, void *src, uint32_t len, uint64_t flags); +//static void BPF_FUNC(dynptr_data, const struct bpf_dynptr *ptr, uint32_t offset, uint32_t len); + +/** LLVM built-ins, mem*() routines work for constant size */ + +#ifndef lock_xadd +# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val)) +#endif + +#ifndef memset +# define memset(s, c, n) __builtin_memset((s), (c), (n)) +#endif + +#ifndef memcpy +# define memcpy(d, s, n) __builtin_memcpy((d), (s), (n)) +#endif + +#ifndef memmove +# define memmove(d, s, n) __builtin_memmove((d), (s), (n)) +#endif + +/* FIXME: __builtin_memcmp() is not yet fully useable unless llvm bug + * https://llvm.org/bugs/show_bug.cgi?id=26218 gets resolved. Also + * this one would generate a reloc entry (non-map), otherwise. + */ +#if 0 +#ifndef memcmp +# define memcmp(a, b, n) __builtin_memcmp((a), (b), (n)) +#endif +#endif + +#include "compiler.h" +FUNC_INLINE void compiler_barrier(void) +{ + asm volatile("" :: + : "memory"); +} + +#ifndef memset +# define memset(s, c, n) __builtin_memset((s), (c), (n)) +#endif + +#define _(P) (__builtin_preserve_access_index(P)) + + +#endif //SYSAK_API_H diff --git a/src/security/bpf_cgroup.h b/src/security/bpf_cgroup.h new file mode 100644 index 0000000..1cd4088 --- /dev/null +++ b/src/security/bpf_cgroup.h @@ -0,0 +1,429 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_CGROUP_H +#define SYSAK_BPF_CGROUP_H + +#include "../coolbpf.h" +#include + +#include "bpf_event.h" +#include "environ_conf.h" +#include "bpf_common.h" +#include "process.h" + +#define NULL ((void *)0) + +#ifndef CGROUP_SUPER_MAGIC +#define CGROUP_SUPER_MAGIC 0x27e0eb /* Cgroupv1 pseudo FS */ +#endif + +#ifndef CGROUP2_SUPER_MAGIC +#define CGROUP2_SUPER_MAGIC 0x63677270 /* Cgroupv2 pseudo FS */ +#endif + +/* Our kernfs node name length, can be made 256? */ + +/* Max nested cgroups that are tracked. Arbitrary value, nested cgroups + * that are at a level greater than 32 will be attached to the cgroup + * at level 32. + */ +#define CGROUP_MAX_NESTED_LEVEL 32 + +typedef enum { + CGROUP_UNTRACKED = 0, /* Cgroup was created but we did not track it */ + CGROUP_NEW = 1, /* Cgroup was just created */ + CGROUP_RUNNING = 2, /* new => running (fork,exec task inside) */ + CGROUP_RUNNING_PROC = 3, /* Generated from pids of procfs */ +} cgroup_state; + +/* Represent old kernfs node with the kernfs_node_id + * union to read the id in 5.4 kernels and older + */ +//struct kernfs_node___old { +// union kernfs_node_id id; +//}; + +struct cgroup_tracking_value { + /* State of cgroup */ + cgroup_state state; + + /* Unique id for the hierarchy this is mostly for cgroupv1 */ + __u32 hierarchy_id; + + /* The depth this cgroup is at */ + __u32 level; + + __u32 pad; + + /* Cgroup kernfs_node name */ + char name[KN_NAME_LENGTH]; +}; // All fields aligned so no 'packed' attribute. + +struct msg_cgroup_event { + struct msg_common common; + struct msg_execve_key parent; + __u32 cgrp_op; /* Current cgroup operation */ + __u32 pid; + __u32 nspid; + __u32 flags; + __u64 ktime; + __u64 cgrpid_tracker; /* Cgroup ID that is used as a tracker for the current cgroup */ + __u64 cgrpid; /* Current cgroup ID */ + struct cgroup_tracking_value cgrp_data; /* Current cgroup data */ + char path[PATH_MAP_SIZE]; /* Current cgroup path */ +}; // All fields aligned so no 'packed' attribute. + +/* Map to track cgroups per IDs */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 32768); + __type(key, __u64); /* Key is the cgrpid */ + __type(value, struct cgroup_tracking_value); +} tg_cgrps_tracking_map SEC(".maps"); + +/* Heap used to construct a cgroup_tracking_value */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __s32); + __type(value, struct cgroup_tracking_value); +} tg_cgrps_tracking_heap SEC(".maps"); + +/* Heap used to construct a msg_cgroup_event */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct msg_cgroup_event); +} tg_cgrps_msg_heap SEC(".maps"); + +/** + * get_cgroup_kn_name() Returns a pointer to the kernfs node name + * @cgrp: target kernfs node + * + * Returns a pointer to the kernfs node name on success, NULL on failures. + */ +FUNC_INLINE const char *__get_cgroup_kn_name(const struct kernfs_node *kn) +{ + const char *name = NULL; + + if (kn) + bpf_probe_read(&name, sizeof(name), _(&kn->name)); + + return name; +} + +/** + * get_cgroup_kn_id() Returns the kernfs node id + * @cgrp: target kernfs node + * + * Returns the kernfs node id on success, zero on failures. + */ +FUNC_INLINE __u64 __get_cgroup_kn_id(const struct kernfs_node *kn) +{ + __u64 id = 0; + + if (!kn) + return id; + + /* Kernels prior to 5.5 have the kernfs_node_id, but distros (RHEL) + * seem to have kernfs_node_id defined for UAPI reasons even though + * its not used here directly. To resolve this walk struct for id.id + */ +// if (bpf_core_field_exists(((struct kernfs_node___old *)0)->id.id)) { +// struct kernfs_node___old *old_kn; +// +// old_kn = (void *)kn; +// if (BPF_CORE_READ_INTO(&id, old_kn, id.id) != 0) +// return 0; +// } else { +// bpf_probe_read(&id, sizeof(id), _(&kn->id)); +// } + + return id; +} + +/** + * __get_cgroup_kn() Returns the kernfs_node of the cgroup + * @cgrp: target cgroup + * + * Returns the kernfs_node of the cgroup on success, NULL on failures. + */ +FUNC_INLINE struct kernfs_node *__get_cgroup_kn(const struct cgroup *cgrp) +{ + struct kernfs_node *kn = NULL; + + if (cgrp) + bpf_probe_read(&kn, sizeof(cgrp->kn), _(&cgrp->kn)); + + return kn; +} + +/** + * get_cgroup_hierarchy_id() Returns the cgroup hierarchy id + * @cgrp: target cgroup + * + * Returns the cgroup hierarchy id. Make sure you pass a valid + * cgroup, this can not fail. + * + * Returning zero means the cgroup is running on the default + * hierarchy. + */ +FUNC_INLINE __u32 get_cgroup_hierarchy_id(const struct cgroup *cgrp) +{ + __u32 id; + + BPF_CORE_READ_INTO(&id, cgrp, root, hierarchy_id); + + return id; +} + +/** + * get_cgroup_name() Returns a pointer to the cgroup name + * @cgrp: target cgroup + * + * Returns a pointer to the cgroup node name on success that can + * be read with bpf_probe_read(). NULL on failures. + */ +FUNC_INLINE const char *get_cgroup_name(const struct cgroup *cgrp) +{ + const char *name; + + if (unlikely(!cgrp)) + return NULL; + + if (BPF_CORE_READ_INTO(&name, cgrp, kn, name) != 0) + return NULL; + + return name; +} + +/** + * get_cgroup_level() Returns the cgroup level + * @cgrp: target cgroup + * + * Returns the cgroup level, or 0 if it can not be retrieved. + */ +FUNC_INLINE __u32 get_cgroup_level(const struct cgroup *cgrp) +{ + __u32 level = 0; + + bpf_probe_read(&level, sizeof(level), _(&cgrp->level)); + return level; +} + +/** + * get_cgroup_id() Returns cgroup id + * @cgrp: target cgroup + * + * Returns the cgroup id of the target cgroup on success, zero on failures. + */ +FUNC_INLINE __u64 get_cgroup_id(const struct cgroup *cgrp) +{ + struct kernfs_node *kn; + + kn = __get_cgroup_kn(cgrp); + return __get_cgroup_kn_id(kn); +} + +/** + * get_task_cgroup() Returns the accurate or desired cgroup of the css of + * current task that we want to operate on. + * @task: must be current task. + * @subsys_idx: index of the desired cgroup_subsys_state part of css_set. + * Passing a zero as a subsys_idx is fine assuming you want that. + * @error_flags: error flags that will be ORed to indicate errors on + * failures. + * + * Returns the cgroup of the css part of css_set of current task and is + * indexed at subsys_idx on success. NULL on failures, and the error_flags + * will be ORed to indicate the corresponding error. + * + * To get cgroup and kernfs node information we want to operate on the right + * cgroup hierarchy which is setup by user space. However due to the + * incompatibility between cgroup v1 and v2; how user space initialize and + * install cgroup controllers, etc, it can be difficult. + * + * Use this helper and pass the css index that you consider accurate and + * which can be discovered at runtime in user space. + * Usually it is the 'memory' or 'pids' indexes by reading /proc/cgroups + * file where each line number is the index starting from zero without + * counting first comment line. + */ +FUNC_INLINE struct cgroup * +get_task_cgroup(struct task_struct *task, __u32 subsys_idx, __u32 *error_flags) +{ + struct cgroup_subsys_state *subsys; + struct css_set *cgroups; + struct cgroup *cgrp = NULL; + + bpf_probe_read(&cgroups, sizeof(cgroups), _(&task->cgroups)); + if (unlikely(!cgroups)) { + *error_flags |= EVENT_ERROR_CGROUPS; + return cgrp; + } + + /* We are interested only in the cpuset, memory or pids controllers + * which are indexed at 0, 4 and 11 respectively assuming all controllers + * are compiled in. + * When we use the controllers indexes we will first discover these indexes + * dynamically in user space which will work on all setups from reading + * file: /proc/cgroups. If we fail to discover the indexes then passing + * a default index zero should be fine assuming we also want that. + * + * Reference: https://elixir.bootlin.com/linux/v5.19/source/include/linux/cgroup_subsys.h + * + * Notes: + * Newer controllers should be appended at the end. controllers + * that are not upstreamed may mess the calculation here + * especially if they happen to be before the desired subsys_idx, + * we fail. + */ + if (unlikely(subsys_idx > pids_cgrp_id)) { + *error_flags |= EVENT_ERROR_CGROUP_SUBSYS; + return cgrp; + } + + /* Read css from the passed subsys index to ensure that we operate + * on the desired controller. This allows user space to be flexible + * and chose the right per cgroup subsystem to use in order to + * support as much as workload as possible. It also reduces errors + * in a significant way. + */ + bpf_probe_read(&subsys, sizeof(subsys), _(&cgroups->subsys[subsys_idx])); + if (unlikely(!subsys)) { + *error_flags |= EVENT_ERROR_CGROUP_SUBSYS; + return cgrp; + } + + bpf_probe_read(&cgrp, sizeof(cgrp), _(&subsys->cgroup)); + if (!cgrp) + *error_flags |= EVENT_ERROR_CGROUP_SUBSYSCGRP; + + return cgrp; +} + +/** + * __tg_get_current_cgroup_id() Returns the accurate cgroup id of current task. + * @cgrp: cgroup target of current task. + * @cgrpfs_ver: Cgroupfs Magic number either Cgroupv1 or Cgroupv2 + * + * It handles both cgroupv2 and cgroupv1. + * If @cgrpfs_ver is default cgroupv2 hierarchy, then it uses the bpf + * helper bpf_get_current_cgroup_id() to retrieve the cgroup id. Otherwise + * it falls back on using the passed @cgrp + * + * Returns the cgroup id of current task on success, zero on failures. + */ +FUNC_INLINE __u64 +__tg_get_current_cgroup_id(struct cgroup *cgrp, __u64 cgrpfs_ver) +{ + return 0; + /* + * Try the bpf helper on the default hierarchy if available + * and if we are running in unified cgroupv2 + */ +// if (bpf_core_enum_value_exists(enum bpf_func_id, +// bpf_get_current_cgroup_id) && +// cgrpfs_ver == CGROUP2_SUPER_MAGIC) { +// return bpf_get_current_cgroup_id(); +// } else { +// return get_cgroup_id(cgrp); +// } +} + +/** + * tg_get_current_cgroup_id() Returns the accurate cgroup id of current task. + * + * It works similar to __tg_get_current_cgroup_id, but computes the cgrp if it is needed. + * Returns the cgroup id of current task on success, zero on failures. + */ +FUNC_INLINE __u64 tg_get_current_cgroup_id(void) +{ + __u32 error_flags; + struct cgroup *cgrp; + __u64 cgrpfs_magic = 0; + struct task_struct *task; + struct tetragon_conf *conf; + int zero = 0, subsys_idx = 0; + + conf = bpf_map_lookup_elem(&tg_conf_map, &zero); + if (conf) { + /* Select which cgroup version */ + cgrpfs_magic = conf->cgrp_fs_magic; + subsys_idx = conf->tg_cgrp_subsys_idx; + } + + /* + * Try the bpf helper on the default hierarchy if available + * and if we are running in unified cgroupv2 + */ +// if (bpf_core_enum_value_exists(enum bpf_func_id, +// bpf_get_current_cgroup_id) && +// cgrpfs_magic == CGROUP2_SUPER_MAGIC) { +// return bpf_get_current_cgroup_id(); +// } + + task = (struct task_struct *)bpf_get_current_task(); + + // NB: error_flags are ignored for now + cgrp = get_task_cgroup(task, subsys_idx, &error_flags); + if (!cgrp) + return 0; + + return get_cgroup_id(cgrp); +} + +/** + * __get_cgrp_tracking_val_heap() Get a cgroup_tracking_val from the + * tg_cgrps_tracking_heap map while setting its fields. + */ +FUNC_INLINE struct cgroup_tracking_value * +__get_cgrp_tracking_val_heap(cgroup_state state, __u32 hierarchy_id, + __u32 level) +{ + int zero = 0; + struct cgroup_tracking_value *heap; + + heap = bpf_map_lookup_elem(&tg_cgrps_tracking_heap, &zero); + if (!heap) + return heap; + + memset(heap, 0, sizeof(struct cgroup_tracking_value)); + heap->state = state; + heap->hierarchy_id = hierarchy_id; + heap->level = level; + + return heap; +} + +/** + * __init_cgrp_tracking_val_heap() Initialize a cgroup_tracking_val that is + * obtained with __get_cgrp_tracking_val_heap(). It will initialize and + * set the cgroup name too. + */ +FUNC_INLINE struct cgroup_tracking_value * +__init_cgrp_tracking_val_heap(struct cgroup *cgrp, cgroup_state state) +{ + const char *name; + struct kernfs_node *kn; + __u32 level, hierarchy_id; + struct cgroup_tracking_value *heap; + + hierarchy_id = get_cgroup_hierarchy_id(cgrp); + level = get_cgroup_level(cgrp); + heap = __get_cgrp_tracking_val_heap(state, hierarchy_id, level); + if (!heap) + return heap; + + kn = __get_cgroup_kn(cgrp); + name = __get_cgroup_kn_name(kn); + if (name) + bpf_probe_read_str(&heap->name, KN_NAME_LENGTH - 1, name); + + return heap; +} + +#endif //SYSAK_BPF_CGROUP_H diff --git a/src/security/bpf_common.h b/src/security/bpf_common.h new file mode 100644 index 0000000..5551e45 --- /dev/null +++ b/src/security/bpf_common.h @@ -0,0 +1,79 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_COMMON_H +#define SYSAK_BPF_COMMON_H + +#ifdef __cplusplus +#include +#endif + +#define KN_NAME_LENGTH 128 + +/* msg_common internal flags */ +#define MSG_COMMON_FLAG_RETURN BIT(0) +#define MSG_COMMON_FLAG_KERNEL_STACKTRACE BIT(1) +#define MSG_COMMON_FLAG_USER_STACKTRACE BIT(2) + +#define XSTR(s) STR(s) +#define STR(s) #s + +/* Msg Layout */ +struct msg_common { + __u8 op; + __u8 flags; // internal flags not exported + __u8 pad[2]; + __u32 size; + __u64 ktime; +}; + +struct msg_test { + struct msg_common common; + unsigned long arg0; + unsigned long arg1; + unsigned long arg2; + unsigned long arg3; +} __attribute__((packed)); + +#ifndef bpf_ntohs +#define bpf_ntohs(x) __builtin_bswap16(x) +#endif + +#ifndef bpf_htons +#define bpf_htons(x) __builtin_bswap16(x) +#endif + +#ifndef bpf_ntohl +#define bpf_ntohl(x) __builtin_bswap32(x) +#endif + +#ifndef bpf_htonl +#define bpf_htonl(x) __builtin_bswap32(x) +#endif + +//#ifndef bpf_map_def +//struct bpf_map_def { +// unsigned int type; +// unsigned int key_size; +// unsigned int value_size; +// unsigned int max_entries; +// unsigned int map_flags; +//}; +//#endif + +#define BIT(nr) (1 << (nr)) +#define BIT_ULL(nr) (1ULL << (nr)) + +#ifdef TETRAGON_BPF_DEBUG +#include +#define DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__) +#else +#define DEBUG(__fmt, ...) +#endif + +#ifndef PATH_MAP_SIZE +#define PATH_MAP_SIZE 4096 +#endif + +#endif //SYSAK_BPF_COMMON_H diff --git a/src/security/bpf_cred.h b/src/security/bpf_cred.h new file mode 100644 index 0000000..e9b861a --- /dev/null +++ b/src/security/bpf_cred.h @@ -0,0 +1,83 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_CRED_H +#define SYSAK_BPF_CRED_H + +#include "compiler.h" + +// NB: in some cases we want to access the capabilities via an array to simplify the BPF code, which is why we define it as a union. +struct msg_capabilities { + union { + struct { + __u64 permitted; + __u64 effective; + __u64 inheritable; + }; + __u64 c[3]; + }; +}; // All fields aligned so no 'packed' attribute. + +// indexes to access msg_capabilities's array (->c) -- should have the same order as the fields above. +enum { + caps_permitted = 0, + caps_effective = 1, + caps_inheritable = 2, +}; + +struct msg_user_namespace { + __s32 level; + __u32 uid; + __u32 gid; + __u32 ns_inum; +}; + +struct msg_cred { + __u32 uid; + __u32 gid; + __u32 suid; + __u32 sgid; + __u32 euid; + __u32 egid; + __u32 fsuid; + __u32 fsgid; + __u32 securebits; + __u32 pad; + struct msg_capabilities caps; + struct msg_user_namespace user_ns; +} __attribute__((packed)); + +/* Execution and cred related flags shared with userspace */ +#define EXEC_SETUID 0x01 /* This is a set-user-id execution */ +#define EXEC_SETGID 0x02 /* This is a set-group-id execution */ +#define EXEC_FILE_CAPS 0x04 /* This binary execution gained new capabilities through file capabilities execution */ +#define EXEC_SETUID_ROOT 0x08 /* This binary execution gained new privileges through setuid to root execution */ +#define EXEC_SETGID_ROOT 0x10 /* This binary execution gained new privileges through setgid to root execution */ + +/* + * Check if "a" is a subset of "set". + * return true if all of the capabilities in "a" are also in "set" + * __cap_issubset(0100, 1111) will return true + * return false if any of the capabilities in "a" are not in "set" + * __cap_issubset(1111, 0100) will return false + */ +FUNC_INLINE bool __cap_issubset(const __u64 a, const __u64 set) +{ + return !(a & ~set); +} + +#define __cap_gained(target, source) \ + !__cap_issubset(target, source) + +/* + * We check if it user id is global root. Right now we do not + * support per user namespace translation, example checking if + * root in user namespace. + */ +FUNC_INLINE bool __is_uid_global_root(__u32 uid) +{ + return uid == 0; +} + +#endif //SYSAK_BPF_CRED_H diff --git a/src/security/bpf_elf.h b/src/security/bpf_elf.h new file mode 100644 index 0000000..0b4ae8f --- /dev/null +++ b/src/security/bpf_elf.h @@ -0,0 +1,56 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_ELF_H +#define SYSAK_BPF_ELF_H + +/* Note: + * + * Below ELF section names and bpf_elf_map structure definition + * are not (!) kernel ABI. It's rather a "contract" between the + * application and the BPF loader in tc. For compatibility, the + * section names should stay as-is. Introduction of aliases, if + * needed, are a possibility, though. + */ + +/* ELF section names, etc */ +#define ELF_SECTION_LICENSE "license" +#define ELF_SECTION_MAPS "maps" +#define ELF_SECTION_PROG "prog" +#define ELF_SECTION_CLASSIFIER "classifier" +#define ELF_SECTION_ACTION "action" + +#define ELF_MAX_MAPS 64 +#define ELF_MAX_LICENSE_LEN 128 + +/* Object pinning settings */ +#define PIN_NONE 0 +#define PIN_OBJECT_NS 1 +#define PIN_GLOBAL_NS 2 + +/* ELF map definition */ +struct bpf_elf_map { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; + __u32 flags; + __u32 id; + __u32 pinning; + __u32 inner_id; + __u32 inner_idx; +}; + +#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ + struct ____btf_map_##name { \ + type_key key; \ + type_val value; \ + }; \ + struct ____btf_map_##name \ + __attribute__((section(".maps." #name), used)) \ + ____btf_map_##name = {} + + + +#endif //SYSAK_BPF_ELF_H diff --git a/src/security/bpf_event.h b/src/security/bpf_event.h new file mode 100644 index 0000000..31ae22e --- /dev/null +++ b/src/security/bpf_event.h @@ -0,0 +1,21 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_EVENT_H +#define SYSAK_BPF_EVENT_H + +#include "../coolbpf.h" +#include + +struct event { + int event; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, struct event); +} tcpmon_map SEC(".maps"); + +#endif //SYSAK_BPF_EVENT_H diff --git a/src/security/bpf_exit.h b/src/security/bpf_exit.h new file mode 100644 index 0000000..786a9f3 --- /dev/null +++ b/src/security/bpf_exit.h @@ -0,0 +1,89 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_EXIT_H +#define SYSAK_BPF_EXIT_H + + +#include "vmlinux.h" +#include "api.h" + +#include "msg_type.h" +#include "bpf_event.h" +#include "bpf_task.h" +#include "bpf_rate.h" +#include "process.h" +#include "bpf_process_event.h" + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct msg_exit); +} exit_heap_map SEC(".maps"); + +FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) +{ + struct execve_map_value *enter; + + /* It is safe to do a map_lookup_event() here because + * we must have captured the execve case in order for an + * exit to happen. Or in the FGS startup case we pre + * populated it before loading BPF programs. At any rate + * if the entry is _not_ in the execve_map the lookup + * will create an empty entry, the ktime check below will + * catch it and we will quickly delete the entry again. + */ + enter = execve_map_get_noinit(tgid); + if (!enter) + return; + bpf_printk("[kprobe][event_exit_send] pid:%u already enter.", tgid); + if (enter->key.ktime) { + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + size_t size = sizeof(struct msg_exit); + struct msg_exit *exit; + struct msg_k8s kube; + int zero = 0; + + exit = bpf_map_lookup_elem(&exit_heap_map, &zero); + if (!exit) + return; + + exit->common.op = MSG_OP_EXIT; + exit->common.flags = 0; + exit->common.pad[0] = 0; + exit->common.pad[1] = 0; + exit->common.size = size; + exit->common.ktime = bpf_ktime_get_ns(); + + exit->current.pid = tgid; + exit->current.pad[0] = 0; + exit->current.pad[1] = 0; + exit->current.pad[2] = 0; + exit->current.pad[3] = 0; + exit->current.ktime = enter->key.ktime; + + /** + * Per thread tracking rules TID == PID : + * We want the exit event to match the exec one, and since during exec + * we report the thread group leader, do same here as we read the exec + * entry from the execve_map anyway and explicitly set it to the to tgid. + */ + exit->info.tid = tgid; + bpf_probe_read(&exit->info.code, sizeof(exit->info.code), + _(&task->exit_code)); + + __event_get_cgroup_info(task, &kube); + + bpf_printk("[kprobe][event_exit_send] pid:%u prepare to send event.", tgid); + if (cgroup_rate(ctx, &kube, exit->common.ktime)) { + bpf_printk("[kprobe][event_exit_send] pid:%u send event.", tgid); + perf_event_output_metric(ctx, MSG_OP_EXIT, &tcpmon_map, + BPF_F_CURRENT_CPU, exit, size); + } + } + execve_map_delete(tgid); +} + +#endif //SYSAK_BPF_EXIT_H diff --git a/src/security/bpf_head.h b/src/security/bpf_head.h new file mode 100644 index 0000000..3b2c29d --- /dev/null +++ b/src/security/bpf_head.h @@ -0,0 +1,141 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_HEAD_H +#define SYSAK_BPF_HEAD_H + +extern "C" { +#include "../coolbpf.h" +#include +}; + +#ifdef COOLBPF_PERF_THREAD + +#define DEFINE_SEKL_OBJECT(skel_name) \ + struct skel_name##_bpf *skel_name = NULL; \ + static pthread_t perf_thread = 0; \ + int thread_worker(struct beeQ *q, void *arg) \ + { \ + perf_thread_worker(arg); \ + return 0; \ + } \ + void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) \ + { \ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); \ + } + +#define LOAD_SKEL_OBJECT(skel_name, perf) \ + ( \ + { \ + __label__ load_bpf_skel_out; \ + int __ret = 0; \ + skel_name = skel_name##_bpf__open(); \ + if (!skel_name) \ + { \ + printf("failed to open BPF object\n"); \ + __ret = -1; \ + goto load_bpf_skel_out; \ + } \ + __ret = skel_name##_bpf__load(skel_name); \ + if (__ret) \ + { \ + printf("failed to load BPF object: %d\n", __ret); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + struct bpf_program* prog; \ + prog = bpf_object__find_program_by_name(skel_name->obj, "execve_rate"); \ + if (prog) {bpf_program__set_autoload(prog, false); \ + printf("execve_rate found and set not to autoattach"); \ + } \ + else printf("execve_rate not found "); \ + prog = bpf_object__find_program_by_name(skel_name->obj, "execve_send"); \ + if (prog) {bpf_program__set_autoload(prog, false); \ + printf("execve_send found and set not to autoattach"); \ + } \ + else printf("execve_send not found "); \ + __ret = skel_name##_bpf__attach(skel_name); \ + if (__ret) \ + { \ + printf("failed to attach BPF programs: %s\n", strerror(-__ret)); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + struct perf_thread_arguments *perf_args = calloc(1, sizeof(struct perf_thread_arguments)); \ + if (!perf_args) \ + { \ + __ret = -ENOMEM; \ + printf("failed to allocate memory: %s\n", strerror(-__ret)); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + perf_args->mapfd = bpf_map__fd(skel_name->maps.perf); \ + perf_args->sample_cb = handle_event; \ + perf_args->lost_cb = handle_lost_events; \ + perf_args->ctx = arg; \ + perf_thread = beeQ_send_thread(arg, perf_args, thread_worker); \ + load_bpf_skel_out: \ + __ret; \ + }) + +#define DESTORY_SKEL_BOJECT(skel_name) \ + if (perf_thread != 0) \ + plugin_thread_stop(perf_thread); \ + skel_name##_bpf__destroy(skel_name); +#else +#define DEFINE_SEKL_OBJECT(skel_name) \ + struct skel_name##_bpf *skel_name = NULL; + +#define LOAD_SKEL_OBJECT(skel_name, perf) \ + ( \ + { \ + __label__ load_bpf_skel_out; \ + int __ret = 0; \ + skel_name = skel_name##_bpf__open(); \ + if (!skel_name) \ + { \ + printf("failed to open BPF object\n"); \ + __ret = -1; \ + goto load_bpf_skel_out; \ + } \ + __ret = skel_name##_bpf__load(skel_name); \ + if (__ret) \ + { \ + printf("failed to load BPF object: %d\n", __ret); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + struct bpf_program* prog; \ + prog = bpf_object__find_program_by_name(skel_name->obj, "execve_rate"); \ + if (prog) {bpf_program__set_autoload(prog, false); \ + printf("execve_rate found and set not to autoattach\n"); \ + } \ + else printf("execve_rate not found "); \ + prog = bpf_object__find_program_by_name(skel_name->obj, "execve_send"); \ + if (prog) {bpf_program__set_autoload(prog, false); \ + printf("execve_send found and set not to autoattach\n"); \ + } \ + else printf("execve_send not found "); \ + __ret = skel_name##_bpf__attach(skel_name); \ + if (__ret) \ + { \ + printf("failed to attach BPF programs: %s\n", strerror(-__ret)); \ + DESTORY_SKEL_BOJECT(skel_name); \ + goto load_bpf_skel_out; \ + } \ + load_bpf_skel_out: \ + __ret; \ + }) + +#define DESTORY_SKEL_BOJECT(skel_name) \ + skel_name##_bpf__destroy(skel_name); +#endif + +#define coobpf_map_find(OBJ, NAME) bpf_object__find_map_fd_by_name(OBJ, NAME) +#define coobpf_key_next(FD, KEY, NEXT) bpf_map_get_next_key(FD, KEY, NEXT) +#define coobpf_key_value(FD, KEY, VALUE) bpf_map_lookup_elem(FD, KEY, VALUE) + + + +#endif //SYSAK_BPF_HEAD_H diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h new file mode 100644 index 0000000..5d2b659 --- /dev/null +++ b/src/security/bpf_process_event.h @@ -0,0 +1,606 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_PROCESS_EVENT_H +#define SYSAK_BPF_PROCESS_EVENT_H + +#include "../coolbpf.h" +#include +#include + +#include "bpf_common.h" +#include "api.h" +#include "bpf_cgroup.h" +#include "bpf_cred.h" +#include "compiler.h" + +#define ENAMETOOLONG 36 /* File name too long */ + +#define MAX_BUF_LEN 256 + +struct buffer_heap_map_value { + // Buffer is twice the needed size because of the verifier. In prepend_name + // unit tests, the verifier figures out that 255 is enough and that the + // buffer_offset will not overflow, but in the real use-case it looks like + // it's forgetting about that. + unsigned char buf[MAX_BUF_LEN * 2]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct buffer_heap_map_value); +} buffer_heap_map SEC(".maps"); + +FUNC_INLINE __u64 __get_auid(struct task_struct *task) +{ + // u64 to convince compiler to do 64bit loads early kernels do not + // support 32bit loads from stack, e.g. r1 = *(u32 *)(r10 -8). + __u64 auid = 0; + + if (!task) + return auid; + + if (bpf_core_field_exists(task->loginuid)) { + bpf_probe_read(&auid, sizeof(auid), _(&task->loginuid.val)); + } else { +// struct audit_task_info *audit; +// +// if (bpf_core_field_exists(task->audit)) { +// bpf_probe_read(&audit, sizeof(audit), _(&task->audit)); +// if (audit) { +// bpf_probe_read(&auid, sizeof(__u32), +// _(&audit->loginuid)); +// } +// } + } + + return auid; +} + +FUNC_INLINE __u32 get_auid(void) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + + return __get_auid(task); +} + +#define offsetof_btf(s, memb) ((size_t)((char *)_(&((s *)0)->memb) - (char *)0)) + +#define container_of_btf(ptr, type, member) \ + ({ \ + void *__mptr = (void *)(ptr); \ + ((type *)(__mptr - offsetof_btf(type, member))); \ + }) + +FUNC_INLINE struct mount *real_mount(struct vfsmount *mnt) +{ + return container_of_btf(mnt, struct mount, mnt); +} + +FUNC_INLINE bool IS_ROOT(struct dentry *dentry) +{ + struct dentry *d_parent; + + bpf_probe_read(&d_parent, sizeof(d_parent), _(&dentry->d_parent)); + return (dentry == d_parent); +} + +FUNC_INLINE bool hlist_bl_unhashed(const struct hlist_bl_node *h) +{ + struct hlist_bl_node **pprev; + + bpf_probe_read(&pprev, sizeof(pprev), _(&h->pprev)); + return !pprev; +} + +FUNC_INLINE int d_unhashed(struct dentry *dentry) +{ + return hlist_bl_unhashed(_(&dentry->d_hash)); +} + +FUNC_INLINE int d_unlinked(struct dentry *dentry) +{ + return d_unhashed(dentry) && !IS_ROOT(dentry); +} + +FUNC_INLINE int +prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namelen) +{ + // contains 1 if the buffer is large enough to contain the whole name and a slash prefix + bool write_slash = 1; + + u64 buffer_offset = (u64)(*bufptr) - (u64)buf; + + // Change name and namelen to fit in the buffer. + // We prefer to store the part of it that fits rather than discard it. + if (namelen >= *buflen) { + name += namelen - *buflen; + namelen = *buflen; + write_slash = 0; + } + + *buflen -= (namelen + write_slash); + + // This will not happen as buffer_offset cannot be above 256 and namelen is + // bound to 255. Needed to make the verifier happy in older kernels. + if (namelen + write_slash > buffer_offset) + return -ENAMETOOLONG; + + buffer_offset -= (namelen + write_slash); + + // This will never happen. buffer_offset is the diff of the initial buffer pointer + // with the current buffer pointer. This will be at max 256 bytes (similar to the initial + // size). + // Needed to bound that for bpf_probe_read call. + if (buffer_offset >= MAX_BUF_LEN) + return -ENAMETOOLONG; + + if (write_slash) + buf[buffer_offset] = '/'; + + // This ensures that namelen is < 256, which is aligned with kernel's max dentry name length + // that is 255 (https://elixir.bootlin.com/linux/v5.10/source/include/uapi/linux/limits.h#L12). + // Needed to bound that for bpf_probe_read call. + asm volatile("%[namelen] &= 0xff;\n" ::[namelen] "+r"(namelen) + :); + bpf_probe_read(buf + buffer_offset + write_slash, namelen * sizeof(char), name); + + *bufptr = buf + buffer_offset; + return write_slash ? 0 : -ENAMETOOLONG; +} + +/* + * Only called from path_with_deleted function before any path traversals. + * In the current scenarios, always buflen will be 256 and namelen 10. + * For this reason I will never return -ENAMETOOLONG. + */ +FUNC_INLINE int +prepend(char **buffer, int *buflen, const char *str, int namelen) +{ + *buflen -= namelen; + if (*buflen < 0) // will never happen - check function comment + return -ENAMETOOLONG; + *buffer -= namelen; + memcpy(*buffer, str, namelen); + return 0; +} + +struct cwd_read_data { + struct dentry *root_dentry; + struct vfsmount *root_mnt; + char *bf; + struct dentry *dentry; + struct vfsmount *vfsmnt; + struct mount *mnt; + char *bptr; + int blen; + bool resolved; +}; + +FUNC_INLINE long cwd_read(struct cwd_read_data *data) +{ + struct qstr d_name; + struct dentry *parent; + struct dentry *vfsmnt_mnt_root; + struct dentry *dentry = data->dentry; + struct vfsmount *vfsmnt = data->vfsmnt; + struct mount *mnt = data->mnt; + int error; + + if (!(dentry != data->root_dentry || vfsmnt != data->root_mnt)) { + data->resolved = + true; // resolved all path components successfully + return 1; + } + + bpf_probe_read(&vfsmnt_mnt_root, sizeof(vfsmnt_mnt_root), + _(&vfsmnt->mnt_root)); + if (dentry == vfsmnt_mnt_root || IS_ROOT(dentry)) { + struct mount *parent; + + bpf_probe_read(&parent, sizeof(parent), _(&mnt->mnt_parent)); + + /* Global root? */ + if (data->mnt != parent) { + bpf_probe_read(&data->dentry, sizeof(data->dentry), + _(&mnt->mnt_mountpoint)); + data->mnt = parent; + bpf_probe_read(&data->vfsmnt, sizeof(data->vfsmnt), + _(&mnt->mnt)); + return 0; + } + // resolved all path components successfully + data->resolved = true; + return 1; + } + bpf_probe_read(&parent, sizeof(parent), _(&dentry->d_parent)); + bpf_probe_read(&d_name, sizeof(d_name), _(&dentry->d_name)); + error = prepend_name(data->bf, &data->bptr, &data->blen, + (const char *)d_name.name, d_name.len); + // This will happen where the dentry name does not fit in the buffer. + // We will stop the loop with resolved == false and later we will + // set the proper value in error before function return. + if (error) + return 1; + + data->dentry = parent; + return 0; +} + +#ifdef __V61_BPF_PROG +static long cwd_read_v61(__u32 index, void *data) +{ + return cwd_read(data); +} +#endif +FUNC_INLINE int +prepend_path(const struct path *path, const struct path *root, char *bf, + char **buffer, int *buflen) +{ + struct cwd_read_data data = { + .bf = bf, + .bptr = *buffer, + .blen = *buflen, + }; + int error = 0; + + bpf_probe_read(&data.root_dentry, sizeof(data.root_dentry), + _(&root->dentry)); + bpf_probe_read(&data.root_mnt, sizeof(data.root_mnt), _(&root->mnt)); + bpf_probe_read(&data.dentry, sizeof(data.dentry), _(&path->dentry)); + bpf_probe_read(&data.vfsmnt, sizeof(data.vfsmnt), _(&path->mnt)); + data.mnt = real_mount(data.vfsmnt); + +#ifndef __V61_BPF_PROG +#pragma unroll + for (int i = 0; i < PROBE_CWD_READ_ITERATIONS; ++i) { + if (cwd_read(&data)) + break; + } +#else + loop(PROBE_CWD_READ_ITERATIONS, cwd_read_v61, (void *)&data, 0); +#endif /* __V61_BPF_PROG */ + + if (data.bptr == *buffer) { + *buflen = 0; + return 0; + } + if (!data.resolved) + error = UNRESOLVED_PATH_COMPONENTS; + *buffer = data.bptr; + *buflen = data.blen; + return error; +} + +FUNC_INLINE int +path_with_deleted(const struct path *path, const struct path *root, char *bf, + char **buf, int *buflen) +{ + struct dentry *dentry; + + bpf_probe_read(&dentry, sizeof(dentry), _(&path->dentry)); + if (d_unlinked(dentry)) { + int error = prepend(buf, buflen, " (deleted)", 10); + if (error) // will never happen as prepend will never return a value != 0 + return error; + } + return prepend_path(path, root, bf, buf, buflen); +} + +/* + * This function returns the path of a dentry and works in a similar + * way to Linux d_path function (https://elixir.bootlin.com/linux/v5.10/source/fs/d_path.c#L262). + * + * Input variables: + * - 'path' is a pointer to a dentry path that we want to resolve + * - 'buf' is the buffer where the path will be stored (this should be always the value of 'buffer_heap_map' map) + * - 'buflen' is the available buffer size to store the path (now 256 in all cases, maybe we can increase that further) + * + * Input buffer layout: + * <-- buflen --> + * ----------------------------- + * | | + * ----------------------------- + * ^ + * | + * buf + * + * + * Output variables: + * - 'buf' is where the path is stored (>= compared to the input argument) + * - 'buflen' the size of the resolved path (0 < buflen <= 256). Will not be negative. If buflen == 0 nothing is written to the buffer. + * - 'error' 0 in case of success or UNRESOLVED_PATH_COMPONENTS in the case where the path is larger than the provided buffer. + * + * Output buffer layout: + * <-- buflen --> + * ----------------------------- + * | /etc/passwd| + * ----------------------------- + * ^ + * | + * buf + * + * ps. The size of the path will be (initial value of buflen) - (return value of buflen) if (buflen != 0) + */ +FUNC_INLINE char * +__d_path_local(const struct path *path, char *buf, int *buflen, int *error) +{ + char *res = buf + *buflen; + struct task_struct *task; + struct fs_struct *fs; + + task = (struct task_struct *)bpf_get_current_task(); + bpf_probe_read(&fs, sizeof(fs), _(&task->fs)); + *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); + // bpf_printk(); + return res; +} + +/* + * Entry point to the codepath used for path resolution. + * + * This function allocates a buffer from 'buffer_heap_map' map and calls + * __d_path_local. After __d_path_local returns, it also does the appropriate + * calculations on the buffer size (check __d_path_local comment). + * + * Returns the buffer where the path is stored. 'buflen' is the size of the + * resolved path (0 < buflen <= 256) and will not be negative. If buflen == 0 + * nothing is written to the buffer (still the value to the buffer is valid). + * 'error' is 0 in case of success or UNRESOLVED_PATH_COMPONENTS in the case + * where the path is larger than the provided buffer. + */ +FUNC_INLINE char * +d_path_local(const struct path *path, int *buflen, int *error) +{ + int zero = 0; + char *buffer = 0; + + buffer = bpf_map_lookup_elem(&buffer_heap_map, &zero); + if (!buffer) + return 0; + + *buflen = MAX_BUF_LEN; + buffer = __d_path_local(path, buffer, buflen, error); + if (*buflen > 0) + *buflen = MAX_BUF_LEN - *buflen; + + return buffer; +} + +FUNC_INLINE __u32 +getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid) +{ + struct task_struct *task = get_task_from_pid(proc_pid); + struct fs_struct *fs; + int flags = 0, size; + char *buffer; + + bpf_probe_read(&fs, sizeof(fs), _(&task->fs)); + if (!fs) { + curr->flags |= EVENT_ERROR_CWD; + return 0; + } + + buffer = d_path_local(_(&fs->pwd), &size, &flags); + if (!buffer) + return 0; + + asm volatile("%[offset] &= 0x3ff;\n" ::[offset] "+r"(offset) + :); + asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) + :); + bpf_probe_read((char *)curr + offset, size, buffer); + + // Unfortunate special case for '/' where nothing was added we need + // to truncate with '\n' for parser. + if (size == 0) + curr->flags |= EVENT_ROOT_CWD; + if (flags & UNRESOLVED_PATH_COMPONENTS) + curr->flags |= EVENT_ERROR_PATH_COMPONENTS; + curr->flags = curr->flags & ~(EVENT_NEEDS_CWD | EVENT_ERROR_CWD); + return (__u32)size; +} + +FUNC_INLINE void event_set_clone(struct msg_process *pid) +{ + pid->flags |= EVENT_CLONE; +} + +FUNC_INLINE void +__get_caps(struct msg_capabilities *msg, const struct cred *cred) +{ + bpf_probe_read(&msg->effective, sizeof(__u64), _(&cred->cap_effective)); + bpf_probe_read(&msg->inheritable, sizeof(__u64), _(&cred->cap_inheritable)); + bpf_probe_read(&msg->permitted, sizeof(__u64), _(&cred->cap_permitted)); +} + +/* @get_current_subj_caps: + * Retrieve current task capabilities from the subjective credentials and + * return it into @msg. + * + * Use this function to report current task capabilities that will be used to + * calculate the security access when acting upon other objects. + * + * Special care must be taken to ensure that @task is "current". + * + * From: https://github.com/torvalds/linux/blob/v6.0/include/linux/cred.h#L88 + * " + * The security context of a task + * + * The parts of the context break down into two categories: + * + * (1) The objective context of a task. These parts are used when some other + * task is attempting to affect this one. + * + * (2) The subjective context. These details are used when the task is acting + * upon another object, be that a file, a task, a key or whatever. + * + * A task has two security pointers. task->real_cred points to the objective + * context that defines that task's actual details. The objective part of this + * context is used whenever that task is acted upon. + * + * task->cred points to the subjective context that defines the details of how + * that task is going to act upon another object. This may be overridden + * temporarily to point to another security context, but normally points to the + * same context as task->real_cred. + * " + */ +FUNC_INLINE void +get_current_subj_caps(struct msg_capabilities *msg, struct task_struct *task) +{ + const struct cred *cred; + + /* Get the task's subjective creds */ + bpf_probe_read(&cred, sizeof(cred), _(&task->cred)); + __get_caps(msg, cred); +} + +FUNC_INLINE void +get_current_subj_creds(struct msg_cred *info, struct task_struct *task) +{ + const struct cred *cred; + + /* Get the task's subjective creds */ + bpf_probe_read(&cred, sizeof(cred), _(&task->cred)); + + bpf_probe_read(&info->uid, sizeof(__u32), _(&cred->uid)); + bpf_probe_read(&info->gid, sizeof(__u32), _(&cred->gid)); + bpf_probe_read(&info->euid, sizeof(__u32), _(&cred->euid)); + bpf_probe_read(&info->egid, sizeof(__u32), _(&cred->egid)); + bpf_probe_read(&info->suid, sizeof(__u32), _(&cred->suid)); + bpf_probe_read(&info->sgid, sizeof(__u32), _(&cred->sgid)); + bpf_probe_read(&info->fsuid, sizeof(__u32), _(&cred->fsuid)); + bpf_probe_read(&info->fsgid, sizeof(__u32), _(&cred->fsgid)); + bpf_probe_read(&info->securebits, sizeof(__u32), _(&cred->securebits)); + + /* Get capabilities */ + __get_caps(&info->caps, cred); +} + +FUNC_INLINE void +get_namespaces(struct msg_ns *msg, struct task_struct *task) +{ + struct nsproxy *nsproxy; + struct nsproxy nsp; + + bpf_probe_read(&nsproxy, sizeof(nsproxy), _(&task->nsproxy)); + bpf_probe_read(&nsp, sizeof(nsp), _(nsproxy)); + + bpf_probe_read(&msg->uts_inum, sizeof(msg->uts_inum), + _(&nsp.uts_ns->ns.inum)); + bpf_probe_read(&msg->ipc_inum, sizeof(msg->ipc_inum), + _(&nsp.ipc_ns->ns.inum)); + bpf_probe_read(&msg->mnt_inum, sizeof(msg->mnt_inum), + _(&nsp.mnt_ns->ns.inum)); + { + struct pid *p = 0; + + bpf_probe_read(&p, sizeof(p), _(&task->thread_pid)); + if (p) { + int level = 0; + struct upid up; + + bpf_probe_read(&level, sizeof(level), _(&p->level)); + bpf_probe_read(&up, sizeof(up), _(&p->numbers[level])); + bpf_probe_read(&msg->pid_inum, sizeof(msg->pid_inum), + _(&up.ns->ns.inum)); + } else + msg->pid_inum = 0; + } + bpf_probe_read(&msg->pid_for_children_inum, + sizeof(msg->pid_for_children_inum), + _(&nsp.pid_ns_for_children->ns.inum)); + bpf_probe_read(&msg->net_inum, sizeof(msg->net_inum), + _(&nsp.net_ns->ns.inum)); + + // this also includes time_ns_for_children + if (bpf_core_field_exists(nsproxy->time_ns)) { + bpf_probe_read(&msg->time_inum, sizeof(msg->time_inum), + _(&nsp.time_ns->ns.inum)); + bpf_probe_read(&msg->time_for_children_inum, + sizeof(msg->time_for_children_inum), + _(&nsp.time_ns_for_children->ns.inum)); + } + + bpf_probe_read(&msg->cgroup_inum, sizeof(msg->cgroup_inum), + _(&nsp.cgroup_ns->ns.inum)); + { + struct mm_struct *mm; + struct user_namespace *user_ns; + + bpf_probe_read(&mm, sizeof(mm), _(&task->mm)); + bpf_probe_read(&user_ns, sizeof(user_ns), _(&mm->user_ns)); + bpf_probe_read(&msg->user_inum, sizeof(msg->user_inum), + _(&user_ns->ns.inum)); + } +} + +/* Gather current task cgroup name */ +FUNC_INLINE __u32 +__event_get_current_cgroup_name(struct cgroup *cgrp, struct msg_k8s *kube) +{ + const char *name; + + /* TODO: check if we have Tetragon cgroup configuration and that the + * tracking cgroup ID is set. If so then query the bpf map for + * the corresponding tracking cgroup name. + */ + + /* TODO: we gather current cgroup context, switch to tracker see above, + * and if that fails for any reason or if we don't have the cgroup name + * of tracker, then we can continue with current context. + */ + + name = get_cgroup_name(cgrp); + if (name) + bpf_probe_read_str(kube->docker_id, KN_NAME_LENGTH, name); + + return name ? 0 : EVENT_ERROR_CGROUP_NAME; +} + +/** + * __event_get_cgroup_info() Collect cgroup info from current task. + * @task: must be current task. + * @msg: the msg_execve_event where to store collected information. + * + * Checks the tg_conf_map BPF map for cgroup and runtime configurations then + * collects cgroup information from current task. This allows to operate on + * different machines and workflows. + */ +FUNC_INLINE __u32 +__event_get_cgroup_info(struct task_struct *task, struct msg_k8s *kube) +{ + __u64 cgrpfs_magic = 0; + int zero = 0, subsys_idx = 0; + struct cgroup *cgrp; + struct tetragon_conf *conf; + __u32 flags = 0; + + /* Clear cgroup info at the beginning, so if we return early we do not pass previous data */ + memset(kube, 0, sizeof(struct msg_k8s)); + + conf = bpf_map_lookup_elem(&tg_conf_map, &zero); + if (conf) { + /* Select which cgroup version */ + cgrpfs_magic = conf->cgrp_fs_magic; + subsys_idx = conf->tg_cgrp_subsys_idx; + } + + cgrp = get_task_cgroup(task, subsys_idx, &flags); + if (!cgrp) + return 0; + + /* Collect event cgroup ID */ + kube->cgrpid = __tg_get_current_cgroup_id(cgrp, cgrpfs_magic); + if (!kube->cgrpid) + flags |= EVENT_ERROR_CGROUP_ID; + + /* Get the cgroup name of this event. */ + flags |= __event_get_current_cgroup_name(cgrp, kube); + return flags; +} + +#endif //SYSAK_BPF_PROCESS_EVENT_H diff --git a/src/security/bpf_process_event_type.h b/src/security/bpf_process_event_type.h new file mode 100644 index 0000000..f5114fd --- /dev/null +++ b/src/security/bpf_process_event_type.h @@ -0,0 +1,345 @@ +// +// Created by qianlu on 2024/6/20. +// + +#ifndef SYSAK_BPF_PROCESS_EVENT_TYPE_H +#define SYSAK_BPF_PROCESS_EVENT_TYPE_H + +#ifdef __cplusplus +#include +#endif +#include "bpf_common.h" +#include "bpf_cred.h" +#include "msg_type.h" + + +/* Max number of args to parse */ +#define MAXARGS 20 +/* Max length of any given arg */ +#define MAXARGLENGTH 256 +/* This is the absolute buffer size for args and filenames including some + * extra head room so we can append last args string to buffer. The extra + * headroom is an unfortunate result of bounds on offset/size in + * event_args_builder(). + * + * For example given an offset bounds + * + * offset <- (0, 100) + * + * We will read into the buffer using this offset giving a max offset + * of eargs + 100. + * + * args[offset] <- (0, 100) + * + * Now we want to read this with call 45 aka bpf_probe_read_str as follows, + * where 'kernel_struct_arg' is the kernel data struct we are reading. + * + * bpf_probe_read_str(args[offset], size, kernel_struct_arg) + * + * But we have a bit of a problem determining if 'size' is out of array + * range. The math would be, + * + * size = length - offset + * + * Giving the remainder of the buffer, + * + * args offset length + * |---------------|------------------| + * + * |-------size-------| + * + * But verifier math works on bounds so bounds analysis of size is the + * following, + * + * length = 1024 + * offset = (0, 100) + * + * size = length - offset + * size = (1024) - (0, 100) + * size <- (924, 1124) + * + * And verifier throws an error because args[offset + size] with bounds + * anaylsis, + * + * args_(max)[100 + 1024] = args_(max)[1124] + * + * To circumvent this, at least until we teach the verifier about + * dependent variables, create a maxarg value and pad arg buffer with + * it. Giving a args buffer of size 'length + pad' with above bounds + * analysis, + * + * size = length - offset + * size = (1024) - (0, 100) + * if size > pad goto done + * size <- (924, 1124) // 1124 < length + pad + * + * Phew all clear now? + */ +#define CWD_MAX 256 +#define BUFFER 1024 +#define SIZEOF_EVENT 56 +#define PADDED_BUFFER \ + (BUFFER + MAXARGLENGTH + SIZEOF_EVENT + SIZEOF_EVENT + CWD_MAX) +/* This is the usable buffer size for args and filenames. It is calculated + * as the (BUFFER SIZE - sizeof(parent) - sizeof(curr) but unfortunately + * preprocess doesn't know types so we do it manually without sizeof(). + */ +#define ARGSBUFFER (BUFFER - SIZEOF_EVENT - SIZEOF_EVENT) +#define __ASM_ARGSBUFFER 976 +#define ARGSBUFFERMASK (ARGSBUFFER - 1) +#define MAXARGMASK (MAXARG - 1) +#define PATHNAME_SIZE 256 + +/* Task flags */ +#ifndef PF_KTHREAD +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#endif + +/* Msg flags */ +#define EVENT_UNKNOWN 0x00 +#define EVENT_EXECVE 0x01 +#define EVENT_EXECVEAT 0x02 +#define EVENT_PROCFS 0x04 +#define EVENT_TRUNC_FILENAME 0x08 +#define EVENT_TRUNC_ARGS 0x10 +#define EVENT_TASK_WALK 0x20 +#define EVENT_MISS 0x40 +#define EVENT_NEEDS_AUID 0x80 +#define EVENT_ERROR_FILENAME 0x100 +#define EVENT_ERROR_ARGS 0x200 +#define EVENT_NEEDS_CWD 0x400 +#define EVENT_NO_CWD_SUPPORT 0x800 +#define EVENT_ROOT_CWD 0x1000 +#define EVENT_ERROR_CWD 0x2000 +#define EVENT_CLONE 0x4000 +#define EVENT_ERROR_SOCK 0x8000 +#define EVENT_ERROR_CGROUP_NAME 0x010000 +#define EVENT_ERROR_CGROUP_KN 0x020000 +#define EVENT_ERROR_CGROUP_SUBSYSCGRP 0x040000 +#define EVENT_ERROR_CGROUP_SUBSYS 0x080000 +#define EVENT_ERROR_CGROUPS 0x100000 +#define EVENT_ERROR_CGROUP_ID 0x200000 +#define EVENT_ERROR_PATH_COMPONENTS 0x400000 +#define EVENT_DATA_FILENAME 0x800000 +#define EVENT_DATA_ARGS 0x1000000 + +#define EVENT_COMMON_FLAG_CLONE 0x01 + +/* Docker IDs are unique at first 12 characters, but we want to get + * 12chars plus any extra prefix used by the container environment. + * Minikube for example prepends 'docker-' to the id. So lets copy + * 32B and assume at least 12B of it is ID info. + */ +#define DOCKER_ID_LENGTH 128 + +struct msg_execve_key { + __u32 pid; // Process TGID + __u8 pad[4]; + __u64 ktime; +}; // All fields aligned so no 'packed' attribute. + +/* This is the struct stored in bpf map to share info between + * different execve hooks. + */ +struct execve_info { + /* The secureexec is to reflect the kernel bprm->secureexec that is exposed + * to userspace through auxiliary vector which can be read from + * /proc/self/auxv or https://man7.org/linux/man-pages/man3/getauxval.3.html + * + * The AT_SECURE of auxv can have a value of 1 or 0 and it is set from + * the bprm->secureexec that is a bit field. + * If bprm->secureexec is 1 then it means executable should be treated securely. + * Most commonly, 1 indicates that the process is executing a set-user-ID + * or set-group-ID binary (so that its real and effective UIDs or GIDs differ + * from one another), or that it gained capabilities by executing a binary file + * that has capabilities (see capabilities(7)). + * Alternatively, a nonzero value may be triggered by a Linux Security Module. + * When this value is nonzero, the dynamic linker disables the use of certain + * environment variables. + * + * The secureexec here can have the following bit flags: + * EXEC_SETUID or EXEC_SETGID + */ + __u32 secureexec; + __u32 i_nlink; /* inode links */ + __u64 i_ino; /* inode number */ +}; + +/* process information + * + * Manually linked to ARGSBUFFER and PADDED_BUFFER if this changes then please + * also change SIZEOF_EVENT. + */ +struct msg_process { + __u32 size; + __u32 pid; // Process TGID + __u32 tid; // Process thread + __u32 nspid; + __u32 secureexec; + __u32 uid; + __u32 auid; + __u32 flags; + __u32 i_nlink; + __u32 pad; + __u64 i_ino; + __u64 ktime; + char *args; +}; // All fields aligned so no 'packed' attribute. + +/* msg_clone_event holds only the necessary fields to construct a new entry from + * the parent after a clone() event. + */ +struct msg_clone_event { + struct msg_common common; + struct msg_execve_key parent; + __u32 tgid; + __u32 tid; + __u32 nspid; + __u32 flags; + __u64 ktime; +} __attribute__((packed)); + +struct exit_info { + __u32 code; + __u32 tid; // Thread ID +}; + +struct msg_exit { + struct msg_common common; + struct msg_execve_key current; + struct exit_info info; +}; // All fields aligned so no 'packed' attribute. + +enum { + ns_uts = 0, + ns_ipc = 1, + ns_mnt = 2, + ns_pid = 3, + ns_pid_for_children = 4, + ns_net = 5, + ns_time = 6, + ns_time_for_children = 7, + ns_cgroup = 8, + ns_user = 9, + + // If you update the value of ns_max_types you + // should also update parseMatchNamespaces() + // in kernel.go + ns_max_types = 10, +}; + +struct msg_ns { + union { + struct { + __u32 uts_inum; + __u32 ipc_inum; + __u32 mnt_inum; + __u32 pid_inum; + __u32 pid_for_children_inum; + __u32 net_inum; + __u32 time_inum; + __u32 time_for_children_inum; + __u32 cgroup_inum; + __u32 user_inum; + }; + __u32 inum[ns_max_types]; + }; +}; // All fields aligned so no 'packed' attribute. + +struct msg_k8s { + __u32 net_ns; + __u32 cid; + __u64 cgrpid; + char docker_id[DOCKER_ID_LENGTH]; +}; // All fields aligned so no 'packed' attribute. + +#define BINARY_PATH_MAX_LEN 256 + +struct heap_exe { + // because of verifier limitations, this has to be 2 * 256 bytes while 256 + // should be theoretically sufficient, and actually is, in unit tests. + char buf[BINARY_PATH_MAX_LEN * 2]; + // offset points to the start of the path in the above buffer. Use offset to + // read the path in the buffer since it's written from the end. + char *off; + __u32 len; + __u32 error; +}; // All fields aligned so no 'packed' attribute. + +struct msg_execve_event { + struct msg_common common; + struct msg_k8s kube; + struct msg_execve_key parent; + __u64 parent_flags; + struct msg_cred creds; + struct msg_ns ns; + struct msg_execve_key cleanup_key; + /* if add anything above please also update the args of + * validate_msg_execve_size() in bpf_execve_event.c */ + union { + struct msg_process process; + char buffer[PADDED_BUFFER]; + }; + /* below fields are not part of the event, serve just as + * heap for execve programs + */ +#ifdef __LARGE_BPF_PROG + struct heap_exe exe; +#endif +}; // All fields aligned so no 'packed' attribute. + +// This structure stores the binary path that was recorded on execve. +// Technically PATH_MAX is 4096 but we limit the length we store since we have +// limits on the length of the string to compare: +// - Artificial limits for full string comparison. +// - Technical limits for prefix and postfix, using LPM_TRIE that have a 256 +// bytes size limit. +struct binary { + // length of the path stored in path, this should be < BINARY_PATH_MAX_LEN + // but can contain negative value in case of copy error. + // While s16 would be sufficient, 64 bits are handy for alignment. + __s64 path_length; + // BINARY_PATH_MAX_LEN first bytes of the path + char path[BINARY_PATH_MAX_LEN]; +}; // All fields aligned so no 'packed' attribute + +// The execve_map_value is tracked by the TGID of the thread group +// the msg_execve_key.pid. The thread IDs are recorded on the +// fly and sent with every corresponding event. +struct execve_map_value { + struct msg_execve_key key; + struct msg_execve_key pkey; + __u32 flags; + __u32 nspid; + struct msg_ns ns; + struct msg_capabilities caps; + struct binary bin; +} __attribute__((packed)) __attribute__((aligned(8))); + + +struct msg_throttle { + struct msg_common common; + struct msg_k8s kube; +}; + + +struct cgroup_rate_key { + __u64 id; +}; + +struct cgroup_rate_value { + __u64 curr; + __u64 prev; + __u64 time; + __u64 rate; + __u64 throttled; +}; + +struct cgroup_rate_options { + __u64 events; + __u64 interval; +}; + + + +#endif //SYSAK_BPF_PROCESS_EVENT_TYPE_H diff --git a/src/security/bpf_rate.h b/src/security/bpf_rate.h new file mode 100644 index 0000000..c46cebf --- /dev/null +++ b/src/security/bpf_rate.h @@ -0,0 +1,140 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_RATE_H +#define SYSAK_BPF_RATE_H + +#include "../coolbpf.h" +#include +#include + +#include "msg_type.h" +#include "bpf_process_event_type.h" + + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 32768); + __type(key, struct cgroup_rate_key); + __type(value, struct cgroup_rate_value); +} cgroup_rate_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct cgroup_rate_options); +} cgroup_rate_options_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct msg_throttle); +} throttle_heap_map SEC(".maps"); + +FUNC_INLINE void send_throttle(void *ctx, struct msg_k8s *kube, __u64 time) +{ + struct msg_throttle *msg; + size_t size = sizeof(*msg); + + msg = bpf_map_lookup_elem(&throttle_heap_map, &(__u32){ 0 }); + if (!msg) + return; + + msg->common.size = size; + msg->common.ktime = time; + msg->common.op = MSG_OP_THROTTLE; + msg->common.flags = 0; + + __builtin_memcpy(&msg->kube, kube, sizeof(*kube)); + + perf_event_output_metric(ctx, MSG_OP_THROTTLE, &tcpmon_map, + BPF_F_CURRENT_CPU, msg, size); +} + +FUNC_INLINE bool cgroup_rate(void *ctx, struct msg_k8s *kube, __u64 time) +{ + struct cgroup_rate_options *opt; + struct cgroup_rate_key key = { + .id = kube->cgrpid, + }; + struct cgroup_rate_value *val; + __u64 delta, interval, slide; + __u32 zero = 0; + + opt = bpf_map_lookup_elem(&cgroup_rate_options_map, &zero); + if (!opt) + return true; + + interval = opt->interval; + if (!interval) + return true; + + val = bpf_map_lookup_elem(&cgroup_rate_map, &key); + if (!val) { + struct cgroup_rate_value new_value = { + .time = (time / interval) * interval, + .curr = 1, + }; + + bpf_map_update_elem(&cgroup_rate_map, &key, &new_value, 0); + return true; + } + + /* + * We split the time in interval windows and keep track of events + * of events count in current (val->curr) and previous (val->prev) + * intervals. + */ + + delta = time - val->time; + if (delta > interval) { + if (delta > 2 * interval) { + val->prev = 0; + val->time = (time / interval) * interval; + } else { + val->prev = val->curr; + val->time += interval; + } + val->curr = 0; + } + + val->curr++; + + /* + * We compute the size of the slide window in previous interval and + * based on that we compute partial amount of events from previous + * interval window. Then we add current interval count and we have + * rate value. + * + * val->time + * | + * <--- interval ----->|<--- interval ----->| + * | + * val->prev | val->curr + * |-------------------|----------- + * val->rate + * |-------------------| + * time + */ + + slide = interval - (time - val->time); + val->rate = (slide * val->prev) / interval + val->curr; + + if (!val->throttled && val->rate >= opt->events) { + val->throttled = time; + send_throttle(ctx, kube, time); + } + + return !val->throttled; +} + +FUNC_INLINE void cgroup_rate_del(__u64 cgroupid) +{ + bpf_map_delete_elem(&cgroup_rate_map, &cgroupid); +} + + +#endif //SYSAK_BPF_RATE_H diff --git a/src/security/bpf_task.h b/src/security/bpf_task.h new file mode 100644 index 0000000..550113d --- /dev/null +++ b/src/security/bpf_task.h @@ -0,0 +1,180 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_BPF_TASK_H +#define SYSAK_BPF_TASK_H + +#include "../coolbpf.h" +#include +#include + +#include "compiler.h" +#include "bpf_event.h" +#include "generic.h" + +/* __d_path_local flags */ +// #define UNRESOLVED_MOUNT_POINTS 0x01 // (deprecated) +// this error is returned by __d_path_local in the following cases: +// - the path walk did not conclude (too many dentry) +// - the path was too long to fit in the buffer +#define UNRESOLVED_PATH_COMPONENTS 0x02 + +#ifdef __LARGE_BPF_PROG +#define PROBE_CWD_READ_ITERATIONS 128 +#else +#define PROBE_CWD_READ_ITERATIONS 11 +#endif + +FUNC_INLINE struct task_struct *get_parent(struct task_struct *t) +{ + struct task_struct *task; + + /* Read the real parent */ + bpf_probe_read(&task, sizeof(task), _(&t->real_parent)); + if (!task) + return 0; + return task; +} + +FUNC_INLINE struct task_struct *get_task_from_pid(__u32 pid) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + __u32 cpid = 0; + int i; + +#define TASK_PID_LOOP 20 +#pragma unroll + for (i = 0; i < TASK_PID_LOOP; i++) { + if (!task) { + i = TASK_PID_LOOP; + continue; + } + bpf_probe_read(&cpid, sizeof(cpid), _(&task->tgid)); + if (cpid == pid) { + i = TASK_PID_LOOP; + continue; + } + task = get_parent(task); + } + if (cpid != pid) + return 0; + return task; +} + +FUNC_INLINE __u32 get_task_pid_vnr(void) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + int thread_pid_exists; + unsigned int level; + struct upid upid; + struct pid *pid; + int upid_sz; + + thread_pid_exists = bpf_core_field_exists(task->thread_pid); + if (thread_pid_exists) { + bpf_probe_read(&pid, sizeof(pid), _(&task->thread_pid)); + if (!pid) + return 0; + } else { +// struct pid_link link; +// int link_sz = bpf_core_field_size(task->pids); +// +// /* 4.14 verifier did not prune this branch even though we +// * have the if (0) above after BTF exists check. So it will +// * try to run this bpf_probe_read and throw an error. So lets +// * sanitize it for the verifier. +// */ +// if (!thread_pid_exists) +// link_sz = +// 24; // voodoo magic, hard-code 24 to init stack +// bpf_probe_read(&link, link_sz, +// (void *)_(&task->pids) + (PIDTYPE_PID * link_sz)); +// pid = link.pid; + } + upid_sz = bpf_core_field_size(pid->numbers[0]); + bpf_probe_read(&level, sizeof(level), _(&pid->level)); + if (level < 1) + return 0; + bpf_probe_read(&upid, upid_sz, + (void *)_(&pid->numbers) + (level * upid_sz)); + return upid.nr; +} + +FUNC_INLINE __u32 event_find_parent_pid(struct task_struct *t) +{ + struct task_struct *task = get_parent(t); + __u32 pid; + + if (!task) + return 0; + bpf_probe_read(&pid, sizeof(pid), _(&task->tgid)); + return pid; +} + +FUNC_INLINE struct execve_map_value * +__event_find_parent(struct task_struct *task) +{ + __u32 pid; + struct execve_map_value *value = 0; + int i; + +#pragma unroll + for (i = 0; i < 4; i++) { + bpf_probe_read(&task, sizeof(task), _(&task->real_parent)); + if (!task) + break; + bpf_probe_read(&pid, sizeof(pid), _(&task->tgid)); + value = execve_map_get_noinit(pid); + if (value && value->key.ktime != 0) + return value; + } + return 0; +} + +FUNC_INLINE struct execve_map_value *event_find_parent(void) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + + return __event_find_parent(task); +} + +FUNC_INLINE void +event_minimal_parent(struct msg_execve_event *event, struct task_struct *task) +{ + event->parent.pid = event_find_parent_pid(task); + event->parent.ktime = 0; + event->parent_flags = EVENT_MISS; +} + +FUNC_INLINE void event_minimal_curr(struct execve_map_value *event) +{ + event->key.pid = (bpf_get_current_pid_tgid() >> 32); + event->key.ktime = 0; // should we insert a time? + event->flags = EVENT_MISS; +} + +FUNC_INLINE struct execve_map_value *event_find_curr(__u32 *ppid, bool *walked) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + struct execve_map_value *value = 0; + int i; + __u32 pid; + +#pragma unroll + for (i = 0; i < 4; i++) { + bpf_probe_read(&pid, sizeof(pid), _(&task->tgid)); + value = execve_map_get_noinit(pid); + if (value && value->key.ktime != 0) + break; + value = 0; + *walked = 1; + bpf_probe_read(&task, sizeof(task), _(&task->real_parent)); + if (!task) + break; + } + *ppid = pid; + return value; +} + +#endif //SYSAK_BPF_TASK_H diff --git a/src/security/compiler.h b/src/security/compiler.h new file mode 100644 index 0000000..2b105d6 --- /dev/null +++ b/src/security/compiler.h @@ -0,0 +1,17 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_COMPILER_H +#define SYSAK_COMPILER_H + +#ifdef __V61_BPF_PROG +#define FUNC_LOCAL static __attribute__((noinline)) __attribute__((__unused__)) +#define FUNC_INLINE static inline __attribute__((always_inline)) +#else +/* Older kernels have all functions inlined. */ +#define FUNC_LOCAL static inline __attribute__((always_inline)) +#define FUNC_INLINE static inline __attribute__((always_inline)) +#endif + +#endif //SYSAK_COMPILER_H diff --git a/src/security/data_event.h b/src/security/data_event.h new file mode 100644 index 0000000..ec16add --- /dev/null +++ b/src/security/data_event.h @@ -0,0 +1,221 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_DATA_EVENT_H +#define SYSAK_DATA_EVENT_H + +#include +#include "data_msg.h" +#include "bpf_common.h" + +FUNC_LOCAL long +__do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes) +{ + int err; + + /* Code movement from clang forces us to inline bounds checks here */ + asm volatile goto( + "if %[bytes] < 0 goto %l[b]\n;" + "if %[bytes] < " XSTR(MSG_DATA_ARG_LEN) " goto %l[a]\n;" + : + : [bytes] "+r"(bytes)::a, b); + bytes = MSG_DATA_ARG_LEN; + a: + // < 5.3 verifier still requires value masking like 'val &= xxx' +#ifndef __LARGE_BPF_PROG + asm volatile("%[bytes] &= 0x3fff;\n" + : + : [bytes] "+r"(bytes) + :); +#endif + err = bpf_probe_read(&msg->arg[0], bytes, (char *)uptr); + if (err < 0) + return err; + + msg->common.size = offsetof(struct msg_data, arg) + bytes; + perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size); + return bytes; + b: + return -1; +} + +FUNC_LOCAL long +do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) +{ + size_t rd_bytes = 0; + int err, i __maybe_unused; + +#ifdef __LARGE_BPF_PROG + for (i = 0; i < 10; i++) { + err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); + if (err < 0) + return err; + rd_bytes += err; + if (rd_bytes == bytes) + return rd_bytes; + } +#else +#define BYTES_COPY \ + err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); \ + if (err < 0) \ + return err; \ + rd_bytes += err; \ + if (rd_bytes == bytes) \ + return rd_bytes; + +#define BYTES_COPY_5 BYTES_COPY BYTES_COPY BYTES_COPY BYTES_COPY BYTES_COPY + + BYTES_COPY_5 + BYTES_COPY_5 + +#undef BYTES_COPY_5 +#endif /* __LARGE_BPF_PROG */ + + /* leftover */ + return rd_bytes; +} + +FUNC_LOCAL long +__do_str(void *ctx, struct msg_data *msg, unsigned long arg, bool *done) +{ + size_t size, max = sizeof(msg->arg) - 1; + long ret; + + /* Code movement from clang forces us to inline bounds checks here */ + asm volatile("%[max] &= 0x7fff;\n" + "if %[max] < 32736 goto +1\n;" + "%[max] = 32736;\n" + : + : [max] "+r"(max) + :); + + ret = bpf_probe_read_str(&msg->arg[0], max, (char *)arg); + if (ret < 0) + return ret; + + *done = ret != max; + if (ret == 0) + return 0; + /* cut out the zero byte */ + ret -= 1; + + size = ret + offsetof(struct msg_data, arg); + /* Code movement from clang forces us to inline bounds checks here */ + asm volatile("%[size] &= 0x7fff;\n" + : + : [size] "+r"(size) + :); + msg->common.size = size; + perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, size); + return ret; +} + +FUNC_LOCAL long +do_str(void *ctx, struct msg_data *msg, unsigned long arg, + size_t bytes __maybe_unused) +{ + size_t rd_bytes = 0; + bool done = false; + long ret; + int i; + +#define __CNT 2 +#pragma unroll + for (i = 0; i < __CNT; i++) { + ret = __do_str(ctx, msg, arg + rd_bytes, &done); + if (ret < 0) + return ret; + rd_bytes += ret; + if (done) + break; + } +#undef __CNT + + /* we have no idea what's string leftover */ + return rd_bytes; +} + +FUNC_INLINE size_t data_event( + void *ctx, struct data_event_desc *desc, unsigned long uptr, + size_t size, struct bpf_map_def *heap, + long (*do_data_event)(void *, struct msg_data *, unsigned long, size_t)) +{ + struct msg_data *msg; + int zero = 0, err; + + msg = bpf_map_lookup_elem(heap, &zero); + if (!msg) + return 0; + + msg->common.op = MSG_OP_DATA; + msg->common.flags = 0; + msg->common.pad[0] = 0; + msg->common.pad[1] = 0; + + msg->id.pid = bpf_get_current_pid_tgid(); + if (msg->id.pid == (__u64)-22) // -EINVAL -- current == NULL + msg->id.pid = PT_REGS_FP_CORE((struct pt_regs *)ctx); + + msg->id.time = bpf_ktime_get_ns(); + desc->id = msg->id; + + /* + * Notes: + * The @size argument is valid only for do_bytes, it's -1 * for do_str. + * The do_data_event callback returns size of posted data. + * Leftover for data_event_str is always 0, because we don't know + * how much more was there to copy. + */ + err = do_data_event(ctx, msg, uptr, size); + + if (err < 0) { + desc->error = err; + desc->pad = 0; + desc->leftover = 0; + desc->size = 0; + } else { + desc->error = 0; + desc->pad = 0; + desc->leftover = size == -1 ? 0 : size - err; + desc->size = err; + } + return sizeof(*desc); +} + +/** + * data_event_bytes - sends data event for raw data + * + * @uptr: pointer to data + * @size: size of the data + * + * Sends data event with raw data specified by @uptr and @size and + * writes status values into @desc object. + * + * Returns size of struct @desc object or 0 in case of error. + */ +FUNC_LOCAL size_t +data_event_bytes(void *ctx, struct data_event_desc *desc, unsigned long uptr, + size_t size, struct bpf_map_def *heap) +{ + return data_event(ctx, desc, uptr, size, heap, do_bytes); +} + +/** + * data_event_str - sends data event for string + * + * @uptr: pointer to string + * + * Sends data event with string specified by @uptr and writes status + * values into @desc object. + * + * Returns size of struct @desc object or 0 in case of error. + */ +FUNC_LOCAL size_t +data_event_str(void *ctx, struct data_event_desc *desc, unsigned long uptr, + struct bpf_map_def *heap) +{ + return data_event(ctx, desc, uptr, -1, heap, do_str); +} + +#endif //SYSAK_DATA_EVENT_H diff --git a/src/security/data_msg.h b/src/security/data_msg.h new file mode 100644 index 0000000..bc22848 --- /dev/null +++ b/src/security/data_msg.h @@ -0,0 +1,42 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_DATA_MSG_H +#define SYSAK_DATA_MSG_H + +#ifdef __cplusplus +#include +#endif + +#include "bpf_common.h" + +#define MSG_DATA_ARG_LEN 32736 + +struct data_event_id { + __u64 pid; + __u64 time; +} __attribute__((packed)); + +struct data_event_desc { + __s32 error; + __u32 pad; + __u32 leftover; + __u32 size; + struct data_event_id id; +} __attribute__((packed)); + +struct msg_data { + struct msg_common common; + struct data_event_id id; + /* To have a fast way to check buffer size we use 32736 (MSG_DATA_ARG_LEN) + * as arg size, which is: + * 0x8000 - offsetof(struct msg_kprobe_arg, arg) + * so we can make verifier happy with: + * 'size &= 0x7fff' check + */ + char arg[MSG_DATA_ARG_LEN]; +} __attribute__((packed)); + + +#endif //SYSAK_DATA_MSG_H diff --git a/src/security/environ_conf.h b/src/security/environ_conf.h new file mode 100644 index 0000000..f0560ba --- /dev/null +++ b/src/security/environ_conf.h @@ -0,0 +1,40 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_ENVIRON_CONF_H +#define SYSAK_ENVIRON_CONF_H + + +/* bpf runtime log levels that follow Golang logrus levels + * https://pkg.go.dev/github.com/sirupsen/logrus#Level + */ +enum { + LOG_ERROR_LEVEL = 2, + LOG_WARN_LEVEL = 3, + LOG_INFO_LEVEL = 4, + LOG_DEBUG_LEVEL = 5, + LOG_TRACE_LEVEL = 6, +}; + +/* Tetragon runtime configuration */ +struct tetragon_conf { + __u32 loglevel; /* Tetragon log level */ + __u32 pid; /* Tetragon pid for debugging purpose */ + __u32 nspid; /* Tetragon pid in namespace for debugging purpose */ + __u32 tg_cgrp_hierarchy; /* Tetragon tracked hierarchy ID */ + __u32 tg_cgrp_subsys_idx; /* Tetragon tracked cgroup subsystem state index at compile time */ + __u32 tg_cgrp_level; /* Tetragon cgroup level */ + __u64 tg_cgrpid; /* Tetragon current cgroup ID to avoid filtering blocking itself */ + __u64 cgrp_fs_magic; /* Cgroupv1 or Cgroupv2 */ +}; // All fields aligned so no 'packed' attribute. + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __s32); + __type(value, struct tetragon_conf); +} tg_conf_map SEC(".maps"); + + +#endif //SYSAK_ENVIRON_CONF_H diff --git a/src/security/filter.h b/src/security/filter.h new file mode 100644 index 0000000..1705e1d --- /dev/null +++ b/src/security/filter.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include +#include +#include +#include "../coolbpf.h" +#include "type.h" + + +// #define POLICY_FILTER_MAX_FILTERS 128 +// #define FILTER_SIZE 4096 + + +// struct filter_map_value { +// unsigned char buf[FILTER_SIZE]; +// }; + +/* Arrays of size 1 will be rewritten to direct loads in verifier */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, SECURE_FUNCS_MAX); + __type(key, int); + __type(value, struct selector_filters); +} filter_map SEC(".maps"); + +// struct { +// __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); +// __uint(max_entries, POLICY_FILTER_MAX_FILTERS); +// __uint(key_size, sizeof(u32)); /* call name id */ +// __array( +// values, struct { +// __uint(type, BPF_MAP_TYPE_ARRAY); +// __uint(max_entries, 1); +// __type(key, __u64); +// __type(value, __u8); +// }); +// } filter_maps SEC(".maps"); \ No newline at end of file diff --git a/src/security/generic.h b/src/security/generic.h new file mode 100644 index 0000000..9960c6b --- /dev/null +++ b/src/security/generic.h @@ -0,0 +1,67 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_GENERIC_H +#define SYSAK_GENERIC_H + + +#include "bpf_common.h" +#include "msg_type.h" +#include "process.h" + +/* The namespace and capability changes filters require later kernels */ +#ifdef __LARGE_BPF_PROG +#define __NS_CHANGES_FILTER +#define __CAP_CHANGES_FILTER +#endif + +#define FILTER_SIZE 4096 + +#define MAX_POSSIBLE_ARGS 5 +#define MAX_POSSIBLE_SELECTORS 31 +#define SELECTORS_ACTIVE 31 +#define MAX_CONFIGURED_SELECTORS MAX_POSSIBLE_SELECTORS + 1 + +struct msg_selector_data { + __u64 curr; + bool pass; + bool active[MAX_CONFIGURED_SELECTORS]; +#ifdef __NS_CHANGES_FILTER + __u64 match_ns; +#endif +#ifdef __CAP_CHANGES_FILTER + __u64 match_cap; +#endif + bool is32BitSyscall; +}; + +struct msg_generic_kprobe { + struct msg_common common; + struct msg_execve_key current; + struct msg_ns ns; + struct msg_capabilities caps; + __u64 func_id; + __u64 retprobe_id; + __u64 action; + __u32 action_arg_id; // only one URL or FQDN action can be fired per match + __u32 tid; // Thread ID that triggered the event + __u64 kernel_stack_id; // Kernel stack trace ID on u32 and potential error, see flag in msg_common.flags + __u64 user_stack_id; // User Stack trace ID + /* anything above is shared with the userspace so it should match structs MsgGenericKprobe and MsgGenericTracepoint in Go */ + char args[24000]; + unsigned long a0, a1, a2, a3, a4; + long argsoff[MAX_POSSIBLE_ARGS]; + struct msg_selector_data sel; + __u32 idx; // attach cookie index + __u32 tailcall_index_process; // recursion index for generic_process_event + __u32 tailcall_index_selector; // recursion index for filter_read_arg + int pass; +}; + +FUNC_INLINE size_t generic_kprobe_common_size(void) +{ + return offsetof(struct msg_generic_kprobe, args); +} + +#endif //SYSAK_GENERIC_H diff --git a/src/security/int_maps.h b/src/security/int_maps.h new file mode 100644 index 0000000..40bfdf8 --- /dev/null +++ b/src/security/int_maps.h @@ -0,0 +1,38 @@ +#ifndef INT_MAPS_H__ +#define INT_MAPS_H__ + +#include +#include +#include +#include +#include "../coolbpf.h" +#include "type.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, INT_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, INT_MAPS_INNER_MAX_ENTRIES); + __type(key, __u32); + __type(value, __u8); + }); +} port_maps SEC(".maps"); + + +// struct { +// __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); +// __uint(max_entries, INT_MAPS_OUTER_MAX_ENTRIES); +// __uint(key_size, sizeof(__u32)); +// __array( +// values, struct { +// __uint(type, BPF_MAP_TYPE_HASH); +// __uint(max_entries, 1); +// __type(key, __u32); +// __type(value, __u8); +// }); +// } dport_maps SEC(".maps"); + +#endif // INT_MAPS_H__ diff --git a/src/security/msg_type.h b/src/security/msg_type.h new file mode 100644 index 0000000..c507ab7 --- /dev/null +++ b/src/security/msg_type.h @@ -0,0 +1,53 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_MSG_TYPE_H +#define SYSAK_MSG_TYPE_H + + +/* Msg Types */ +enum msg_ops { + MSG_OP_UNDEF = 0, + MSG_OP_EXECVE = 5, + MSG_OP_EXIT = 7, + MSG_OP_GENERIC_KPROBE = 13, + MSG_OP_GENERIC_TRACEPOINT = 14, + MSG_OP_GENERIC_UPROBE = 15, + + MSG_OP_TEST = 254, + + /* These ops went through a few iterations of experimentation + * and some of those experiments exist in the wild. So just + * bump deprecated space to some large value and start over. + * This way its easy to phase out the old ones. And any new + * ops are clear to see in database and logs. + */ + MSG_OP_DEPRECATE_SPACE = 1000, + + MSG_OP_CLONE = 23, + + MSG_OP_DATA = 24, + + MSG_OP_CGROUP = 25, + + MSG_OP_LOADER = 26, + + MSG_OP_THROTTLE = 27, + + MSG_OP_MAX, +}; + +enum msg_cgroup_ops { + MSG_OP_CGROUP_UNDEF = 0, + MSG_OP_CGROUP_MKDIR = + 1, /* cgroup_mkdir tracepoint, used for debugging */ + MSG_OP_CGROUP_RMDIR = + 2, /* cgroup_rmdir tracepoint, used for debugging */ + MSG_OP_CGROUP_RELEASE = + 3, /* cgroup_release tracepoint, used for debugging */ + MSG_OP_CGROUP_ATTACH_TASK = 10, /* cgroup_attach_task tracepoint */ +}; + + +#endif //SYSAK_MSG_TYPE_H diff --git a/src/security/process.h b/src/security/process.h new file mode 100644 index 0000000..733b1c5 --- /dev/null +++ b/src/security/process.h @@ -0,0 +1,607 @@ +// +// Created by qianlu on 2024/6/16. +// + +#ifndef SYSAK_PROCESS_H +#define SYSAK_PROCESS_H + +#include "../coolbpf.h" +#include +#include +#include "bpf_event.h" +#include "bpf_cred.h" +#include "bpf_common.h" +#include "compiler.h" +#include "api.h" + +#include "type.h" +#include "bpf_process_event_type.h" + +// +///* Max number of args to parse */ +//#define MAXARGS 20 +///* Max length of any given arg */ +//#define MAXARGLENGTH 256 +///* This is the absolute buffer size for args and filenames including some +// * extra head room so we can append last args string to buffer. The extra +// * headroom is an unfortunate result of bounds on offset/size in +// * event_args_builder(). +// * +// * For example given an offset bounds +// * +// * offset <- (0, 100) +// * +// * We will read into the buffer using this offset giving a max offset +// * of eargs + 100. +// * +// * args[offset] <- (0, 100) +// * +// * Now we want to read this with call 45 aka bpf_probe_read_str as follows, +// * where 'kernel_struct_arg' is the kernel data struct we are reading. +// * +// * bpf_probe_read_str(args[offset], size, kernel_struct_arg) +// * +// * But we have a bit of a problem determining if 'size' is out of array +// * range. The math would be, +// * +// * size = length - offset +// * +// * Giving the remainder of the buffer, +// * +// * args offset length +// * |---------------|------------------| +// * +// * |-------size-------| +// * +// * But verifier math works on bounds so bounds analysis of size is the +// * following, +// * +// * length = 1024 +// * offset = (0, 100) +// * +// * size = length - offset +// * size = (1024) - (0, 100) +// * size <- (924, 1124) +// * +// * And verifier throws an error because args[offset + size] with bounds +// * anaylsis, +// * +// * args_(max)[100 + 1024] = args_(max)[1124] +// * +// * To circumvent this, at least until we teach the verifier about +// * dependent variables, create a maxarg value and pad arg buffer with +// * it. Giving a args buffer of size 'length + pad' with above bounds +// * analysis, +// * +// * size = length - offset +// * size = (1024) - (0, 100) +// * if size > pad goto done +// * size <- (924, 1124) // 1124 < length + pad +// * +// * Phew all clear now? +// */ +//#define CWD_MAX 256 +//#define BUFFER 1024 +//#define SIZEOF_EVENT 56 +//#define PADDED_BUFFER \ +// (BUFFER + MAXARGLENGTH + SIZEOF_EVENT + SIZEOF_EVENT + CWD_MAX) +///* This is the usable buffer size for args and filenames. It is calculated +// * as the (BUFFER SIZE - sizeof(parent) - sizeof(curr) but unfortunately +// * preprocess doesn't know types so we do it manually without sizeof(). +// */ +//#define ARGSBUFFER (BUFFER - SIZEOF_EVENT - SIZEOF_EVENT) +//#define __ASM_ARGSBUFFER 976 +//#define ARGSBUFFERMASK (ARGSBUFFER - 1) +//#define MAXARGMASK (MAXARG - 1) +//#define PATHNAME_SIZE 256 +// +///* Task flags */ +//#ifndef PF_KTHREAD +//#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +//#endif +// +///* Msg flags */ +//#define EVENT_UNKNOWN 0x00 +//#define EVENT_EXECVE 0x01 +//#define EVENT_EXECVEAT 0x02 +//#define EVENT_PROCFS 0x04 +//#define EVENT_TRUNC_FILENAME 0x08 +//#define EVENT_TRUNC_ARGS 0x10 +//#define EVENT_TASK_WALK 0x20 +//#define EVENT_MISS 0x40 +//#define EVENT_NEEDS_AUID 0x80 +//#define EVENT_ERROR_FILENAME 0x100 +//#define EVENT_ERROR_ARGS 0x200 +//#define EVENT_NEEDS_CWD 0x400 +//#define EVENT_NO_CWD_SUPPORT 0x800 +//#define EVENT_ROOT_CWD 0x1000 +//#define EVENT_ERROR_CWD 0x2000 +//#define EVENT_CLONE 0x4000 +//#define EVENT_ERROR_SOCK 0x8000 +//#define EVENT_ERROR_CGROUP_NAME 0x010000 +//#define EVENT_ERROR_CGROUP_KN 0x020000 +//#define EVENT_ERROR_CGROUP_SUBSYSCGRP 0x040000 +//#define EVENT_ERROR_CGROUP_SUBSYS 0x080000 +//#define EVENT_ERROR_CGROUPS 0x100000 +//#define EVENT_ERROR_CGROUP_ID 0x200000 +//#define EVENT_ERROR_PATH_COMPONENTS 0x400000 +//#define EVENT_DATA_FILENAME 0x800000 +//#define EVENT_DATA_ARGS 0x1000000 +// +//#define EVENT_COMMON_FLAG_CLONE 0x01 +// +///* Docker IDs are unique at first 12 characters, but we want to get +// * 12chars plus any extra prefix used by the container environment. +// * Minikube for example prepends 'docker-' to the id. So lets copy +// * 32B and assume at least 12B of it is ID info. +// */ +//#define DOCKER_ID_LENGTH 128 +// +//struct msg_execve_key { +// __u32 pid; // Process TGID +// __u8 pad[4]; +// __u64 ktime; +//}; // All fields aligned so no 'packed' attribute. +// +///* This is the struct stored in bpf map to share info between +// * different execve hooks. +// */ +//struct execve_info { +// /* The secureexec is to reflect the kernel bprm->secureexec that is exposed +// * to userspace through auxiliary vector which can be read from +// * /proc/self/auxv or https://man7.org/linux/man-pages/man3/getauxval.3.html +// * +// * The AT_SECURE of auxv can have a value of 1 or 0 and it is set from +// * the bprm->secureexec that is a bit field. +// * If bprm->secureexec is 1 then it means executable should be treated securely. +// * Most commonly, 1 indicates that the process is executing a set-user-ID +// * or set-group-ID binary (so that its real and effective UIDs or GIDs differ +// * from one another), or that it gained capabilities by executing a binary file +// * that has capabilities (see capabilities(7)). +// * Alternatively, a nonzero value may be triggered by a Linux Security Module. +// * When this value is nonzero, the dynamic linker disables the use of certain +// * environment variables. +// * +// * The secureexec here can have the following bit flags: +// * EXEC_SETUID or EXEC_SETGID +// */ +// __u32 secureexec; +// __u32 i_nlink; /* inode links */ +// __u64 i_ino; /* inode number */ +//}; +// +///* process information +// * +// * Manually linked to ARGSBUFFER and PADDED_BUFFER if this changes then please +// * also change SIZEOF_EVENT. +// */ +//struct msg_process { +// __u32 size; +// __u32 pid; // Process TGID +// __u32 tid; // Process thread +// __u32 nspid; +// __u32 secureexec; +// __u32 uid; +// __u32 auid; +// __u32 flags; +// __u32 i_nlink; +// __u32 pad; +// __u64 i_ino; +// __u64 ktime; +// char *args; +//}; // All fields aligned so no 'packed' attribute. +// +///* msg_clone_event holds only the necessary fields to construct a new entry from +// * the parent after a clone() event. +// */ +//struct msg_clone_event { +// struct msg_common common; +// struct msg_execve_key parent; +// __u32 tgid; +// __u32 tid; +// __u32 nspid; +// __u32 flags; +// __u64 ktime; +//} __attribute__((packed)); +// +//struct exit_info { +// __u32 code; +// __u32 tid; // Thread ID +//}; +// +//struct msg_exit { +// struct msg_common common; +// struct msg_execve_key current; +// struct exit_info info; +//}; // All fields aligned so no 'packed' attribute. +// +//enum { +// ns_uts = 0, +// ns_ipc = 1, +// ns_mnt = 2, +// ns_pid = 3, +// ns_pid_for_children = 4, +// ns_net = 5, +// ns_time = 6, +// ns_time_for_children = 7, +// ns_cgroup = 8, +// ns_user = 9, +// +// // If you update the value of ns_max_types you +// // should also update parseMatchNamespaces() +// // in kernel.go +// ns_max_types = 10, +//}; +// +//struct msg_ns { +// union { +// struct { +// __u32 uts_inum; +// __u32 ipc_inum; +// __u32 mnt_inum; +// __u32 pid_inum; +// __u32 pid_for_children_inum; +// __u32 net_inum; +// __u32 time_inum; +// __u32 time_for_children_inum; +// __u32 cgroup_inum; +// __u32 user_inum; +// }; +// __u32 inum[ns_max_types]; +// }; +//}; // All fields aligned so no 'packed' attribute. +// +//struct msg_k8s { +// __u32 net_ns; +// __u32 cid; +// __u64 cgrpid; +// char docker_id[DOCKER_ID_LENGTH]; +//}; // All fields aligned so no 'packed' attribute. +// +//#define BINARY_PATH_MAX_LEN 256 +// +//struct heap_exe { +// // because of verifier limitations, this has to be 2 * 256 bytes while 256 +// // should be theoretically sufficient, and actually is, in unit tests. +// char buf[BINARY_PATH_MAX_LEN * 2]; +// // offset points to the start of the path in the above buffer. Use offset to +// // read the path in the buffer since it's written from the end. +// char *off; +// __u32 len; +// __u32 error; +//}; // All fields aligned so no 'packed' attribute. +// +//struct msg_execve_event { +// struct msg_common common; +// struct msg_k8s kube; +// struct msg_execve_key parent; +// __u64 parent_flags; +// struct msg_cred creds; +// struct msg_ns ns; +// struct msg_execve_key cleanup_key; +// /* if add anything above please also update the args of +// * validate_msg_execve_size() in bpf_execve_event.c */ +// union { +// struct msg_process process; +// char buffer[PADDED_BUFFER]; +// }; +// /* below fields are not part of the event, serve just as +// * heap for execve programs +// */ +//#ifdef __LARGE_BPF_PROG +// struct heap_exe exe; +//#endif +//}; // All fields aligned so no 'packed' attribute. +// +//// This structure stores the binary path that was recorded on execve. +//// Technically PATH_MAX is 4096 but we limit the length we store since we have +//// limits on the length of the string to compare: +//// - Artificial limits for full string comparison. +//// - Technical limits for prefix and postfix, using LPM_TRIE that have a 256 +//// bytes size limit. +//struct binary { +// // length of the path stored in path, this should be < BINARY_PATH_MAX_LEN +// // but can contain negative value in case of copy error. +// // While s16 would be sufficient, 64 bits are handy for alignment. +// __s64 path_length; +// // BINARY_PATH_MAX_LEN first bytes of the path +// char path[BINARY_PATH_MAX_LEN]; +//}; // All fields aligned so no 'packed' attribute +// +//// The execve_map_value is tracked by the TGID of the thread group +//// the msg_execve_key.pid. The thread IDs are recorded on the +//// fly and sent with every corresponding event. +//struct execve_map_value { +// struct msg_execve_key key; +// struct msg_execve_key pkey; +// __u32 flags; +// __u32 nspid; +// struct msg_ns ns; +// struct msg_capabilities caps; +// struct binary bin; +//} __attribute__((packed)) __attribute__((aligned(8))); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct msg_execve_event); +} execve_msg_heap_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 32768); + __type(key, __u32); + __type(value, struct execve_map_value); +} execve_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 2); + __type(key, __s32); + __type(value, __s64); +} execve_map_stats SEC(".maps"); + +enum { + MAP_STATS_COUNT = 0, + MAP_STATS_ERROR = 1, +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __s32); + __type(value, struct execve_map_value); +} execve_val SEC(".maps"); + +struct execve_heap { + union { + char pathname[PATHNAME_SIZE]; + char maxpath[4096]; + }; + struct execve_info info; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __s32); + __type(value, struct execve_heap); +} execve_heap SEC(".maps"); + +/* The tg_execve_joined_info_map allows to join and combine + * exec info that is gathered during different hooks + * through the execve call. The list of current hooks is: + * 1. kprobe/security_bprm_committing_creds + * For details check tg_kp_bprm_committing_creds bpf program. + * 2. tracepoint/sys_execve + * For details see event_execve bpf program. + * + * Important: the information stored here is complementary + * information only, the core logic should not depend on entries + * of this map to be present. + * + * tgid+tid is key as execve is a complex syscall where failures + * may happen at different levels and hooks, also the thread + * that triggered and succeeded at execve will be the only new + * and main thread. + */ +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 8192); + __type(key, __u64); + __type(value, struct execve_info); +} tg_execve_joined_info_map SEC(".maps"); + +/* The tg_execve_joined_info_map_stats will hold stats about + * entries and map update errors. + */ +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 2); + __type(key, __s32); + __type(value, __s64); +} tg_execve_joined_info_map_stats SEC(".maps"); + +FUNC_INLINE int64_t validate_msg_execve_size(int64_t size) +{ + size_t max = sizeof(struct msg_execve_event); + + /* validate_msg_size() calls need to happen near caller using the + * size. Otherwise, depending on kernel version, the verifier may + * lose track of the size bounds. Place a compiler barrier here + * otherwise clang will likely place this check near other msg + * population calls which can be significant distance away resulting + * in losing bounds on older kernels where bounds are not tracked + * as rigorously. + */ + compiler_barrier(); + if (size > max) + size = max; + if (size < 1) + size = offsetof(struct msg_execve_event, buffer); + compiler_barrier(); + return size; +} + +// execve_map_error() will increment the map error counter +FUNC_INLINE void execve_map_error(void) +{ + int one = MAP_STATS_ERROR; + __s64 *cntr; + + cntr = bpf_map_lookup_elem(&execve_map_stats, &one); + if (cntr) + *cntr = *cntr + 1; +} + +FUNC_INLINE uint64_t get_start_time() +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + uint64_t gl_off = offsetof(struct task_struct, group_leader); + struct task_struct *group_leader_ptr; + bpf_probe_read(&group_leader_ptr, + sizeof(struct task_struct *), + (uint8_t *)task + gl_off); + + uint64_t start_time = 0; + + if (bpf_core_field_exists(group_leader_ptr->start_time)) + { + uint64_t st_off = offsetof(struct task_struct, start_time); + bpf_probe_read(&start_time, + sizeof(uint64_t), + (uint8_t *)group_leader_ptr + st_off); + } + else if (bpf_core_field_exists(group_leader_ptr->start_boottime)) + { + uint64_t st_off = offsetof(struct task_struct, start_boottime); + bpf_probe_read(&start_time, + sizeof(uint64_t), + (uint8_t *)group_leader_ptr + st_off); + } else { + start_time = bpf_ktime_get_ns(); + } + + return start_time; + // return nsec_to_clock_t(start_time); +} + +// execve_map_get will look up if pid exists and return it if it does. If it +// does not, it will create a new one and return it. +FUNC_INLINE struct execve_map_value *execve_map_get(__u32 pid) +{ + struct execve_map_value *event; + + event = bpf_map_lookup_elem(&execve_map, &pid); + if (!event) { + struct execve_map_value *value; + int err, zero = MAP_STATS_COUNT; + __s64 *cntr; + + value = bpf_map_lookup_elem(&execve_val, &zero); + if (!value) + return 0; + + memset(value, 0, sizeof(struct execve_map_value)); + err = bpf_map_update_elem(&execve_map, &pid, value, 0); + if (!err) { + cntr = bpf_map_lookup_elem(&execve_map_stats, &zero); + if (cntr) + *cntr = *cntr + 1; + } else { + execve_map_error(); + } + event = bpf_map_lookup_elem(&execve_map, &pid); + } + return event; +} + +FUNC_INLINE struct execve_map_value *execve_map_get_noinit(__u32 pid) +{ + return bpf_map_lookup_elem(&execve_map, &pid); +} + +FUNC_INLINE void execve_map_delete(__u32 pid) +{ + int err = bpf_map_delete_elem(&execve_map, &pid); + int zero = MAP_STATS_COUNT; + __s64 *cntr; + + if (!err) { + cntr = bpf_map_lookup_elem(&execve_map_stats, &zero); + if (cntr) + *cntr = *cntr - 1; + } else { + execve_map_error(); + } +} + +// execve_joined_info_map_error() will increment the map error counter +FUNC_INLINE void execve_joined_info_map_error(void) +{ + int one = MAP_STATS_ERROR; + __s64 *cntr; + + cntr = bpf_map_lookup_elem(&tg_execve_joined_info_map_stats, &one); + if (cntr) + *cntr = *cntr + 1; +} + +FUNC_INLINE void execve_joined_info_map_set(__u64 tid, struct execve_info *info) +{ + int err, zero = MAP_STATS_COUNT; + __s64 *cntr; + + err = bpf_map_update_elem(&tg_execve_joined_info_map, &tid, info, BPF_ANY); + if (err < 0) { + /* -EBUSY or -ENOMEM with the help of the cntr error + * on the stats map this can be a good indication of + * long running workloads and if we have to make the + * map size bigger for such cases. + */ + execve_joined_info_map_error(); + return; + } + + cntr = bpf_map_lookup_elem(&tg_execve_joined_info_map_stats, &zero); + if (cntr) + *cntr = *cntr + 1; +} + +/* Clear up some space for next threads */ +FUNC_INLINE void execve_joined_info_map_clear(__u64 tid) +{ + int err, zero = MAP_STATS_COUNT; + __s64 *cntr; + + err = bpf_map_delete_elem(&tg_execve_joined_info_map, &tid); + if (!err) { + cntr = bpf_map_lookup_elem(&tg_execve_joined_info_map_stats, &zero); + if (cntr) + *cntr = *cntr - 1; +} +/* We don't care here about -ENOENT as there is no guarantee entries + * will be present anyway. + */ +} + +/* Returns an execve_info if found. A missing entry is perfectly fine as it + * could mean we are not interested into storing more information about this task. + */ +FUNC_INLINE struct execve_info *execve_joined_info_map_get(__u64 tid) +{ + return bpf_map_lookup_elem(&tg_execve_joined_info_map, &tid); +} + +_Static_assert(sizeof(struct execve_map_value) % 8 == 0, + "struct execve_map_value should have size multiple of 8 bytes"); + +struct kernel_stats { + __u64 sent_failed[256]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, __u32); + __type(value, struct kernel_stats); + __uint(max_entries, 1); +} tg_stats_map SEC(".maps"); + +FUNC_INLINE void +perf_event_output_metric(void *ctx, u8 metric, void *map, u64 flags, void *data, u64 size) +{ + struct kernel_stats *valp; + __u32 zero = 0; + long err; + + err = bpf_perf_event_output(ctx, map, flags, data, size); + if (err < 0) { + valp = bpf_map_lookup_elem(&tg_stats_map, &zero); + if (valp) + __sync_fetch_and_add(&valp->sent_failed[metric], 1); + } +} + + +#endif //SYSAK_PROCESS_H diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c new file mode 100644 index 0000000..025d391 --- /dev/null +++ b/src/security/security.bpf.c @@ -0,0 +1,1357 @@ +// +// Created by qianlu on 2024/6/12. +// + +#include +#include +#include +#include +#include "../coolbpf.h" + +#include "int_maps.h" +#include "filter.h" +#include "type.h" +#include "process.h" +#include "addr_lpm_maps.h" +#include "string_maps.h" +#include "bpf_exit.h" +#include "tailcall_stack.h" +//#include "bpf_execve.h" +// map in map + +// struct { +// __uint(type, BPF_MAP_TYPE_LPM_TRIE); +// __uint(max_entries, 10); +// // __uint(key_size, sizeof(__u8) * sizeof(struct string_prefix_lpm_trie)); +// // __uint(value_size, sizeof(__u8)); +// __type(key, __u8[sizeof(struct string_prefix_lpm_trie)]); // Need to specify as byte array as wouldn't take struct as key type +// __type(value, __u8); +// __uint(map_flags, BPF_F_NO_PREALLOC); +// } ql_test_prefix_map SEC(".maps"); + +// [0, SYSAK_SECURE_MAX_CIDR_LIMIT/2) for source addr +// [SYSAK_SECURE_MAX_CIDR_LIMIT/2, SYSAK_SECURE_MAX_CIDR_LIMIT) for dest addr +BPF_ARRAY(cidr_filter_list, struct cidr_entry, SYSAK_SECURE_MAX_CIDR_LIMIT); +// [0, SYSAK_SECURE_MAX_PORT_LIMIT/2) for source port +// [SYSAK_SECURE_MAX_PORT_LIMIT/2, SYSAK_SECURE_MAX_CIDR_LIMIT) for dest port +BPF_ARRAY(port_filter_list, struct port_entry, SYSAK_SECURE_MAX_PORT_LIMIT); + +BPF_HASH(sock_secure_port_filter, u16, struct port_entry, 1024); +BPF_PERF_OUTPUT(sock_secure_output, 1024); +BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1); + +// [0, SYSAK_SECURE_MAX_PATH_LIMIT/2) for file path +BPF_ARRAY(path_filter_list, struct path_entry, SYSAK_SECURE_MAX_PATH_LIMIT); +BPF_PERF_OUTPUT(file_secure_output, 1024); +BPF_PERCPU_ARRAY(file_secure_data_heap, struct file_data_t, 1); +BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); + +struct +{ + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} file_path_filter_calls SEC(".maps"); + +struct +{ + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} secure_tailcall_map SEC(".maps"); +//////////////////////////// process //////////////////////////// +///////////////////////////////////////////////////////////////// + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} execve_calls SEC(".maps"); + +#include "data_event.h" + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct msg_data); +} data_heap SEC(".maps"); + +FUNC_INLINE __u32 +read_args(void *ctx, struct msg_execve_event *event) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + struct msg_process *p = &event->process; + unsigned long start_stack, end_stack; + unsigned long free_size, args_size; + __u32 zero = 0, size = 0; + struct execve_heap *heap; + struct mm_struct *mm; + char *args; + long off; + int err; + + bpf_probe_read(&mm, sizeof(mm), _(&task->mm)); + if (!mm) + return 0; + + bpf_probe_read(&start_stack, sizeof(start_stack), + _(&mm->arg_start)); + bpf_probe_read(&end_stack, sizeof(start_stack), _(&mm->arg_end)); + + if (!start_stack || !end_stack) + return 0; + + /* skip first argument - binary path */ + heap = bpf_map_lookup_elem(&execve_heap, &zero); + if (!heap) + return 0; + + /* poor man's strlen */ + off = bpf_probe_read_str(&heap->maxpath, 4096, (char *)start_stack); + if (off < 0) + return 0; + bpf_printk("[read_args] pid:%llu, args:%s", p->pid, heap->maxpath); + + start_stack += off; + + size = p->size & 0x1ff /* 2*MAXARGLENGTH - 1*/; + args = (char *)p + size; + + if (args >= (char *)&event->process + BUFFER) + return 0; + + /* Read arguments either to rest of the space in the event, + * or use data event to send it separatelly. + */ + free_size = (char *)&event->process + BUFFER - args; + args_size = end_stack - start_stack; + + if (args_size < BUFFER && args_size < free_size) { + size = args_size & 0x3ff /* BUFFER - 1 */; + err = bpf_probe_read(args, size, (char *)start_stack); + if (err < 0) { + p->flags |= EVENT_ERROR_ARGS; + size = 0; + } + } else { + size = data_event_bytes(ctx, (struct data_event_desc *)args, + (unsigned long)start_stack, + args_size, + (struct bpf_map_def *)&data_heap); + if (size > 0) + p->flags |= EVENT_DATA_ARGS; + } + return size; +} + +FUNC_INLINE __u32 +read_path(void *ctx, struct msg_execve_event *event, void *filename) +{ + struct msg_process *p = &event->process; + __u32 size = 0; + __u32 flags = 0; + char *earg; + + earg = (void *)p + offsetof(struct msg_process, args); + + size = bpf_probe_read_str(earg, MAXARGLENGTH - 1, filename); + bpf_printk("[read_path] pid:%llu, path:%s", p->pid, earg); + if (size < 0) { + flags |= EVENT_ERROR_FILENAME; + size = 0; + } else if (size == MAXARGLENGTH - 1) { + size = data_event_str(ctx, (struct data_event_desc *)earg, + (unsigned long)filename, + (struct bpf_map_def *)&data_heap); + if (size == 0) + flags |= EVENT_ERROR_FILENAME; + else + flags |= EVENT_DATA_FILENAME; + } + + p->flags |= flags; + return size; +} + +FUNC_INLINE __u32 +read_cwd(void *ctx, struct msg_process *p) +{ + if (p->flags & EVENT_ERROR_CWD) + return 0; + return getcwd(p, p->size, p->pid); +} + +FUNC_INLINE void +read_execve_shared_info(void *ctx, struct msg_process *p, __u64 pid) +{ + struct execve_info *info; + + info = execve_joined_info_map_get(pid); + if (!info) { + p->secureexec = 0; + p->i_ino = 0; + p->i_nlink = 0; + return; + } + + p->secureexec = info->secureexec; + p->i_ino = info->i_ino; + p->i_nlink = info->i_nlink; + execve_joined_info_map_clear(pid); +} + +/** + * read_exe() Reads the path from the backing executable file of the current + * process. + * + * The executable file of a process can change using the prctl() system call + * and PR_SET_MM_EXE_FILE. Thus, this function should only be used under the + * execve path since the executable file is locked and usually there is only + * one remaining thread at its exit path. + */ +#ifdef __LARGE_BPF_PROG +FUNC_INLINE __u32 +read_exe(struct task_struct *task, struct heap_exe *exe) +{ + struct file *file = BPF_CORE_READ(task, mm, exe_file); + struct path *path = __builtin_preserve_access_index(&file->f_path); + + exe->len = BINARY_PATH_MAX_LEN; + exe->off = (char *)&exe->buf; + exe->off = __d_path_local(path, exe->off, (int *)&exe->len, (int *)&exe->error); + if (exe->len > 0) + exe->len = BINARY_PATH_MAX_LEN - exe->len; + + return exe->len; +} +#endif + +// int wake_up_process(struct task_struct *p) +SEC("kprobe/wake_up_new_task") +int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) +{ + __u32 cpid = bpf_get_current_pid_tgid() >> 32; + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u enter~", cpid); + struct execve_map_value *curr, *parent; + struct msg_clone_event msg; + struct msg_capabilities caps; + u64 msg_size = sizeof(struct msg_clone_event); + struct msg_k8s kube; + u32 tgid = 0; + + if (!task) + return 0; + + tgid = BPF_CORE_READ(task, tgid); + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u read tgid:%u ~", cpid, tgid); + + /* Do not try to create any msg or calling execve_map_get + * (that will add a new process in the execve_map) if we + * cannot find it's parent in the execve_map. + */ + parent = __event_find_parent(task); + if (!parent) + return 0; + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u has parent.", cpid, tgid); + curr = execve_map_get(tgid); + if (!curr) + return 0; + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u new event in execve_map.", cpid, tgid); + /* Generate an EVENT_COMMON_FLAG_CLONE event once per process, + * that is, thread group. + */ + if (curr->key.ktime != 0) + return 0; + + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin init event.", cpid, tgid); + /* Setup the execve_map entry. */ + curr->flags = EVENT_COMMON_FLAG_CLONE; + curr->key.pid = tgid; + // curr->key.ktime = get_start_time(); + curr->key.ktime = bpf_ktime_get_ns(); + curr->nspid = get_task_pid_vnr(); + memcpy(&curr->bin, &parent->bin, sizeof(curr->bin)); + curr->pkey = parent->key; + + /* Store the thread leader capabilities so we can check later + * before the execve hook point if they changed or not. + * This needs to be converted later to credentials. + */ + get_current_subj_caps(&caps, task); + curr->caps.permitted = caps.permitted; + curr->caps.effective = caps.effective; + curr->caps.inheritable = caps.inheritable; + + /* Setup the msg_clone_event and sent to the user. */ + msg.common.op = MSG_OP_CLONE; + msg.common.size = msg_size; + msg.common.ktime = curr->key.ktime; + msg.parent = curr->pkey; + msg.tgid = curr->key.pid; + /* Per thread tracking rules TID == PID : + * Since we generate one event per thread group, then when this task + * wakes up it will be the only one in the thread group, and it is + * the leader. Ensure to pass TID to user space. + */ + msg.tid = BPF_CORE_READ(task, pid); + msg.ktime = curr->key.ktime; + msg.nspid = curr->nspid; + msg.flags = curr->flags; + + __event_get_cgroup_info(task, &kube); + + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u init event done.", cpid, tgid); + + if (cgroup_rate(ctx, &kube, msg.ktime)) { + bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin submit clone event.", cpid, tgid); + perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, + BPF_F_CURRENT_CPU, &msg, msg_size); + } + + return 0; +} + +////__attribute__((section("tracepoint/sys_execve"), used)) int +SEC("tracepoint/sched/sched_process_exec") +int event_execve(struct trace_event_raw_sched_process_exec *ctx) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + char *filename = (char *)ctx + (_(ctx->__data_loc_filename) & 0xFFFF); + struct msg_execve_event *event; + struct execve_map_value *parent; + struct msg_process *p; + __u32 zero = 0; + __u64 pid; + + event = bpf_map_lookup_elem(&execve_msg_heap_map, &zero); + if (!event) + return 0; + + pid = bpf_get_current_pid_tgid(); + parent = event_find_parent(); + if (parent) { + event->parent = parent->key; + } else { + event_minimal_parent(event, task); + } + + p = &event->process; + p->flags = EVENT_EXECVE; + /** + * Per thread tracking rules TID == PID : + * At exec all threads other than the calling one are destroyed, so + * current becomes the new thread leader since we hook late during + * execve. + */ + p->pid = pid >> 32; + p->tid = (__u32)pid; + p->nspid = get_task_pid_vnr(); + p->ktime = bpf_ktime_get_ns(); + p->size = offsetof(struct msg_process, args); + p->auid = get_auid(); + p->uid = bpf_get_current_uid_gid(); + read_execve_shared_info(ctx, p, pid); + + p->size += read_path(ctx, event, filename); + p->size += read_args(ctx, event); + p->size += read_cwd(ctx, p); + bpf_printk("[event_execve] enter pid:%llu, filename:%s", p->pid, filename); + + event->common.op = MSG_OP_EXECVE; + event->common.ktime = p->ktime; + event->common.size = offsetof(struct msg_execve_event, process) + p->size; + + BPF_CORE_READ_INTO(&event->kube.net_ns, task, nsproxy, net_ns, ns.inum); + + get_current_subj_creds(&event->creds, task); + get_namespaces(&event->ns, task); + p->flags |= __event_get_cgroup_info(task, &event->kube); + + bpf_tail_call(ctx, &execve_calls, 0); + return 0; +} + +//__attribute__((section("tracepoint/0"), used)) +SEC("tracepoint/0") +int execve_rate(void *ctx) +{ + struct msg_execve_event *msg; + + __u32 zero = 0; + + msg = bpf_map_lookup_elem(&execve_msg_heap_map, &zero); + if (!msg) + return 0; + + if (cgroup_rate(ctx, &msg->kube, msg->common.ktime)) + bpf_tail_call(ctx, &execve_calls, 1); + return 0; +} + +///** +// * execve_send() sends the collected execve event data. +// * +// * This function is the last tail call of the execve event, its sole purpose +// * is to update the pid execve_map entry to reflect the new execve event that +// * has already been collected, then send it to the perf buffer. +// */ +////__attribute__((section("tracepoint/1"), used)) int +SEC("tracepoint/1") +int execve_send(void *ctx) +{ + bpf_printk("[execve_send] enter ~"); + struct msg_execve_event *event; + struct execve_map_value *curr; + struct msg_process *p; + __u32 zero = 0; + uint64_t size; + __u32 pid; +#if defined(__NS_CHANGES_FILTER) || defined(__CAP_CHANGES_FILTER) + bool init_curr = 0; +#endif + + event = bpf_map_lookup_elem(&execve_msg_heap_map, &zero); + if (!event) + return 0; + +#ifdef __LARGE_BPF_PROG + // Reading the absolute path of the process exe for matchBinaries. + // Historically we used the filename, a potentially relative path (maybe to + // a symlink) coming from the execve tracepoint. For kernels not supporting + // large BPF prog, we still use the filename. + read_exe((struct task_struct *)bpf_get_current_task(), &event->exe); +#endif + + p = &event->process; + + pid = (bpf_get_current_pid_tgid() >> 32); + + curr = execve_map_get_noinit(pid); + if (curr) { + event->cleanup_key = curr->key; +#if defined(__NS_CHANGES_FILTER) || defined(__CAP_CHANGES_FILTER) + /* if this exec event preceds a clone, initialize capabilities + * and namespaces as well. + */ + if (curr->flags == EVENT_COMMON_FLAG_CLONE) + init_curr = 1; +#endif + curr->key.pid = p->pid; + curr->key.ktime = p->ktime; + curr->nspid = p->nspid; + curr->pkey = event->parent; + if (curr->flags & EVENT_COMMON_FLAG_CLONE) { + event_set_clone(p); + } + curr->flags = 0; +#ifdef __NS_CHANGES_FILTER + if (init_curr) + memcpy(&(curr->ns), &(event->ns), + sizeof(struct msg_ns)); +#endif +#ifdef __CAP_CHANGES_FILTER + if (init_curr) { + curr->caps.permitted = event->creds.caps.permitted; + curr->caps.effective = event->creds.caps.effective; + curr->caps.inheritable = event->creds.caps.inheritable; + } +#endif + // buffer can be written at clone stage with parent's info, if previous + // path is longer than current, we can have leftovers at the end. + memset(&curr->bin, 0, sizeof(curr->bin)); +#ifdef __LARGE_BPF_PROG + // read from proc exe stored at execve time + if (event->exe.len <= BINARY_PATH_MAX_LEN) { + curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.off); + if (curr->bin.path_length == 0) + curr->bin.path_length = event->exe.len; + } +#else + // reuse p->args first string that contains the filename, this can't be + // above 256 in size (otherwise the complete will be send via data msg) + // which is okay because we need the 256 first bytes. + curr->bin.path_length = bpf_probe_read_str(curr->bin.path, BINARY_PATH_MAX_LEN, &p->args); + if (curr->bin.path_length > 1) { + // don't include the NULL byte in the length + curr->bin.path_length--; + } +#endif + } + + event->common.flags = 0; + size = validate_msg_execve_size( + sizeof(struct msg_common) + sizeof(struct msg_k8s) + + sizeof(struct msg_execve_key) + sizeof(__u64) + + sizeof(struct msg_cred) + sizeof(struct msg_ns) + + sizeof(struct msg_execve_key) + p->size); +// bpf_printk("[execve_send] before perf output ~"); + perf_event_output_metric(ctx, MSG_OP_EXECVE, &tcpmon_map, BPF_F_CURRENT_CPU, event, size); +// bpf_printk("[execve_send] after perf output ~"); + return 0; +} + +// +// +// +//// exit +// +////__attribute__((section("kprobe/acct_process"), used)) +SEC("kprobe/acct_process") +int event_exit_acct_process(struct pt_regs *ctx) +{ + __u64 pid_tgid = bpf_get_current_pid_tgid(); + __u32 pid = pid_tgid >> 32; + bpf_printk("[kprobe][event_exit_acct_process] pid:%u enter~", pid); + event_exit_send(ctx, pid_tgid >> 32); + bpf_printk("[kprobe][event_exit_acct_process] pid:%u send done ~", pid); + return 0; +} + +/* + * Hooking on acct_process kernel function, which is called on the task's + * exit path once the task is the last one in the group. It's stable since + * v4.19, so it's safe to hook for us. + * + * It's called with on_exit argument != 0 when called from do_exit + * function with same conditions like for acct_process described above. + */ +//__attribute__((section("kprobe/disassociate_ctty"), used)) int + +SEC("kprobe/disassociate_ctty") +int event_exit_disassociate_ctty(struct pt_regs *ctx) +{ + int on_exit = (int)PT_REGS_PARM1_CORE(ctx); + __u32 pid = bpf_get_current_pid_tgid() >> 32; + bpf_printk("[kprobe][event_exit_disassociate_ctty] pid:%u enter~", pid); + + if (on_exit) + event_exit_send(ctx, pid); + return 0; +} + + +//////////////////////////// filters //////////////////////////// + +#define POLICY_FILTER_MAX_POLICIES 128 + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, POLICY_FILTER_MAX_POLICIES); + __uint(key_size, sizeof(u32)); /* policy id */ + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u64); /* cgroup id */ + __type(value, __u8); /* empty */ + }); +} policy_filter_maps SEC(".maps"); + +///////////////////////////////////////////////////////////////// + + +//////////////////////////// network //////////////////////////// +///////////////////////////////////////////////////////////////// + +static __always_inline u16 bpf_core_sock_sk_protocol_ak(struct sock *sk) +{ + return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol); +} + +static inline int cidr_match(__u32 addr, __u32 net, __u32 subnet) { + __u32 mask = subnet == 0 ? 0 : (0xFFFFFFFF << (32 - subnet)); + return (addr & mask) == (net & mask); +} + +// return value: +// 0 --- pass +// 1 --- reject +// direction: 0 for source addr, 1 for dest addr +int port_filter(__u16 port, int direction) { + int start = 0; + if (direction == 1) { + start = (SYSAK_SECURE_MAX_PORT_LIMIT >> 1); + } + + int key; + struct port_entry *entry; + // 0 for blacklist + // 1 for whitelist + int blacklist = 3; + +#pragma unroll + for (key = 0; key < SYSAK_SECURE_MAX_PORT_LIMIT; key++) { + int tmp = start + key; + entry = bpf_map_lookup_elem(&port_filter_list, &tmp); + if (!entry || entry->inited == 0) { + // need stop + break; + } + blacklist = entry->black; + bpf_printk("[kprobe][port_filter] black:%u, port:%u, income_port:%u", entry->black, entry->port, port); + if (port == entry->port) { + if (blacklist == 1) { + // blacklist + bpf_printk("[kprobe][port_filter] filtered by blacklist port, port:%u : disabled.", + port); + return 1; + } else if (blacklist == 0) { + // whitelist + return 0; + } + } + + return (entry->black == 0) ? 1 : 0; + } + + // blacklist + if (blacklist == 1) return 0; + if (blacklist == 0) { + // whitelist + bpf_printk("[kprobe][port_filter] filtered by whitelist port, port:%u . disabled.", port); + return 1; + } + + // no filters + return 0; +} + +// return value: +// 0 --- pass +// 1 --- reject +// direction: 0 for source addr, 1 for dest addr +int addr_filter(__u32 addr, int direction) { + int start = 0; + if (direction == 1) { + start = (SYSAK_SECURE_MAX_CIDR_LIMIT >> 1); + } + + int key; + + // 0 for blacklist + // 1 for whitelist + int blacklist = 3; +#pragma unroll + for (key = 0; key < SYSAK_SECURE_MAX_CIDR_LIMIT; key++) { + int tmp = start + key; + struct cidr_entry *entry = bpf_map_lookup_elem(&cidr_filter_list, &tmp); + if (!entry || entry->inited == 0) break; + bpf_printk("[kprobe][addr_filter] black:%u, net:%u, mask:%u", entry->black, entry->net, entry->mask); + blacklist = entry->black; + if (cidr_match(addr, entry->net, entry->mask)) { + if (blacklist == 1) { + // bingo black list + bpf_printk("[kprobe][addr_filter] filtered by blacklist cidr, ip:%u net:%u mask:%u: disabled.", + addr, entry->net, entry->mask); + return 1; + } else if (blacklist == 0) { + // bingo white list + return 0; + } + } + } + + // blacklist + if (blacklist == 1) return 0; + if (blacklist == 0) { + // whitelist + bpf_printk("[kprobe][addr_filter] filtered by white cidr, ip:%u disabled.", + addr); + return 1; + } + + // no filters + return 0; +} + +static __always_inline u32 get_netns(struct sock *sk) { + return BPF_CORE_READ(sk, __sk_common.skc_net.net, ns.inum); +} + +// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +SEC("kprobe/tcp_sendmsg") +int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t size) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) { + bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); + return 0; + } + bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + // define event + __u32 zero = 0; + struct tcp_data_t* data = NULL; + data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); + if (!data) return 0; + memset(data, 0, sizeof(data)); + + data->func = TRACEPOINT_FUNC_TCP_SENDMSG; + data->key = enter->key; + data->pkey = enter->pkey; + + struct inet_sock *inet = (struct inet_sock *)sk; + data->timestamp = bpf_ktime_get_ns(); + unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + data->daddr = bpf_htonl(daddr); + unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + data->dport = bpf_htons(dport); + unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + data->saddr = bpf_htonl(saddr); + unsigned short sport = BPF_CORE_READ(inet, inet_sport); + data->sport = bpf_htons(sport); + data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + data->net_ns = get_netns(sk); + data->protocol = bpf_core_sock_sk_protocol_ak(sk); + data->bytes = size; + + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG; + stack->tcp_data.func = TRACEPOINT_FUNC_TCP_SENDMSG; + stack->tcp_data.key = enter->key; + stack->tcp_data.pkey = enter->pkey; + stack->tcp_data.timestamp = bpf_ktime_get_ns(); + stack->tcp_data.daddr = daddr; + stack->tcp_data.dport = bpf_htons(dport); + stack->tcp_data.saddr = saddr; + stack->tcp_data.sport = bpf_htons(sport); + stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); + stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); + stack->tcp_data.net_ns = get_netns(sk); + stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); + stack->tcp_data.bytes = size; + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.sport, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + // do filters + // int sf, df, sp, dp; + // sf = addr_filter(data->saddr, 0); + // df = addr_filter(data->daddr, 1); + // sp = port_filter(data->sport, 0); + // dp = port_filter(data->dport, 1); + // if (sf || df || sp || dp) { + // bpf_printk("[kprobe][kprobe_tcp_sendmsg] skip submit because of filters."); + // return 0; + // } + + // bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct tcp_data_t)); + // bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); + return 0; +} + +// void tcp_close(struct sock *sk, long timeout); +SEC("kprobe/tcp_close") +int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) { + bpf_printk("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); + return 0; + } + bpf_printk("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + __u32 zero = 0; + struct tcp_data_t* data = NULL; + data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); + if (!data) return 0; + memset(data, 0, sizeof(data)); + + data->func = TRACEPOINT_FUNC_TCP_CLOSE; + data->key = enter->key; + data->pkey = enter->pkey; + struct inet_sock *inet = (struct inet_sock *)sk; + data->timestamp = bpf_ktime_get_ns(); + unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + data->daddr = bpf_htonl(daddr); + unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + data->dport = bpf_htons(dport); + unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + data->saddr = bpf_htonl(saddr); + unsigned short sport = BPF_CORE_READ(inet, inet_sport); + data->sport = bpf_htons(sport); + data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + data->net_ns = get_netns(sk); + data->protocol = bpf_core_sock_sk_protocol_ak(sk); + + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE; + stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CLOSE; + stack->tcp_data.key = enter->key; + stack->tcp_data.pkey = enter->pkey; + stack->tcp_data.timestamp = bpf_ktime_get_ns(); + stack->tcp_data.daddr = daddr; + stack->tcp_data.dport = bpf_htons(dport); + stack->tcp_data.saddr = saddr; + stack->tcp_data.sport = bpf_htons(sport); + stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); + stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); + stack->tcp_data.net_ns = get_netns(sk); + stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.sport, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + + // do filters +// int sf, df, sp, dp; +// sf = addr_filter(data->saddr, 0); +// df = addr_filter(data->daddr, 1); +// sp = port_filter(data->sport, 0); +// dp = port_filter(data->dport, 1); +// if (sf || df || sp || dp) { +// bpf_printk("[kprobe][kprobe_tcp_close] skip submit because of filters."); +// return 0; +// } + +// // bpf_printk("Packet matched CIDR: %x/%x/%u/%u\n", entry->net, entry->mask, entry->enable, entry->src); +// bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct tcp_data_t)); +// bpf_printk("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); + return 0; +} + +// +SEC("kprobe/tcp_connect") +int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) { + bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); + return 0; + } + bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + __u32 zero = 0; + struct tcp_data_t* data = NULL; + data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); + if (!data) return 0; + memset(data, 0, sizeof(data)); + + data->func = TRACEPOINT_FUNC_TCP_CONNECT; + data->key = enter->key; + data->pkey = enter->pkey; + // struct inet_sock *inet = (struct inet_sock *)sk; + // data->timestamp = bpf_ktime_get_ns(); + // data->daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + // data->daddr = bpf_htonl(data->daddr); + // data->dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + // data->dport = bpf_htons(data->dport); + // data->saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + // data->saddr = bpf_htonl(data->saddr); + // data->sport = BPF_CORE_READ(inet, inet_sport); + // data->sport = bpf_htons(data->sport); + // data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + // data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + // data->net_ns = get_netns(sk); + // data->protocol = bpf_core_sock_sk_protocol_ak(sk); + + struct inet_sock *inet = (struct inet_sock *)sk; + data->timestamp = bpf_ktime_get_ns(); + unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + data->daddr = bpf_htonl(daddr); + unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + data->dport = bpf_htons(dport); + unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + data->saddr = bpf_htonl(saddr); + unsigned short sport = BPF_CORE_READ(inet, inet_sport); + data->sport = bpf_htons(sport); + data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + data->net_ns = get_netns(sk); + data->protocol = bpf_core_sock_sk_protocol_ak(sk); + + + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT; + stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CONNECT; + stack->tcp_data.key = enter->key; + stack->tcp_data.pkey = enter->pkey; + stack->tcp_data.timestamp = bpf_ktime_get_ns(); + stack->tcp_data.daddr = daddr; + stack->tcp_data.dport = bpf_htons(dport); + stack->tcp_data.saddr = saddr; + stack->tcp_data.sport = bpf_htons(sport); + stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); + stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); + stack->tcp_data.net_ns = get_netns(sk); + stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.sport, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + + // do filters +// int sf, df, sp, dp; +// sf = addr_filter(data->saddr, 0); +// df = addr_filter(data->daddr, 1); +// sp = port_filter(data->sport, 0); +// dp = port_filter(data->dport, 1); +// if (sf || df || sp || dp) { +// bpf_printk("[kprobe][kprobe_tcp_connect] skip submit because of filters."); +// return 0; +// } + +// // bpf_printk("Packet matched CIDR: %x/%x/%u/%u\n", entry->net, entry->mask, entry->enable, entry->src); +// bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct tcp_data_t)); +// bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); + return 0; +} + + + +////// file ////// +// char _license[] SEC("license") = "GPL"; +// Function to calculate the length of the string +static inline __attribute__((always_inline)) u32 str_len(const char *str) +{ + u32 len = 0; +#pragma unroll + for (int i = 0; i < SYSAK_SECURE_MAX_PATH_LENGTH_LIMIT; i++) + { + if (str[i] == '\0') + break; + len++; + } + return len; +} + +static inline __attribute__((always_inline)) long copy_path(char *args, const struct path *arg) +{ + int *s = (int *)args; + int size = 0, flags = 0; + char *buffer; + void *curr = &args[4]; + umode_t i_mode; + buffer = d_path_local(arg, &size, &flags); + if (!buffer) + return 0; + // tips: path size between 0~255 + asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) + :); + bpf_probe_read(curr, size, buffer); + *s = size; + size += 4; + BPF_CORE_READ_INTO(&i_mode, arg, dentry, d_inode, i_mode); + /* + * the format of the path is: + * ----------------------------------------- + * | 4 bytes | N bytes | 4 bytes | 2 bytes | + * | pathlen | path | flags | mode | + * ----------------------------------------- + * Next we set up the flags. + */ + asm volatile goto( + "r1 = *(u64 *)%[pid];\n" + "r7 = *(u32 *)%[offset];\n" + "if r7 s< 0 goto %l[a];\n" + "if r7 s> 1188 goto %l[a];\n" + "r1 += r7;\n" + "r2 = *(u32 *)%[flags];\n" + "*(u32 *)(r1 + 0) = r2;\n" + "r2 = *(u16 *)%[mode];\n" + "*(u16 *)(r1 + 4) = r2;\n" + : + : [pid] "m"(args), [flags] "m"(flags), [offset] "+m"(size), [mode] "m"(i_mode) + : "r0", "r1", "r2", "r7", "memory" + : a); +a: + size += sizeof(u32) + sizeof(u16); // for the flags + i_mode + return size; +} + + +void write_ipv6_addr32(u32 *dest, u32 *src) +{ + dest[0] = src[0]; + dest[1] = src[1]; + dest[2] = src[2]; + dest[3] = src[3]; +} + +SEC("kprobe/tailcall_prog") +int filter_prog(struct pt_regs *ctx) { + bpf_printk("[secure][tailcall] enter filter_prog"); + __u32 zero = 0; + struct secure_tailcall_stack *stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) + return 0; + + int call_name_idx = stack->func; + struct selector_filters* filters = NULL; + filters = bpf_map_lookup_elem(&filter_map, &call_name_idx); + + if (filters == NULL) { + // no filter was set ... + // should send data directly. + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_SEND); + return 0; + } + + // get data + int i = 0; + int pass = 1; + #pragma unroll + for (; i < MAX_FILTER_FOR_PER_CALLNAME; i ++) { + int idx = i; + struct selector_filter filter = filters->filters[idx]; + if (filter.filter_type != FILTER_TYPE_UNKNOWN) { + bpf_printk("get file prefix filter, type:%u, map index:%u", filter.filter_type, filter.map_idx[0]); + // bpf_printk("get file prefix filter, vallen:%u, plus 8:%u", filter.vallen, filter.vallen << 3); + } + struct addr4_lpm_trie arg4; + struct addr6_lpm_trie arg6; + switch(filter.filter_type) { + case FILTER_TYPE_SADDR: { + uint32_t saddr = stack->tcp_data.saddr; + + struct bpf_map* inner_map4 = NULL; + struct bpf_map* inner_map6 = NULL; + if (filter.map_idx[0] != -1) { + inner_map4 = bpf_map_lookup_elem(&addr4lpm_maps, &filter.map_idx[0]); + } + if (filter.map_idx[1] != -1) { + inner_map6 = bpf_map_lookup_elem(&addr6lpm_maps, &filter.map_idx[1]); + } + if (inner_map4 == NULL) { + bpf_printk("there is something wrong with the lpm maps..."); + bpf_printk("callname idx:%u cannot find inner map for saddr, continue ... ", call_name_idx); + + continue; + } + arg4.addr = saddr; + arg4.prefix = 32; + // arg6.prefix = 128; + // write the address in as 4 u32s due to alignment + // write_ipv6_addr32(arg6.addr, (__u32 *)stack->tcp_data.saddr); + __u8 *ppass4 = NULL, *ppass6 = NULL; + if (inner_map4 != NULL) ppass4 = bpf_map_lookup_elem(inner_map4, &arg4); + + // ppass6 = bpf_map_lookup_elem(inner_map6, &arg6); + if (filter.op_type == OP_TYPE_IN) { + // not in white list + if (ppass4 == NULL) { + bpf_printk("callname idx:%u arg4 saddr:%u, prefix:%u not in whitelist", call_name_idx, arg4.addr, arg4.prefix); + return 0; + } + } else if (filter.op_type == OP_TYPE_NOT_IN) { + // or in black list + if (ppass4 != NULL) { + bpf_printk("callname idx:%u arg4 saddr:%u, prefix:%u in blacklist", call_name_idx, arg4.addr, arg4.prefix); + return 0; + } + } + break; + } + case FILTER_TYPE_DADDR: { + uint32_t daddr = stack->tcp_data.daddr; + arg4.addr = daddr; + arg4.prefix = 32; + struct bpf_map* inner_map = bpf_map_lookup_elem(&addr4lpm_maps, &filter.map_idx[0]); + if (inner_map == NULL) { + bpf_printk("callname idx:%u cannot find inner map for daddr, continue ... ", call_name_idx); + continue; + } + __u8* ppass = NULL; + ppass = bpf_map_lookup_elem(inner_map, &arg4); + if (filter.op_type == OP_TYPE_IN) { + // not in white list + if (ppass == NULL) { + bpf_printk("callname idx:%u arg4 daddr:%u, prefix:%u not in whitelist", call_name_idx, arg4.addr, arg4.prefix); + return 0; + } + } else if (filter.op_type == OP_TYPE_NOT_IN) { + // or in black list + bpf_printk("callname idx:%u arg4 daddr:%u, prefix:%u in blacklist", call_name_idx, arg4.addr, arg4.prefix); + if (ppass != NULL) return 0; + } + + break; + } + case FILTER_TYPE_SPORT: { + uint32_t sport = stack->tcp_data.sport; + struct bpf_map* inner_map = bpf_map_lookup_elem(&port_maps, &filter.map_idx[0]); + if (inner_map == NULL) { + bpf_printk("callname idx:%u cannot find inner map for sport, continue ... ", call_name_idx); + continue; + } + __u8* ppass = NULL; + ppass = bpf_map_lookup_elem(inner_map, &sport); + if (filter.op_type == OP_TYPE_IN) { + // not in white list + if (ppass == NULL) { + bpf_printk("callname idx:%u arg4 sport:%u not in whitelist", call_name_idx, sport); + return 0; + } + } else if (filter.op_type == OP_TYPE_NOT_IN) { + // or in black list + if (ppass != NULL) { + bpf_printk("callname idx:%u arg4 sport:%u in blacklist", call_name_idx, sport); + return 0; + } + } + } + case FILTER_TYPE_DPORT: { + uint32_t dport = stack->tcp_data.dport; + struct bpf_map* inner_map = bpf_map_lookup_elem(&port_maps, &filter.map_idx[0]); + if (inner_map == NULL) { + bpf_printk("callname idx:%u cannot find inner map for dport, continue ... ", call_name_idx); + continue; + } + __u8* ppass = NULL; + ppass = bpf_map_lookup_elem(inner_map, &dport); + if (filter.op_type == OP_TYPE_IN) { + // not in white list + if (ppass == NULL) { + bpf_printk("callname idx:%u arg4 dport:%u not in whitelist", call_name_idx, dport); + return 0; + } + } else if (filter.op_type == OP_TYPE_NOT_IN) { + // or in black list + if (ppass != NULL) { + bpf_printk("callname idx:%u arg4 dport:%u in blacklist", call_name_idx, dport); + return 0; + } + } + break; + } + case FILTER_TYPE_FILE_PREFIX: { + struct string_prefix_lpm_trie *prefix = NULL; + int zero = 0; + prefix = bpf_map_lookup_elem(&string_prefix_maps_heap, &zero); + if (prefix == NULL) { + bpf_printk("[kprobe][tailcall] cannot lookup string_prefix_maps_heap"); + break; + } + // to bits + + // struct file_data_t* data = bpf_map_lookup_elem(&file_secure_data_heap, &zero); + // if (data == NULL) break; + // bpf_probe_read(&data, sizeof(struct file_data_t), &stack->file_data); + __u32 path_size = 0; + bpf_probe_read(&path_size, 4, stack->file_data.path); + prefix->prefixlen = path_size * 8; + bpf_probe_read(prefix->data, path_size & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); + // bpf_probe_read(prefix->data, filter.vallen & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); + int path_len = *(int *)stack->file_data.path; + bpf_printk("[kprobe][tailcall] begin to query inner map. stack path length:%d", path_len); + bpf_printk("[kprobe][tailcall] begin to query inner map. stack path+4:%s", &stack->file_data.path[4]); + bpf_printk("[kprobe][tailcall] begin to query inner map. prefix path:%s, path size:%u", prefix->data, path_size); + + struct bpf_map* inner_map = bpf_map_lookup_elem(&string_prefix_maps, &filter.map_idx[0]); + __u8* ppass = NULL; + if (inner_map != NULL) { + ppass = bpf_map_lookup_elem(inner_map, prefix); + if (ppass == NULL || *ppass == 0) pass &= 0; + else pass &= 1; + // if (ppass != NULL) { + // bpf_printk("[kprobe][tailcall] bingo~ query for inner map, path:%s, val:%u", prefix->data, (__u32)*ppass); + // pass &= (__u32)(*ppass); + // // TODO @sym @fs + // bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_SEND); + // return 0; + // } + // else { + // pass &= 0; + // bpf_printk("[kprobe][tailcall] query for inner map, got null val"); + // } + } else { + // no filters were set ... + bpf_printk("[kprobe][tailcall] cannot find inner map, no filter set, pass"); + } + break; + } + default: + break; + } + } + + if (pass) { + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_SEND); + } else { + bpf_printk("[filter_prog] skip submit due to the filter"); + } + + return 0; +} + +SEC("kprobe/secure_data_send") +int secure_data_send(struct pt_regs *ctx) +{ + bpf_printk("[secure][tailcall] enter secure_data_send"); + // the max tail call, just flush event + __u32 zero = 0; + struct secure_tailcall_stack *data = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!data) + return 0; + + switch (data->func) + { + case SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION: + case SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE: + case SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE: + case SECURE_FUNC_TRACEPOINT_FUNC_SYS_WRITE: + case SECURE_FUNC_TRACEPOINT_FUNC_SYS_READ:{ + bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, &data->file_data, sizeof(struct file_data_t)); + bpf_printk("[kprobe][secure_data_send][file] pid:%u, ktime:%u send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime); + break; + } + case SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE: + case SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT: + case SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG: + bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, &data->tcp_data, sizeof(struct tcp_data_t)); + bpf_printk("[kprobe][secure_data_send][socket] pid:%u, ktime:%u send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime); + default: + break; + } + // bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct secure_tailcall_stack)); + // bpf_printk("[kprobe][kprobe_security_file_permission] pid:%u, ktime:%u send to perfbuffer.\n", data->key.pid, data->key.ktime); + return 0; +} + +SEC("kprobe/security_file_permission") +int kprobe_security_file_permission(struct pt_regs *ctx) +{ + bpf_printk("[kprobe][kprobe_security_file_permission] enter security_file_permission."); + __u32 zero = 0; + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + struct file *file = (struct file *)PT_REGS_PARM1(ctx); + const struct path *path_arg = 0; + path_arg = _(&file->f_path); + long ret = copy_path(stack->file_data.path, path_arg); + int path_len = *(int *)stack->file_data.path; + u32 flag_prefix = 4 + path_len; + int flag = -1; + if (flag_prefix < 2000 && flag_prefix > 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix); + u32 mode_prefix = 8 + path_len; + short mode = -1; + if (mode_prefix < 2000 && mode_prefix > 0) mode = bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); + bpf_printk("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); + bpf_printk("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); + + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) + { + bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); + bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); + return 0; + } + bpf_printk("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + // __u32 zero = 0; + // struct secure_tailcall_stack* stack = NULL; + // stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + // if (!stack) return 0; + // memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; + stack->file_data.key = enter->key; + stack->file_data.pkey = enter->pkey; + stack->file_data.timestamp = bpf_ktime_get_ns(); + // struct file *file = (struct file *)PT_REGS_PARM1(ctx); + // const struct path *path_arg = 0; + // path_arg = _(&file->f_path); + // copy_path(stack->file_data.path, path_arg); + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +SEC("kprobe/security_mmap_file") +int kprobe_security_mmap_file(struct pt_regs *ctx) +{ + bpf_printk("[kprobe][security_mmap_file] enter security_mmap_file."); + __u32 zero = 0; + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + struct file *file = (struct file *)PT_REGS_PARM1(ctx); + const struct path *path_arg = 0; + path_arg = _(&file->f_path); + long ret = copy_path(stack->file_data.path, path_arg); + int path_len = *(int *)stack->file_data.path; + bpf_printk("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); + bpf_printk("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); + + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) + { + return 0; + } + bpf_printk("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE; + stack->file_data.key = enter->key; + stack->file_data.pkey = enter->pkey; + stack->file_data.timestamp = bpf_ktime_get_ns(); + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +SEC("kprobe/security_path_truncate") +int kprobe_security_path_truncate(struct pt_regs *ctx) +{ + bpf_printk("[kprobe][security_path_truncate] enter security_path_truncate."); + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) + { + return 0; + } + bpf_printk("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + __u32 zero = 0; + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; + stack->file_data.key = enter->key; + stack->file_data.pkey = enter->pkey; + stack->file_data.timestamp = bpf_ktime_get_ns(); + struct path *path = (struct path *)PT_REGS_PARM1(ctx); + const struct path *path_arg = 0; + path_arg = _(path); + copy_path(stack->file_data.path, path_arg); + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +//char _license[] SEC("license") = "GPL"; diff --git a/src/security/string_maps.h b/src/security/string_maps.h new file mode 100644 index 0000000..baeedba --- /dev/null +++ b/src/security/string_maps.h @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright Authors of Cilium */ + +#ifndef STRING_MAPS_H__ +#define STRING_MAPS_H__ + +#include +#include +#include +#include +#include "../coolbpf.h" +#include "type.h" + +/* + * To facilitate an arbitrary number of strings that can be matched on, string matching + * uses a hash look up. The problem with this is that the key to a hash has to be a fixed + * size, so if the maximum string length is 128 bytes, then all stored strings will be + * 128 bytes long (padded with 0s) and the string to be looked up also has to be padded + * with 0s to 128 bytes. This means that a short string will be hashed as if it is 128 + * bytes long. + * + * The BPF hash maps use jhash for key hashing. See include/linux/jhash.h. This requires + * approximately 1 CPU cycle per byte, so in the example above, hashing every string, + * regardless of length, will take ~128 cycles, which is clearly inefficient. See + * https://fosdem.org/2023/schedule/event/bpf_hashing/ for details. + * + * jhash hashes in 12 byte blocks (3 x u32). For all lengths >12, a number of 12 byte + * blocks are hashed, and the remainder is hashed using a combination of single byte + * loads/shifts, followed by a final mix. It appears that the most efficient use of + * jhash is with lengths equal to 12k + 1, minimising the number of single byte loads/ + * shifts. + * + * In order to reduce the amount of hashing of padded 0s, we opt to store string matches + * in multiple hashes, with increasing key sizes, where the key size is one more than a + * multiple of 12. Each string to be stored is placed in the hash that has the smallest + * key size that can accommodate it (and is padded to the key size). Strings to be looked + * up are equally padded to the smallest key size that can accommodate them, and then + * looked up in the related map. + * + * The chosen key sizes are 25, 49, 73, 97, 121, 145, 258, 514, 1026, 2050, 4098 (11 maps). + * The first 6 are sized for common uses and to minimise the hashing of empty bytes. The + * following 5 maps notionally double in size, with lengths equal to 2^k + 2. On kernels + * <5.11, the last four maps are replaced with a single map with key size 512. This is due + * to key size limitations on kernels <5.11. + * + * In order to distinguish between character buffers that end in 0s and similar buffers + * that are padded with 0s, each string will be prefixed by its length stored in a + * single byte (for first 6 maps) or as a little endian u16 (latter maps). + */ + + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_0]); + __type(value, __u8); + }); +} string_maps_0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_1]); + __type(value, __u8); + }); +} string_maps_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_2]); + __type(value, __u8); + }); +} string_maps_2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_3]); + __type(value, __u8); + }); +} string_maps_3 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_4]); + __type(value, __u8); + }); +} string_maps_4 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_5]); + __type(value, __u8); + }); +} string_maps_5 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_6]); + __type(value, __u8); + }); +} string_maps_6 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_7]); + __type(value, __u8); + }); +} string_maps_7 SEC(".maps"); + +#ifdef __LARGE_MAP_KEYS +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_8]); + __type(value, __u8); + }); +} string_maps_8 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_9]); + __type(value, __u8); + }); +} string_maps_9 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u8[STRING_MAPS_SIZE_10]); + __type(value, __u8); + }); +} string_maps_10 SEC(".maps"); +#endif + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, STRING_MAPS_HEAP_SIZE); +} string_maps_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, STRING_MAPS_HEAP_SIZE); +} string_maps_ro_zero SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(max_entries, 1); + __type(key, __u8[sizeof(struct string_prefix_lpm_trie)]); // Need to specify as byte array as wouldn't take struct as key type + __type(value, __u8); + __uint(map_flags, BPF_F_NO_PREALLOC); + }); +} string_prefix_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct string_prefix_lpm_trie)); +} string_prefix_maps_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, STRING_MAPS_OUTER_MAX_ENTRIES); + __uint(key_size, sizeof(__u32)); + __array( + values, struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(max_entries, 1); + __type(key, __u8[sizeof(struct string_postfix_lpm_trie)]); // Need to specify as byte array as wouldn't take struct as key type + __type(value, __u8); + __uint(map_flags, BPF_F_NO_PREALLOC); + }); +} string_postfix_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct string_postfix_lpm_trie)); +} string_postfix_maps_heap SEC(".maps"); + +#endif // STRING_MAPS_H__ diff --git a/src/security/tailcall_stack.h b/src/security/tailcall_stack.h new file mode 100644 index 0000000..8d63137 --- /dev/null +++ b/src/security/tailcall_stack.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include +#include +#include +#include "../coolbpf.h" +#include "type.h" + +struct secure_tailcall_stack { + enum secure_funcs func; + union { + struct tcp_data_t tcp_data; + struct file_data_t file_data; + }; +} __attribute__((packed)); \ No newline at end of file diff --git a/src/security/type.h b/src/security/type.h new file mode 100644 index 0000000..86cc671 --- /dev/null +++ b/src/security/type.h @@ -0,0 +1,253 @@ +// +// Created by qianlu on 2024/6/12. +// + +#ifndef SYSAK_TYPE_H +#define SYSAK_TYPE_H + +#ifdef __cplusplus +#include +#endif + +#include "bpf_process_event_type.h" + +#ifndef AF_INET +#define AF_INET 2 +#endif +#ifndef AF_INET6 +#define AF_INET6 10 +#endif +#define IPV4LEN 4 +#define IPV6LEN 16 + +struct tuple_type { + __u64 saddr[2]; + __u64 daddr[2]; + __u16 sport; + __u16 dport; + __u16 protocol; + __u16 family; +}; + +/// network etc +enum sock_secure_ctrl_type { + INVALID, + PID, + CONTAINER_ID, + SOURCE_IP, + SOURCE_PORT, + DEST_IP, + DEST_PORT, + /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ + NET_NS, + MAX, +}; + +enum secure_funcs { + + // file + SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION, + SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE, + SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE, + SECURE_FUNC_TRACEPOINT_FUNC_SYS_WRITE, + SECURE_FUNC_TRACEPOINT_FUNC_SYS_READ, + + // network + SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE, + SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT, + SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG, + + + // process + + SECURE_FUNCS_MAX, +}; + +enum sock_secure_func { + TRACEPOINT_FUNC_TCP_CLOSE, + TRACEPOINT_FUNC_TCP_CONNECT, + TRACEPOINT_FUNC_TCP_SENDMSG, + TRACEPOINT_FUNC_MAX, +}; + +struct addr_port { + __u32 addr; + __u16 port; +}; + +struct ns_key_t { + __u32 net_ns_inum; +}; + +struct tcp_data_t { + struct msg_execve_key key; + struct msg_execve_key pkey; + enum sock_secure_func func; + __u16 protocol; + __u16 state; + __u16 family; + __u32 pid; + __u32 saddr; // Source address + __u32 daddr; // Destination address + __u16 sport; // Source port + __u16 dport; // Destination port + __u32 net_ns; // Network namespace + __u64 timestamp; + __u64 bytes; +}; + +#define SYSAK_SECURE_MAX_CIDR_LIMIT 20 +#define SYSAK_SECURE_MAX_CIDR_LIMIT_HALF 10 +#define SYSAK_SECURE_MAX_PORT_LIMIT 20 +#define SYSAK_SECURE_MAX_PORT_LIMIT_HALF 10 + +#define SYSAK_SECURE_MAX_PATH_LIMIT 2 +#define SYSAK_SECURE_MAX_PATH_LENGTH_LIMIT 256 + +#define INT_MAPS_OUTER_MAX_ENTRIES 20 +#define INT_MAPS_INNER_MAX_ENTRIES 8 +#define STRING_MAPS_OUTER_MAX_ENTRIES 20 +#define STRING_MAPS_INNER_MAX_ENTRIES 8 + +struct cidr_entry { + int inited; + int black; // black list or not + __u32 net; // Network part of CIDR + __u32 mask; // Network mask +}; + +struct port_entry { + int inited; + int black; // is black list or not + __u16 port; // is src port or not +}; + + + +/// process etc + +// file +enum file_secure_func +{ + TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION, + TRACEPOINT_FUNC_SECURITY_MMAP_FILE, + TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE, + TRACEPOINT_FUNC_SYS_WRITE, + TRACEPOINT_FUNC_SYS_READ, +}; +struct file_data_t +{ + struct msg_execve_key key; + struct msg_execve_key pkey; + enum file_secure_func func; + __u64 timestamp; + __u32 size; + char path[2000]; +}; +struct path_entry +{ + // todo need updates + int inited; + int length; + char path[2000]; +}; + + +#define STRING_MAPS_KEY_INC_SIZE 24 +#define STRING_MAPS_SIZE_0 (1 * STRING_MAPS_KEY_INC_SIZE + 1) +#define STRING_MAPS_SIZE_1 (2 * STRING_MAPS_KEY_INC_SIZE + 1) +#define STRING_MAPS_SIZE_2 (3 * STRING_MAPS_KEY_INC_SIZE + 1) +#define STRING_MAPS_SIZE_3 (4 * STRING_MAPS_KEY_INC_SIZE + 1) +#define STRING_MAPS_SIZE_4 (5 * STRING_MAPS_KEY_INC_SIZE + 1) +#define STRING_MAPS_SIZE_5 (6 * STRING_MAPS_KEY_INC_SIZE + 1) +#define STRING_MAPS_SIZE_6 (256 + 2) +#ifdef __LARGE_MAP_KEYS +#define STRING_MAPS_SIZE_7 (512 + 2) +#define STRING_MAPS_SIZE_8 (1024 + 2) +#define STRING_MAPS_SIZE_9 (2048 + 2) +#define STRING_MAPS_SIZE_10 (4096 + 2) +#else +#define STRING_MAPS_SIZE_7 (512) +#endif +#define STRING_MAPS_HEAP_SIZE 16384 +#define STRING_MAPS_HEAP_MASK (8192 - 1) +#define STRING_MAPS_COPY_MASK 4095 + +#define STRING_PREFIX_MAX_LENGTH 256 + +struct string_prefix_lpm_trie { + __u32 prefixlen; + __u8 data[STRING_PREFIX_MAX_LENGTH]; +}; + +#define STRING_POSTFIX_MAX_LENGTH 128 +#define STRING_POSTFIX_MAX_MASK (STRING_POSTFIX_MAX_LENGTH - 1) +#ifdef __LARGE_BPF_PROG +#define STRING_POSTFIX_MAX_MATCH_LENGTH STRING_POSTFIX_MAX_LENGTH +#else +#define STRING_POSTFIX_MAX_MATCH_LENGTH 95 +#endif + +struct string_postfix_lpm_trie { + __u32 prefixlen; + __u8 data[STRING_POSTFIX_MAX_LENGTH]; +}; + + +#define ADDR_LPM_MAPS_OUTER_MAX_ENTRIES 20 +#define ADDR_LPM_MAPS_INNER_MAX_ENTRIES 8 + + +struct addr4_lpm_trie { + __u32 prefix; + __u32 addr; +}; + +struct addr6_lpm_trie { + __u32 prefix; + __u32 addr[4]; +}; + +enum tailcall_func { + TAILCALL_FILTER_PROG, + TAILCALL_SEND, +}; + + +enum filter_type { + FILTER_TYPE_UNKNOWN, + FILTER_TYPE_SADDR, + FILTER_TYPE_DADDR, + FILTER_TYPE_NOT_SADDR, + FILTER_TYPE_NOT_DADDR, + FILTER_TYPE_SPORT, + FILTER_TYPE_DPORT, + FILTER_TYPE_NOT_SPORT, + FILTER_TYPE_NOT_DPORT, + FILTER_TYPE_FILE_PREFIX, +}; + +enum op_type { + OP_TYPE_IN, + OP_TYPE_NOT_IN, +}; + +#define MAX_FILTER_FOR_PER_CALLNAME 8 + +struct selector_filter { + // __u32 index; + // __u32 op; + __u32 vallen; + enum filter_type filter_type; + enum op_type op_type; + __u32 map_idx[2]; + // __u8 value; +} __attribute__((packed)); + +struct selector_filters { + int filter_count; + struct selector_filter filters[MAX_FILTER_FOR_PER_CALLNAME]; +} __attribute__((packed)); + + +#endif //SYSAK_TYPE_H -- Gitee From 11c827fe5d9b9db0f664117c29af65ef41b07b1e Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Fri, 3 Jan 2025 17:49:55 +0800 Subject: [PATCH 04/24] install skeleton && fix out of bound error --- scripts/cmake/genskel.cmake | 3 +++ src/CMakeLists.txt | 23 ++++++++++++----------- src/bpf/CMakeLists.txt | 1 + src/security/CMakeLists.txt | 1 + src/security/security.bpf.c | 10 +++++----- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake index 325b581..4b28c68 100644 --- a/scripts/cmake/genskel.cmake +++ b/scripts/cmake/genskel.cmake @@ -41,5 +41,8 @@ macro(genskel name) ${name}_skel DEPENDS ${BPF_S_FILE} ) + # install skeleton headers + message(STATUS "gen skel INSTALL_INCLUDE_DIR: ${INSTALL_INCLUDE_DIR}") + install(FILES ${BPF_S_FILE} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) endmacro() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a0391a..6ce0349 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,14 @@ else(ELF_LIBRARY) message("Not found libelf library: ${ELF_LIBRARY}") endif(ELF_LIBRARY) +if(NOT DEFINED INSTALL_LIB_DIR) + set(INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/lib) +endif() + +if(NOT DEFINED INSTALL_INCLUDE_DIR) + set(INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include) +endif() + add_subdirectory(bpf) add_subdirectory(security) if (ENABLE_PROFILE) @@ -30,14 +38,6 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coolbpf.pc.in include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/bpf) -if(NOT DEFINED INSTALL_LIB_DIR) - set(INSTALL_LIB_DIR ${CMAKE_INSTALL_PREFIX}/lib) -endif() - -if(NOT DEFINED INSTALL_INCLUDE_DIR) - set(INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include) -endif() - file(GLOB sources ${PROJECT_SOURCE_DIR}/third/libbpf/src/*.c ${CMAKE_CURRENT_SOURCE_DIR}/*.c) # share library @@ -57,9 +57,10 @@ set_target_properties(coolbpf_static PROPERTIES OUTPUT_NAME "coolbpf") install(TARGETS coolbpf LIBRARY DESTINATION ${INSTALL_LIB_DIR}) install(TARGETS coolbpf_static ARCHIVE DESTINATION ${INSTALL_LIB_DIR}) -# install skeleton headers -file(GLOB skel_headers ${CMAKE_CURRENT_BINARY_DIR}/bpf/*.skel.h ${CMAKE_CURRENT_BINARY_DIR}/security/*.skel.h) -install(FILES ${skel_headers} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) +# file(GLOB skel_headers ${CMAKE_CURRENT_BINARY_DIR}/bpf/*.skel.h ${CMAKE_CURRENT_BINARY_DIR}/security/*.skel.h) +# install(FILES ${skel_headers} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) + +message(STATUS "in coolbpf/src INSTALL_INCLUDE_DIR: ${INSTALL_INCLUDE_DIR}") # install security headers file(GLOB security_headers security/*.h) diff --git a/src/bpf/CMakeLists.txt b/src/bpf/CMakeLists.txt index 5b8ef33..bbb9802 100644 --- a/src/bpf/CMakeLists.txt +++ b/src/bpf/CMakeLists.txt @@ -1,3 +1,4 @@ include(${PROJECT_SOURCE_DIR}/scripts/cmake/genskel.cmake) +message(STATUS "net bpf INSTALL_INCLUDE_DIR: ${INSTALL_INCLUDE_DIR}") genskel(net) \ No newline at end of file diff --git a/src/security/CMakeLists.txt b/src/security/CMakeLists.txt index dd57f8b..28906f8 100644 --- a/src/security/CMakeLists.txt +++ b/src/security/CMakeLists.txt @@ -1,3 +1,4 @@ include(${PROJECT_SOURCE_DIR}/scripts/cmake/genskel.cmake) +message(STATUS "security bpf INSTALL_INCLUDE_DIR: ${INSTALL_INCLUDE_DIR}") genskel(security) \ No newline at end of file diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 025d391..7524bc6 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -1251,12 +1251,12 @@ int kprobe_security_file_permission(struct pt_regs *ctx) path_arg = _(&file->f_path); long ret = copy_path(stack->file_data.path, path_arg); int path_len = *(int *)stack->file_data.path; - u32 flag_prefix = 4 + path_len; + const u32 flag_prefix = 4 + path_len; int flag = -1; - if (flag_prefix < 2000 && flag_prefix > 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix); - u32 mode_prefix = 8 + path_len; + if (flag_prefix < 2000 && flag_prefix >= 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix); + const u32 mode_prefix = 8 + path_len; short mode = -1; - if (mode_prefix < 2000 && mode_prefix > 0) mode = bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); + if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); bpf_printk("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); bpf_printk("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); @@ -1354,4 +1354,4 @@ int kprobe_security_path_truncate(struct pt_regs *ctx) return 0; } -//char _license[] SEC("license") = "GPL"; +// char _license[] SEC("license") = "GPL"; -- Gitee From e781f32ff6c58b5cb1feaf6c3856c656222f7c8f Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Wed, 8 Jan 2025 16:12:43 +0800 Subject: [PATCH 05/24] enable timeout --- src/net.c | 4 ++-- src/net.h | 5 +++-- tools/examples/net/net.c | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/net.c b/src/net.c index 398d0c5..2fd39b7 100644 --- a/src/net.c +++ b/src/net.c @@ -531,7 +531,7 @@ void ebpf_config(int32_t opt1, int32_t opt2, int32_t params_count, } } -int32_t ebpf_poll_events(int32_t max_events, int32_t *stop_flag) +int32_t ebpf_poll_events(int32_t max_events, int32_t *stop_flag, int timeout_ms) { int j; /* 100 times one by one ?*/ @@ -540,7 +540,7 @@ int32_t ebpf_poll_events(int32_t max_events, int32_t *stop_flag) { if (g_poll_callback_count < max_events && !*stop_flag) { - int rst = perf_buffer__poll(env.pbs[j], 0); + int rst = perf_buffer__poll(env.pbs[j], timeout_ms); if (rst < 0 && errno != EINTR) { net_log(LOG_TYPE_WARN, "Error polling perf buffer: %d, hand_type:%d\n", diff --git a/src/net.h b/src/net.h index f0c0384..3ac97f4 100644 --- a/src/net.h +++ b/src/net.h @@ -6,13 +6,14 @@ #ifndef COOLBPF_NET_H #define COOLBPF_NET_H +#if defined(__linux__) #ifndef __VMLINUX_H__ #include #include #include #include #endif - +#endif // request or reponse #define PACKET_MAX_SIZE 8192 @@ -395,7 +396,7 @@ void ebpf_config(int32_t opt1, int32_t opt2, int32_t params_count, void **params * @param stop_flag 是否需要立即退出 * @return int32_t 正数,返回处理的事件数; -100,stop_flag触发;其他,错误码 */ -int32_t ebpf_poll_events(int32_t max_events, int32_t *stop_flag); +int32_t ebpf_poll_events(int32_t max_events, int32_t *stop_flag, int timeout_ms); // 启动时,会调用init,然后调用start /* diff --git a/tools/examples/net/net.c b/tools/examples/net/net.c index 3659e65..6631f37 100644 --- a/tools/examples/net/net.c +++ b/tools/examples/net/net.c @@ -445,7 +445,7 @@ int main(int argc, char **argv) printf("net start end...\n"); while (1) { - err = ebpf_poll_events(100, &stop_flag); + err = ebpf_poll_events(100, &stop_flag, 0); if (exiting) { if (env_para.file != stdout) -- Gitee From c24d3993c16051c00d86eae063958898e435b5a7 Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Thu, 23 Jan 2025 11:10:59 +0800 Subject: [PATCH 06/24] 1. modify skeleton location 2. split security bpf files Signed-off-by: qianlu.kk --- .gitignore | 6 +- scripts/cmake/genskel.cmake | 6 +- src/CMakeLists.txt | 2 +- src/security/file_security.bpf.c | 189 +++++++++++++++++++++++ src/security/network_security.bpf.c | 231 ++++++++++++++++++++++++++++ src/security/security.bpf.c | 45 +++--- 6 files changed, 450 insertions(+), 29 deletions(-) create mode 100644 src/security/file_security.bpf.c create mode 100644 src/security/network_security.bpf.c diff --git a/.gitignore b/.gitignore index 0eb1688..2075465 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,8 @@ docs/_build lwcb/pylwcb/.env lwcb/.vscode lwcb/target -lwcb/pylwcb/target \ No newline at end of file +lwcb/pylwcb/target + +src/bpf/*.skel.h +src/profiler/*.skel.h +src/security/*.skel.h \ No newline at end of file diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake index 4b28c68..e6dcfb8 100644 --- a/scripts/cmake/genskel.cmake +++ b/scripts/cmake/genskel.cmake @@ -21,7 +21,7 @@ message(STATUS "Include Directories: ${include_dirs}") macro(genskel name) SET(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.bpf.c) SET(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o) - SET(BPF_S_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.skel.h) + SET(BPF_S_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.skel.h) add_custom_command( OUTPUT ${BPF_O_FILE} @@ -30,6 +30,7 @@ macro(genskel name) COMMENT "Generating BPF object: ${BPF_O_FILE}" ) + message(STATUS "gen skel CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") add_custom_command( OUTPUT ${BPF_S_FILE} COMMAND ${BPFTOOL} gen skeleton ${BPF_O_FILE} > ${BPF_S_FILE} @@ -44,5 +45,4 @@ macro(genskel name) # install skeleton headers message(STATUS "gen skel INSTALL_INCLUDE_DIR: ${INSTALL_INCLUDE_DIR}") install(FILES ${BPF_S_FILE} DESTINATION ${INSTALL_INCLUDE_DIR}/coolbpf) -endmacro() - +endmacro() \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6ce0349..f2ddacd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,7 +35,7 @@ endif() configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coolbpf.pc.in ${CMAKE_CURRENT_BINARY_DIR}/coolbpf.pc @ONLY) -include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} +include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/bpf ${CMAKE_CURRENT_BINARY_DIR}/bpf) file(GLOB sources ${PROJECT_SOURCE_DIR}/third/libbpf/src/*.c diff --git a/src/security/file_security.bpf.c b/src/security/file_security.bpf.c new file mode 100644 index 0000000..6095185 --- /dev/null +++ b/src/security/file_security.bpf.c @@ -0,0 +1,189 @@ +#include +#include +#include +#include +#include "../coolbpf.h" + +#include "int_maps.h" +#include "filter.h" +#include "type.h" +#include "process.h" +#include "addr_lpm_maps.h" +#include "string_maps.h" +#include "bpf_exit.h" +#include "tailcall_stack.h" + +struct +{ + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} secure_tailcall_map SEC(".maps"); + +BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); + +static inline __attribute__((always_inline)) u32 str_len(const char *str) +{ + u32 len = 0; +#pragma unroll + for (int i = 0; i < SYSAK_SECURE_MAX_PATH_LENGTH_LIMIT; i++) + { + if (str[i] == '\0') + break; + len++; + } + return len; +} + +static inline __attribute__((always_inline)) long copy_path(char *args, const struct path *arg) +{ + int *s = (int *)args; + int size = 0, flags = 0; + char *buffer; + void *curr = &args[4]; + umode_t i_mode; + buffer = d_path_local(arg, &size, &flags); + if (!buffer) + return 0; + // tips: path size between 0~255 + asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) + :); + bpf_probe_read(curr, size, buffer); + *s = size; + size += 4; + BPF_CORE_READ_INTO(&i_mode, arg, dentry, d_inode, i_mode); + /* + * the format of the path is: + * ----------------------------------------- + * | 4 bytes | N bytes | 4 bytes | 2 bytes | + * | pathlen | path | flags | mode | + * ----------------------------------------- + * Next we set up the flags. + */ + asm volatile goto( + "r1 = *(u64 *)%[pid];\n" + "r7 = *(u32 *)%[offset];\n" + "if r7 s< 0 goto %l[a];\n" + "if r7 s> 1188 goto %l[a];\n" + "r1 += r7;\n" + "r2 = *(u32 *)%[flags];\n" + "*(u32 *)(r1 + 0) = r2;\n" + "r2 = *(u16 *)%[mode];\n" + "*(u16 *)(r1 + 4) = r2;\n" + : + : [pid] "m"(args), [flags] "m"(flags), [offset] "+m"(size), [mode] "m"(i_mode) + : "r0", "r1", "r2", "r7", "memory" + : a); +a: + size += sizeof(u32) + sizeof(u16); // for the flags + i_mode + return size; +} + +SEC("kprobe/security_file_permission") +int kprobe_security_file_permission(struct pt_regs *ctx) +{ + bpf_printk("[kprobe][kprobe_security_file_permission] enter security_file_permission."); + __u32 zero = 0; + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + struct file *file = (struct file *)PT_REGS_PARM1(ctx); + const struct path *path_arg = 0; + path_arg = _(&file->f_path); + long ret = copy_path(stack->file_data.path, path_arg); + int path_len = *(int *)stack->file_data.path; + const u32 flag_prefix = 4 + path_len; + int flag = -1; + if (flag_prefix < 2000 && flag_prefix >= 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix); + const u32 mode_prefix = 8 + path_len; + short mode = -1; + if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); + bpf_printk("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); + bpf_printk("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); + + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) + { + bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); + bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); + return 0; + } + bpf_printk("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; + stack->file_data.key = enter->key; + stack->file_data.pkey = enter->pkey; + stack->file_data.timestamp = bpf_ktime_get_ns(); + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +SEC("kprobe/security_mmap_file") +int kprobe_security_mmap_file(struct pt_regs *ctx) +{ + bpf_printk("[kprobe][security_mmap_file] enter security_mmap_file."); + __u32 zero = 0; + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + struct file *file = (struct file *)PT_REGS_PARM1(ctx); + const struct path *path_arg = 0; + path_arg = _(&file->f_path); + long ret = copy_path(stack->file_data.path, path_arg); + int path_len = *(int *)stack->file_data.path; + bpf_printk("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); + bpf_printk("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); + + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) + { + return 0; + } + bpf_printk("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE; + stack->file_data.key = enter->key; + stack->file_data.pkey = enter->pkey; + stack->file_data.timestamp = bpf_ktime_get_ns(); + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +SEC("kprobe/security_path_truncate") +int kprobe_security_path_truncate(struct pt_regs *ctx) +{ + bpf_printk("[kprobe][security_path_truncate] enter security_path_truncate."); + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) + { + return 0; + } + bpf_printk("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + __u32 zero = 0; + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; + stack->file_data.key = enter->key; + stack->file_data.pkey = enter->pkey; + stack->file_data.timestamp = bpf_ktime_get_ns(); + struct path *path = (struct path *)PT_REGS_PARM1(ctx); + const struct path *path_arg = 0; + path_arg = _(path); + copy_path(stack->file_data.path, path_arg); + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} \ No newline at end of file diff --git a/src/security/network_security.bpf.c b/src/security/network_security.bpf.c new file mode 100644 index 0000000..d99b2bd --- /dev/null +++ b/src/security/network_security.bpf.c @@ -0,0 +1,231 @@ +#include +#include +#include +#include +#include "../coolbpf.h" + +#include "int_maps.h" +#include "filter.h" +#include "type.h" +#include "process.h" +#include "addr_lpm_maps.h" +#include "string_maps.h" +#include "bpf_exit.h" +#include "tailcall_stack.h" + +BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1); +BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); + +struct +{ + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} secure_tailcall_map SEC(".maps"); + +static __always_inline u16 bpf_core_sock_sk_protocol_ak(struct sock *sk) +{ + return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol); +} + +static __always_inline u32 get_netns(struct sock *sk) { + return BPF_CORE_READ(sk, __sk_common.skc_net.net, ns.inum); +} + +// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +SEC("kprobe/tcp_sendmsg") +int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t size) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) { + bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); + return 0; + } + bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + // define event + __u32 zero = 0; + struct tcp_data_t* data = NULL; + data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); + if (!data) return 0; + memset(data, 0, sizeof(data)); + + data->func = TRACEPOINT_FUNC_TCP_SENDMSG; + data->key = enter->key; + data->pkey = enter->pkey; + + struct inet_sock *inet = (struct inet_sock *)sk; + data->timestamp = bpf_ktime_get_ns(); + unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + data->daddr = bpf_htonl(daddr); + unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + data->dport = bpf_htons(dport); + unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + data->saddr = bpf_htonl(saddr); + unsigned short sport = BPF_CORE_READ(inet, inet_sport); + data->sport = bpf_htons(sport); + data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + data->net_ns = get_netns(sk); + data->protocol = bpf_core_sock_sk_protocol_ak(sk); + data->bytes = size; + + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG; + stack->tcp_data.func = TRACEPOINT_FUNC_TCP_SENDMSG; + stack->tcp_data.key = enter->key; + stack->tcp_data.pkey = enter->pkey; + stack->tcp_data.timestamp = bpf_ktime_get_ns(); + stack->tcp_data.daddr = daddr; + stack->tcp_data.dport = bpf_htons(dport); + stack->tcp_data.saddr = saddr; + stack->tcp_data.sport = bpf_htons(sport); + stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); + stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); + stack->tcp_data.net_ns = get_netns(sk); + stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); + stack->tcp_data.bytes = size; + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.sport, data->state); + + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +// void tcp_close(struct sock *sk, long timeout); +SEC("kprobe/tcp_close") +int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) { + bpf_printk("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); + return 0; + } + bpf_printk("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + __u32 zero = 0; + struct tcp_data_t* data = NULL; + data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); + if (!data) return 0; + memset(data, 0, sizeof(data)); + + data->func = TRACEPOINT_FUNC_TCP_CLOSE; + data->key = enter->key; + data->pkey = enter->pkey; + struct inet_sock *inet = (struct inet_sock *)sk; + data->timestamp = bpf_ktime_get_ns(); + unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + data->daddr = bpf_htonl(daddr); + unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + data->dport = bpf_htons(dport); + unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + data->saddr = bpf_htonl(saddr); + unsigned short sport = BPF_CORE_READ(inet, inet_sport); + data->sport = bpf_htons(sport); + data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + data->net_ns = get_netns(sk); + data->protocol = bpf_core_sock_sk_protocol_ak(sk); + + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE; + stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CLOSE; + stack->tcp_data.key = enter->key; + stack->tcp_data.pkey = enter->pkey; + stack->tcp_data.timestamp = bpf_ktime_get_ns(); + stack->tcp_data.daddr = daddr; + stack->tcp_data.dport = bpf_htons(dport); + stack->tcp_data.saddr = saddr; + stack->tcp_data.sport = bpf_htons(sport); + stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); + stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); + stack->tcp_data.net_ns = get_netns(sk); + stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.sport, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} + +// +SEC("kprobe/tcp_connect") +int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct execve_map_value *enter; + enter = execve_map_get_noinit(pid); + if (!enter || enter->key.ktime == 0) { + bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); + return 0; + } + bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + + __u32 zero = 0; + struct tcp_data_t* data = NULL; + data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); + if (!data) return 0; + memset(data, 0, sizeof(data)); + + data->func = TRACEPOINT_FUNC_TCP_CONNECT; + data->key = enter->key; + data->pkey = enter->pkey; + + struct inet_sock *inet = (struct inet_sock *)sk; + data->timestamp = bpf_ktime_get_ns(); + unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); + data->daddr = bpf_htonl(daddr); + unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); + data->dport = bpf_htons(dport); + unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); + data->saddr = bpf_htonl(saddr); + unsigned short sport = BPF_CORE_READ(inet, inet_sport); + data->sport = bpf_htons(sport); + data->state = BPF_CORE_READ(sk, __sk_common.skc_state); + data->family = BPF_CORE_READ(sk, __sk_common.skc_family); + data->net_ns = get_netns(sk); + data->protocol = bpf_core_sock_sk_protocol_ak(sk); + + + struct secure_tailcall_stack* stack = NULL; + stack = bpf_map_lookup_elem(&tailcall_stack, &zero); + if (!stack) return 0; + memset(stack, 0, sizeof(stack)); + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT; + stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CONNECT; + stack->tcp_data.key = enter->key; + stack->tcp_data.pkey = enter->pkey; + stack->tcp_data.timestamp = bpf_ktime_get_ns(); + stack->tcp_data.daddr = daddr; + stack->tcp_data.dport = bpf_htons(dport); + stack->tcp_data.saddr = saddr; + stack->tcp_data.sport = bpf_htons(sport); + stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); + stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); + stack->tcp_data.net_ns = get_netns(sk); + stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.sport, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + + + bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); + return 0; +} diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 7524bc6..eb3f4d1 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -730,10 +730,10 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); stack->tcp_data.bytes = size; - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); + bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.sport, data->state); bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); @@ -807,10 +807,10 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); + bpf_printk("[kprobe][kprobe_tcp_close][dump] saddr:%u, daddr:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); + bpf_printk("[kprobe][kprobe_tcp_close][dump] daddr:%u, sport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.sport, data->state); bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); @@ -901,11 +901,10 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); - + bpf_printk("[kprobe][kprobe_tcp_connect][dump] saddr:%u, daddr:%u, family:%u", + stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); + bpf_printk("[kprobe][kprobe_tcp_connect][dump] daddr:%u, sport:%u, state:%u", + stack->tcp_data.daddr, stack->tcp_data.sport, data->state); bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); @@ -1024,7 +1023,7 @@ int filter_prog(struct pt_regs *ctx) { int idx = i; struct selector_filter filter = filters->filters[idx]; if (filter.filter_type != FILTER_TYPE_UNKNOWN) { - bpf_printk("get file prefix filter, type:%u, map index:%u", filter.filter_type, filter.map_idx[0]); + bpf_printk("get file prefix filter, callname idx:%u type:%u, map index:%u", call_name_idx, filter.filter_type, filter.map_idx[0]); // bpf_printk("get file prefix filter, vallen:%u, plus 8:%u", filter.vallen, filter.vallen << 3); } struct addr4_lpm_trie arg4; @@ -1042,9 +1041,7 @@ int filter_prog(struct pt_regs *ctx) { inner_map6 = bpf_map_lookup_elem(&addr6lpm_maps, &filter.map_idx[1]); } if (inner_map4 == NULL) { - bpf_printk("there is something wrong with the lpm maps..."); - bpf_printk("callname idx:%u cannot find inner map for saddr, continue ... ", call_name_idx); - + bpf_printk("there is something wrong with the lpm maps... callname idx:%u cannot find inner map for saddr, continue ... ", call_name_idx); continue; } arg4.addr = saddr; @@ -1148,7 +1145,7 @@ int filter_prog(struct pt_regs *ctx) { int zero = 0; prefix = bpf_map_lookup_elem(&string_prefix_maps_heap, &zero); if (prefix == NULL) { - bpf_printk("[kprobe][tailcall] cannot lookup string_prefix_maps_heap"); + bpf_printk("[kprobe][tailcall] callname idx:%u cannot lookup string_prefix_maps_heap", call_name_idx); break; } // to bits @@ -1162,9 +1159,9 @@ int filter_prog(struct pt_regs *ctx) { bpf_probe_read(prefix->data, path_size & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); // bpf_probe_read(prefix->data, filter.vallen & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); int path_len = *(int *)stack->file_data.path; - bpf_printk("[kprobe][tailcall] begin to query inner map. stack path length:%d", path_len); - bpf_printk("[kprobe][tailcall] begin to query inner map. stack path+4:%s", &stack->file_data.path[4]); - bpf_printk("[kprobe][tailcall] begin to query inner map. prefix path:%s, path size:%u", prefix->data, path_size); + bpf_printk("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path length:%d", call_name_idx, path_len); + bpf_printk("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path+4:%s", call_name_idx, &stack->file_data.path[4]); + bpf_printk("[kprobe][tailcall] callname idx:%u begin to query inner map. prefix path:%s, path size:%u", call_name_idx, prefix->data, path_size); struct bpf_map* inner_map = bpf_map_lookup_elem(&string_prefix_maps, &filter.map_idx[0]); __u8* ppass = NULL; @@ -1185,7 +1182,7 @@ int filter_prog(struct pt_regs *ctx) { // } } else { // no filters were set ... - bpf_printk("[kprobe][tailcall] cannot find inner map, no filter set, pass"); + bpf_printk("[kprobe][tailcall] callname idx:%u cannot find inner map, no filter set, pass", call_name_idx); } break; } @@ -1221,14 +1218,14 @@ int secure_data_send(struct pt_regs *ctx) case SECURE_FUNC_TRACEPOINT_FUNC_SYS_WRITE: case SECURE_FUNC_TRACEPOINT_FUNC_SYS_READ:{ bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, &data->file_data, sizeof(struct file_data_t)); - bpf_printk("[kprobe][secure_data_send][file] pid:%u, ktime:%u send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime); + bpf_printk("[kprobe][secure_data_send][file] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); break; } case SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE: case SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT: case SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG: bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, &data->tcp_data, sizeof(struct tcp_data_t)); - bpf_printk("[kprobe][secure_data_send][socket] pid:%u, ktime:%u send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime); + bpf_printk("[kprobe][secure_data_send][socket] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); default: break; } -- Gitee From 108f5a8d37880eabec0ffa9ecad6418468ac3283 Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Fri, 14 Feb 2025 19:50:03 +0800 Subject: [PATCH 07/24] support operator < --- src/security/data_msg.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/security/data_msg.h b/src/security/data_msg.h index bc22848..87c68bc 100644 --- a/src/security/data_msg.h +++ b/src/security/data_msg.h @@ -16,6 +16,14 @@ struct data_event_id { __u64 pid; __u64 time; +#ifdef __cplusplus + bool operator<(const data_event_id& other) const { + if (pid != other.pid) { + return pid < other.pid; + } + return time < other.time; + } +#endif } __attribute__((packed)); struct data_event_desc { -- Gitee From 543dba357952309162fda7c41c17b77191698b04 Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Tue, 18 Feb 2025 20:50:59 +0800 Subject: [PATCH 08/24] add common part --- src/security/bpf_process_event_type.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/security/bpf_process_event_type.h b/src/security/bpf_process_event_type.h index f5114fd..6fe580d 100644 --- a/src/security/bpf_process_event_type.h +++ b/src/security/bpf_process_event_type.h @@ -266,14 +266,17 @@ struct heap_exe { __u32 error; }; // All fields aligned so no 'packed' attribute. +#define EXECVE_EVENT_COMMON_MEMBERS \ + struct msg_common common; \ + struct msg_k8s kube; \ + struct msg_execve_key parent; \ + __u64 parent_flags; \ + struct msg_cred creds; \ + struct msg_ns ns; \ + struct msg_execve_key cleanup_key; + struct msg_execve_event { - struct msg_common common; - struct msg_k8s kube; - struct msg_execve_key parent; - __u64 parent_flags; - struct msg_cred creds; - struct msg_ns ns; - struct msg_execve_key cleanup_key; + EXECVE_EVENT_COMMON_MEMBERS /* if add anything above please also update the args of * validate_msg_execve_size() in bpf_execve_event.c */ union { -- Gitee From 242d3b0477e615ec00d2d18bdef5aa862e51c7fd Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Wed, 19 Feb 2025 16:46:51 +0800 Subject: [PATCH 09/24] update log macro to avoid conflict with spdlog --- src/coolbpf.c | 8 ++++---- src/coolbpf.h | 8 ++++---- src/log.h | 12 ++++++------ src/security/security.bpf.c | 18 +----------------- 4 files changed, 15 insertions(+), 31 deletions(-) diff --git a/src/coolbpf.c b/src/coolbpf.c index 89cb07f..5dd3101 100644 --- a/src/coolbpf.c +++ b/src/coolbpf.c @@ -68,7 +68,7 @@ struct coolbpf_object *__coolbpf_object_open(skel_open open, skel_load load, void *skel_obj = open(); if (!skel_obj) { free(obj); - error("failed to open skeleton object\n"); + log_error("failed to open skeleton object\n"); return NULL; } @@ -139,14 +139,14 @@ void *perf_thread_worker(void *ctx) err = libbpf_get_error(pb); if (err) { - error("error new perf buffer: %s\n", strerror(-err)); + log_error("error new perf buffer: %s\n", strerror(-err)); return NULL; } if (!pb) { err = -errno; - error("failed to open perf buffer: %d\n", err); + log_error("failed to open perf buffer: %d\n", err); return NULL; } @@ -155,7 +155,7 @@ void *perf_thread_worker(void *ctx) err = perf_buffer__poll(pb, timeout_ms); if (err < 0 && err != -EINTR) { - error("error polling perf buffer: %s\n", strerror(-err)); + log_error("error polling perf buffer: %s\n", strerror(-err)); goto cleanup; } diff --git a/src/coolbpf.h b/src/coolbpf.h index 88b5400..386bd8f 100644 --- a/src/coolbpf.h +++ b/src/coolbpf.h @@ -54,12 +54,12 @@ struct coolbpf_object int __err = 0; \ struct coolbpf_object *__cb = calloc(1, sizeof(struct coolbpf_object)); \ if (!__cb) { \ - error("failed to allocate memory for coolbpf_object\n"); \ + log_error("failed to allocate memory for coolbpf_object\n"); \ goto __real_out; \ } \ struct skel##_bpf *skel_obj = skel##_bpf__open(); \ if (!skel_obj) { \ - error("failed to open CoolBPF object\n"); \ + log_error("failed to open CoolBPF object\n"); \ goto __failed_out; \ } \ __cb->skel_load = skel##_bpf__load; \ @@ -71,13 +71,13 @@ struct coolbpf_object __cb->ctx = _ctx; \ __err = coolbpf_object_load(__cb); \ if (__err) { \ - error("failed to load CoolBPF object: %d\n", __err); \ + log_error("failed to load CoolBPF object: %d\n", __err); \ coolbpf_object_destroy(__cb); \ goto __failed_out; \ } \ __err = coolbpf_object_attach(__cb); \ if (__err) { \ - error("failed to attach CoolBPF object: %d\n", __err); \ + log_error("failed to attach CoolBPF object: %d\n", __err); \ coolbpf_object_destroy(__cb); \ goto __failed_out; \ } \ diff --git a/src/log.h b/src/log.h index 34735c5..656b8ef 100644 --- a/src/log.h +++ b/src/log.h @@ -36,12 +36,12 @@ enum { LOG_TRACE, LOG_DEBUG, LOG_INFO, LOG_WARN, LOG_ERROR, LOG_FATAL }; COOLBPF_API void coolbpf_set_loglevel(int level); -#define trace(...) log_log(LOG_TRACE, __FILE__, __LINE__, __VA_ARGS__) -#define debug(...) log_log(LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__) -#define info(...) log_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__) -#define warn(...) log_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__) -#define error(...) log_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__) -#define fatal(...) log_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__) +#define log_trace(...) log_log(LOG_TRACE, __FILE__, __LINE__, __VA_ARGS__) +#define log_debug(...) log_log(LOG_DEBUG, __FILE__, __LINE__, __VA_ARGS__) +#define log_info(...) log_log(LOG_INFO, __FILE__, __LINE__, __VA_ARGS__) +#define log_warn(...) log_log(LOG_WARN, __FILE__, __LINE__, __VA_ARGS__) +#define log_error(...) log_log(LOG_ERROR, __FILE__, __LINE__, __VA_ARGS__) +#define log_fatal(...) log_log(LOG_FATAL, __FILE__, __LINE__, __VA_ARGS__) const char* log_level_string(int level); void log_set_lock(log_LockFn fn, void *udata); diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index eb3f4d1..0b1f344 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -17,30 +17,14 @@ #include "bpf_exit.h" #include "tailcall_stack.h" //#include "bpf_execve.h" -// map in map - -// struct { -// __uint(type, BPF_MAP_TYPE_LPM_TRIE); -// __uint(max_entries, 10); -// // __uint(key_size, sizeof(__u8) * sizeof(struct string_prefix_lpm_trie)); -// // __uint(value_size, sizeof(__u8)); -// __type(key, __u8[sizeof(struct string_prefix_lpm_trie)]); // Need to specify as byte array as wouldn't take struct as key type -// __type(value, __u8); -// __uint(map_flags, BPF_F_NO_PREALLOC); -// } ql_test_prefix_map SEC(".maps"); - -// [0, SYSAK_SECURE_MAX_CIDR_LIMIT/2) for source addr -// [SYSAK_SECURE_MAX_CIDR_LIMIT/2, SYSAK_SECURE_MAX_CIDR_LIMIT) for dest addr + BPF_ARRAY(cidr_filter_list, struct cidr_entry, SYSAK_SECURE_MAX_CIDR_LIMIT); -// [0, SYSAK_SECURE_MAX_PORT_LIMIT/2) for source port -// [SYSAK_SECURE_MAX_PORT_LIMIT/2, SYSAK_SECURE_MAX_CIDR_LIMIT) for dest port BPF_ARRAY(port_filter_list, struct port_entry, SYSAK_SECURE_MAX_PORT_LIMIT); BPF_HASH(sock_secure_port_filter, u16, struct port_entry, 1024); BPF_PERF_OUTPUT(sock_secure_output, 1024); BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1); -// [0, SYSAK_SECURE_MAX_PATH_LIMIT/2) for file path BPF_ARRAY(path_filter_list, struct path_entry, SYSAK_SECURE_MAX_PATH_LIMIT); BPF_PERF_OUTPUT(file_secure_output, 1024); BPF_PERCPU_ARRAY(file_secure_data_heap, struct file_data_t, 1); -- Gitee From ba4d1b14fa156f7a6c22958820269dcbf30ae41d Mon Sep 17 00:00:00 2001 From: Tom Yu Date: Thu, 29 May 2025 08:48:12 +0000 Subject: [PATCH 10/24] Fix data reading limitation for sizes larger than 0x3fff This commit addresses an issue where data larger than 0x3fff bytes could not be fully read, even with multiple read attempts. The fix allows reading of data beyond the 0x3fff byte limit by: * Replacing the bitwise AND masking with a conditional jump * Setting a maximum read size of 0x3fff bytes per operation --- src/security/data_event.h | 9 ++++----- src/security/data_msg.h | 8 -------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/src/security/data_event.h b/src/security/data_event.h index ec16add..24ae14f 100644 --- a/src/security/data_event.h +++ b/src/security/data_event.h @@ -24,10 +24,9 @@ __do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes) a: // < 5.3 verifier still requires value masking like 'val &= xxx' #ifndef __LARGE_BPF_PROG - asm volatile("%[bytes] &= 0x3fff;\n" - : - : [bytes] "+r"(bytes) - :); + asm volatile goto("if %[bytes] < 0x3fff goto %l[c]\n;" : : [bytes] "+r"(bytes)::c); + bytes = 0x3fff; + c: #endif err = bpf_probe_read(&msg->arg[0], bytes, (char *)uptr); if (err < 0) @@ -177,7 +176,7 @@ FUNC_INLINE size_t data_event( } else { desc->error = 0; desc->pad = 0; - desc->leftover = size == -1 ? 0 : size - err; + desc->leftover = size == (size_t)-1 ? 0 : size - err; desc->size = err; } return sizeof(*desc); diff --git a/src/security/data_msg.h b/src/security/data_msg.h index 87c68bc..bc22848 100644 --- a/src/security/data_msg.h +++ b/src/security/data_msg.h @@ -16,14 +16,6 @@ struct data_event_id { __u64 pid; __u64 time; -#ifdef __cplusplus - bool operator<(const data_event_id& other) const { - if (pid != other.pid) { - return pid < other.pid; - } - return time < other.time; - } -#endif } __attribute__((packed)); struct data_event_desc { -- Gitee From 0a64eb9f0cf72392d44fb7d96e5a45ac07bce45e Mon Sep 17 00:00:00 2001 From: qianlufaceless Date: Tue, 17 Jun 2025 08:57:33 +0000 Subject: [PATCH 11/24] Adapt to 4.19 kernel * wrap bpf_printk => BPF_DEBUG * modify libbpf inner map name to work around 4.19 bug --- scripts/cmake/genskel.cmake | 5 + src/bpf/net.bpf.c | 17 +- src/ebpf_log.h | 16 ++ src/security/bpf_exit.h | 7 +- src/security/bpf_process_event.h | 2 +- src/security/file_security.bpf.c | 25 +-- src/security/network_security.bpf.c | 25 +-- src/security/security.bpf.c | 237 +++++++++++++--------------- third/libbpf/src/libbpf.c | 6 +- 9 files changed, 174 insertions(+), 166 deletions(-) create mode 100644 src/ebpf_log.h diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake index e6dcfb8..34e487b 100644 --- a/scripts/cmake/genskel.cmake +++ b/scripts/cmake/genskel.cmake @@ -19,6 +19,11 @@ endforeach() message(STATUS "Include Directories: ${include_dirs}") macro(genskel name) + message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") + if (CMAKE_BUILD_TYPE MATCHES Debug) + add_definitions(-DBPF_DEBUG) + message(STATUS "add definition: -DBPF_DEBUG") + endif () SET(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.bpf.c) SET(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o) SET(BPF_S_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.skel.h) diff --git a/src/bpf/net.bpf.c b/src/bpf/net.bpf.c index 36cca84..c3b7ebc 100644 --- a/src/bpf/net.bpf.c +++ b/src/bpf/net.bpf.c @@ -1,6 +1,7 @@ #include "vmlinux.h" #include "../coolbpf.h" #include "../net.h" +#include "../ebpf_log.h" #define AF_UNIX 1 #define AF_INET 2 /* Internet IP Protocol */ @@ -238,24 +239,24 @@ static __always_inline bool match_container_id(struct connect_info_t* conn_info) u32 index = ContainerIdIndex; int64_t *cid_prefix_length = bpf_map_lookup_elem(&config_tgid_map, &index); if (cid_prefix_length == NULL) { - bpf_printk("cid_prefix_length null! pid:%u\n", conn_info->conn_id.tgid); + BPF_DEBUG("cid_prefix_length null! pid:%u\n", conn_info->conn_id.tgid); return true; } u32 trim_len = *cid_prefix_length; if (trim_len <= 0 || trim_len > KN_NAME_LENGTH) { - bpf_printk("trim_len invalid! pid:%u trim_len:%u\n", conn_info->conn_id.tgid, trim_len); + BPF_DEBUG("trim_len invalid! pid:%u trim_len:%u\n", conn_info->conn_id.tgid, trim_len); return false; } if (conn_info->docker_id_length == 0) { - bpf_printk("dockerid length is zero! pid:%u docker_id_length:%u\n", conn_info->conn_id.tgid, conn_info->docker_id_length); + BPF_DEBUG("dockerid length is zero! pid:%u docker_id_length:%u\n", conn_info->conn_id.tgid, conn_info->docker_id_length); return false; } int length = conn_info->docker_id_length >= KN_NAME_LENGTH? KN_NAME_LENGTH : conn_info->docker_id_length; int real_length = length - trim_len; if (real_length <=0 ) { - bpf_printk("reallen invalid! pid:%u real_length:%u\n", conn_info->conn_id.tgid, real_length); + BPF_DEBUG("reallen invalid! pid:%u real_length:%u\n", conn_info->conn_id.tgid, real_length); return false; } if (real_length >= CONTAINER_ID_MAX_LENGTH) real_length = CONTAINER_ID_MAX_LENGTH; @@ -265,16 +266,16 @@ static __always_inline bool match_container_id(struct connect_info_t* conn_info) struct container_id_key* prefix = bpf_map_lookup_elem(&container_id_heap, &zero); if (!prefix) return false; __builtin_memset(prefix, 0, sizeof(struct container_id_key)); - bpf_printk("after memset! pid:%u, cgroup:%s, real_length:%u \n", conn_info->conn_id.tgid, prefix->data, real_length); + BPF_DEBUG("after memset! pid:%u, cgroup:%s, real_length:%u \n", conn_info->conn_id.tgid, prefix->data, real_length); bpf_probe_read(prefix->data, real_length, conn_info->docker_id + trim_len); prefix->prefixlen = real_length << 3; __u8* ppass = bpf_map_lookup_elem(&enable_container_ids, prefix); if (ppass) { - bpf_printk("bingo! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); + BPF_DEBUG("bingo! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); // in whitelist return true; } - bpf_printk("blacklist! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); + BPF_DEBUG("blacklist! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); return false; } @@ -348,7 +349,7 @@ static __always_inline __u32 __event_get_current_cgroup_name(struct cgroup *cgrp if (!name) return EVENT_ERROR_CGROUP_NAME; int ret = bpf_probe_read_str(conn_info->docker_id, KN_NAME_LENGTH, name); - bpf_printk("pid:%u docker_id:%s ret:%u \n", conn_info->conn_id.tgid, conn_info->docker_id, ret); + BPF_DEBUG("pid:%u docker_id:%s ret:%u \n", conn_info->conn_id.tgid, conn_info->docker_id, ret); conn_info->docker_id_length = ret; return name ? 0 : EVENT_ERROR_CGROUP_NAME; diff --git a/src/ebpf_log.h b/src/ebpf_log.h new file mode 100644 index 0000000..a19324d --- /dev/null +++ b/src/ebpf_log.h @@ -0,0 +1,16 @@ +#pragma once + +#define BPF_NO_GLOBAL_DATA + +/* Macro to output debug logs to /sys/kernel/debug/tracing/trace_pipe + */ +#ifdef BPF_DEBUG +#define BPF_DEBUG(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \ + }) +#else +// No op +#define BPF_DEBUG(fmt, ...) +#endif \ No newline at end of file diff --git a/src/security/bpf_exit.h b/src/security/bpf_exit.h index 786a9f3..50c242e 100644 --- a/src/security/bpf_exit.h +++ b/src/security/bpf_exit.h @@ -15,6 +15,7 @@ #include "bpf_rate.h" #include "process.h" #include "bpf_process_event.h" +#include "../ebpf_log.h" struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); @@ -38,7 +39,7 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) enter = execve_map_get_noinit(tgid); if (!enter) return; - bpf_printk("[kprobe][event_exit_send] pid:%u already enter.", tgid); + BPF_DEBUG("[kprobe][event_exit_send] pid:%u already enter.", tgid); if (enter->key.ktime) { struct task_struct *task = (struct task_struct *)bpf_get_current_task(); size_t size = sizeof(struct msg_exit); @@ -76,9 +77,9 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) __event_get_cgroup_info(task, &kube); - bpf_printk("[kprobe][event_exit_send] pid:%u prepare to send event.", tgid); + BPF_DEBUG("[kprobe][event_exit_send] pid:%u prepare to send event.", tgid); if (cgroup_rate(ctx, &kube, exit->common.ktime)) { - bpf_printk("[kprobe][event_exit_send] pid:%u send event.", tgid); + BPF_DEBUG("[kprobe][event_exit_send] pid:%u send event.", tgid); perf_event_output_metric(ctx, MSG_OP_EXIT, &tcpmon_map, BPF_F_CURRENT_CPU, exit, size); } diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 5d2b659..4605de8 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -335,7 +335,7 @@ __d_path_local(const struct path *path, char *buf, int *buflen, int *error) task = (struct task_struct *)bpf_get_current_task(); bpf_probe_read(&fs, sizeof(fs), _(&task->fs)); *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); - // bpf_printk(); + // log_debug); return res; } diff --git a/src/security/file_security.bpf.c b/src/security/file_security.bpf.c index 6095185..ec9b52b 100644 --- a/src/security/file_security.bpf.c +++ b/src/security/file_security.bpf.c @@ -12,6 +12,7 @@ #include "string_maps.h" #include "bpf_exit.h" #include "tailcall_stack.h" +#include "../ebpf_log.h" struct { @@ -83,7 +84,7 @@ a: SEC("kprobe/security_file_permission") int kprobe_security_file_permission(struct pt_regs *ctx) { - bpf_printk("[kprobe][kprobe_security_file_permission] enter security_file_permission."); + BPF_DEBUG("[kprobe][kprobe_security_file_permission] enter security_file_permission."); __u32 zero = 0; struct secure_tailcall_stack* stack = NULL; stack = bpf_map_lookup_elem(&tailcall_stack, &zero); @@ -100,19 +101,19 @@ int kprobe_security_file_permission(struct pt_regs *ctx) const u32 mode_prefix = 8 + path_len; short mode = -1; if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); - bpf_printk("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); - bpf_printk("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); + BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); + BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); __u32 pid = bpf_get_current_pid_tgid() >> 32; struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); - bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); + BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); + BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); return 0; } - bpf_printk("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; stack->file_data.key = enter->key; @@ -125,7 +126,7 @@ int kprobe_security_file_permission(struct pt_regs *ctx) SEC("kprobe/security_mmap_file") int kprobe_security_mmap_file(struct pt_regs *ctx) { - bpf_printk("[kprobe][security_mmap_file] enter security_mmap_file."); + BPF_DEBUG("[kprobe][security_mmap_file] enter security_mmap_file."); __u32 zero = 0; struct secure_tailcall_stack* stack = NULL; stack = bpf_map_lookup_elem(&tailcall_stack, &zero); @@ -136,8 +137,8 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) path_arg = _(&file->f_path); long ret = copy_path(stack->file_data.path, path_arg); int path_len = *(int *)stack->file_data.path; - bpf_printk("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); - bpf_printk("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); + BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); + BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); __u32 pid = bpf_get_current_pid_tgid() >> 32; struct execve_map_value *enter; @@ -146,7 +147,7 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) { return 0; } - bpf_printk("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE; stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE; @@ -161,7 +162,7 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) SEC("kprobe/security_path_truncate") int kprobe_security_path_truncate(struct pt_regs *ctx) { - bpf_printk("[kprobe][security_path_truncate] enter security_path_truncate."); + BPF_DEBUG("[kprobe][security_path_truncate] enter security_path_truncate."); __u32 pid = bpf_get_current_pid_tgid() >> 32; struct execve_map_value *enter; enter = execve_map_get_noinit(pid); @@ -169,7 +170,7 @@ int kprobe_security_path_truncate(struct pt_regs *ctx) { return 0; } - bpf_printk("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); __u32 zero = 0; struct secure_tailcall_stack* stack = NULL; stack = bpf_map_lookup_elem(&tailcall_stack, &zero); diff --git a/src/security/network_security.bpf.c b/src/security/network_security.bpf.c index d99b2bd..b934653 100644 --- a/src/security/network_security.bpf.c +++ b/src/security/network_security.bpf.c @@ -12,6 +12,7 @@ #include "string_maps.h" #include "bpf_exit.h" #include "tailcall_stack.h" +#include "../ebpf_log.h" BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1); BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); @@ -41,10 +42,10 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); return 0; } - bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); // define event __u32 zero = 0; @@ -91,9 +92,9 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); stack->tcp_data.bytes = size; - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", stack->tcp_data.daddr, stack->tcp_data.sport, data->state); @@ -109,10 +110,10 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); + BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); return 0; } - bpf_printk("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); __u32 zero = 0; struct tcp_data_t* data = NULL; @@ -155,9 +156,9 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", stack->tcp_data.daddr, stack->tcp_data.dport, data->state); @@ -172,10 +173,10 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); + BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); return 0; } - bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); __u32 zero = 0; struct tcp_data_t* data = NULL; @@ -220,9 +221,9 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", stack->tcp_data.daddr, stack->tcp_data.dport, data->state); diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 0b1f344..f167589 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -17,6 +17,7 @@ #include "bpf_exit.h" #include "tailcall_stack.h" //#include "bpf_execve.h" +#include "../ebpf_log.h" BPF_ARRAY(cidr_filter_list, struct cidr_entry, SYSAK_SECURE_MAX_CIDR_LIMIT); BPF_ARRAY(port_filter_list, struct port_entry, SYSAK_SECURE_MAX_PORT_LIMIT); @@ -98,7 +99,7 @@ read_args(void *ctx, struct msg_execve_event *event) off = bpf_probe_read_str(&heap->maxpath, 4096, (char *)start_stack); if (off < 0) return 0; - bpf_printk("[read_args] pid:%llu, args:%s", p->pid, heap->maxpath); + BPF_DEBUG("[read_args] pid:%llu, args:%s", p->pid, heap->maxpath); start_stack += off; @@ -143,7 +144,7 @@ read_path(void *ctx, struct msg_execve_event *event, void *filename) earg = (void *)p + offsetof(struct msg_process, args); size = bpf_probe_read_str(earg, MAXARGLENGTH - 1, filename); - bpf_printk("[read_path] pid:%llu, path:%s", p->pid, earg); + BPF_DEBUG("[read_path] pid:%llu, path:%s", p->pid, earg); if (size < 0) { flags |= EVENT_ERROR_FILENAME; size = 0; @@ -219,7 +220,7 @@ SEC("kprobe/wake_up_new_task") int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) { __u32 cpid = bpf_get_current_pid_tgid() >> 32; - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u enter~", cpid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u enter~", cpid); struct execve_map_value *curr, *parent; struct msg_clone_event msg; struct msg_capabilities caps; @@ -231,7 +232,7 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) return 0; tgid = BPF_CORE_READ(task, tgid); - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u read tgid:%u ~", cpid, tgid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u read tgid:%u ~", cpid, tgid); /* Do not try to create any msg or calling execve_map_get * (that will add a new process in the execve_map) if we @@ -240,18 +241,18 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) parent = __event_find_parent(task); if (!parent) return 0; - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u has parent.", cpid, tgid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u has parent.", cpid, tgid); curr = execve_map_get(tgid); if (!curr) return 0; - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u new event in execve_map.", cpid, tgid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u new event in execve_map.", cpid, tgid); /* Generate an EVENT_COMMON_FLAG_CLONE event once per process, * that is, thread group. */ if (curr->key.ktime != 0) return 0; - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin init event.", cpid, tgid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin init event.", cpid, tgid); /* Setup the execve_map entry. */ curr->flags = EVENT_COMMON_FLAG_CLONE; curr->key.pid = tgid; @@ -288,10 +289,10 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) __event_get_cgroup_info(task, &kube); - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u init event done.", cpid, tgid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u init event done.", cpid, tgid); if (cgroup_rate(ctx, &kube, msg.ktime)) { - bpf_printk("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin submit clone event.", cpid, tgid); + BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin submit clone event.", cpid, tgid); perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, BPF_F_CURRENT_CPU, &msg, msg_size); } @@ -343,7 +344,7 @@ int event_execve(struct trace_event_raw_sched_process_exec *ctx) p->size += read_path(ctx, event, filename); p->size += read_args(ctx, event); p->size += read_cwd(ctx, p); - bpf_printk("[event_execve] enter pid:%llu, filename:%s", p->pid, filename); + BPF_DEBUG("[event_execve] enter pid:%llu, filename:%s", p->pid, filename); event->common.op = MSG_OP_EXECVE; event->common.ktime = p->ktime; @@ -387,7 +388,7 @@ int execve_rate(void *ctx) SEC("tracepoint/1") int execve_send(void *ctx) { - bpf_printk("[execve_send] enter ~"); + BPF_DEBUG("[execve_send] enter ~"); struct msg_execve_event *event; struct execve_map_value *curr; struct msg_process *p; @@ -472,9 +473,9 @@ int execve_send(void *ctx) sizeof(struct msg_execve_key) + sizeof(__u64) + sizeof(struct msg_cred) + sizeof(struct msg_ns) + sizeof(struct msg_execve_key) + p->size); -// bpf_printk("[execve_send] before perf output ~"); +// BPF_DEBUG("[execve_send] before perf output ~"); perf_event_output_metric(ctx, MSG_OP_EXECVE, &tcpmon_map, BPF_F_CURRENT_CPU, event, size); -// bpf_printk("[execve_send] after perf output ~"); +// BPF_DEBUG("[execve_send] after perf output ~"); return 0; } @@ -489,9 +490,9 @@ int event_exit_acct_process(struct pt_regs *ctx) { __u64 pid_tgid = bpf_get_current_pid_tgid(); __u32 pid = pid_tgid >> 32; - bpf_printk("[kprobe][event_exit_acct_process] pid:%u enter~", pid); + BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u enter~", pid); event_exit_send(ctx, pid_tgid >> 32); - bpf_printk("[kprobe][event_exit_acct_process] pid:%u send done ~", pid); + BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u send done ~", pid); return 0; } @@ -510,7 +511,7 @@ int event_exit_disassociate_ctty(struct pt_regs *ctx) { int on_exit = (int)PT_REGS_PARM1_CORE(ctx); __u32 pid = bpf_get_current_pid_tgid() >> 32; - bpf_printk("[kprobe][event_exit_disassociate_ctty] pid:%u enter~", pid); + BPF_DEBUG("[kprobe][event_exit_disassociate_ctty] pid:%u enter~", pid); if (on_exit) event_exit_send(ctx, pid); @@ -576,11 +577,11 @@ int port_filter(__u16 port, int direction) { break; } blacklist = entry->black; - bpf_printk("[kprobe][port_filter] black:%u, port:%u, income_port:%u", entry->black, entry->port, port); + BPF_DEBUG("[kprobe][port_filter] black:%u, port:%u, income_port:%u", entry->black, entry->port, port); if (port == entry->port) { if (blacklist == 1) { // blacklist - bpf_printk("[kprobe][port_filter] filtered by blacklist port, port:%u : disabled.", + BPF_DEBUG("[kprobe][port_filter] filtered by blacklist port, port:%u : disabled.", port); return 1; } else if (blacklist == 0) { @@ -596,7 +597,7 @@ int port_filter(__u16 port, int direction) { if (blacklist == 1) return 0; if (blacklist == 0) { // whitelist - bpf_printk("[kprobe][port_filter] filtered by whitelist port, port:%u . disabled.", port); + BPF_DEBUG("[kprobe][port_filter] filtered by whitelist port, port:%u . disabled.", port); return 1; } @@ -624,12 +625,12 @@ int addr_filter(__u32 addr, int direction) { int tmp = start + key; struct cidr_entry *entry = bpf_map_lookup_elem(&cidr_filter_list, &tmp); if (!entry || entry->inited == 0) break; - bpf_printk("[kprobe][addr_filter] black:%u, net:%u, mask:%u", entry->black, entry->net, entry->mask); + BPF_DEBUG("[kprobe][addr_filter] black:%u, net:%u, mask:%u", entry->black, entry->net, entry->mask); blacklist = entry->black; if (cidr_match(addr, entry->net, entry->mask)) { if (blacklist == 1) { // bingo black list - bpf_printk("[kprobe][addr_filter] filtered by blacklist cidr, ip:%u net:%u mask:%u: disabled.", + BPF_DEBUG("[kprobe][addr_filter] filtered by blacklist cidr, ip:%u net:%u mask:%u: disabled.", addr, entry->net, entry->mask); return 1; } else if (blacklist == 0) { @@ -643,7 +644,7 @@ int addr_filter(__u32 addr, int direction) { if (blacklist == 1) return 0; if (blacklist == 0) { // whitelist - bpf_printk("[kprobe][addr_filter] filtered by white cidr, ip:%u disabled.", + BPF_DEBUG("[kprobe][addr_filter] filtered by white cidr, ip:%u disabled.", addr); return 1; } @@ -664,10 +665,10 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); return 0; } - bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); // define event __u32 zero = 0; @@ -714,9 +715,9 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); stack->tcp_data.bytes = size; - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); - bpf_printk("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", stack->tcp_data.daddr, stack->tcp_data.sport, data->state); @@ -728,12 +729,12 @@ int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t s // sp = port_filter(data->sport, 0); // dp = port_filter(data->dport, 1); // if (sf || df || sp || dp) { - // bpf_printk("[kprobe][kprobe_tcp_sendmsg] skip submit because of filters."); + // BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] skip submit because of filters."); // return 0; // } // bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct tcp_data_t)); - // bpf_printk("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); + // BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); return 0; } @@ -745,10 +746,10 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); + BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); return 0; } - bpf_printk("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); __u32 zero = 0; struct tcp_data_t* data = NULL; @@ -791,9 +792,9 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - bpf_printk("[kprobe][kprobe_tcp_close][dump] saddr:%u, daddr:%u, family:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_close][dump] saddr:%u, daddr:%u, family:%u", stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); - bpf_printk("[kprobe][kprobe_tcp_close][dump] daddr:%u, sport:%u, state:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_close][dump] daddr:%u, sport:%u, state:%u", stack->tcp_data.daddr, stack->tcp_data.sport, data->state); @@ -806,13 +807,13 @@ int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) // sp = port_filter(data->sport, 0); // dp = port_filter(data->dport, 1); // if (sf || df || sp || dp) { -// bpf_printk("[kprobe][kprobe_tcp_close] skip submit because of filters."); +// BPF_DEBUG("[kprobe][kprobe_tcp_close] skip submit because of filters."); // return 0; // } -// // bpf_printk("Packet matched CIDR: %x/%x/%u/%u\n", entry->net, entry->mask, entry->enable, entry->src); +// // BPF_DEBUG("Packet matched CIDR: %x/%x/%u/%u\n", entry->net, entry->mask, entry->enable, entry->src); // bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct tcp_data_t)); -// bpf_printk("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); +// BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); return 0; } @@ -823,10 +824,10 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); + BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); return 0; } - bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); __u32 zero = 0; struct tcp_data_t* data = NULL; @@ -885,9 +886,9 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); stack->tcp_data.net_ns = get_netns(sk); stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - bpf_printk("[kprobe][kprobe_tcp_connect][dump] saddr:%u, daddr:%u, family:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_connect][dump] saddr:%u, daddr:%u, family:%u", stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); - bpf_printk("[kprobe][kprobe_tcp_connect][dump] daddr:%u, sport:%u, state:%u", + BPF_DEBUG("[kprobe][kprobe_tcp_connect][dump] daddr:%u, sport:%u, state:%u", stack->tcp_data.daddr, stack->tcp_data.sport, data->state); bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); @@ -899,13 +900,13 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { // sp = port_filter(data->sport, 0); // dp = port_filter(data->dport, 1); // if (sf || df || sp || dp) { -// bpf_printk("[kprobe][kprobe_tcp_connect] skip submit because of filters."); +// BPF_DEBUG("[kprobe][kprobe_tcp_connect] skip submit because of filters."); // return 0; // } -// // bpf_printk("Packet matched CIDR: %x/%x/%u/%u\n", entry->net, entry->mask, entry->enable, entry->src); +// // BPF_DEBUG("Packet matched CIDR: %x/%x/%u/%u\n", entry->net, entry->mask, entry->enable, entry->src); // bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct tcp_data_t)); -// bpf_printk("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); +// BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu send to perfbuffer.", pid, enter->key.ktime); return 0; } @@ -982,7 +983,7 @@ void write_ipv6_addr32(u32 *dest, u32 *src) SEC("kprobe/tailcall_prog") int filter_prog(struct pt_regs *ctx) { - bpf_printk("[secure][tailcall] enter filter_prog"); + BPF_DEBUG("[secure][tailcall] enter filter_prog"); __u32 zero = 0; struct secure_tailcall_stack *stack = bpf_map_lookup_elem(&tailcall_stack, &zero); if (!stack) @@ -1006,26 +1007,25 @@ int filter_prog(struct pt_regs *ctx) { for (; i < MAX_FILTER_FOR_PER_CALLNAME; i ++) { int idx = i; struct selector_filter filter = filters->filters[idx]; - if (filter.filter_type != FILTER_TYPE_UNKNOWN) { - bpf_printk("get file prefix filter, callname idx:%u type:%u, map index:%u", call_name_idx, filter.filter_type, filter.map_idx[0]); - // bpf_printk("get file prefix filter, vallen:%u, plus 8:%u", filter.vallen, filter.vallen << 3); - } + // if (filter.filter_type != FILTER_TYPE_UNKNOWN) { + // BPF_DEBUG("get file prefix filter, callname idx:%u type:%u, map index:%u", call_name_idx, filter.filter_type, filter.map_idx[0]); + // // BPF_DEBUG("get file prefix filter, vallen:%u, plus 8:%u", filter.vallen, filter.vallen << 3); + // } struct addr4_lpm_trie arg4; - struct addr6_lpm_trie arg6; + // struct addr6_lpm_trie arg6; switch(filter.filter_type) { case FILTER_TYPE_SADDR: { uint32_t saddr = stack->tcp_data.saddr; - struct bpf_map* inner_map4 = NULL; - struct bpf_map* inner_map6 = NULL; + // struct bpf_map* inner_map6 = NULL; if (filter.map_idx[0] != -1) { inner_map4 = bpf_map_lookup_elem(&addr4lpm_maps, &filter.map_idx[0]); } - if (filter.map_idx[1] != -1) { - inner_map6 = bpf_map_lookup_elem(&addr6lpm_maps, &filter.map_idx[1]); - } + // if (filter.map_idx[1] != -1) { + // inner_map6 = bpf_map_lookup_elem(&addr6lpm_maps, &filter.map_idx[1]); + // } if (inner_map4 == NULL) { - bpf_printk("there is something wrong with the lpm maps... callname idx:%u cannot find inner map for saddr, continue ... ", call_name_idx); + BPF_DEBUG("there is something wrong with the lpm maps... callname idx:%u cannot find inner map for saddr, continue ... ", call_name_idx); continue; } arg4.addr = saddr; @@ -1040,13 +1040,13 @@ int filter_prog(struct pt_regs *ctx) { if (filter.op_type == OP_TYPE_IN) { // not in white list if (ppass4 == NULL) { - bpf_printk("callname idx:%u arg4 saddr:%u, prefix:%u not in whitelist", call_name_idx, arg4.addr, arg4.prefix); + BPF_DEBUG("callname idx:%u arg4 saddr:%u, prefix:%u not in whitelist", call_name_idx, arg4.addr, arg4.prefix); return 0; } } else if (filter.op_type == OP_TYPE_NOT_IN) { // or in black list if (ppass4 != NULL) { - bpf_printk("callname idx:%u arg4 saddr:%u, prefix:%u in blacklist", call_name_idx, arg4.addr, arg4.prefix); + BPF_DEBUG("callname idx:%u arg4 saddr:%u, prefix:%u in blacklist", call_name_idx, arg4.addr, arg4.prefix); return 0; } } @@ -1058,7 +1058,7 @@ int filter_prog(struct pt_regs *ctx) { arg4.prefix = 32; struct bpf_map* inner_map = bpf_map_lookup_elem(&addr4lpm_maps, &filter.map_idx[0]); if (inner_map == NULL) { - bpf_printk("callname idx:%u cannot find inner map for daddr, continue ... ", call_name_idx); + BPF_DEBUG("callname idx:%u cannot find inner map for daddr, continue ... ", call_name_idx); continue; } __u8* ppass = NULL; @@ -1066,12 +1066,12 @@ int filter_prog(struct pt_regs *ctx) { if (filter.op_type == OP_TYPE_IN) { // not in white list if (ppass == NULL) { - bpf_printk("callname idx:%u arg4 daddr:%u, prefix:%u not in whitelist", call_name_idx, arg4.addr, arg4.prefix); + BPF_DEBUG("callname idx:%u arg4 daddr:%u, prefix:%u not in whitelist", call_name_idx, arg4.addr, arg4.prefix); return 0; } } else if (filter.op_type == OP_TYPE_NOT_IN) { // or in black list - bpf_printk("callname idx:%u arg4 daddr:%u, prefix:%u in blacklist", call_name_idx, arg4.addr, arg4.prefix); + BPF_DEBUG("callname idx:%u arg4 daddr:%u, prefix:%u in blacklist", call_name_idx, arg4.addr, arg4.prefix); if (ppass != NULL) return 0; } @@ -1081,7 +1081,7 @@ int filter_prog(struct pt_regs *ctx) { uint32_t sport = stack->tcp_data.sport; struct bpf_map* inner_map = bpf_map_lookup_elem(&port_maps, &filter.map_idx[0]); if (inner_map == NULL) { - bpf_printk("callname idx:%u cannot find inner map for sport, continue ... ", call_name_idx); + BPF_DEBUG("callname idx:%u cannot find inner map for sport, continue ... ", call_name_idx); continue; } __u8* ppass = NULL; @@ -1089,13 +1089,13 @@ int filter_prog(struct pt_regs *ctx) { if (filter.op_type == OP_TYPE_IN) { // not in white list if (ppass == NULL) { - bpf_printk("callname idx:%u arg4 sport:%u not in whitelist", call_name_idx, sport); + BPF_DEBUG("callname idx:%u arg4 sport:%u not in whitelist", call_name_idx, sport); return 0; } } else if (filter.op_type == OP_TYPE_NOT_IN) { // or in black list if (ppass != NULL) { - bpf_printk("callname idx:%u arg4 sport:%u in blacklist", call_name_idx, sport); + BPF_DEBUG("callname idx:%u arg4 sport:%u in blacklist", call_name_idx, sport); return 0; } } @@ -1104,7 +1104,7 @@ int filter_prog(struct pt_regs *ctx) { uint32_t dport = stack->tcp_data.dport; struct bpf_map* inner_map = bpf_map_lookup_elem(&port_maps, &filter.map_idx[0]); if (inner_map == NULL) { - bpf_printk("callname idx:%u cannot find inner map for dport, continue ... ", call_name_idx); + BPF_DEBUG("callname idx:%u cannot find inner map for dport, continue ... ", call_name_idx); continue; } __u8* ppass = NULL; @@ -1112,64 +1112,47 @@ int filter_prog(struct pt_regs *ctx) { if (filter.op_type == OP_TYPE_IN) { // not in white list if (ppass == NULL) { - bpf_printk("callname idx:%u arg4 dport:%u not in whitelist", call_name_idx, dport); + BPF_DEBUG("callname idx:%u arg4 dport:%u not in whitelist", call_name_idx, dport); return 0; } } else if (filter.op_type == OP_TYPE_NOT_IN) { // or in black list if (ppass != NULL) { - bpf_printk("callname idx:%u arg4 dport:%u in blacklist", call_name_idx, dport); + BPF_DEBUG("callname idx:%u arg4 dport:%u in blacklist", call_name_idx, dport); return 0; } } break; } - case FILTER_TYPE_FILE_PREFIX: { - struct string_prefix_lpm_trie *prefix = NULL; - int zero = 0; - prefix = bpf_map_lookup_elem(&string_prefix_maps_heap, &zero); - if (prefix == NULL) { - bpf_printk("[kprobe][tailcall] callname idx:%u cannot lookup string_prefix_maps_heap", call_name_idx); - break; - } - // to bits - - // struct file_data_t* data = bpf_map_lookup_elem(&file_secure_data_heap, &zero); - // if (data == NULL) break; - // bpf_probe_read(&data, sizeof(struct file_data_t), &stack->file_data); - __u32 path_size = 0; - bpf_probe_read(&path_size, 4, stack->file_data.path); - prefix->prefixlen = path_size * 8; - bpf_probe_read(prefix->data, path_size & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); - // bpf_probe_read(prefix->data, filter.vallen & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); - int path_len = *(int *)stack->file_data.path; - bpf_printk("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path length:%d", call_name_idx, path_len); - bpf_printk("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path+4:%s", call_name_idx, &stack->file_data.path[4]); - bpf_printk("[kprobe][tailcall] callname idx:%u begin to query inner map. prefix path:%s, path size:%u", call_name_idx, prefix->data, path_size); + // case FILTER_TYPE_FILE_PREFIX: { + // struct string_prefix_lpm_trie *prefix = NULL; + // int zero = 0; + // prefix = bpf_map_lookup_elem(&string_prefix_maps_heap, &zero); + // if (prefix == NULL) { + // BPF_DEBUG("[kprobe][tailcall] callname idx:%u cannot lookup string_prefix_maps_heap", call_name_idx); + // break; + // } + // __u32 path_size = 0; + // bpf_probe_read(&path_size, 4, stack->file_data.path); + // prefix->prefixlen = path_size * 8; + // bpf_probe_read(prefix->data, path_size & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); + // int path_len = *(int *)stack->file_data.path; + // BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path length:%d", call_name_idx, path_len); + // BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path+4:%s", call_name_idx, &stack->file_data.path[4]); + // BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. prefix path:%s, path size:%u", call_name_idx, prefix->data, path_size); - struct bpf_map* inner_map = bpf_map_lookup_elem(&string_prefix_maps, &filter.map_idx[0]); - __u8* ppass = NULL; - if (inner_map != NULL) { - ppass = bpf_map_lookup_elem(inner_map, prefix); - if (ppass == NULL || *ppass == 0) pass &= 0; - else pass &= 1; - // if (ppass != NULL) { - // bpf_printk("[kprobe][tailcall] bingo~ query for inner map, path:%s, val:%u", prefix->data, (__u32)*ppass); - // pass &= (__u32)(*ppass); - // // TODO @sym @fs - // bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_SEND); - // return 0; - // } - // else { - // pass &= 0; - // bpf_printk("[kprobe][tailcall] query for inner map, got null val"); - // } - } else { - // no filters were set ... - bpf_printk("[kprobe][tailcall] callname idx:%u cannot find inner map, no filter set, pass", call_name_idx); - } - break; - } + // struct bpf_map* inner_map = bpf_map_lookup_elem(&string_prefix_maps, &filter.map_idx[0]); + // __u8* ppass = NULL; + // if (inner_map != NULL) { + // ppass = bpf_map_lookup_elem(inner_map, prefix); + // if (ppass == NULL || *ppass == 0) pass &= 0; + // else pass &= 1; + // } else { + // // no filters were set ... + // BPF_DEBUG("[kprobe][tailcall] callname idx:%u cannot find inner map, no filter set, pass", call_name_idx); + // } + // break; + // } default: break; } @@ -1178,7 +1161,7 @@ int filter_prog(struct pt_regs *ctx) { if (pass) { bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_SEND); } else { - bpf_printk("[filter_prog] skip submit due to the filter"); + BPF_DEBUG("[filter_prog] skip submit due to the filter"); } return 0; @@ -1187,7 +1170,7 @@ int filter_prog(struct pt_regs *ctx) { SEC("kprobe/secure_data_send") int secure_data_send(struct pt_regs *ctx) { - bpf_printk("[secure][tailcall] enter secure_data_send"); + BPF_DEBUG("[secure][tailcall] enter secure_data_send"); // the max tail call, just flush event __u32 zero = 0; struct secure_tailcall_stack *data = bpf_map_lookup_elem(&tailcall_stack, &zero); @@ -1202,26 +1185,26 @@ int secure_data_send(struct pt_regs *ctx) case SECURE_FUNC_TRACEPOINT_FUNC_SYS_WRITE: case SECURE_FUNC_TRACEPOINT_FUNC_SYS_READ:{ bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, &data->file_data, sizeof(struct file_data_t)); - bpf_printk("[kprobe][secure_data_send][file] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); + BPF_DEBUG("[kprobe][secure_data_send][file] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); break; } case SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE: case SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT: case SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG: bpf_perf_event_output(ctx, &sock_secure_output, BPF_F_CURRENT_CPU, &data->tcp_data, sizeof(struct tcp_data_t)); - bpf_printk("[kprobe][secure_data_send][socket] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); + BPF_DEBUG("[kprobe][secure_data_send][socket] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); default: break; } // bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, data, sizeof(struct secure_tailcall_stack)); - // bpf_printk("[kprobe][kprobe_security_file_permission] pid:%u, ktime:%u send to perfbuffer.\n", data->key.pid, data->key.ktime); + // BPF_DEBUG("[kprobe][kprobe_security_file_permission] pid:%u, ktime:%u send to perfbuffer.\n", data->key.pid, data->key.ktime); return 0; } SEC("kprobe/security_file_permission") int kprobe_security_file_permission(struct pt_regs *ctx) { - bpf_printk("[kprobe][kprobe_security_file_permission] enter security_file_permission."); + BPF_DEBUG("[kprobe][kprobe_security_file_permission] enter security_file_permission."); __u32 zero = 0; struct secure_tailcall_stack* stack = NULL; stack = bpf_map_lookup_elem(&tailcall_stack, &zero); @@ -1238,19 +1221,19 @@ int kprobe_security_file_permission(struct pt_regs *ctx) const u32 mode_prefix = 8 + path_len; short mode = -1; if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); - bpf_printk("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); - bpf_printk("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); + BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); + BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); __u32 pid = bpf_get_current_pid_tgid() >> 32; struct execve_map_value *enter; enter = execve_map_get_noinit(pid); if (!enter || enter->key.ktime == 0) { - bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); - bpf_printk("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); + BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); + BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); return 0; } - bpf_printk("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); // __u32 zero = 0; // struct secure_tailcall_stack* stack = NULL; // stack = bpf_map_lookup_elem(&tailcall_stack, &zero); @@ -1272,7 +1255,7 @@ int kprobe_security_file_permission(struct pt_regs *ctx) SEC("kprobe/security_mmap_file") int kprobe_security_mmap_file(struct pt_regs *ctx) { - bpf_printk("[kprobe][security_mmap_file] enter security_mmap_file."); + BPF_DEBUG("[kprobe][security_mmap_file] enter security_mmap_file."); __u32 zero = 0; struct secure_tailcall_stack* stack = NULL; stack = bpf_map_lookup_elem(&tailcall_stack, &zero); @@ -1283,8 +1266,8 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) path_arg = _(&file->f_path); long ret = copy_path(stack->file_data.path, path_arg); int path_len = *(int *)stack->file_data.path; - bpf_printk("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); - bpf_printk("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); + BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); + BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); __u32 pid = bpf_get_current_pid_tgid() >> 32; struct execve_map_value *enter; @@ -1293,7 +1276,7 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) { return 0; } - bpf_printk("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE; stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE; @@ -1308,7 +1291,7 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) SEC("kprobe/security_path_truncate") int kprobe_security_path_truncate(struct pt_regs *ctx) { - bpf_printk("[kprobe][security_path_truncate] enter security_path_truncate."); + BPF_DEBUG("[kprobe][security_path_truncate] enter security_path_truncate."); __u32 pid = bpf_get_current_pid_tgid() >> 32; struct execve_map_value *enter; enter = execve_map_get_noinit(pid); @@ -1316,7 +1299,7 @@ int kprobe_security_path_truncate(struct pt_regs *ctx) { return 0; } - bpf_printk("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); + BPF_DEBUG("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); __u32 zero = 0; struct secure_tailcall_stack* stack = NULL; stack = bpf_map_lookup_elem(&tailcall_stack, &zero); diff --git a/third/libbpf/src/libbpf.c b/third/libbpf/src/libbpf.c index b57f2de..aa3b743 100644 --- a/third/libbpf/src/libbpf.c +++ b/third/libbpf/src/libbpf.c @@ -2643,7 +2643,7 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return -EINVAL; } - snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name); + snprintf(inner_map_name, sizeof(inner_map_name), "%s_inner", map_name); err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL); if (err) return err; @@ -2871,10 +2871,10 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, if (map->inner_map->fd < 0) return map->inner_map->fd; map->inner_map->sec_idx = sec_idx; - map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1); + map->inner_map->name = malloc(strlen(map_name) + sizeof("_inner") + 1); if (!map->inner_map->name) return -ENOMEM; - sprintf(map->inner_map->name, "%s.inner", map_name); + sprintf(map->inner_map->name, "%s_inner", map_name); fill_map_from_def(map->inner_map, &inner_def); } -- Gitee From dfa41712dd9f9a06ecbe4ab657b567066f6bd585 Mon Sep 17 00:00:00 2001 From: Tom Yu Date: Thu, 19 Jun 2025 13:23:05 +0000 Subject: [PATCH 12/24] Fix unpredictable path truncation when length exceed 255 This patch also adopted several commits from Tetragon to improve bpf program and reduce complexity. [bpf: bump prepend_name underlying buffer size 4096](https://github.com/cilium/tetragon/pull/2764) [bpf: remove unused fields in msg_k8s](https://github.com/cilium/tetragon/pull/3127) [introduce in_init_tree flag for process events](https://github.com/cilium/tetragon/pull/3209) [process:bpf: report euid as the process.uid](https://github.com/cilium/tetragon/pull/2575) --- scripts/cmake/genskel.cmake | 4 +- src/CMakeLists.txt | 12 +- src/ebpf_log.h | 11 +- src/security/bpf_common.h | 7 - src/security/bpf_exit.h | 3 - src/security/bpf_process_event.h | 26 +-- src/security/bpf_process_event_type.h | 27 +-- src/security/data_event.h | 13 +- src/security/file_security.bpf.c | 190 --------------- src/security/network_security.bpf.c | 232 ------------------ src/security/process.h | 325 +------------------------- src/security/security.bpf.c | 86 +++---- 12 files changed, 97 insertions(+), 839 deletions(-) delete mode 100644 src/security/file_security.bpf.c delete mode 100644 src/security/network_security.bpf.c diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake index 34e487b..60286d1 100644 --- a/scripts/cmake/genskel.cmake +++ b/scripts/cmake/genskel.cmake @@ -21,8 +21,8 @@ message(STATUS "Include Directories: ${include_dirs}") macro(genskel name) message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") if (CMAKE_BUILD_TYPE MATCHES Debug) - add_definitions(-DBPF_DEBUG) - message(STATUS "add definition: -DBPF_DEBUG") + add_definitions(-COOLBPF_DEBUG) + message(STATUS "add definition: -DCOOLBPF_DEBUG") endif () SET(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.bpf.c) SET(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f2ddacd..0365b58 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,18 +26,18 @@ if(NOT DEFINED INSTALL_INCLUDE_DIR) set(INSTALL_INCLUDE_DIR ${CMAKE_INSTALL_PREFIX}/include) endif() -add_subdirectory(bpf) -add_subdirectory(security) -if (ENABLE_PROFILE) - add_subdirectory(profiler) -endif() - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/coolbpf.pc.in ${CMAKE_CURRENT_BINARY_DIR}/coolbpf.pc @ONLY) include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/bpf ${CMAKE_CURRENT_BINARY_DIR}/bpf) +add_subdirectory(bpf) +add_subdirectory(security) +if (ENABLE_PROFILE) + add_subdirectory(profiler) +endif() + file(GLOB sources ${PROJECT_SOURCE_DIR}/third/libbpf/src/*.c ${CMAKE_CURRENT_SOURCE_DIR}/*.c) # share library diff --git a/src/ebpf_log.h b/src/ebpf_log.h index a19324d..fdca468 100644 --- a/src/ebpf_log.h +++ b/src/ebpf_log.h @@ -4,13 +4,10 @@ /* Macro to output debug logs to /sys/kernel/debug/tracing/trace_pipe */ -#ifdef BPF_DEBUG -#define BPF_DEBUG(fmt, ...) \ - ({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \ - }) +#ifdef COOLBPF_DEBUG +#include +#define BPF_DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__) #else // No op -#define BPF_DEBUG(fmt, ...) +#define BPF_DEBUG(__fmt, ...) #endif \ No newline at end of file diff --git a/src/security/bpf_common.h b/src/security/bpf_common.h index 5551e45..63accee 100644 --- a/src/security/bpf_common.h +++ b/src/security/bpf_common.h @@ -65,13 +65,6 @@ struct msg_test { #define BIT(nr) (1 << (nr)) #define BIT_ULL(nr) (1ULL << (nr)) -#ifdef TETRAGON_BPF_DEBUG -#include -#define DEBUG(__fmt, ...) bpf_printk(__fmt, ##__VA_ARGS__) -#else -#define DEBUG(__fmt, ...) -#endif - #ifndef PATH_MAP_SIZE #define PATH_MAP_SIZE 4096 #endif diff --git a/src/security/bpf_exit.h b/src/security/bpf_exit.h index 50c242e..d47cbe1 100644 --- a/src/security/bpf_exit.h +++ b/src/security/bpf_exit.h @@ -39,7 +39,6 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) enter = execve_map_get_noinit(tgid); if (!enter) return; - BPF_DEBUG("[kprobe][event_exit_send] pid:%u already enter.", tgid); if (enter->key.ktime) { struct task_struct *task = (struct task_struct *)bpf_get_current_task(); size_t size = sizeof(struct msg_exit); @@ -77,9 +76,7 @@ FUNC_INLINE void event_exit_send(void *ctx, __u32 tgid) __event_get_cgroup_info(task, &kube); - BPF_DEBUG("[kprobe][event_exit_send] pid:%u prepare to send event.", tgid); if (cgroup_rate(ctx, &kube, exit->common.ktime)) { - BPF_DEBUG("[kprobe][event_exit_send] pid:%u send event.", tgid); perf_event_output_metric(ctx, MSG_OP_EXIT, &tcpmon_map, BPF_F_CURRENT_CPU, exit, size); } diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 4605de8..48ac76f 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -5,26 +5,27 @@ #ifndef SYSAK_BPF_PROCESS_EVENT_H #define SYSAK_BPF_PROCESS_EVENT_H -#include "../coolbpf.h" #include #include -#include "bpf_common.h" +#include "../coolbpf.h" #include "api.h" #include "bpf_cgroup.h" +#include "bpf_common.h" #include "bpf_cred.h" #include "compiler.h" +#include "ebpf_log.h" #define ENAMETOOLONG 36 /* File name too long */ -#define MAX_BUF_LEN 256 +#define MAX_BUF_LEN 4096 struct buffer_heap_map_value { // Buffer is twice the needed size because of the verifier. In prepend_name // unit tests, the verifier figures out that 255 is enough and that the // buffer_offset will not overflow, but in the real use-case it looks like // it's forgetting about that. - unsigned char buf[MAX_BUF_LEN * 2]; + unsigned char buf[MAX_BUF_LEN + 256]; }; struct { @@ -124,8 +125,6 @@ prepend_name(char *buf, char **bufptr, int *buflen, const char *name, u32 namele *buflen -= (namelen + write_slash); - // This will not happen as buffer_offset cannot be above 256 and namelen is - // bound to 255. Needed to make the verifier happy in older kernels. if (namelen + write_slash > buffer_offset) return -ENAMETOOLONG; @@ -335,7 +334,6 @@ __d_path_local(const struct path *path, char *buf, int *buflen, int *error) task = (struct task_struct *)bpf_get_current_task(); bpf_probe_read(&fs, sizeof(fs), _(&task->fs)); *error = path_with_deleted(path, _(&fs->root), buf, &res, buflen); - // log_debug); return res; } @@ -390,6 +388,8 @@ getcwd(struct msg_process *curr, __u32 offset, __u32 proc_pid) asm volatile("%[offset] &= 0x3ff;\n" ::[offset] "+r"(offset) :); + if (size > 255) + size = 255; asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) :); bpf_probe_read((char *)curr + offset, size, buffer); @@ -544,16 +544,6 @@ __event_get_current_cgroup_name(struct cgroup *cgrp, struct msg_k8s *kube) { const char *name; - /* TODO: check if we have Tetragon cgroup configuration and that the - * tracking cgroup ID is set. If so then query the bpf map for - * the corresponding tracking cgroup name. - */ - - /* TODO: we gather current cgroup context, switch to tracker see above, - * and if that fails for any reason or if we don't have the cgroup name - * of tracker, then we can continue with current context. - */ - name = get_cgroup_name(cgrp); if (name) bpf_probe_read_str(kube->docker_id, KN_NAME_LENGTH, name); @@ -603,4 +593,4 @@ __event_get_cgroup_info(struct task_struct *task, struct msg_k8s *kube) return flags; } -#endif //SYSAK_BPF_PROCESS_EVENT_H +#endif // SYSAK_BPF_PROCESS_EVENT_H diff --git a/src/security/bpf_process_event_type.h b/src/security/bpf_process_event_type.h index 6fe580d..13bc124 100644 --- a/src/security/bpf_process_event_type.h +++ b/src/security/bpf_process_event_type.h @@ -122,6 +122,7 @@ #define EVENT_ERROR_PATH_COMPONENTS 0x400000 #define EVENT_DATA_FILENAME 0x800000 #define EVENT_DATA_ARGS 0x1000000 +#define EVENT_IN_INIT_TREE 0x2000000 #define EVENT_COMMON_FLAG_CLONE 0x01 @@ -247,8 +248,6 @@ struct msg_ns { }; // All fields aligned so no 'packed' attribute. struct msg_k8s { - __u32 net_ns; - __u32 cid; __u64 cgrpid; char docker_id[DOCKER_ID_LENGTH]; }; // All fields aligned so no 'packed' attribute. @@ -256,27 +255,19 @@ struct msg_k8s { #define BINARY_PATH_MAX_LEN 256 struct heap_exe { - // because of verifier limitations, this has to be 2 * 256 bytes while 256 - // should be theoretically sufficient, and actually is, in unit tests. - char buf[BINARY_PATH_MAX_LEN * 2]; - // offset points to the start of the path in the above buffer. Use offset to - // read the path in the buffer since it's written from the end. - char *off; + char buf[BINARY_PATH_MAX_LEN]; __u32 len; __u32 error; }; // All fields aligned so no 'packed' attribute. -#define EXECVE_EVENT_COMMON_MEMBERS \ - struct msg_common common; \ - struct msg_k8s kube; \ - struct msg_execve_key parent; \ - __u64 parent_flags; \ - struct msg_cred creds; \ - struct msg_ns ns; \ - struct msg_execve_key cleanup_key; - struct msg_execve_event { - EXECVE_EVENT_COMMON_MEMBERS + struct msg_common common; + struct msg_k8s kube; + struct msg_execve_key parent; + __u64 parent_flags; + struct msg_cred creds; + struct msg_ns ns; + struct msg_execve_key cleanup_key; /* if add anything above please also update the args of * validate_msg_execve_size() in bpf_execve_event.c */ union { diff --git a/src/security/data_event.h b/src/security/data_event.h index 24ae14f..e34a956 100644 --- a/src/security/data_event.h +++ b/src/security/data_event.h @@ -33,7 +33,9 @@ __do_bytes(void *ctx, struct msg_data *msg, unsigned long uptr, size_t bytes) return err; msg->common.size = offsetof(struct msg_data, arg) + bytes; - perf_event_output_metric(ctx, MSG_OP_DATA, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size); + err = bpf_perf_event_output(ctx, &tcpmon_map, BPF_F_CURRENT_CPU, msg, msg->common.size); + if (err < 0) + return err; return bytes; b: return -1; @@ -43,13 +45,13 @@ FUNC_LOCAL long do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) { size_t rd_bytes = 0; - int err, i __maybe_unused; + int err = 0, i __maybe_unused; #ifdef __LARGE_BPF_PROG for (i = 0; i < 10; i++) { err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); if (err < 0) - return err; + goto error; rd_bytes += err; if (rd_bytes == bytes) return rd_bytes; @@ -58,7 +60,7 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) #define BYTES_COPY \ err = __do_bytes(ctx, msg, arg + rd_bytes, bytes - rd_bytes); \ if (err < 0) \ - return err; \ + goto error; \ rd_bytes += err; \ if (rd_bytes == bytes) \ return rd_bytes; @@ -73,6 +75,9 @@ do_bytes(void *ctx, struct msg_data *msg, unsigned long arg, size_t bytes) /* leftover */ return rd_bytes; +error: + perf_event_output_update_error_metric(MSG_OP_DATA, err); + return err; } FUNC_LOCAL long diff --git a/src/security/file_security.bpf.c b/src/security/file_security.bpf.c deleted file mode 100644 index ec9b52b..0000000 --- a/src/security/file_security.bpf.c +++ /dev/null @@ -1,190 +0,0 @@ -#include -#include -#include -#include -#include "../coolbpf.h" - -#include "int_maps.h" -#include "filter.h" -#include "type.h" -#include "process.h" -#include "addr_lpm_maps.h" -#include "string_maps.h" -#include "bpf_exit.h" -#include "tailcall_stack.h" -#include "../ebpf_log.h" - -struct -{ - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __uint(max_entries, 3); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} secure_tailcall_map SEC(".maps"); - -BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); - -static inline __attribute__((always_inline)) u32 str_len(const char *str) -{ - u32 len = 0; -#pragma unroll - for (int i = 0; i < SYSAK_SECURE_MAX_PATH_LENGTH_LIMIT; i++) - { - if (str[i] == '\0') - break; - len++; - } - return len; -} - -static inline __attribute__((always_inline)) long copy_path(char *args, const struct path *arg) -{ - int *s = (int *)args; - int size = 0, flags = 0; - char *buffer; - void *curr = &args[4]; - umode_t i_mode; - buffer = d_path_local(arg, &size, &flags); - if (!buffer) - return 0; - // tips: path size between 0~255 - asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) - :); - bpf_probe_read(curr, size, buffer); - *s = size; - size += 4; - BPF_CORE_READ_INTO(&i_mode, arg, dentry, d_inode, i_mode); - /* - * the format of the path is: - * ----------------------------------------- - * | 4 bytes | N bytes | 4 bytes | 2 bytes | - * | pathlen | path | flags | mode | - * ----------------------------------------- - * Next we set up the flags. - */ - asm volatile goto( - "r1 = *(u64 *)%[pid];\n" - "r7 = *(u32 *)%[offset];\n" - "if r7 s< 0 goto %l[a];\n" - "if r7 s> 1188 goto %l[a];\n" - "r1 += r7;\n" - "r2 = *(u32 *)%[flags];\n" - "*(u32 *)(r1 + 0) = r2;\n" - "r2 = *(u16 *)%[mode];\n" - "*(u16 *)(r1 + 4) = r2;\n" - : - : [pid] "m"(args), [flags] "m"(flags), [offset] "+m"(size), [mode] "m"(i_mode) - : "r0", "r1", "r2", "r7", "memory" - : a); -a: - size += sizeof(u32) + sizeof(u16); // for the flags + i_mode - return size; -} - -SEC("kprobe/security_file_permission") -int kprobe_security_file_permission(struct pt_regs *ctx) -{ - BPF_DEBUG("[kprobe][kprobe_security_file_permission] enter security_file_permission."); - __u32 zero = 0; - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - struct file *file = (struct file *)PT_REGS_PARM1(ctx); - const struct path *path_arg = 0; - path_arg = _(&file->f_path); - long ret = copy_path(stack->file_data.path, path_arg); - int path_len = *(int *)stack->file_data.path; - const u32 flag_prefix = 4 + path_len; - int flag = -1; - if (flag_prefix < 2000 && flag_prefix >= 0) bpf_probe_read(&flag, 4, stack->file_data.path + flag_prefix); - const u32 mode_prefix = 8 + path_len; - short mode = -1; - if (mode_prefix < 2000 && mode_prefix >= 0) bpf_probe_read(&mode, 2, stack->file_data.path + mode_prefix); - BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path length:%d, ret:%lld, flag:%d", path_len, ret, flag); - BPF_DEBUG("[kprobe][tailcall][permission] before ~ stack path+4:%s, mode:%d", &stack->file_data.path[4], mode); - - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) - { - BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path:%s, pid:%u", stack->file_data.path, pid); - BPF_DEBUG("[kprobe][tailcall][permission] no init!!! return! stack path+4:%s, pid:%u", &stack->file_data.path[4], pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_security_file_permission] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; - stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; - stack->file_data.key = enter->key; - stack->file_data.pkey = enter->pkey; - stack->file_data.timestamp = bpf_ktime_get_ns(); - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -SEC("kprobe/security_mmap_file") -int kprobe_security_mmap_file(struct pt_regs *ctx) -{ - BPF_DEBUG("[kprobe][security_mmap_file] enter security_mmap_file."); - __u32 zero = 0; - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - struct file *file = (struct file *)PT_REGS_PARM1(ctx); - const struct path *path_arg = 0; - path_arg = _(&file->f_path); - long ret = copy_path(stack->file_data.path, path_arg); - int path_len = *(int *)stack->file_data.path; - BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); - BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); - - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) - { - return 0; - } - BPF_DEBUG("[kprobe][security_mmap_file] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_MMAP_FILE; - stack->file_data.func = TRACEPOINT_FUNC_SECURITY_MMAP_FILE; - stack->file_data.key = enter->key; - stack->file_data.pkey = enter->pkey; - stack->file_data.timestamp = bpf_ktime_get_ns(); - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -SEC("kprobe/security_path_truncate") -int kprobe_security_path_truncate(struct pt_regs *ctx) -{ - BPF_DEBUG("[kprobe][security_path_truncate] enter security_path_truncate."); - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) - { - return 0; - } - BPF_DEBUG("[kprobe][security_path_truncate] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - __u32 zero = 0; - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; - stack->file_data.func = TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE; - stack->file_data.key = enter->key; - stack->file_data.pkey = enter->pkey; - stack->file_data.timestamp = bpf_ktime_get_ns(); - struct path *path = (struct path *)PT_REGS_PARM1(ctx); - const struct path *path_arg = 0; - path_arg = _(path); - copy_path(stack->file_data.path, path_arg); - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} \ No newline at end of file diff --git a/src/security/network_security.bpf.c b/src/security/network_security.bpf.c deleted file mode 100644 index b934653..0000000 --- a/src/security/network_security.bpf.c +++ /dev/null @@ -1,232 +0,0 @@ -#include -#include -#include -#include -#include "../coolbpf.h" - -#include "int_maps.h" -#include "filter.h" -#include "type.h" -#include "process.h" -#include "addr_lpm_maps.h" -#include "string_maps.h" -#include "bpf_exit.h" -#include "tailcall_stack.h" -#include "../ebpf_log.h" - -BPF_PERCPU_ARRAY(sock_secure_data_heap, struct tcp_data_t, 1); -BPF_PERCPU_ARRAY(tailcall_stack, struct secure_tailcall_stack, 1); - -struct -{ - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __uint(max_entries, 3); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} secure_tailcall_map SEC(".maps"); - -static __always_inline u16 bpf_core_sock_sk_protocol_ak(struct sock *sk) -{ - return (u16)BPF_CORE_READ_BITFIELD_PROBED(sk, sk_protocol); -} - -static __always_inline u32 get_netns(struct sock *sk) { - return BPF_CORE_READ(sk, __sk_common.skc_net.net, ns.inum); -} - -// int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) -SEC("kprobe/tcp_sendmsg") -int BPF_KPROBE(kprobe_tcp_sendmsg, struct sock *sk, struct msghdr *msg, size_t size) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) { - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u never enter. skip collect", pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - // define event - __u32 zero = 0; - struct tcp_data_t* data = NULL; - data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); - if (!data) return 0; - memset(data, 0, sizeof(data)); - - data->func = TRACEPOINT_FUNC_TCP_SENDMSG; - data->key = enter->key; - data->pkey = enter->pkey; - - struct inet_sock *inet = (struct inet_sock *)sk; - data->timestamp = bpf_ktime_get_ns(); - unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); - data->daddr = bpf_htonl(daddr); - unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - data->dport = bpf_htons(dport); - unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - data->saddr = bpf_htonl(saddr); - unsigned short sport = BPF_CORE_READ(inet, inet_sport); - data->sport = bpf_htons(sport); - data->state = BPF_CORE_READ(sk, __sk_common.skc_state); - data->family = BPF_CORE_READ(sk, __sk_common.skc_family); - data->net_ns = get_netns(sk); - data->protocol = bpf_core_sock_sk_protocol_ak(sk); - data->bytes = size; - - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_SENDMSG; - stack->tcp_data.func = TRACEPOINT_FUNC_TCP_SENDMSG; - stack->tcp_data.key = enter->key; - stack->tcp_data.pkey = enter->pkey; - stack->tcp_data.timestamp = bpf_ktime_get_ns(); - stack->tcp_data.daddr = daddr; - stack->tcp_data.dport = bpf_htons(dport); - stack->tcp_data.saddr = saddr; - stack->tcp_data.sport = bpf_htons(sport); - stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); - stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); - stack->tcp_data.net_ns = get_netns(sk); - stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - stack->tcp_data.bytes = size; - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, daddr:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.daddr, data->family); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, sport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.sport, data->state); - - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -// void tcp_close(struct sock *sk, long timeout); -SEC("kprobe/tcp_close") -int BPF_KPROBE(kprobe_tcp_close, struct sock *sk) -{ - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) { - BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u never enter. skip collect", pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_tcp_close] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - __u32 zero = 0; - struct tcp_data_t* data = NULL; - data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); - if (!data) return 0; - memset(data, 0, sizeof(data)); - - data->func = TRACEPOINT_FUNC_TCP_CLOSE; - data->key = enter->key; - data->pkey = enter->pkey; - struct inet_sock *inet = (struct inet_sock *)sk; - data->timestamp = bpf_ktime_get_ns(); - unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); - data->daddr = bpf_htonl(daddr); - unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - data->dport = bpf_htons(dport); - unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - data->saddr = bpf_htonl(saddr); - unsigned short sport = BPF_CORE_READ(inet, inet_sport); - data->sport = bpf_htons(sport); - data->state = BPF_CORE_READ(sk, __sk_common.skc_state); - data->family = BPF_CORE_READ(sk, __sk_common.skc_family); - data->net_ns = get_netns(sk); - data->protocol = bpf_core_sock_sk_protocol_ak(sk); - - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CLOSE; - stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CLOSE; - stack->tcp_data.key = enter->key; - stack->tcp_data.pkey = enter->pkey; - stack->tcp_data.timestamp = bpf_ktime_get_ns(); - stack->tcp_data.daddr = daddr; - stack->tcp_data.dport = bpf_htons(dport); - stack->tcp_data.saddr = saddr; - stack->tcp_data.sport = bpf_htons(sport); - stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); - stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); - stack->tcp_data.net_ns = get_netns(sk); - stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); - - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} - -// -SEC("kprobe/tcp_connect") -int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { - __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct execve_map_value *enter; - enter = execve_map_get_noinit(pid); - if (!enter || enter->key.ktime == 0) { - BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u never enter. skip collect", pid); - return 0; - } - BPF_DEBUG("[kprobe][kprobe_tcp_connect] pid:%u ktime:%llu already enter.", pid, enter->key.ktime); - - __u32 zero = 0; - struct tcp_data_t* data = NULL; - data = bpf_map_lookup_elem(&sock_secure_data_heap, &zero); - if (!data) return 0; - memset(data, 0, sizeof(data)); - - data->func = TRACEPOINT_FUNC_TCP_CONNECT; - data->key = enter->key; - data->pkey = enter->pkey; - - struct inet_sock *inet = (struct inet_sock *)sk; - data->timestamp = bpf_ktime_get_ns(); - unsigned int daddr = BPF_CORE_READ(sk, __sk_common.skc_daddr); - data->daddr = bpf_htonl(daddr); - unsigned short dport = BPF_CORE_READ(sk, __sk_common.skc_dport); - data->dport = bpf_htons(dport); - unsigned int saddr = BPF_CORE_READ(sk, __sk_common.skc_rcv_saddr); - data->saddr = bpf_htonl(saddr); - unsigned short sport = BPF_CORE_READ(inet, inet_sport); - data->sport = bpf_htons(sport); - data->state = BPF_CORE_READ(sk, __sk_common.skc_state); - data->family = BPF_CORE_READ(sk, __sk_common.skc_family); - data->net_ns = get_netns(sk); - data->protocol = bpf_core_sock_sk_protocol_ak(sk); - - - struct secure_tailcall_stack* stack = NULL; - stack = bpf_map_lookup_elem(&tailcall_stack, &zero); - if (!stack) return 0; - memset(stack, 0, sizeof(stack)); - stack->func = SECURE_FUNC_TRACEPOINT_FUNC_TCP_CONNECT; - stack->tcp_data.func = TRACEPOINT_FUNC_TCP_CONNECT; - stack->tcp_data.key = enter->key; - stack->tcp_data.pkey = enter->pkey; - stack->tcp_data.timestamp = bpf_ktime_get_ns(); - stack->tcp_data.daddr = daddr; - stack->tcp_data.dport = bpf_htons(dport); - stack->tcp_data.saddr = saddr; - stack->tcp_data.sport = bpf_htons(sport); - stack->tcp_data.state = BPF_CORE_READ(sk, __sk_common.skc_state); - stack->tcp_data.family = BPF_CORE_READ(sk, __sk_common.skc_family); - stack->tcp_data.net_ns = get_netns(sk); - stack->tcp_data.protocol = bpf_core_sock_sk_protocol_ak(sk); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] saddr:%u, sport:%u, family:%u", - stack->tcp_data.saddr, stack->tcp_data.sport, data->family); - BPF_DEBUG("[kprobe][kprobe_tcp_sendmsg][dump] daddr:%u, dport:%u, state:%u", - stack->tcp_data.daddr, stack->tcp_data.dport, data->state); - - - bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); - return 0; -} diff --git a/src/security/process.h b/src/security/process.h index 733b1c5..bc5b29a 100644 --- a/src/security/process.h +++ b/src/security/process.h @@ -17,310 +17,6 @@ #include "type.h" #include "bpf_process_event_type.h" -// -///* Max number of args to parse */ -//#define MAXARGS 20 -///* Max length of any given arg */ -//#define MAXARGLENGTH 256 -///* This is the absolute buffer size for args and filenames including some -// * extra head room so we can append last args string to buffer. The extra -// * headroom is an unfortunate result of bounds on offset/size in -// * event_args_builder(). -// * -// * For example given an offset bounds -// * -// * offset <- (0, 100) -// * -// * We will read into the buffer using this offset giving a max offset -// * of eargs + 100. -// * -// * args[offset] <- (0, 100) -// * -// * Now we want to read this with call 45 aka bpf_probe_read_str as follows, -// * where 'kernel_struct_arg' is the kernel data struct we are reading. -// * -// * bpf_probe_read_str(args[offset], size, kernel_struct_arg) -// * -// * But we have a bit of a problem determining if 'size' is out of array -// * range. The math would be, -// * -// * size = length - offset -// * -// * Giving the remainder of the buffer, -// * -// * args offset length -// * |---------------|------------------| -// * -// * |-------size-------| -// * -// * But verifier math works on bounds so bounds analysis of size is the -// * following, -// * -// * length = 1024 -// * offset = (0, 100) -// * -// * size = length - offset -// * size = (1024) - (0, 100) -// * size <- (924, 1124) -// * -// * And verifier throws an error because args[offset + size] with bounds -// * anaylsis, -// * -// * args_(max)[100 + 1024] = args_(max)[1124] -// * -// * To circumvent this, at least until we teach the verifier about -// * dependent variables, create a maxarg value and pad arg buffer with -// * it. Giving a args buffer of size 'length + pad' with above bounds -// * analysis, -// * -// * size = length - offset -// * size = (1024) - (0, 100) -// * if size > pad goto done -// * size <- (924, 1124) // 1124 < length + pad -// * -// * Phew all clear now? -// */ -//#define CWD_MAX 256 -//#define BUFFER 1024 -//#define SIZEOF_EVENT 56 -//#define PADDED_BUFFER \ -// (BUFFER + MAXARGLENGTH + SIZEOF_EVENT + SIZEOF_EVENT + CWD_MAX) -///* This is the usable buffer size for args and filenames. It is calculated -// * as the (BUFFER SIZE - sizeof(parent) - sizeof(curr) but unfortunately -// * preprocess doesn't know types so we do it manually without sizeof(). -// */ -//#define ARGSBUFFER (BUFFER - SIZEOF_EVENT - SIZEOF_EVENT) -//#define __ASM_ARGSBUFFER 976 -//#define ARGSBUFFERMASK (ARGSBUFFER - 1) -//#define MAXARGMASK (MAXARG - 1) -//#define PATHNAME_SIZE 256 -// -///* Task flags */ -//#ifndef PF_KTHREAD -//#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ -//#endif -// -///* Msg flags */ -//#define EVENT_UNKNOWN 0x00 -//#define EVENT_EXECVE 0x01 -//#define EVENT_EXECVEAT 0x02 -//#define EVENT_PROCFS 0x04 -//#define EVENT_TRUNC_FILENAME 0x08 -//#define EVENT_TRUNC_ARGS 0x10 -//#define EVENT_TASK_WALK 0x20 -//#define EVENT_MISS 0x40 -//#define EVENT_NEEDS_AUID 0x80 -//#define EVENT_ERROR_FILENAME 0x100 -//#define EVENT_ERROR_ARGS 0x200 -//#define EVENT_NEEDS_CWD 0x400 -//#define EVENT_NO_CWD_SUPPORT 0x800 -//#define EVENT_ROOT_CWD 0x1000 -//#define EVENT_ERROR_CWD 0x2000 -//#define EVENT_CLONE 0x4000 -//#define EVENT_ERROR_SOCK 0x8000 -//#define EVENT_ERROR_CGROUP_NAME 0x010000 -//#define EVENT_ERROR_CGROUP_KN 0x020000 -//#define EVENT_ERROR_CGROUP_SUBSYSCGRP 0x040000 -//#define EVENT_ERROR_CGROUP_SUBSYS 0x080000 -//#define EVENT_ERROR_CGROUPS 0x100000 -//#define EVENT_ERROR_CGROUP_ID 0x200000 -//#define EVENT_ERROR_PATH_COMPONENTS 0x400000 -//#define EVENT_DATA_FILENAME 0x800000 -//#define EVENT_DATA_ARGS 0x1000000 -// -//#define EVENT_COMMON_FLAG_CLONE 0x01 -// -///* Docker IDs are unique at first 12 characters, but we want to get -// * 12chars plus any extra prefix used by the container environment. -// * Minikube for example prepends 'docker-' to the id. So lets copy -// * 32B and assume at least 12B of it is ID info. -// */ -//#define DOCKER_ID_LENGTH 128 -// -//struct msg_execve_key { -// __u32 pid; // Process TGID -// __u8 pad[4]; -// __u64 ktime; -//}; // All fields aligned so no 'packed' attribute. -// -///* This is the struct stored in bpf map to share info between -// * different execve hooks. -// */ -//struct execve_info { -// /* The secureexec is to reflect the kernel bprm->secureexec that is exposed -// * to userspace through auxiliary vector which can be read from -// * /proc/self/auxv or https://man7.org/linux/man-pages/man3/getauxval.3.html -// * -// * The AT_SECURE of auxv can have a value of 1 or 0 and it is set from -// * the bprm->secureexec that is a bit field. -// * If bprm->secureexec is 1 then it means executable should be treated securely. -// * Most commonly, 1 indicates that the process is executing a set-user-ID -// * or set-group-ID binary (so that its real and effective UIDs or GIDs differ -// * from one another), or that it gained capabilities by executing a binary file -// * that has capabilities (see capabilities(7)). -// * Alternatively, a nonzero value may be triggered by a Linux Security Module. -// * When this value is nonzero, the dynamic linker disables the use of certain -// * environment variables. -// * -// * The secureexec here can have the following bit flags: -// * EXEC_SETUID or EXEC_SETGID -// */ -// __u32 secureexec; -// __u32 i_nlink; /* inode links */ -// __u64 i_ino; /* inode number */ -//}; -// -///* process information -// * -// * Manually linked to ARGSBUFFER and PADDED_BUFFER if this changes then please -// * also change SIZEOF_EVENT. -// */ -//struct msg_process { -// __u32 size; -// __u32 pid; // Process TGID -// __u32 tid; // Process thread -// __u32 nspid; -// __u32 secureexec; -// __u32 uid; -// __u32 auid; -// __u32 flags; -// __u32 i_nlink; -// __u32 pad; -// __u64 i_ino; -// __u64 ktime; -// char *args; -//}; // All fields aligned so no 'packed' attribute. -// -///* msg_clone_event holds only the necessary fields to construct a new entry from -// * the parent after a clone() event. -// */ -//struct msg_clone_event { -// struct msg_common common; -// struct msg_execve_key parent; -// __u32 tgid; -// __u32 tid; -// __u32 nspid; -// __u32 flags; -// __u64 ktime; -//} __attribute__((packed)); -// -//struct exit_info { -// __u32 code; -// __u32 tid; // Thread ID -//}; -// -//struct msg_exit { -// struct msg_common common; -// struct msg_execve_key current; -// struct exit_info info; -//}; // All fields aligned so no 'packed' attribute. -// -//enum { -// ns_uts = 0, -// ns_ipc = 1, -// ns_mnt = 2, -// ns_pid = 3, -// ns_pid_for_children = 4, -// ns_net = 5, -// ns_time = 6, -// ns_time_for_children = 7, -// ns_cgroup = 8, -// ns_user = 9, -// -// // If you update the value of ns_max_types you -// // should also update parseMatchNamespaces() -// // in kernel.go -// ns_max_types = 10, -//}; -// -//struct msg_ns { -// union { -// struct { -// __u32 uts_inum; -// __u32 ipc_inum; -// __u32 mnt_inum; -// __u32 pid_inum; -// __u32 pid_for_children_inum; -// __u32 net_inum; -// __u32 time_inum; -// __u32 time_for_children_inum; -// __u32 cgroup_inum; -// __u32 user_inum; -// }; -// __u32 inum[ns_max_types]; -// }; -//}; // All fields aligned so no 'packed' attribute. -// -//struct msg_k8s { -// __u32 net_ns; -// __u32 cid; -// __u64 cgrpid; -// char docker_id[DOCKER_ID_LENGTH]; -//}; // All fields aligned so no 'packed' attribute. -// -//#define BINARY_PATH_MAX_LEN 256 -// -//struct heap_exe { -// // because of verifier limitations, this has to be 2 * 256 bytes while 256 -// // should be theoretically sufficient, and actually is, in unit tests. -// char buf[BINARY_PATH_MAX_LEN * 2]; -// // offset points to the start of the path in the above buffer. Use offset to -// // read the path in the buffer since it's written from the end. -// char *off; -// __u32 len; -// __u32 error; -//}; // All fields aligned so no 'packed' attribute. -// -//struct msg_execve_event { -// struct msg_common common; -// struct msg_k8s kube; -// struct msg_execve_key parent; -// __u64 parent_flags; -// struct msg_cred creds; -// struct msg_ns ns; -// struct msg_execve_key cleanup_key; -// /* if add anything above please also update the args of -// * validate_msg_execve_size() in bpf_execve_event.c */ -// union { -// struct msg_process process; -// char buffer[PADDED_BUFFER]; -// }; -// /* below fields are not part of the event, serve just as -// * heap for execve programs -// */ -//#ifdef __LARGE_BPF_PROG -// struct heap_exe exe; -//#endif -//}; // All fields aligned so no 'packed' attribute. -// -//// This structure stores the binary path that was recorded on execve. -//// Technically PATH_MAX is 4096 but we limit the length we store since we have -//// limits on the length of the string to compare: -//// - Artificial limits for full string comparison. -//// - Technical limits for prefix and postfix, using LPM_TRIE that have a 256 -//// bytes size limit. -//struct binary { -// // length of the path stored in path, this should be < BINARY_PATH_MAX_LEN -// // but can contain negative value in case of copy error. -// // While s16 would be sufficient, 64 bits are handy for alignment. -// __s64 path_length; -// // BINARY_PATH_MAX_LEN first bytes of the path -// char path[BINARY_PATH_MAX_LEN]; -//}; // All fields aligned so no 'packed' attribute -// -//// The execve_map_value is tracked by the TGID of the thread group -//// the msg_execve_key.pid. The thread IDs are recorded on the -//// fly and sent with every corresponding event. -//struct execve_map_value { -// struct msg_execve_key key; -// struct msg_execve_key pkey; -// __u32 flags; -// __u32 nspid; -// struct msg_ns ns; -// struct msg_capabilities caps; -// struct binary bin; -//} __attribute__((packed)) __attribute__((aligned(8))); - struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); @@ -588,20 +284,23 @@ struct { __uint(max_entries, 1); } tg_stats_map SEC(".maps"); -FUNC_INLINE void -perf_event_output_metric(void *ctx, u8 metric, void *map, u64 flags, void *data, u64 size) -{ +FUNC_INLINE void perf_event_output_update_error_metric(u8 msg_op, long err) { struct kernel_stats *valp; __u32 zero = 0; - long err; - err = bpf_perf_event_output(ctx, map, flags, data, size); - if (err < 0) { - valp = bpf_map_lookup_elem(&tg_stats_map, &zero); - if (valp) - __sync_fetch_and_add(&valp->sent_failed[metric], 1); + valp = bpf_map_lookup_elem(&tg_stats_map, &zero); + if (valp) { + __sync_fetch_and_add(&valp->sent_failed[msg_op], 1); } } +FUNC_INLINE void perf_event_output_metric(void *ctx, u8 msg_op, void *map, + u64 flags, void *data, u64 size) { + long err; + + err = bpf_perf_event_output(ctx, map, flags, data, size); + if (err < 0) + perf_event_output_update_error_metric(msg_op, err); +} #endif //SYSAK_PROCESS_H diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index f167589..f4337bb 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -99,7 +99,6 @@ read_args(void *ctx, struct msg_execve_event *event) off = bpf_probe_read_str(&heap->maxpath, 4096, (char *)start_stack); if (off < 0) return 0; - BPF_DEBUG("[read_args] pid:%llu, args:%s", p->pid, heap->maxpath); start_stack += off; @@ -144,7 +143,6 @@ read_path(void *ctx, struct msg_execve_event *event, void *filename) earg = (void *)p + offsetof(struct msg_process, args); size = bpf_probe_read_str(earg, MAXARGLENGTH - 1, filename); - BPF_DEBUG("[read_path] pid:%llu, path:%s", p->pid, earg); if (size < 0) { flags |= EVENT_ERROR_FILENAME; size = 0; @@ -205,11 +203,22 @@ read_exe(struct task_struct *task, struct heap_exe *exe) struct file *file = BPF_CORE_READ(task, mm, exe_file); struct path *path = __builtin_preserve_access_index(&file->f_path); - exe->len = BINARY_PATH_MAX_LEN; - exe->off = (char *)&exe->buf; - exe->off = __d_path_local(path, exe->off, (int *)&exe->len, (int *)&exe->error); - if (exe->len > 0) - exe->len = BINARY_PATH_MAX_LEN - exe->len; + // we need to walk the complete 4096 len dentry in order to have an accurate + // matching on the prefix operators, even if we only keep a subset of that + char *buffer; + + buffer = d_path_local(path, (int *)&exe->len, (int *)&exe->error); + if (!buffer) + return 0; + + // buffer used by d_path_local can contain up to MAX_BUF_LEN i.e. 4096 we + // only keep the first 255 chars for our needs (we sacrifice one char to the + // verifier for the > 0 check) + if (exe->len > 255) + exe->len = 255; + asm volatile("%[len] &= 0xff;\n" + : [len] "+r"(exe->len)); + probe_read(exe->buf, exe->len, buffer); return exe->len; } @@ -219,11 +228,8 @@ read_exe(struct task_struct *task, struct heap_exe *exe) SEC("kprobe/wake_up_new_task") int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) { - __u32 cpid = bpf_get_current_pid_tgid() >> 32; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u enter~", cpid); struct execve_map_value *curr, *parent; struct msg_clone_event msg; - struct msg_capabilities caps; u64 msg_size = sizeof(struct msg_clone_event); struct msg_k8s kube; u32 tgid = 0; @@ -232,7 +238,6 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) return 0; tgid = BPF_CORE_READ(task, tgid); - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u read tgid:%u ~", cpid, tgid); /* Do not try to create any msg or calling execve_map_get * (that will add a new process in the execve_map) if we @@ -241,22 +246,20 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) parent = __event_find_parent(task); if (!parent) return 0; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u has parent.", cpid, tgid); + curr = execve_map_get(tgid); if (!curr) return 0; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u new event in execve_map.", cpid, tgid); + /* Generate an EVENT_COMMON_FLAG_CLONE event once per process, * that is, thread group. */ if (curr->key.ktime != 0) return 0; - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin init event.", cpid, tgid); /* Setup the execve_map entry. */ curr->flags = EVENT_COMMON_FLAG_CLONE; curr->key.pid = tgid; - // curr->key.ktime = get_start_time(); curr->key.ktime = bpf_ktime_get_ns(); curr->nspid = get_task_pid_vnr(); memcpy(&curr->bin, &parent->bin, sizeof(curr->bin)); @@ -266,10 +269,17 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) * before the execve hook point if they changed or not. * This needs to be converted later to credentials. */ - get_current_subj_caps(&caps, task); - curr->caps.permitted = caps.permitted; - curr->caps.effective = caps.effective; - curr->caps.inheritable = caps.inheritable; + get_current_subj_caps(&curr->caps, task); + + /* Store the thread leader namespaces so we can check later + * before the execve hook point if they changed or not. + */ + get_namespaces(&curr->ns, task); + + /* Set EVENT_IN_INIT_TREE flag on the process if its parent is in a + * container's init tree or if it has nspid=1. + */ + set_in_init_tree(curr, parent); /* Setup the msg_clone_event and sent to the user. */ msg.common.op = MSG_OP_CLONE; @@ -289,18 +299,13 @@ int BPF_KPROBE(event_wake_up_new_task, struct task_struct *task) __event_get_cgroup_info(task, &kube); - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u init event done.", cpid, tgid); - if (cgroup_rate(ctx, &kube, msg.ktime)) { - BPF_DEBUG("[kprobe][event_wake_up_new_task] pid:%u tgid:%u begin submit clone event.", cpid, tgid); - perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, - BPF_F_CURRENT_CPU, &msg, msg_size); + perf_event_output_metric(ctx, MSG_OP_CLONE, &tcpmon_map, BPF_F_CURRENT_CPU, &msg, msg_size); } return 0; } -////__attribute__((section("tracepoint/sys_execve"), used)) int SEC("tracepoint/sched/sched_process_exec") int event_execve(struct trace_event_raw_sched_process_exec *ctx) { @@ -338,21 +343,24 @@ int event_execve(struct trace_event_raw_sched_process_exec *ctx) p->ktime = bpf_ktime_get_ns(); p->size = offsetof(struct msg_process, args); p->auid = get_auid(); - p->uid = bpf_get_current_uid_gid(); read_execve_shared_info(ctx, p, pid); p->size += read_path(ctx, event, filename); p->size += read_args(ctx, event); p->size += read_cwd(ctx, p); - BPF_DEBUG("[event_execve] enter pid:%llu, filename:%s", p->pid, filename); event->common.op = MSG_OP_EXECVE; event->common.ktime = p->ktime; event->common.size = offsetof(struct msg_execve_event, process) + p->size; - BPF_CORE_READ_INTO(&event->kube.net_ns, task, nsproxy, net_ns, ns.inum); - get_current_subj_creds(&event->creds, task); + /** + * Instead of showing the task owner, we want to display the effective + * uid that is used to calculate the privileges of current task when + * acting upon other objects. This allows to be compatible with the 'ps' + * tool that reports snapshot of current processes. + */ + p->uid = event->creds.euid; get_namespaces(&event->ns, task); p->flags |= __event_get_cgroup_info(task, &event->kube); @@ -388,7 +396,6 @@ int execve_rate(void *ctx) SEC("tracepoint/1") int execve_send(void *ctx) { - BPF_DEBUG("[execve_send] enter ~"); struct msg_execve_event *event; struct execve_map_value *curr; struct msg_process *p; @@ -432,7 +439,13 @@ int execve_send(void *ctx) if (curr->flags & EVENT_COMMON_FLAG_CLONE) { event_set_clone(p); } - curr->flags = 0; + curr->flags &= ~EVENT_COMMON_FLAG_CLONE; + /* Set EVENT_IN_INIT_TREE flag on the process if nspid=1. + */ + set_in_init_tree(curr, NULL); + if (curr->flags & EVENT_IN_INIT_TREE) { + event->process.flags |= EVENT_IN_INIT_TREE; + } #ifdef __NS_CHANGES_FILTER if (init_curr) memcpy(&(curr->ns), &(event->ns), @@ -451,7 +464,7 @@ int execve_send(void *ctx) #ifdef __LARGE_BPF_PROG // read from proc exe stored at execve time if (event->exe.len <= BINARY_PATH_MAX_LEN) { - curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.off); + curr->bin.path_length = bpf_probe_read(curr->bin.path, event->exe.len, event->exe.buf); if (curr->bin.path_length == 0) curr->bin.path_length = event->exe.len; } @@ -473,9 +486,7 @@ int execve_send(void *ctx) sizeof(struct msg_execve_key) + sizeof(__u64) + sizeof(struct msg_cred) + sizeof(struct msg_ns) + sizeof(struct msg_execve_key) + p->size); -// BPF_DEBUG("[execve_send] before perf output ~"); perf_event_output_metric(ctx, MSG_OP_EXECVE, &tcpmon_map, BPF_F_CURRENT_CPU, event, size); -// BPF_DEBUG("[execve_send] after perf output ~"); return 0; } @@ -490,9 +501,7 @@ int event_exit_acct_process(struct pt_regs *ctx) { __u64 pid_tgid = bpf_get_current_pid_tgid(); __u32 pid = pid_tgid >> 32; - BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u enter~", pid); - event_exit_send(ctx, pid_tgid >> 32); - BPF_DEBUG("[kprobe][event_exit_acct_process] pid:%u send done ~", pid); + event_exit_send(ctx, pid); return 0; } @@ -511,8 +520,6 @@ int event_exit_disassociate_ctty(struct pt_regs *ctx) { int on_exit = (int)PT_REGS_PARM1_CORE(ctx); __u32 pid = bpf_get_current_pid_tgid() >> 32; - BPF_DEBUG("[kprobe][event_exit_disassociate_ctty] pid:%u enter~", pid); - if (on_exit) event_exit_send(ctx, pid); return 0; @@ -939,6 +946,7 @@ static inline __attribute__((always_inline)) long copy_path(char *args, const st if (!buffer) return 0; // tips: path size between 0~255 + if (size > 255) size = 255; asm volatile("%[size] &= 0xff;\n" ::[size] "+r"(size) :); bpf_probe_read(curr, size, buffer); -- Gitee From cf9b9300c81092fd3855df18f8792a9293899689 Mon Sep 17 00:00:00 2001 From: Tom Yu Date: Sat, 21 Jun 2025 17:05:20 +0000 Subject: [PATCH 13/24] add set_in_init_tree --- src/security/bpf_process_event.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 48ac76f..06b0567 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -593,4 +593,18 @@ __event_get_cgroup_info(struct task_struct *task, struct msg_k8s *kube) return flags; } +FUNC_INLINE void +set_in_init_tree(struct execve_map_value *curr, struct execve_map_value *parent) +{ + if (parent && parent->flags & EVENT_IN_INIT_TREE) { + curr->flags |= EVENT_IN_INIT_TREE; + BPF_DEBUG("%s: parent in init tree", __func__); + return; + } + + if (curr->nspid == 1) { + curr->flags |= EVENT_IN_INIT_TREE; + BPF_DEBUG("%s: nspid=1", __func__); + } +} #endif // SYSAK_BPF_PROCESS_EVENT_H -- Gitee From bc1bb8641a8050c2d82a351518632d8092c3cdf2 Mon Sep 17 00:00:00 2001 From: xiongyunnnn <16005136+xiongyunnnn@user.noreply.gitee.com> Date: Mon, 14 Jul 2025 20:08:41 +0800 Subject: [PATCH 14/24] fix issues with eBPF file collection --- src/security/bpf_process_event.h | 9 +++- src/security/security.bpf.c | 73 ++++++++++++++++++++------------ src/security/type.h | 12 ++++++ 3 files changed, 64 insertions(+), 30 deletions(-) diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 06b0567..4254f6a 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -179,7 +179,7 @@ struct cwd_read_data { bool resolved; }; -FUNC_INLINE long cwd_read(struct cwd_read_data *data) +FUNC_INLINE long cwd_read(struct cwd_read_data *data, char **buffer) { struct qstr d_name; struct dentry *parent; @@ -212,6 +212,11 @@ FUNC_INLINE long cwd_read(struct cwd_read_data *data) return 0; } // resolved all path components successfully + if (data->bptr == *buffer) { + bpf_probe_read(&d_name, sizeof(d_name), _(&dentry->d_name)); + error = prepend_name(data->bf, &data->bptr, &data->blen, + (const char *)d_name.name, d_name.len); + } data->resolved = true; return 1; } @@ -256,7 +261,7 @@ prepend_path(const struct path *path, const struct path *root, char *bf, #ifndef __V61_BPF_PROG #pragma unroll for (int i = 0; i < PROBE_CWD_READ_ITERATIONS; ++i) { - if (cwd_read(&data)) + if (cwd_read(&data, buffer)) break; } #else diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index f4337bb..9573cce 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -1132,35 +1132,35 @@ int filter_prog(struct pt_regs *ctx) { } break; } - // case FILTER_TYPE_FILE_PREFIX: { - // struct string_prefix_lpm_trie *prefix = NULL; - // int zero = 0; - // prefix = bpf_map_lookup_elem(&string_prefix_maps_heap, &zero); - // if (prefix == NULL) { - // BPF_DEBUG("[kprobe][tailcall] callname idx:%u cannot lookup string_prefix_maps_heap", call_name_idx); - // break; - // } - // __u32 path_size = 0; - // bpf_probe_read(&path_size, 4, stack->file_data.path); - // prefix->prefixlen = path_size * 8; - // bpf_probe_read(prefix->data, path_size & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); - // int path_len = *(int *)stack->file_data.path; - // BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path length:%d", call_name_idx, path_len); - // BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path+4:%s", call_name_idx, &stack->file_data.path[4]); - // BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. prefix path:%s, path size:%u", call_name_idx, prefix->data, path_size); + case FILTER_TYPE_FILE_PREFIX: { + struct string_prefix_lpm_trie *prefix = NULL; + int zero = 0; + prefix = bpf_map_lookup_elem(&string_prefix_maps_heap, &zero); + if (prefix == NULL) { + BPF_DEBUG("[kprobe][tailcall] callname idx:%u cannot lookup string_prefix_maps_heap", call_name_idx); + break; + } + __u32 path_size = 0; + bpf_probe_read(&path_size, 4, stack->file_data.path); + prefix->prefixlen = path_size * 8; + bpf_probe_read(prefix->data, path_size & (STRING_PREFIX_MAX_LENGTH - 1), stack->file_data.path + 4); + int path_len = *(int *)stack->file_data.path; + BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path length:%d", call_name_idx, path_len); + BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. stack path+4:%s", call_name_idx, &stack->file_data.path[4]); + BPF_DEBUG("[kprobe][tailcall] callname idx:%u begin to query inner map. prefix path:%s, path size:%u", call_name_idx, prefix->data, path_size); - // struct bpf_map* inner_map = bpf_map_lookup_elem(&string_prefix_maps, &filter.map_idx[0]); - // __u8* ppass = NULL; - // if (inner_map != NULL) { - // ppass = bpf_map_lookup_elem(inner_map, prefix); - // if (ppass == NULL || *ppass == 0) pass &= 0; - // else pass &= 1; - // } else { - // // no filters were set ... - // BPF_DEBUG("[kprobe][tailcall] callname idx:%u cannot find inner map, no filter set, pass", call_name_idx); - // } - // break; - // } + struct bpf_map* inner_map = bpf_map_lookup_elem(&string_prefix_maps, &filter.map_idx[0]); + __u8* ppass = NULL; + if (inner_map != NULL) { + ppass = bpf_map_lookup_elem(inner_map, prefix); + if (ppass == NULL || *ppass == 0) pass &= 0; + else pass &= 1; + } else { + // no filters were set ... + BPF_DEBUG("[kprobe][tailcall] callname idx:%u cannot find inner map, no filter set, pass", call_name_idx); + } + break; + } default: break; } @@ -1256,6 +1256,23 @@ int kprobe_security_file_permission(struct pt_regs *ctx) // const struct path *path_arg = 0; // path_arg = _(&file->f_path); // copy_path(stack->file_data.path, path_arg); + // 获得操作类型掩码信息 + int mask = (int) PT_REGS_PARM2(ctx); + switch (mask) { + case MAY_READ: + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION_READ; + break; + case MAY_WRITE: + stack->func = SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION; + stack->file_data.func = TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION_WRITE; + break; + default: + BPF_DEBUG("[kprobe][kprobe_security_file_permission] unknown operation"); + break; + } + + BPF_DEBUG("[kprobe][security_file_permission] after association: pid:%u ktime:%llu path:%s already enter.", pid, enter->key.ktime, &stack->file_data.path[4]); bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); return 0; } diff --git a/src/security/type.h b/src/security/type.h index 86cc671..75bde6f 100644 --- a/src/security/type.h +++ b/src/security/type.h @@ -20,6 +20,16 @@ #define IPV4LEN 4 #define IPV6LEN 16 +#define MAY_EXEC 0x00000001 +#define MAY_WRITE 0x00000002 +#define MAY_READ 0x00000004 +#define MAY_APPEND 0x00000008 +#define MAY_ACCESS 0x00000010 +#define MAY_OPEN 0x00000020 +#define MAY_CHDIR 0x00000040 +/* called from RCU mode, don't block */ +#define MAY_NOT_BLOCK 0x00000080 + struct tuple_type { __u64 saddr[2]; __u64 daddr[2]; @@ -130,6 +140,8 @@ struct port_entry { enum file_secure_func { TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION, + TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION_WRITE, + TRACEPOINT_FUNC_SECURITY_FILE_PERMISSION_READ, TRACEPOINT_FUNC_SECURITY_MMAP_FILE, TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE, TRACEPOINT_FUNC_SYS_WRITE, -- Gitee From 5c20fd671003f431002325b280f4389b91041828 Mon Sep 17 00:00:00 2001 From: xiongyunnnn <16005136+xiongyunnnn@user.noreply.gitee.com> Date: Mon, 14 Jul 2025 21:00:41 +0800 Subject: [PATCH 15/24] add early exit for completed filter processing --- src/security/security.bpf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 9573cce..08cec66 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -1011,8 +1011,13 @@ int filter_prog(struct pt_regs *ctx) { // get data int i = 0; int pass = 1; + int filter_num = filters->filter_count; #pragma unroll for (; i < MAX_FILTER_FOR_PER_CALLNAME; i ++) { + // Early break if we've processed all actual filters + if (i >= filter_num) { + break; + } int idx = i; struct selector_filter filter = filters->filters[idx]; // if (filter.filter_type != FILTER_TYPE_UNKNOWN) { -- Gitee From cf303c34e23775379499ae75cf8c035b0a139a3e Mon Sep 17 00:00:00 2001 From: xiongyunnnn <16005136+xiongyunnnn@user.noreply.gitee.com> Date: Wed, 16 Jul 2025 18:01:39 +0800 Subject: [PATCH 16/24] fix --- src/security/bpf_process_event.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 4254f6a..06b0567 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -179,7 +179,7 @@ struct cwd_read_data { bool resolved; }; -FUNC_INLINE long cwd_read(struct cwd_read_data *data, char **buffer) +FUNC_INLINE long cwd_read(struct cwd_read_data *data) { struct qstr d_name; struct dentry *parent; @@ -212,11 +212,6 @@ FUNC_INLINE long cwd_read(struct cwd_read_data *data, char **buffer) return 0; } // resolved all path components successfully - if (data->bptr == *buffer) { - bpf_probe_read(&d_name, sizeof(d_name), _(&dentry->d_name)); - error = prepend_name(data->bf, &data->bptr, &data->blen, - (const char *)d_name.name, d_name.len); - } data->resolved = true; return 1; } @@ -261,7 +256,7 @@ prepend_path(const struct path *path, const struct path *root, char *bf, #ifndef __V61_BPF_PROG #pragma unroll for (int i = 0; i < PROBE_CWD_READ_ITERATIONS; ++i) { - if (cwd_read(&data, buffer)) + if (cwd_read(&data)) break; } #else -- Gitee From 228220c96e26524e573fc378b5965ad1fe5b7163 Mon Sep 17 00:00:00 2001 From: xiongyunnnn <16005136+xiongyunnnn@user.noreply.gitee.com> Date: Wed, 16 Jul 2025 19:01:22 +0800 Subject: [PATCH 17/24] fix --- src/security/security.bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 08cec66..1abeb91 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -1261,7 +1261,7 @@ int kprobe_security_file_permission(struct pt_regs *ctx) // const struct path *path_arg = 0; // path_arg = _(&file->f_path); // copy_path(stack->file_data.path, path_arg); - // 获得操作类型掩码信息 + // obtain operation type mask information int mask = (int) PT_REGS_PARM2(ctx); switch (mask) { case MAY_READ: -- Gitee From b3995be81307342c884b32ac7953148b636a3fdf Mon Sep 17 00:00:00 2001 From: qianlufaceless Date: Tue, 22 Jul 2025 05:39:42 +0000 Subject: [PATCH 18/24] feat: set container_id_key in data_event * update --- src/bpf/net.bpf.c | 17 +++++++++-------- src/net.c | 5 ++--- src/net.h | 6 ++++-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/bpf/net.bpf.c b/src/bpf/net.bpf.c index c3b7ebc..18401a1 100644 --- a/src/bpf/net.bpf.c +++ b/src/bpf/net.bpf.c @@ -182,7 +182,7 @@ struct { __uint(type, BPF_MAP_TYPE_LPM_TRIE); __uint(max_entries, 1024); __type(key, __u8[sizeof(struct container_id_key)]); // Need to specify as byte array as wouldn't take struct as key type - __type(value, __u8); + __type(value, __u64); __uint(map_flags, BPF_F_NO_PREALLOC); } enable_container_ids SEC(".maps"); @@ -269,10 +269,11 @@ static __always_inline bool match_container_id(struct connect_info_t* conn_info) BPF_DEBUG("after memset! pid:%u, cgroup:%s, real_length:%u \n", conn_info->conn_id.tgid, prefix->data, real_length); bpf_probe_read(prefix->data, real_length, conn_info->docker_id + trim_len); prefix->prefixlen = real_length << 3; - __u8* ppass = bpf_map_lookup_elem(&enable_container_ids, prefix); - if (ppass) { + __u64* cid_key = bpf_map_lookup_elem(&enable_container_ids, prefix); + if (cid_key) { BPF_DEBUG("bingo! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); // in whitelist + conn_info->cid_key = *cid_key; return true; } BPF_DEBUG("blacklist! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); @@ -445,6 +446,7 @@ static __always_inline void init_conn_info(uint32_t tgid, conn_info->addr.sa.sa_family = AF_UNKNOWN; conn_info->is_sample = true; conn_info->protocol = ProtoUnknown; + conn_info->cid_key = 0; struct task_struct *task = (struct task_struct *)bpf_get_current_task(); struct cgroup *cgrp = get_task_cgroup(task); if (!cgrp) @@ -1395,7 +1397,7 @@ static __always_inline void output_conn_stats(struct trace_event_raw_sys_exit_co uint32_t total_pkts = conn_info->wr_pkts + conn_info->rd_pkts; bool real_threshold = (total_bytes >= conn_info->last_output_rd_bytes + conn_info->last_output_wr_bytes + ConnStatsBytesThreshold) || (total_pkts >= conn_info->last_output_rd_pkts + conn_info->last_output_wr_pkts + ConnStatsPacketsThreshold); - if (real_threshold || force || !conn_info->ever_sent) + if (match_container_id(conn_info) && (real_threshold || force || !conn_info->ever_sent)) { struct conn_stats_event_t *event = add_conn_stats(conn_info); if (event != NULL) @@ -1494,7 +1496,7 @@ static __always_inline void add_close_event(struct trace_event_raw_sys_exit_comp ctrl_event.conn_id = conn_info->conn_id; ctrl_event.close.rd_bytes = conn_info->rd_bytes; ctrl_event.close.wr_bytes = conn_info->wr_bytes; - if (conn_info->is_sample) + // if (conn_info->is_sample) { bpf_perf_event_output(ctx, &connect_ctrl_events_map, BPF_F_CURRENT_CPU, &ctrl_event, sizeof(struct conn_ctrl_event_t)); @@ -2063,11 +2065,10 @@ int BPF_KPROBE(tcp_close, struct sock *sk) * only family is AF_UNIX and no data will no report, but the bytes will be * recorded in first data event and report to user */ - if (need_trace_family(conn_info->addr.sa.sa_family) || + if (match_container_id(conn_info) && (need_trace_family(conn_info->addr.sa.sa_family) || conn_info->wr_bytes != 0 || - conn_info->rd_bytes != 0) + conn_info->rd_bytes != 0)) { - add_close_event(ctx, conn_info); if (conn_info->last_output_rd_pkts + conn_info->last_output_wr_pkts != conn_info->rd_pkts + conn_info->wr_pkts) { diff --git a/src/net.c b/src/net.c index 5b366f4..6953008 100644 --- a/src/net.c +++ b/src/net.c @@ -680,7 +680,7 @@ void ebpf_update_conn_role(struct connect_id_t *conn_id, enum support_role_e rol { } -bool ebpf_set_cid_filter(const char* container_id, size_t length, bool update) +bool ebpf_set_cid_filter(const char* container_id, size_t length, uint64_t cid_key, bool update) { struct net_bpf *obj = env.obj; int map_fd = bpf_map__fd(obj->maps.enable_container_ids); @@ -691,11 +691,10 @@ bool ebpf_set_cid_filter(const char* container_id, size_t length, bool update) }; memset(key.data, 0, CONTAINER_ID_MAX_LENGTH); memcpy(key.data, container_id, length); - __u8 value = 1; bool ret; if (update) { - ret = bpf_map_update_elem(map_fd, &key, &value, BPF_ANY); + ret = bpf_map_update_elem(map_fd, &key, &cid_key, BPF_ANY); if (ret) { net_log(LOG_TYPE_WARN, "Failed to update element: %s\n", strerror(errno)); return false; diff --git a/src/net.h b/src/net.h index 3ac97f4..47531c4 100644 --- a/src/net.h +++ b/src/net.h @@ -208,6 +208,7 @@ struct conn_stats_event_t struct conn_data_event_t { struct connect_id_t conn_id; + uint64_t cid_key; uint64_t start_ts; uint64_t end_ts; enum support_proto_e protocol; @@ -252,6 +253,7 @@ struct connect_info_t uint64_t rd_max_ts; uint64_t wr_min_ts; uint64_t wr_max_ts; + uint64_t cid_key; uint64_t start_ts; uint64_t end_ts; enum support_proto_e protocol; @@ -441,8 +443,8 @@ void ebpf_update_conn_addr(struct connect_id_t *conn_id, union sockaddr_t *dest_ // 更新process 观察范围,动态增加pid,drop 为true 是进行删除操作。 void ebpf_disable_process(uint32_t pid, bool drop); -// 更新containerid 观察范围,动态增加pid,drop 为true 是进行删除操作。 -bool ebpf_set_cid_filter(const char* container_id, size_t length, bool update); +// 更新containerid 观察范围,动态增加 container id,drop 为true 是进行删除操作。 +bool ebpf_set_cid_filter(const char* container_id, size_t length, uint64_t cid_key, bool update); // 更新conn对应的角色,某些协议内核态无法判断角色 void ebpf_update_conn_role(struct connect_id_t *conn_id, enum support_role_e role_type); -- Gitee From 8e9517c8fefce31cc664f9edd9cdf8defef45d40 Mon Sep 17 00:00:00 2001 From: qianlufaceless Date: Tue, 22 Jul 2025 06:29:09 +0000 Subject: [PATCH 19/24] !43 fix: security load failure * fix --- src/security/security.bpf.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 1abeb91..52a2882 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -1013,11 +1013,7 @@ int filter_prog(struct pt_regs *ctx) { int pass = 1; int filter_num = filters->filter_count; #pragma unroll - for (; i < MAX_FILTER_FOR_PER_CALLNAME; i ++) { - // Early break if we've processed all actual filters - if (i >= filter_num) { - break; - } + for (; i < MAX_FILTER_FOR_PER_CALLNAME && i < filter_num; i ++) { int idx = i; struct selector_filter filter = filters->filters[idx]; // if (filter.filter_type != FILTER_TYPE_UNKNOWN) { -- Gitee From edf9ca51bad3f9b6276ad48c939f2341a1eae531 Mon Sep 17 00:00:00 2001 From: qianlufaceless Date: Mon, 18 Aug 2025 08:54:56 +0000 Subject: [PATCH 20/24] feat: support get self runtime info from kernel --- src/bpf/net.bpf.c | 65 +++++++++++++++++++++++++++++++++++++---------- src/net.c | 35 +++++++++++++++++++++++++ src/net.h | 10 ++++++++ 3 files changed, 97 insertions(+), 13 deletions(-) diff --git a/src/bpf/net.bpf.c b/src/bpf/net.bpf.c index 18401a1..1c5d5c9 100644 --- a/src/bpf/net.bpf.c +++ b/src/bpf/net.bpf.c @@ -186,6 +186,22 @@ struct { __uint(map_flags, BPF_F_NO_PREALLOC); } enable_container_ids SEC(".maps"); +struct +{ + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, struct self_runtime_info); + __uint(max_entries, 1); + __uint(map_flags, BPF_F_NO_PREALLOC); +} self_runtime_info_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct self_runtime_info); +} self_runtime_info_heap SEC(".maps"); + struct trace_event_raw_sys_enter_comp { struct trace_entry ent; @@ -253,22 +269,15 @@ static __always_inline bool match_container_id(struct connect_info_t* conn_info) BPF_DEBUG("dockerid length is zero! pid:%u docker_id_length:%u\n", conn_info->conn_id.tgid, conn_info->docker_id_length); return false; } - int length = conn_info->docker_id_length >= KN_NAME_LENGTH? KN_NAME_LENGTH : conn_info->docker_id_length; - int real_length = length - trim_len; - if (real_length <=0 ) { - BPF_DEBUG("reallen invalid! pid:%u real_length:%u\n", conn_info->conn_id.tgid, real_length); - return false; - } - if (real_length >= CONTAINER_ID_MAX_LENGTH) real_length = CONTAINER_ID_MAX_LENGTH; // check config u32 zero = 0; struct container_id_key* prefix = bpf_map_lookup_elem(&container_id_heap, &zero); if (!prefix) return false; __builtin_memset(prefix, 0, sizeof(struct container_id_key)); - BPF_DEBUG("after memset! pid:%u, cgroup:%s, real_length:%u \n", conn_info->conn_id.tgid, prefix->data, real_length); - bpf_probe_read(prefix->data, real_length, conn_info->docker_id + trim_len); - prefix->prefixlen = real_length << 3; + BPF_DEBUG("after memset! pid:%u, cgroup:%s, real_length:%u \n", conn_info->conn_id.tgid, prefix->data, conn_info->docker_id_length); + bpf_probe_read(prefix->data, CONTAINER_ID_MAX_LENGTH, conn_info->docker_id + trim_len); + prefix->prefixlen = CONTAINER_ID_MAX_LENGTH << 3; __u64* cid_key = bpf_map_lookup_elem(&enable_container_ids, prefix); if (cid_key) { BPF_DEBUG("bingo! pid:%u, cgroup:%s, prefix:%u \n", conn_info->conn_id.tgid, prefix->data, prefix->prefixlen); @@ -2065,9 +2074,7 @@ int BPF_KPROBE(tcp_close, struct sock *sk) * only family is AF_UNIX and no data will no report, but the bytes will be * recorded in first data event and report to user */ - if (match_container_id(conn_info) && (need_trace_family(conn_info->addr.sa.sa_family) || - conn_info->wr_bytes != 0 || - conn_info->rd_bytes != 0)) + if (match_container_id(conn_info) && need_trace_family(conn_info->addr.sa.sa_family)) { add_close_event(ctx, conn_info); if (conn_info->last_output_rd_pkts + conn_info->last_output_wr_pkts != conn_info->rd_pkts + conn_info->wr_pkts) @@ -2614,6 +2621,38 @@ int tp_sys_enter_recvmsg(struct trace_event_raw_sys_enter_comp *ctx) return 0; } +SEC("uprobe/ebpf_get_self_runtime_info") +int ebpf_get_self_runtime_info(struct pt_regs *ctx) +{ + struct task_struct *task = (struct task_struct *)bpf_get_current_task(); + struct cgroup *cgrp = get_task_cgroup(task); + if (!cgrp) + return 1; + + const char *name; + + name = get_cgroup_name(cgrp); + if (!name) return EVENT_ERROR_CGROUP_NAME; + + int key = 0; + struct self_runtime_info *runtime_info; + runtime_info = bpf_map_lookup_elem(&self_runtime_info_heap, &key); + if (runtime_info == NULL) { + return 1; + } + + int ret = bpf_probe_read_str(runtime_info->docker_id, KN_NAME_LENGTH, name); + runtime_info->docker_id_length = ret; + BPF_DEBUG("[uprobe][ebpf_get_self_runtime_info] docker_id:%s ret:%u \n", cgroup_name, ret); + runtime_info->pid = bpf_get_current_pid_tgid() >> 32; + + + bpf_map_update_elem(&self_runtime_info_map, &key, runtime_info, BPF_ANY); + + // not found + return 0; +} + SEC("uprobe/ebpf_cleanup_dog") int cleanup_dog_probe(struct pt_regs *ctx) { diff --git a/src/net.c b/src/net.c index 6953008..3aadb24 100644 --- a/src/net.c +++ b/src/net.c @@ -338,6 +338,38 @@ static void get_btf_path(void) pclose(fp); } +int32_t ebpf_init_self_runtime_info(char *so, long offset, struct self_runtime_info* info) { + // attach + struct net_bpf *obj = env.obj; + int ret; + + obj->links.ebpf_get_self_runtime_info = bpf_program__attach_uprobe(obj->progs.ebpf_get_self_runtime_info, false, + 0, so, offset); // 0 for self + ret = libbpf_get_error(obj->links.ebpf_get_self_runtime_info); + if (ret != 0) + { + net_log(LOG_TYPE_WARN, "uprobe get_self_runtime_info failed\n"); + return ret; + } + + net_log(LOG_TYPE_INFO, "successfully attach uprobe get_self_runtime_info\n"); + + // trigger + get_self_runtime_info(); + + // read from bpf maps ... + int map_fd = bpf_map__fd(obj->maps.self_runtime_info_map); + + int key = 0; + ret = bpf_map_lookup_elem(map_fd, &key, info); + if (ret && errno != ENOENT) { + net_log(LOG_TYPE_WARN, "failed to lookup element in self_runtime_info_map: %s\n", strerror(errno)); + return ret; + } + + return 0; +} + int32_t ebpf_init(char *btf, int32_t btf_size, char *so, int32_t so_size, long uprobe_offset, long upca_offset, long upps_offset, long upcr_offset) { @@ -369,6 +401,7 @@ int32_t ebpf_init(char *btf, int32_t btf_size, char *so, int32_t so_size, long u bpf_program__set_autoattach(obj->progs.disable_process_probe, false); bpf_program__set_autoattach(obj->progs.update_conn_role_probe, false); bpf_program__set_autoattach(obj->progs.update_conn_addr_probe, false); + bpf_program__set_autoattach(obj->progs.ebpf_get_self_runtime_info, false); err = net_bpf__attach(obj); if (err) { @@ -680,6 +713,8 @@ void ebpf_update_conn_role(struct connect_id_t *conn_id, enum support_role_e rol { } +void get_self_runtime_info() {} + bool ebpf_set_cid_filter(const char* container_id, size_t length, uint64_t cid_key, bool update) { struct net_bpf *obj = env.obj; diff --git a/src/net.h b/src/net.h index 47531c4..dfc4d08 100644 --- a/src/net.h +++ b/src/net.h @@ -222,6 +222,12 @@ struct conn_data_event_t #endif }; +struct self_runtime_info { + uint32_t pid; + char docker_id[KN_NAME_LENGTH]; + int32_t docker_id_length; +}; + #ifdef __VMLINUX_H__ struct connect_info_t @@ -449,5 +455,9 @@ bool ebpf_set_cid_filter(const char* container_id, size_t length, uint64_t cid_k // 更新conn对应的角色,某些协议内核态无法判断角色 void ebpf_update_conn_role(struct connect_id_t *conn_id, enum support_role_e role_type); +void get_self_runtime_info(); + +int32_t ebpf_init_self_runtime_info(char *so, long offset, struct self_runtime_info* info); + #endif #endif -- Gitee From c650063d1034deb02da19c75d869354b7e78f14e Mon Sep 17 00:00:00 2001 From: xiongyunnnn <16005136+xiongyunnnn@user.noreply.gitee.com> Date: Wed, 20 Aug 2025 07:15:29 +0000 Subject: [PATCH 21/24] Optimize perf buffer usage by using dynamic sizing for file events * resize file events for the perf buffer --- src/security/security.bpf.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/security/security.bpf.c b/src/security/security.bpf.c index 52a2882..915f82e 100644 --- a/src/security/security.bpf.c +++ b/src/security/security.bpf.c @@ -1176,6 +1176,10 @@ int filter_prog(struct pt_regs *ctx) { return 0; } +static __always_inline size_t file_data_common_size(void) { + return offsetof(struct file_data_t, path); +} + SEC("kprobe/secure_data_send") int secure_data_send(struct pt_regs *ctx) { @@ -1193,7 +1197,11 @@ int secure_data_send(struct pt_regs *ctx) case SECURE_FUNC_TRACEPOINT_FUNC_SECURITY_PATH_TRUNCATE: case SECURE_FUNC_TRACEPOINT_FUNC_SYS_WRITE: case SECURE_FUNC_TRACEPOINT_FUNC_SYS_READ:{ - bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, &data->file_data, sizeof(struct file_data_t)); + __u32 total = file_data_common_size() + data->file_data.size; + if (total > sizeof(struct file_data_t)) { + total = sizeof(struct file_data_t); + } + bpf_perf_event_output(ctx, &file_secure_output, BPF_F_CURRENT_CPU, &data->file_data, total); BPF_DEBUG("[kprobe][secure_data_send][file] pid:%u, ktime:%u, func:%d send to perfbuffer.\n", data->file_data.key.pid, data->file_data.key.ktime, data->func); break; } @@ -1223,6 +1231,7 @@ int kprobe_security_file_permission(struct pt_regs *ctx) const struct path *path_arg = 0; path_arg = _(&file->f_path); long ret = copy_path(stack->file_data.path, path_arg); + stack->file_data.size = ret; int path_len = *(int *)stack->file_data.path; const u32 flag_prefix = 4 + path_len; int flag = -1; @@ -1291,6 +1300,7 @@ int kprobe_security_mmap_file(struct pt_regs *ctx) const struct path *path_arg = 0; path_arg = _(&file->f_path); long ret = copy_path(stack->file_data.path, path_arg); + stack->file_data.size = ret; int path_len = *(int *)stack->file_data.path; BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path length:%s, ret:%lld", path_len, ret); BPF_DEBUG("[kprobe][tailcall][mmap] before ~ stack path+4:%s", &stack->file_data.path[4]); @@ -1339,7 +1349,8 @@ int kprobe_security_path_truncate(struct pt_regs *ctx) struct path *path = (struct path *)PT_REGS_PARM1(ctx); const struct path *path_arg = 0; path_arg = _(path); - copy_path(stack->file_data.path, path_arg); + long ret = copy_path(stack->file_data.path, path_arg); + stack->file_data.size = ret; bpf_tail_call(ctx, &secure_tailcall_map, TAILCALL_FILTER_PROG); return 0; } -- Gitee From 7e4f60dd399f68853e5fcbf87b9cebabeea36b61 Mon Sep 17 00:00:00 2001 From: "qianlu.kk" Date: Mon, 15 Sep 2025 17:51:22 +0800 Subject: [PATCH 22/24] update --- CMakeLists.txt | 2 +- scripts/cmake/genskel.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a1ca2ed..46c7e98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ option(BUILD_EXAMPLE "Build example cases" OFF) option(BUILD_LCC "Build lcc project" OFF) OPTION(ENABLE_GCOV "Enable gcov" OFF) option(ENABLE_ASAN "Enable asan" OFF) -option(ENABLE_PROFILE "Enable profile" OFF) +option(ENABLE_PROFILE "Enable profile" ON) option(ENABLE_STATIC_LINK_ELF "Enable static link libelf" OFF) IF (ENABLE_GCOV) diff --git a/scripts/cmake/genskel.cmake b/scripts/cmake/genskel.cmake index 60286d1..32de7dc 100644 --- a/scripts/cmake/genskel.cmake +++ b/scripts/cmake/genskel.cmake @@ -26,7 +26,7 @@ macro(genskel name) endif () SET(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.bpf.c) SET(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o) - SET(BPF_S_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${name}.skel.h) + SET(BPF_S_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.skel.h) add_custom_command( OUTPUT ${BPF_O_FILE} -- Gitee From bc2e5e06529de2832e2d4fee047fd8d29887e67c Mon Sep 17 00:00:00 2001 From: Tom Yu Date: Fri, 26 Sep 2025 07:31:26 +0000 Subject: [PATCH 23/24] !50 fix: cwd_read bug when path is bind mounted This commit fixes a bug in the cwd_read function where the path resolution was incorrect when dealing with bind mounted paths. The issue was in how the vfsmnt was being accessed - we now correctly use the parent's mnt reference to ensure proper path resolution in bind mount scenarios. --- src/security/bpf_process_event.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/security/bpf_process_event.h b/src/security/bpf_process_event.h index 06b0567..1296c94 100644 --- a/src/security/bpf_process_event.h +++ b/src/security/bpf_process_event.h @@ -207,8 +207,7 @@ FUNC_INLINE long cwd_read(struct cwd_read_data *data) bpf_probe_read(&data->dentry, sizeof(data->dentry), _(&mnt->mnt_mountpoint)); data->mnt = parent; - bpf_probe_read(&data->vfsmnt, sizeof(data->vfsmnt), - _(&mnt->mnt)); + data->vfsmnt = _(&parent->mnt); return 0; } // resolved all path components successfully -- Gitee From 111b2f1f45b14a688673bd6bad800cde68781db5 Mon Sep 17 00:00:00 2001 From: wokron <9765558+wokero@user.noreply.gitee.com> Date: Thu, 6 Nov 2025 07:39:19 +0000 Subject: [PATCH 24/24] !49 Enable loongcollector CPU profiling support * check if is go program before find traceId field * update stack format * support profiling with trace_id * check if btf path exists * support change host root path * support load as so * fix memory leak * sync with latest libprofiler --- src/profiler/Cargo.lock | 12 +++ src/profiler/Cargo.toml | 1 + src/profiler/build.rs | 2 + src/profiler/src/bin/heatmap.rs | 2 +- .../src/bpf/interpreter_dispatcher.bpf.c | 21 +++++ src/profiler/src/bpf/native_stack.bpf.c | 31 +++++-- src/profiler/src/bpf/tracemgmt.h | 1 + src/profiler/src/bpf/types.h | 11 +++ src/profiler/src/heatmap.rs | 5 +- src/profiler/src/lib.rs | 32 ++++++- src/profiler/src/probes/event.rs | 1 + src/profiler/src/probes/probes.rs | 40 ++++++-- src/profiler/src/probes/stack_delta.rs | 29 +++--- src/profiler/src/probes/system_config.rs | 12 ++- src/profiler/src/probes/types.rs | 14 +++ src/profiler/src/process/maps.rs | 17 +++- src/profiler/src/profiler.rs | 70 +++++++++++++- src/profiler/src/stack.rs | 9 +- src/profiler/src/symbollizer/elf.rs | 93 +++++++++++++++++++ .../src/symbollizer/lru_process_files.rs | 20 +++- src/profiler/src/symbollizer/symbolizer.rs | 10 +- src/profiler/src/tpbase/libc.rs | 2 +- src/profiler/src/utils/mod.rs | 1 + src/profiler/src/utils/process.rs | 40 ++++++++ src/profiler/src/utils/v2p.rs | 33 +++++++ 25 files changed, 460 insertions(+), 49 deletions(-) create mode 100644 src/profiler/src/utils/v2p.rs diff --git a/src/profiler/Cargo.lock b/src/profiler/Cargo.lock index 9b6b569..8073804 100644 --- a/src/profiler/Cargo.lock +++ b/src/profiler/Cargo.lock @@ -1127,6 +1127,17 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "pagemap" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9b10bd736861cab1e4a800e7547d08f5103929b3549a45a23408aaff0c1ee71" +dependencies = [ + "bitflags 1.3.2", + "libc", + "thiserror", +] + [[package]] name = "parking_lot" version = "0.12.3" @@ -1280,6 +1291,7 @@ dependencies = [ "num_cpus", "object", "once_cell", + "pagemap", "perf-event-open-sys", "procfs", "protobuf", diff --git a/src/profiler/Cargo.toml b/src/profiler/Cargo.toml index 34983b6..02fed57 100644 --- a/src/profiler/Cargo.toml +++ b/src/profiler/Cargo.toml @@ -29,6 +29,7 @@ moka = { version = "0.12.10", features = ["sync"] } num_cpus = "1.16.0" object = "0.36.1" once_cell = "1.19.0" +pagemap = "0.1.0" perf-event-open-sys = "4.0.0" procfs = "0.16.0" protobuf = "3.5.1" diff --git a/src/profiler/build.rs b/src/profiler/build.rs index 5666c0c..abe508d 100644 --- a/src/profiler/build.rs +++ b/src/profiler/build.rs @@ -8,6 +8,7 @@ fn generate_skeleton(out: &mut PathBuf, name: &str) { out.push(&rs_name); SkeletonBuilder::new() .source(&c_path) + .clang_args(["-DHAS_APM"]) .build_and_generate(&out) .unwrap(); @@ -22,6 +23,7 @@ fn generate_header(out: &mut PathBuf, name: &str) { out.push(&rs_name); let bindings = bindgen::Builder::default() .header(&header_path) + .clang_args(["-DHAS_APM"]) .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) .generate() .unwrap(); diff --git a/src/profiler/src/bin/heatmap.rs b/src/profiler/src/bin/heatmap.rs index a8ac780..dde5b58 100644 --- a/src/profiler/src/bin/heatmap.rs +++ b/src/profiler/src/bin/heatmap.rs @@ -7,7 +7,7 @@ use std::ffi::CString; use structopt::StructOpt; #[derive(Debug, StructOpt)] -#[structopt(name = "rtrace", about = "Diagnosing tools of kernel network")] +#[structopt(name = "heatmap", about = "show cpu heatmap")] pub struct Command { #[structopt(long, help = "Specify the Pid of the tracking process")] pid: u32, diff --git a/src/profiler/src/bpf/interpreter_dispatcher.bpf.c b/src/profiler/src/bpf/interpreter_dispatcher.bpf.c index 4308cde..12c90fd 100644 --- a/src/profiler/src/bpf/interpreter_dispatcher.bpf.c +++ b/src/profiler/src/bpf/interpreter_dispatcher.bpf.c @@ -34,6 +34,27 @@ void maybe_add_apm_info(Trace *trace) { DEBUG_PRINT("Trace is within a process with APM integration enabled"); + if (proc->tracing_type == TRACE_GO_AGENT) { + const struct task_struct* task_ptr = (struct task_struct*)bpf_get_current_task(); + const void* fs_base; + bpf_probe_read(&fs_base, sizeof(void *), &task_ptr->thread.fsbase); + + size_t g_addr; // address of struct runtime.g + bpf_probe_read_user(&g_addr, sizeof(void*), (void*)(fs_base + (-8))); + + size_t go_string_addr; // address of field traceId in runtime.g + bpf_probe_read_user(&go_string_addr, sizeof(void*), (void*)(g_addr + proc->tracing_field_offset + 8)); + + size_t trace_id_addr; + bpf_probe_read_user(&trace_id_addr, sizeof(void*), (void*)(go_string_addr + 0)); + + const char trace_id[32]; + bpf_probe_read_user(trace_id, sizeof(trace_id), (void*)(trace_id_addr)); + + __builtin_memcpy(trace->trace_id, trace_id, sizeof(trace->trace_id)); + return; + } + u64 tsd_base; if (tsd_get_base((void **)&tsd_base) != 0) { increment_metric(metricID_UnwindApmIntErrReadTsdBase); diff --git a/src/profiler/src/bpf/native_stack.bpf.c b/src/profiler/src/bpf/native_stack.bpf.c index e8c7edb..305f8df 100644 --- a/src/profiler/src/bpf/native_stack.bpf.c +++ b/src/profiler/src/bpf/native_stack.bpf.c @@ -668,10 +668,20 @@ static inline ErrorCode copy_state_regs(UnwindState *state, // Check if the process is running in 32-bit mode on the x86_64 system. // This check follows the Linux kernel implementation of user_64bit_mode() in // arch/x86/include/asm/ptrace.h. - if (regs->cs == __USER32_CS) - { - return ERR_NATIVE_X64_32BIT_COMPAT_MODE; + if (bpf_core_field_size(regs->cs) == 2) { + u16 cs; + bpf_probe_read_kernel(&cs, sizeof(cs), ®s->cs); + if (cs == __USER32_CS) + { + return ERR_NATIVE_X64_32BIT_COMPAT_MODE; + } + } else { + if (regs->cs == __USER32_CS) + { + return ERR_NATIVE_X64_32BIT_COMPAT_MODE; + } } + state->pc = regs->ip; state->sp = regs->sp; state->fp = regs->bp; @@ -731,9 +741,18 @@ static inline bool ptregs_is_usermode(struct pt_regs *regs) { #if defined(__x86_64__) // On x86_64 the user mode SS should always be __USER_DS. - if (regs->ss != __USER_DS) - { - return false; + if (bpf_core_field_size(regs->ss) == 2) { + u16 ss; + bpf_probe_read_kernel(&ss, sizeof(ss), ®s->ss); + if (ss != __USER_DS) + { + return false; + } + } else { + if (regs->ss != __USER_DS) + { + return false; + } } return true; #elif defined(__aarch64__) diff --git a/src/profiler/src/bpf/tracemgmt.h b/src/profiler/src/bpf/tracemgmt.h index b222ae7..8b0a5c2 100644 --- a/src/profiler/src/bpf/tracemgmt.h +++ b/src/profiler/src/bpf/tracemgmt.h @@ -248,6 +248,7 @@ static inline PerCPURecord *get_pristine_per_cpu_record() trace->apm_trace_id.as_int.hi = 0; trace->apm_trace_id.as_int.lo = 0; trace->apm_transaction_id.as_int = 0; + __builtin_memset(trace->trace_id, 0, TRACE_ID_LEN); #endif return record; } diff --git a/src/profiler/src/bpf/types.h b/src/profiler/src/bpf/types.h index 1111abc..8b302e4 100644 --- a/src/profiler/src/bpf/types.h +++ b/src/profiler/src/bpf/types.h @@ -501,6 +501,8 @@ typedef struct V8ProcInfo { // COMM_LEN defines the maximum length we will receive for the comm of a task. #define COMM_LEN 16 +#define TRACE_ID_LEN 32 + #ifdef HAS_APM // 128-bit APM trace ID. typedef union ApmTraceID { @@ -550,6 +552,8 @@ typedef struct Trace { ApmSpanID apm_transaction_id; // APM trace ID or all-zero if not present. ApmTraceID apm_trace_id; + // General trace id + unsigned char trace_id[TRACE_ID_LEN]; #endif // The kernel stack ID. s32 kernel_stack_id; @@ -883,8 +887,15 @@ typedef struct SystemConfig { #define PSR_MODE_MASK 0x0000000f #define PSR_MODE_EL0t 0x00000000 +typedef enum TracingType { + TRACE_NONE, + TRACE_GO_AGENT, +} TracingType; + typedef struct ApmIntProcInfo { u64 tls_offset; + TracingType tracing_type; + u64 tracing_field_offset; } ApmIntProcInfo; #endif diff --git a/src/profiler/src/heatmap.rs b/src/profiler/src/heatmap.rs index 369179b..bf632de 100644 --- a/src/profiler/src/heatmap.rs +++ b/src/profiler/src/heatmap.rs @@ -1,3 +1,4 @@ +use crate::get_host_root_path; use std::collections::HashMap; use std::collections::LinkedList; use std::fs::read_to_string; @@ -34,7 +35,7 @@ pub struct ProcessHeatMap { impl ProcessHeatMap { pub fn add_process(&mut self, pid: u32) { self.heat_maps.entry(pid).or_insert_with(|| { - let comm = match read_to_string(format!("/proc/{pid}/comm")) { + let comm = match read_to_string(format!("{}/proc/{pid}/comm", get_host_root_path())) { Ok(mut comm) => { comm.pop(); comm @@ -57,7 +58,7 @@ impl ProcessHeatMap { // find it in previous heatmap if base < heat.base { for single in self.done.iter_mut() { - if single.base == base { + if pid == single.pid && single.base == base { single.inc(slot); return; } diff --git a/src/profiler/src/lib.rs b/src/profiler/src/lib.rs index 308ec0f..2ef76ff 100644 --- a/src/profiler/src/lib.rs +++ b/src/profiler/src/lib.rs @@ -5,6 +5,7 @@ use std::ffi::CString; use std::sync::atomic::AtomicBool; use std::sync::atomic::AtomicU64; use std::sync::atomic::Ordering; +use std::sync::OnceLock; pub mod error; pub mod executable; pub mod interpreter; @@ -29,6 +30,12 @@ pub static ENABLE_SYMBOLIZER: AtomicBool = AtomicBool::new(true); pub static SYMBOL_FILE_MAX_SIZE: AtomicU64 = AtomicU64::new(u64::MAX); pub static LIVETRACE_ENABLE_CPU_INFO: AtomicBool = AtomicBool::new(false); pub static LIVETRACE_ENABLE_FUNCTION_OFFSET: AtomicBool = AtomicBool::new(false); +pub static HOST_ROOT_PATH: OnceLock = OnceLock::new(); +pub static ENABLE_TRACING: AtomicBool = AtomicBool::new(false); + +pub fn get_host_root_path() -> &'static str { + HOST_ROOT_PATH.get().map(|s| s.as_str()).unwrap_or("/") +} pub fn is_enable_cpuno() -> bool { LIVETRACE_ENABLE_CPU_INFO.load(Ordering::SeqCst) @@ -50,6 +57,10 @@ pub fn symbol_file_max_size() -> u64 { SYMBOL_FILE_MAX_SIZE.load(Ordering::SeqCst) } +pub fn is_enable_tracing() -> bool { + ENABLE_TRACING.load(Ordering::SeqCst) +} + pub fn symbol_file_max_symbols() -> u64 { let sz = symbol_file_max_size(); if sz == u64::MAX { @@ -102,6 +113,25 @@ pub extern "C" fn livetrace_disable_symbolizer() { ENABLE_SYMBOLIZER.store(false, Ordering::SeqCst); } +#[no_mangle] +pub extern "C" fn livetrace_set_host_root_path(path: *const libc::c_char) -> i32 { + let path = unsafe { CStr::from_ptr(path) }; + let path = match path.to_str() { + Ok(path) => path, + Err(_e) => { + return -1; + } + }; + let path = path.to_string(); + HOST_ROOT_PATH.set(path); + 0 +} + +#[no_mangle] +pub extern "C" fn livetrace_enable_tracing() { + ENABLE_TRACING.store(true, Ordering::SeqCst); +} + #[no_mangle] pub extern "C" fn livetrace_profiler_create() -> *mut Profiler<'static> { Box::into_raw(Box::new(Profiler::new())) @@ -110,7 +140,7 @@ pub extern "C" fn livetrace_profiler_create() -> *mut Profiler<'static> { #[no_mangle] pub extern "C" fn livetrace_profiler_destroy(profiler: *mut Profiler) { if !profiler.is_null() { - unsafe { std::ptr::drop_in_place(profiler) } + unsafe { Box::from_raw(profiler); } } } diff --git a/src/profiler/src/probes/event.rs b/src/profiler/src/probes/event.rs index 4c95145..a51cabb 100644 --- a/src/profiler/src/probes/event.rs +++ b/src/profiler/src/probes/event.rs @@ -68,6 +68,7 @@ pub struct RawStack { pub time: u64, pub kernel: Vec, pub user: RawUserStack, + pub trace_id: Option, } impl RawStack {} diff --git a/src/profiler/src/probes/probes.rs b/src/profiler/src/probes/probes.rs index 9429d7d..0ac4303 100644 --- a/src/profiler/src/probes/probes.rs +++ b/src/profiler/src/probes/probes.rs @@ -1,5 +1,6 @@ use crate::is_system_profiling; use crate::process::maps::ProcessMaps; +use crate::HOST_ROOT_PATH; use super::event::ProbeEvent; use super::event::RawStack; @@ -46,10 +47,12 @@ use perf_event_open_sys::bindings::PERF_TYPE_SOFTWARE; use perf_event_open_sys::perf_event_open; use std::collections::HashMap; use std::env::current_exe; +use std::ffi::CStr; use std::ffi::CString; use std::os::fd::AsFd; use std::os::fd::AsRawFd; use std::path; +use std::path::Path; use std::path::PathBuf; use std::str::FromStr; use std::sync::atomic::AtomicBool; @@ -96,9 +99,12 @@ pub static SYSAK_BTF_PATH: Lazy> = Lazy::new(|| { if info.release.starts_with("5.10") { return None; } - return Some( - CString::new(format!("{}/tools/vmlinux-{}", sysak, info.release)).unwrap(), - ); + let path = format!("{}/tools/vmlinux-{}", sysak, info.release); + if !Path::new(&path).exists() { + log::warn!("failed to find custom btf on path: {}", path); + return None; + } + return Some(CString::new(path).unwrap()); } } None @@ -149,7 +155,7 @@ fn set_thread_need_exit() { fn get_self_path() -> PathBuf { let pid = unsafe { libc::getpid() }; - let pm = ProcessMaps::new(pid as u32).unwrap(); + let pm = ProcessMaps::new_local(pid as u32).unwrap(); if let Some(p) = pm.find_so("libmullprof.so") { return PathBuf::from(p); } @@ -158,6 +164,10 @@ fn get_self_path() -> PathBuf { return PathBuf::from(p); } + if let Some(p) = pm.find_so("libprofiler.so") { + return PathBuf::from(p); + } + current_exe().expect("failed to find executable name") } @@ -172,7 +182,7 @@ pub struct Probes<'a> { sched_skel: sched::SchedMonitorSkel<'a>, pub hotspot_skel: hotspot::HotspotSkel<'a>, pub python_skel: python::PythonSkel<'a>, - interpreter_dispatcher_skel: dispatcher::InterpreterDispatcherSkel<'a>, + pub interpreter_dispatcher_skel: dispatcher::InterpreterDispatcherSkel<'a>, links: Vec, pub rx: Receiver, pub pid_maps_info_map: PidMapsInfoMap, @@ -267,7 +277,11 @@ impl<'a> Probes<'a> { let hotspot_skel = load_skel!(maps, hotspot::HotspotSkelBuilder); let python_skel = load_skel!(maps, python::PythonSkelBuilder); - let (tx, rx) = crossbeam_channel::unbounded(); + let ms = profile_period() as usize; + let sample_per_sec = 1000 / ms; + let ten_sec_samples = sample_per_sec * 10 * num_possible_cpus().unwrap_or(1); + log::info!("cache max stack samples: {}", ten_sec_samples); + let (tx, rx) = crossbeam_channel::bounded(ten_sec_samples); let trace_thread_handle = { let mut cloned_tx = tx.clone(); @@ -445,7 +459,7 @@ impl<'a> Probes<'a> { let ret_value = SystemAnalysis::from(value); assert!(ret_value.raw.pid == 0); sc.set_stack_ptregs_offset((ret_value.raw.address - ret_value.code_u64()) as u32); - sc.set_has_pid_namespace(self.pid != self.nspid); + sc.set_has_pid_namespace(self.pid != self.nspid && HOST_ROOT_PATH.get().is_none()); system_config_skel .maps_mut() @@ -562,9 +576,9 @@ fn thread_poll_trace_event(map: &StackMap, tx: &mut Sender, cpu: i32 let user_stackid = (*raw).user_stack_id; let user_stack = if user_stackid == i32::MAX { - RawUserStack::Native((*raw).__bindgen_anon_1.user_stack[..stack_len].to_vec()) + RawUserStack::Native((&(*raw).__bindgen_anon_1.user_stack)[..stack_len].to_vec()) } else { - RawUserStack::Dynamic((*raw).__bindgen_anon_1.frames[..stack_len].to_vec()) + RawUserStack::Dynamic((&(*raw).__bindgen_anon_1.frames)[..stack_len].to_vec()) }; let kernel_stack = if kernel_stackid >= 0 { @@ -573,12 +587,20 @@ fn thread_poll_trace_event(map: &StackMap, tx: &mut Sender, cpu: i32 vec![] }; + let trace_id = if (*raw).trace_id[0] != 0 { + let data = std::slice::from_raw_parts((*raw).trace_id.as_ptr() as *const u8, 32); + Some(unsafe { std::str::from_utf8_unchecked(data) }.to_owned()) + } else { + None + }; + RawStack { cpu: cpu as u32, pid, time: (*raw).ktime, kernel: kernel_stack, user: user_stack, + trace_id, } }; let comm = unsafe { diff --git a/src/profiler/src/probes/stack_delta.rs b/src/profiler/src/probes/stack_delta.rs index a9ca5b3..c2b20fc 100644 --- a/src/profiler/src/probes/stack_delta.rs +++ b/src/profiler/src/probes/stack_delta.rs @@ -131,17 +131,15 @@ impl StackDeltaMap { pub fn update(&self, file_id: FileId64, deltas: Vec) -> Result { let map_id = get_map_id(deltas.len() as u32)?; - let inner = self - .create_inner_map(map_id) - .expect("failed to create inner map"); + let inner = self.create_inner_map(map_id)?; let outer = self.outer_map(map_id); if self.batch { - update_batch_inner_map(&inner, deltas); + update_batch_inner_map(&inner, deltas)?; } else { - update_inner_map(&inner, deltas).expect("failed to update inner map"); + update_inner_map(&inner, deltas)?; } - update_outer_map(outer, file_id, &inner).expect("failed to update outer map"); + update_outer_map(outer, file_id, &inner)?; Ok(map_id) } @@ -194,7 +192,7 @@ fn update_inner_map(inner: &MapHandle, deltas: Vec) -> Result<()> { Ok(()) } -fn update_batch_inner_map(inner: &MapHandle, deltas: Vec) { +fn update_batch_inner_map(inner: &MapHandle, deltas: Vec) -> Result<()> { let mut batch_key = Vec::with_capacity(deltas.len() * 4); let mut batch_val: Vec = Vec::with_capacity(deltas.len() * deltas[0].raw_size()); @@ -203,15 +201,14 @@ fn update_batch_inner_map(inner: &MapHandle, deltas: Vec) { batch_key.extend(idx.to_ne_bytes()); batch_val.extend(delta.slice()); } - inner - .update_batch( - &batch_key, - &batch_val, - deltas.len() as u32, - MapFlags::ANY, - MapFlags::ANY, - ) - .expect("failed to update inner map") + inner.update_batch( + &batch_key, + &batch_val, + deltas.len() as u32, + MapFlags::ANY, + MapFlags::ANY, + )?; + Ok(()) } pub fn create_inner_map(map_id: u32) -> Result { diff --git a/src/profiler/src/probes/system_config.rs b/src/profiler/src/probes/system_config.rs index 7aba761..c87f1ea 100644 --- a/src/profiler/src/probes/system_config.rs +++ b/src/profiler/src/probes/system_config.rs @@ -1,5 +1,6 @@ use crate::SYSTEM_PROFILING; +use std::path::Path; use super::types::SystemConfig; use libbpf_rs::btf::types::Composite; use libbpf_rs::btf::types::MemberAttr; @@ -12,10 +13,15 @@ pub fn get_system_config() -> SystemConfig { let btf_path: Option = { if let Ok(sysak) = std::env::var("SYSAK_WORK_PATH") { if let Ok(info) = uname::uname() { - if !info.release.starts_with("5.10") { - Some(format!("{}/tools/vmlinux-{}", sysak, info.release)) - } else { + if info.release.starts_with("5.10") { None + } else { + let path = format!("{}/tools/vmlinux-{}", sysak, info.release); + if !Path::new(&path).exists() { + None + } else { + Some(path) + } } } else { None diff --git a/src/profiler/src/probes/types.rs b/src/profiler/src/probes/types.rs index 19d5ecc..0918448 100644 --- a/src/profiler/src/probes/types.rs +++ b/src/profiler/src/probes/types.rs @@ -156,3 +156,17 @@ pub struct HotspotProcInfo { } impl_default!(HotspotProcInfo); + + +#[repr(u32)] +#[derive(PartialEq)] +pub enum TracingType { + TraceNone = bpf::TracingType_TRACE_NONE, + TraceGoAgent = bpf::TracingType_TRACE_GO_AGENT, +} + +pub struct ApmIntProcInfo { + pub raw: bpf::ApmIntProcInfo, +} + +impl_default!(ApmIntProcInfo); \ No newline at end of file diff --git a/src/profiler/src/process/maps.rs b/src/profiler/src/process/maps.rs index b8bc647..e51400b 100644 --- a/src/profiler/src/process/maps.rs +++ b/src/profiler/src/process/maps.rs @@ -1,4 +1,5 @@ use crate::symbollizer::file_id::FileId64; +use crate::get_host_root_path; use anyhow::Result; use std::cmp::Ordering; use std::collections::HashMap; @@ -9,6 +10,7 @@ use std::io::BufReader; use std::ops::Deref; use std::ops::DerefMut; use std::path::Path; +use std::path::PathBuf; #[derive(Debug, Eq, Hash, PartialEq, Clone, Copy)] pub struct DiskFileKey { @@ -114,7 +116,7 @@ impl ProcessMapsEntry { if self.is_anonymous() || self.is_vdso() { "".to_owned() } else { - format!("/proc/{}/root/{}", pid, self.path.as_ref().unwrap()) + format!("{}/proc/{}/root/{}", get_host_root_path(), pid, self.path.as_ref().unwrap()) } } } @@ -142,8 +144,7 @@ impl DerefMut for ProcessMaps { } impl ProcessMaps { - pub fn new(pid: u32) -> Result { - let maps_path = Path::new("/proc").join(pid.to_string()).join("maps"); + fn new_inner(maps_path: PathBuf) -> Result { let file = File::open(maps_path)?; let reader = BufReader::new(file); @@ -188,6 +189,16 @@ impl ProcessMaps { Ok(Self { entries }) } + pub fn new(pid: u32) -> Result { + let maps_path = Path::new(get_host_root_path()).join("proc").join(pid.to_string()).join("maps"); + Self::new_inner(maps_path) + } + + pub fn new_local(pid: u32) -> Result { + let maps_path = Path::new("/proc").join(pid.to_string()).join("maps"); + Self::new_inner(maps_path) + } + /// Compares two `ProcessMaps` instances and returns the added and removed entries. pub fn diff(&self, other: &Self) -> (Vec, Vec) { let mut added = Vec::new(); diff --git a/src/profiler/src/profiler.rs b/src/profiler/src/profiler.rs index 612d098..32156ad 100644 --- a/src/profiler/src/profiler.rs +++ b/src/profiler/src/profiler.rs @@ -1,24 +1,31 @@ use crate::executable::ExecutableCache; +use crate::get_host_root_path; use crate::heatmap::ProcessHeatMap; use crate::heatmap::TenSecHeatMap; use crate::interpreter::Interpreter; use crate::is_enable_symbolizer; +use crate::is_enable_tracing; use crate::is_system_profiling; use crate::probes::event::ProbeEvent; use crate::probes::probes::Probes; +use crate::probes::types::ApmIntProcInfo; +use crate::probes::types::TracingType; use crate::process::maps::ExeMapsEntry; use crate::process::maps::ProcessMaps; use crate::process::process::Process; use crate::stack::Stack; use crate::stack::StackAggregator; use crate::stack::SymbolizedStack; +use crate::symbollizer::elf::ElfFile; use crate::symbollizer::file_cache::FileCache; use crate::symbollizer::symbolizer::Symbolizer; use crate::utils::lpm::Prefix; +use crate::utils::process::get_comm_by_pid; use crate::utils::time::init_tstamp; use crate::utils::time::time_delta; use crate::MIN_PROCESS_SAMPLES; use anyhow::Result; +use libbpf_rs::MapFlags; use std::collections::HashMap; use std::time::Instant; @@ -27,7 +34,7 @@ pub struct Profiler<'a> { probes: Probes<'a>, caches: FileCache, executables: ExecutableCache, - symbolizer: Symbolizer, + pub symbolizer: Symbolizer, interpreters: HashMap, all_system_profiling: bool, @@ -41,7 +48,8 @@ pub struct Profiler<'a> { impl<'a> Profiler<'a> { pub fn new() -> Self { let mut symer = Symbolizer::new(); - symer.add_kernel("/proc/kallsyms"); + let kallsyms_path = format!("{}/proc/kallsyms", get_host_root_path()); + symer.add_kernel(kallsyms_path.as_str()); init_tstamp(); Profiler { pids: HashMap::new(), @@ -169,6 +177,12 @@ impl<'a> Profiler<'a> { proc.exit(&mut self.probes, &mut self.executables)?; } + self.probes + .interpreter_dispatcher_skel + .maps_mut() + .apm_int_procs() + .delete(&pid.to_ne_bytes())?; + if let Some(mut int) = self.interpreters.remove(&pid) { int.exit(&mut self.probes)?; } @@ -241,6 +255,52 @@ impl<'a> Profiler<'a> { } }; + if is_enable_tracing() { + let mut trace_type = TracingType::TraceNone; + let mut field_offset = 0; + if let Ok(true) = ElfFile::check_section_exist(&info.file, ".go.buildinfo") { + log::debug!( + "found .go.buildinfo section in pid: {pid}, exe: {}", + map.file_path(pid) + ); + if let Ok(Some(offset)) = + ElfFile::extract_field_offset(&info.file, "runtime.g", "traceId") + { + log::info!( + "found go traceId field offset: {offset} in pid: {pid}, exe: {}", + map.file_path(pid) + ); + trace_type = TracingType::TraceGoAgent; + field_offset = offset; + } + } + + if trace_type != TracingType::TraceNone { + let mut pinfo = ApmIntProcInfo::default(); + pinfo.raw.tracing_type = trace_type as u32; + pinfo.raw.tracing_field_offset = field_offset as u64; + match self + .probes + .interpreter_dispatcher_skel + .maps_mut() + .apm_int_procs() + .update(&pid.to_ne_bytes(), pinfo.slice(), MapFlags::ANY) + { + Ok(_) => { + log::info!( + "update apm_int_procs map for pid: {pid} with type: {:?}", + pinfo.raw.tracing_type as u32 + ); + } + Err(e) => { + log::warn!( + "failed to update apm_int_procs map for pid: {pid}, error: {e}" + ); + } + } + } + } + let va = match info.file_offset_to_virtual_address(map.offset) { Some(x) => x, None => { @@ -261,7 +321,11 @@ impl<'a> Profiler<'a> { Ok(Some(a)) => a, Ok(None) => continue, Err(e) => { - log::error!("failed to get executable: {e}"); + log::error!( + "failed to get executable for comm {}: {:?}, err: {e}", + get_comm_by_pid(pid), + map + ); continue; } }; diff --git a/src/profiler/src/stack.rs b/src/profiler/src/stack.rs index cb6ed96..12539b6 100644 --- a/src/profiler/src/stack.rs +++ b/src/profiler/src/stack.rs @@ -1,4 +1,5 @@ use crate::interpreter::Interpreter; +use crate::is_enable_tracing; use crate::pb::LivetraceCell; use crate::pb::LivetraceList; use crate::pb::Ustack; @@ -60,6 +61,7 @@ pub enum Frame { pub struct Stack { pub count: u32, pub frames: Vec, + pub trace_id: Option, } impl Stack { @@ -105,6 +107,7 @@ impl Stack { } stack.count = cnt; + stack.trace_id = raw.trace_id.clone(); Ok(stack) } @@ -138,7 +141,11 @@ impl ToString for Stack { .collect::>() .join(";"); - format!("{} {}", s, self.count) + if is_enable_tracing() { + return format!("{} {} {}", s, self.count, self.trace_id.as_deref().unwrap_or("null")); + } else { + return format!("{} {}", s, self.count); + } } } diff --git a/src/profiler/src/symbollizer/elf.rs b/src/profiler/src/symbollizer/elf.rs index 66e0a0b..d088339 100644 --- a/src/profiler/src/symbollizer/elf.rs +++ b/src/profiler/src/symbollizer/elf.rs @@ -381,6 +381,99 @@ impl ElfFile { .ok_or(anyhow!("symbol {} not found", name)) } + pub fn extract_field_offset( + file: &File, + struct_name: &str, + field_name: &str, + ) -> Result> { + let mmap_ref = unsafe { memmap2::Mmap::map(file)? }; + let elf = object::File::parse(&*mmap_ref)?; + let endian = if elf.is_little_endian() { + gimli::RunTimeEndian::Little + } else { + gimli::RunTimeEndian::Big + }; + let arena_data = Arena::new(); + let arena_relocations = Arena::new(); + let mut load_section = |id: gimli::SectionId| -> Result<_> { + load_file_section(id, &elf, endian, false, &arena_data, &arena_relocations) + }; + + let mut dwarf = gimli::Dwarf::load(load_section).unwrap(); + + // iterate over all compilation units + let mut iter = dwarf.units(); + while let Some(header) = iter.next()? { + let unit = dwarf.unit(header)?; + + let mut entries = unit.entries(); + while let Some((depth, entry)) = entries.next_dfs()? { + if entry.tag() == gimli::DW_TAG_structure_type { + if let Some(attr) = entry.attr(gimli::DW_AT_name)? { + let string = dwarf.attr_string(&unit, attr.value())?; + let actual_string = string.to_string_lossy()?.into_owned(); + if actual_string == struct_name { + // now iterate over the children to find field + let mut children = entries.clone(); + while let Some((child_depth, child)) = children.next_dfs()? { + if child.tag() == gimli::DW_TAG_member { + if let Some(attr) = child.attr(gimli::DW_AT_name)? { + let string = dwarf.attr_string(&unit, attr.value())?; + let actual_string = string.to_string_lossy()?.into_owned(); + if actual_string == field_name { + // find the field + // extract the offset from DW_AT_data_member_location + if let Some(attr) = + child.attr(gimli::DW_AT_data_member_location)? + { + match attr.value() { + gimli::AttributeValue::Udata(offset) => { + return Ok(Some(offset as usize)); + } + gimli::AttributeValue::Data1(data) => { + return Ok(Some(data as usize)); + } + gimli::AttributeValue::Data2(data) => { + return Ok(Some(data as usize)); + } + gimli::AttributeValue::Data4(data) => { + return Ok(Some(data as usize)); + } + gimli::AttributeValue::Data8(data) => { + return Ok(Some(data as usize)); + } + _ => { + return Ok(None); + } + } + } else { + return Ok(None); + } + } + } + } else { + break; + } + } + break; + } + } + } + } + } + + return Ok(None); + } + + pub fn check_section_exist(file: &File, section_name: &str) -> Result { + let mmap_ref = unsafe { memmap2::Mmap::map(file)? }; + let elf = object::File::parse(&*mmap_ref)?; + if elf.section_by_name(section_name).is_some() { + return Ok(true); + } + Ok(false) + } + // parse eh_frame and return stack_deltas pub fn parse_eh_frame(file: &File) -> Result> { let mmap_ref = unsafe { memmap2::Mmap::map(file)? }; diff --git a/src/profiler/src/symbollizer/lru_process_files.rs b/src/profiler/src/symbollizer/lru_process_files.rs index 7128255..c8e169a 100644 --- a/src/profiler/src/symbollizer/lru_process_files.rs +++ b/src/profiler/src/symbollizer/lru_process_files.rs @@ -87,11 +87,17 @@ impl ProcessFiles { } syms } + + pub fn cache(&self, lru_files: &mut LruFileSymbols) { + for file in &self.files { + let _ = lru_files.symbolize_with_path(file.file_id, 0, &file.path); + } + } } #[derive(Debug)] pub struct LruProcessFiles { - lru: LruCache, + pub(crate) lru: LruCache, } impl LruProcessFiles { @@ -124,6 +130,18 @@ impl LruProcessFiles { } } } + + pub fn cache(&mut self, pid: u32, lru_files: &mut LruFileSymbols) { + match self + .lru + .try_get_or_insert(pid, || -> Result { ProcessFiles::new(pid) }) + { + Ok(pf) => pf.cache(lru_files), + Err(e) => { + log::warn!("failed to add process files for pid {pid}: {e}"); + } + } + } } impl Deref for LruProcessFiles { diff --git a/src/profiler/src/symbollizer/symbolizer.rs b/src/profiler/src/symbollizer/symbolizer.rs index 9ebe1e6..7fe5767 100644 --- a/src/profiler/src/symbollizer/symbolizer.rs +++ b/src/profiler/src/symbollizer/symbolizer.rs @@ -2,6 +2,7 @@ use crate::is_enable_cpuno; use crate::is_enable_function_offset; use crate::process::maps::ProcessMaps; use crate::MAX_NUM_OF_PROCESSES; +use crate::get_host_root_path; use anyhow::bail; use anyhow::Result; use lru::LruCache; @@ -125,7 +126,7 @@ impl Symbolizer { pub fn proc_comm(&mut self, pid: u32) -> Result<&String> { let get_comm = || { - let mut comm = read_to_string(format!("/proc/{pid}/comm"))?; + let mut comm = read_to_string(format!("{}/proc/{pid}/comm", get_host_root_path()))?; comm.pop(); Ok(comm) }; @@ -133,7 +134,7 @@ impl Symbolizer { self.procs .try_get_or_insert(pid, || -> Result { let comm = if let Some(reg) = &self.adb_regex { - let cmdline = read_to_string(format!("/proc/{pid}/cmdline"))?; + let cmdline = read_to_string(format!("{}/proc/{pid}/cmdline", get_host_root_path()))?; reg.find(&cmdline) .map_or_else(|| get_comm(), |x| Ok(x.as_str().to_owned())) } else { @@ -183,6 +184,11 @@ impl Symbolizer { } syms } + + pub fn cache_process(&mut self, pid: u32) { + let _ = self.proc_comm(pid); + self.proc_files.cache(pid, &mut self.file_symbols); + } } #[cfg(test)] diff --git a/src/profiler/src/tpbase/libc.rs b/src/profiler/src/tpbase/libc.rs index d9bb7b6..e416006 100644 --- a/src/profiler/src/tpbase/libc.rs +++ b/src/profiler/src/tpbase/libc.rs @@ -17,7 +17,7 @@ use super::libc_decode::extract_tsd_info_native; /// Determines if the DSO filename potentially contains pthread code pub fn is_potential_tsd_dso(filename: &str) -> bool { - let libc_regex: Regex = Regex::new(r".*/(ld-musl|libpthread)([-.].*)?\.so").unwrap(); + let libc_regex: Regex = Regex::new(r".*/(ld-musl|libc|libpthread)([-.].*)?\.so").unwrap(); libc_regex.is_match(filename) } diff --git a/src/profiler/src/utils/mod.rs b/src/profiler/src/utils/mod.rs index 7ce9d57..6dd4976 100644 --- a/src/profiler/src/utils/mod.rs +++ b/src/profiler/src/utils/mod.rs @@ -3,3 +3,4 @@ pub mod process; pub mod remote_reader; pub mod safe_reader; pub mod time; +pub mod v2p; diff --git a/src/profiler/src/utils/process.rs b/src/profiler/src/utils/process.rs index b1b323a..cdd00c3 100644 --- a/src/profiler/src/utils/process.rs +++ b/src/profiler/src/utils/process.rs @@ -14,3 +14,43 @@ pub fn find_processes_by_comm(pat: &str) -> Vec { ret } + +pub fn get_comm_by_pid(pid: u32) -> String { + let path = format!("/proc/{}/comm", pid); + match std::fs::read_to_string(path) { + Ok(mut s) => { + s.pop(); // 去除末尾换行符 + s + } + Err(_) => "Unknown".to_string(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_valid_pid() { + // 获取当前进程的 PID + let current_pid = std::process::id(); + // 获取当前进程的 comm + let comm = get_comm_by_pid(current_pid); + // 验证 comm 不为空且不为 "Unknown" + assert!( + !comm.is_empty(), + "Expected non-empty comm for current process" + ); + assert_ne!(comm, "Unknown", "Expected valid comm for current process"); + log::info!("Current process (PID {}): {}", current_pid, comm); + } + + #[test] + fn test_invalid_pid() { + // 使用一个不可能存在的 PID + let invalid_pid = u32::MAX; // 4294967295 + let comm = get_comm_by_pid(invalid_pid); + assert_eq!(comm, "Unknown", "Expected 'Unknown' for invalid PID"); + log::info!("Invalid process (PID {}): {}", invalid_pid, comm); + } +} diff --git a/src/profiler/src/utils/v2p.rs b/src/profiler/src/utils/v2p.rs new file mode 100644 index 0000000..6bb6698 --- /dev/null +++ b/src/profiler/src/utils/v2p.rs @@ -0,0 +1,33 @@ +use anyhow::bail; +use anyhow::Result; +use pagemap::MemoryRegion; +use procfs::page_size; + +// user virtual address to page frame number(pfn) +pub fn v2p(pid: u32, virt: u64) -> Result { + let mut maps = pagemap::PageMap::new(pid as u64)?; + let page = page_size(); + + let start = virt & !(page - 1); + let end = start + page; + + let entries = maps.pagemap_region(&MemoryRegion::from((start, end)))?; + + if entries.len() != 1 { + bail!("Number of entries is not 1") + } + + let pfn = entries[0].pfn()?; + Ok(pfn) +} + +// user virtual address to kernel virtual address +// page_kv = (pfn << 12) + page_offset_base +// kv = page_kv + offset in page +pub fn v2kv(pid: u32, virt: u64, page_offset_base: u64) -> Result { + let pfn = v2p(pid, virt)?; + let page_kv = (pfn << 12) + page_offset_base; + let page = page_size(); + let off = virt & (page - 1); + Ok(page_kv + off) +} -- Gitee